Merge pull request #157 from SDSRV-IDP/dev/semi_correct

Dev/semi correct
This commit is contained in:
Le Van Tuan 2024-07-18 10:40:05 +07:00 committed by GitHub Enterprise
commit c015d2e70e
15 changed files with 398 additions and 14 deletions

View File

@ -3,8 +3,10 @@ FROM python:3.9-slim
WORKDIR /app WORKDIR /app
COPY run.py . COPY run.py .
COPY requirements.txt .
RUN apt-get update && apt-get -y install curl RUN apt-get update && apt-get -y install curl
RUN pip install -r requirements.txt
RUN pip install requests RUN pip install requests
CMD [ "python", "-u", "run.py" ] CMD [ "python", "-u", "run.py" ]

View File

@ -0,0 +1 @@
pytz==2024.1

View File

@ -1,11 +1,15 @@
import os import os
import time import time
import requests import requests
from datetime import datetime from datetime import datetime, timezone, timedelta
import pytz
# Get the proxy URL from the environment variable # Get the proxy URL from the environment variable
interval = 60*60*3 # 1 minute interval = 60*60*3 # 1 minute
update_cost = int(60*2) update_cost = int(60*2)
scan_cost = int(10)
last_scan = None
scan_interval = 24*60*60
proxy_url = os.getenv('PROXY', "localhost") proxy_url = os.getenv('PROXY', "localhost")
user = os.getenv('ADMIN_USER_NAME', "") user = os.getenv('ADMIN_USER_NAME', "")
password = os.getenv('ADMIN_PASSWORD', "") password = os.getenv('ADMIN_PASSWORD', "")
@ -28,7 +32,33 @@ update_data = {
'subsidiary': None 'subsidiary': None
} }
"report_overview_duration" # Define the scan API
scan_list_url = f'{proxy_url}/api/automation/'
scan_create_url = f'{proxy_url}/api/automation/(id)/scan/'
def semi_scan(login_token):
global last_scan
headers = {'Authorization': login_token}
sg_tz = sg_tz = pytz.timezone("Asia/Singapore")
# check if last scan is [scan_interval] ago
if not last_scan:
last_scan = time.time() - scan_interval
if time.time() - last_scan < scan_interval:
return
# get all rules:
list_rules_response = requests.get(scan_list_url, headers=headers)
print(f"[INFO]: Total {len(list_rules_response.json())} rules returned from server")
# process rule one by one
for rule in list_rules_response.json():
data = {
"start_date": datetime.now(sg_tz).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "+08:00",
"end_date": (datetime.now(sg_tz) - timedelta(seconds=time.time()-last_scan)).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "+08:00"
}
response = requests.post(scan_create_url.replace("(id)", str(rule["id"])), json=data, headers=headers)
print("[INFO]: scanning rule {} with data: {} status code: {}".format(rule["id"], data, response.status_code))
time.sleep(scan_cost)
last_scan = time.time()
# def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]): # def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]):
def update_report(login_token, report_overview_duration=["7d", "30d"], subsidiary=["SEAO", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]): def update_report(login_token, report_overview_duration=["7d", "30d"], subsidiary=["SEAO", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]):
@ -56,6 +86,7 @@ while True:
# Call the update API # Call the update API
try: try:
semi_scan(login_token)
update_report(login_token) update_report(login_token)
except Exception as e: except Exception as e:
print(f"[ERROR]: {e}") print(f"[ERROR]: {e}")

@ -1 +1 @@
Subproject commit be37541e48bcf2045be3e375319fdb69aa8bcef0 Subproject commit 03bfaeb4441178fe933f65b7a05c35b04779ff07

View File

@ -21,3 +21,58 @@ Sample at `env_sample/example_local_env`
`python manage.py runserver 0.0.0.0:8000` `python manage.py runserver 0.0.0.0:8000`
### 2.2.3 Run Worker ### 2.2.3 Run Worker
`celery -A fwd_api.proj.worker worker -l INFO --without-gossip --without-mingle --without-heartbeat -Ofair --pool=solo` `celery -A fwd_api.proj.worker worker -l INFO --without-gossip --without-mingle --without-heartbeat -Ofair --pool=solo`
## Feature
### Semi correction
**API URL**: /api/automation/
#### **Exclaimer**: This process will overwrite all request files (images) reason and counter measure with the provided rule
#### How to use
Example:
subsidiary: SESP \
Invoice_Purchase Date_Accuracy: 100% \
Invoice_Sold_To_Party_OCR: != "" \
Invoice_Retailer_OCR: Samsung Brand Store \
Reason to fill: Wrong Feedback \
Counter Measure: Update revised resutl and re-calculate accuracy \
The config would be as following
```
{
"subsidiary": "SESP",
"predict_result": {
"retailername": "Samsung Brand Store",
"sold_to_party": "notEmpty"
},
"feedback_accuracy": {
"purchase_date": 1
},
"reason": "Wrong Feedback",
"counter_measures": "Update revised result and re-calculate accuracy"
},
```
Supported special commands: ["<", "Empty", "notEmpty", "starts_with"] \
To use the commands with values, you need to sperate it with the value by `||` for example: "<||1.0" means <100% and "starts_with||Shopee" means Shopee*
#### Modify a duration with a rule:
```
curl -X 'POST' \
'<server_url>/api/automation/<rule_id>/scan/' \
-H 'accept: application/json' \
-H 'Authorization: <token>' \
-H 'Content-Type: application/json' \
-d '{
"start_date": "2024-05-17T07:27:19.087Z",
"end_date": "2024-07-17T07:27:19.087Z"
}'
```
To get the list of URLS:
```
curl -X 'GET' \
'<server_url>/api/automation/' \
-H 'accept: application/json' \
-H 'Authorization: <token>'
```
More infomation, please refer to the swagger page

View File

@ -194,12 +194,6 @@ SPECTACULAR_SETTINGS = {
# Custom Spectacular Settings # Custom Spectacular Settings
"EXCLUDE_PATH": [reverse_lazy("schema")], "EXCLUDE_PATH": [reverse_lazy("schema")],
"EXCLUDE_RELATIVE_PATH": ["/rsa", '/gen-token', '/app/'], "EXCLUDE_RELATIVE_PATH": ["/rsa", '/gen-token', '/app/'],
"TAGS": [
"Login",
"OCR",
"Data",
"System",
],
"TAGS_SORTER": "alpha" "TAGS_SORTER": "alpha"
} }
@ -304,4 +298,13 @@ LOGGING = {
'level': 'INFO', 'level': 'INFO',
} }
}, },
} }
REASON_SOLUTION_MAP = {"Invalid image": "Remove this image from the evaluation report",
"Missing information": "Remove this image from the evaluation report",
"Too blurry text": "Remove this image from the evaluation report",
"Too small text": "Remove this image from the evaluation report",
"Handwritten": "Remove this image from the evaluation report",
"Wrong feedback": "Update revised resutl and re-calculate accuracy",
"Ocr cannot extract": "Improve OCR",
}

View File

@ -0,0 +1,75 @@
from random import choice
from rest_framework import viewsets
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from drf_spectacular.types import OpenApiTypes
from rest_framework.decorators import action
from django.core.paginator import Paginator
from drf_spectacular.utils import extend_schema, OpenApiParameter
from django.conf import settings
from rest_framework import status
from django.db.models import Q
import logging
from fwd_api.utils.subsidiary import map_subsidiary_long_to_short
from fwd_api.utils.auto_correct_language import condition_to_ORM_command
from ..models.SemiAutoCorrection import SemiAutoCorrection
from ..models.SubscriptionRequestFile import SubscriptionRequestFile
from ..serializers.SemiAutoCorrection import SemiAutoCorrectionSerializer, SemiAutoCorrectionScanSerializer
class SemiAutoCorrectionViewSet(viewsets.ModelViewSet):
queryset = SemiAutoCorrection.objects.all()
serializer_class = SemiAutoCorrectionSerializer
permission_classes = []
def get_serializer_class(self):
if self.action in ['scan']:
return SemiAutoCorrectionScanSerializer
# Return the default serializer class for other actions
return super().get_serializer_class()
def perform_create(self, serializer):
serializer.save()
@action(detail=True, url_path="scan", methods=["POST"])
def scan(self, request, pk=None):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
validated_data = serializer.validated_data
semi_auto_correction_rule = self.get_object()
# TODO: Make this a background task
base_query = Q(created_at__range=(validated_data["start_date"], validated_data["end_date"]))
if semi_auto_correction_rule.subsidiary:
short_sub = map_subsidiary_long_to_short(
semi_auto_correction_rule.subsidiary)
base_query = Q(request__subsidiary__startswith=short_sub)
ORM_commands = {"include": base_query,
"exclude": None}
for [item, i_name] in [[semi_auto_correction_rule.feedback_result, "feedback_result"],
[semi_auto_correction_rule.reviewed_result, "reviewed_result"],
[semi_auto_correction_rule.predict_result, "predict_result"],
[semi_auto_correction_rule.feedback_accuracy, "feedback_accuracy"],
[semi_auto_correction_rule.reviewed_accuracy, "reviewed_accuracy"]]:
for k, v in item.items():
if v is not None:
ORM_commands = condition_to_ORM_command(
v, k, i_name, ORM_commands)
if ORM_commands["exclude"]:
images_to_scan = SubscriptionRequestFile.objects.filter(
ORM_commands["include"]
).exclude(ORM_commands["exclude"])
else:
images_to_scan = SubscriptionRequestFile.objects.filter(
ORM_commands["include"]
)
requestfile_ids = []
for image in images_to_scan:
image.reason = semi_auto_correction_rule.reason
image.counter_measures = semi_auto_correction_rule.counter_measures
image.save()
requestfile_ids.append(image.id)
return Response(data={"requestfile_ids": requestfile_ids, "count": len(requestfile_ids)}, status=status.HTTP_201_CREATED)

View File

@ -3,16 +3,16 @@ from rest_framework.routers import DefaultRouter, SimpleRouter
from fwd_api.api.ctel_view import CtelViewSet from fwd_api.api.ctel_view import CtelViewSet
from fwd_api.api.accuracy_view import AccuracyViewSet from fwd_api.api.accuracy_view import AccuracyViewSet
from fwd_api.api.ctel_user_view import CtelUserViewSet from fwd_api.api.ctel_user_view import CtelUserViewSet
from fwd_api.api.ctel_template_view import CtelTemplateViewSet from fwd_api.api.ctel_template_view import CtelTemplateViewSet
from fwd_api.api.semi_auto_correction import SemiAutoCorrectionViewSet
if settings.DEBUG: if settings.DEBUG:
router = DefaultRouter() router = DefaultRouter()
else: else:
router = SimpleRouter() router = SimpleRouter()
router.register("automation", SemiAutoCorrectionViewSet, basename="SemiAutoAPI")
router.register("ctel", CtelViewSet, basename="CtelAPI") router.register("ctel", CtelViewSet, basename="CtelAPI")
router.register("ctel", CtelUserViewSet, basename="CtelUserAPI") router.register("ctel", CtelUserViewSet, basename="CtelUserAPI")
router.register("ctel", AccuracyViewSet, basename="AccuracyAPI") router.register("ctel", AccuracyViewSet, basename="AccuracyAPI")

View File

@ -0,0 +1,26 @@
# Generated by Django 4.1.3 on 2024-07-15 07:02
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0191_subscriptionrequest_is_required_and_more'),
]
operations = [
migrations.CreateModel(
name='SemiAutoCorrection',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('feedback_result', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
('reviewed_result', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
('predict_result', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
('feedback_accuracy', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
('reviewed_accuracy', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
('reason', models.TextField(blank=True)),
('counter_measures', models.TextField(blank=True)),
],
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-07-15 09:14
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0192_semiautocorrection'),
]
operations = [
migrations.AddField(
model_name='semiautocorrection',
name='subsidiary',
field=models.CharField(max_length=200, null=True),
),
]

View File

@ -0,0 +1,39 @@
# Generated by Django 4.1.3 on 2024-07-15 09:16
from django.db import migrations, models
import fwd_api.models.SemiAutoCorrection
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0193_semiautocorrection_subsidiary'),
]
operations = [
migrations.AlterField(
model_name='semiautocorrection',
name='feedback_accuracy',
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
),
migrations.AlterField(
model_name='semiautocorrection',
name='feedback_result',
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
),
migrations.AlterField(
model_name='semiautocorrection',
name='predict_result',
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
),
migrations.AlterField(
model_name='semiautocorrection',
name='reviewed_accuracy',
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
),
migrations.AlterField(
model_name='semiautocorrection',
name='reviewed_result',
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
),
]

View File

@ -0,0 +1,19 @@
from django.db import models
from django.utils import timezone
def default_json_fields():
return {"invoice_no": None, "imei_number": None, "retailername": None, "purchase_date": None, "sold_to_party": None}
class SemiAutoCorrection(models.Model):
# INPUT
id = models.AutoField(primary_key=True)
subsidiary = models.CharField(null=True, max_length=200)
feedback_result = models.JSONField(null=True, default=default_json_fields)
reviewed_result = models.JSONField(null=True, default=default_json_fields)
predict_result = models.JSONField(null=True, default=default_json_fields)
reviewed_accuracy = models.JSONField(null=True, default=default_json_fields)
feedback_accuracy = models.JSONField(null=True, default=default_json_fields)
reviewed_accuracy = models.JSONField(null=True, default=default_json_fields)
# OUTPUT
reason = models.TextField(blank=True)
counter_measures = models.TextField(blank=True)

View File

@ -0,0 +1,47 @@
from rest_framework import serializers
from ..models.SemiAutoCorrection import SemiAutoCorrection
def default_json_fields():
return {"invoice_no": None, "imei_number": None, "retailername": None, "purchase_date": None, "sold_to_party": None}
class SemiAutoCorrectionSerializer(serializers.ModelSerializer):
class Meta:
model = SemiAutoCorrection
fields = '__all__'
def to_internal_value(self, data):
"""
Customize the deserialization process for the JSONField fields.
"""
internal_value = super().to_internal_value(data)
# Update the JSONField fields
internal_value['feedback_result'] = self.update_json_field(data.get('feedback_result'))
internal_value['reviewed_result'] = self.update_json_field(data.get('reviewed_result'))
internal_value['predict_result'] = self.update_json_field(data.get('predict_result'))
internal_value['reviewed_accuracy'] = self.update_json_field(data.get('reviewed_accuracy'))
internal_value['feedback_accuracy'] = self.update_json_field(data.get('feedback_accuracy'))
internal_value['reviewed_accuracy'] = self.update_json_field(data.get('reviewed_accuracy'))
return internal_value
def update_json_field(self, value):
"""
Helper method to update the JSONField value.
"""
if value is None or value == "":
return default_json_fields()
else:
_value = default_json_fields()
_value.update(value)
return _value
class SemiAutoCorrectionScanSerializer(serializers.ModelSerializer):
start_date = serializers.DateTimeField()
end_date = serializers.DateTimeField()
class Meta:
model = SemiAutoCorrection
fields = ["id", "start_date", "end_date"]

View File

@ -0,0 +1,68 @@
from django.db.models import Q
NO_DEFAULT_VALUE = "*&%"
SEPARATE_KEYWORD = "||"
KEYWORD_TO_ORM = {
"<": {"orm": [["__0__lt", NO_DEFAULT_VALUE, "include"], ["__exact", [], "exclude"]],
"operation": "AND"}, # [[<command>, <value>, <is_excluded>],...]
"notEmpty": {"orm": [["__exact", None, "exclude"], ["__exact", "", "exclude"]],
"operation": "OR"},
"Empty": {"orm": [["__exact", None, "include"], ["__exact", "", "include"]],
"operation": "OR"}, # operation bw the 2 orm cmd for this only
"starts_with": {"orm": [["__istartswith", NO_DEFAULT_VALUE, "include"]],
"operation": "AND"},
}
def condition_to_ORM_command(condition, keyword, parent=None, ORM_commands={"include": None,
"exclude": None}):
# For *result and *accuracy only
ORM_command = ""
if parent:
ORM_command += f"{parent}__{keyword}"
else:
ORM_command += f"{keyword}"
# map the command by condition
# Example:
# <1.0
# notEmpty
# Empty
# starts_with||Shopee
special_case = False
for k, v in KEYWORD_TO_ORM.items():
if k in str(condition):
special_case = True
this_query = {"include": None,
"exclude": None}
for cmd in v["orm"]:
full_cmd = ORM_command + cmd[0]
if cmd[1] != NO_DEFAULT_VALUE:
cmd_value = cmd[1]
else:
try:
cmd_value = float(
str(condition).split(SEPARATE_KEYWORD)[-1])
except ValueError:
cmd_value = str(condition).split(SEPARATE_KEYWORD)[-1]
if not this_query[cmd[2]]:
this_query[cmd[2]] = Q(**{full_cmd: cmd_value})
else:
if v["operation"] == "AND":
this_query[cmd[2]] &= Q(**{full_cmd: cmd_value})
elif v["operation"] == "OR":
this_query[cmd[2]] |= Q(**{full_cmd: cmd_value})
for stat in this_query.keys():
if this_query[stat]:
if not ORM_commands[stat]:
ORM_commands[stat] = this_query[stat]
else:
ORM_commands[stat] &= this_query[stat]
break
if not special_case:
if "accuracy" in parent:
condition = [condition]
if not ORM_commands["include"]:
ORM_commands["include"] = Q(**{ORM_command: condition})
else:
ORM_commands["include"] &= Q(**{ORM_command: condition})
return ORM_commands

@ -1 +1 @@
Subproject commit be37541e48bcf2045be3e375319fdb69aa8bcef0 Subproject commit 03bfaeb4441178fe933f65b7a05c35b04779ff07