Merge pull request #157 from SDSRV-IDP/dev/semi_correct
Dev/semi correct
This commit is contained in:
commit
c015d2e70e
@ -3,8 +3,10 @@ FROM python:3.9-slim
|
||||
WORKDIR /app
|
||||
|
||||
COPY run.py .
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN apt-get update && apt-get -y install curl
|
||||
RUN pip install -r requirements.txt
|
||||
RUN pip install requests
|
||||
|
||||
CMD [ "python", "-u", "run.py" ]
|
1
api-cronjob/requirements.txt
Normal file
1
api-cronjob/requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
pytz==2024.1
|
@ -1,11 +1,15 @@
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import pytz
|
||||
# Get the proxy URL from the environment variable
|
||||
interval = 60*60*3 # 1 minute
|
||||
update_cost = int(60*2)
|
||||
scan_cost = int(10)
|
||||
last_scan = None
|
||||
scan_interval = 24*60*60
|
||||
|
||||
proxy_url = os.getenv('PROXY', "localhost")
|
||||
user = os.getenv('ADMIN_USER_NAME', "")
|
||||
password = os.getenv('ADMIN_PASSWORD', "")
|
||||
@ -28,7 +32,33 @@ update_data = {
|
||||
'subsidiary': None
|
||||
}
|
||||
|
||||
"report_overview_duration"
|
||||
# Define the scan API
|
||||
scan_list_url = f'{proxy_url}/api/automation/'
|
||||
scan_create_url = f'{proxy_url}/api/automation/(id)/scan/'
|
||||
|
||||
def semi_scan(login_token):
|
||||
global last_scan
|
||||
headers = {'Authorization': login_token}
|
||||
sg_tz = sg_tz = pytz.timezone("Asia/Singapore")
|
||||
# check if last scan is [scan_interval] ago
|
||||
if not last_scan:
|
||||
last_scan = time.time() - scan_interval
|
||||
if time.time() - last_scan < scan_interval:
|
||||
return
|
||||
# get all rules:
|
||||
list_rules_response = requests.get(scan_list_url, headers=headers)
|
||||
print(f"[INFO]: Total {len(list_rules_response.json())} rules returned from server")
|
||||
# process rule one by one
|
||||
for rule in list_rules_response.json():
|
||||
data = {
|
||||
"start_date": datetime.now(sg_tz).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "+08:00",
|
||||
"end_date": (datetime.now(sg_tz) - timedelta(seconds=time.time()-last_scan)).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "+08:00"
|
||||
}
|
||||
response = requests.post(scan_create_url.replace("(id)", str(rule["id"])), json=data, headers=headers)
|
||||
print("[INFO]: scanning rule {} with data: {} status code: {}".format(rule["id"], data, response.status_code))
|
||||
time.sleep(scan_cost)
|
||||
last_scan = time.time()
|
||||
|
||||
|
||||
# def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]):
|
||||
def update_report(login_token, report_overview_duration=["7d", "30d"], subsidiary=["SEAO", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]):
|
||||
@ -56,6 +86,7 @@ while True:
|
||||
|
||||
# Call the update API
|
||||
try:
|
||||
semi_scan(login_token)
|
||||
update_report(login_token)
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: {e}")
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit be37541e48bcf2045be3e375319fdb69aa8bcef0
|
||||
Subproject commit 03bfaeb4441178fe933f65b7a05c35b04779ff07
|
@ -21,3 +21,58 @@ Sample at `env_sample/example_local_env`
|
||||
`python manage.py runserver 0.0.0.0:8000`
|
||||
### 2.2.3 Run Worker
|
||||
`celery -A fwd_api.proj.worker worker -l INFO --without-gossip --without-mingle --without-heartbeat -Ofair --pool=solo`
|
||||
|
||||
|
||||
|
||||
## Feature
|
||||
### Semi correction
|
||||
**API URL**: /api/automation/
|
||||
#### **Exclaimer**: This process will overwrite all request files (images) reason and counter measure with the provided rule
|
||||
#### How to use
|
||||
Example:
|
||||
subsidiary: SESP \
|
||||
Invoice_Purchase Date_Accuracy: 100% \
|
||||
Invoice_Sold_To_Party_OCR: != "" \
|
||||
Invoice_Retailer_OCR: Samsung Brand Store \
|
||||
Reason to fill: Wrong Feedback \
|
||||
Counter Measure: Update revised resutl and re-calculate accuracy \
|
||||
The config would be as following
|
||||
```
|
||||
{
|
||||
"subsidiary": "SESP",
|
||||
"predict_result": {
|
||||
"retailername": "Samsung Brand Store",
|
||||
"sold_to_party": "notEmpty"
|
||||
},
|
||||
"feedback_accuracy": {
|
||||
"purchase_date": 1
|
||||
},
|
||||
"reason": "Wrong Feedback",
|
||||
"counter_measures": "Update revised result and re-calculate accuracy"
|
||||
},
|
||||
```
|
||||
|
||||
Supported special commands: ["<", "Empty", "notEmpty", "starts_with"] \
|
||||
To use the commands with values, you need to sperate it with the value by `||` for example: "<||1.0" means <100% and "starts_with||Shopee" means Shopee*
|
||||
|
||||
#### Modify a duration with a rule:
|
||||
```
|
||||
curl -X 'POST' \
|
||||
'<server_url>/api/automation/<rule_id>/scan/' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Authorization: <token>' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"start_date": "2024-05-17T07:27:19.087Z",
|
||||
"end_date": "2024-07-17T07:27:19.087Z"
|
||||
}'
|
||||
```
|
||||
|
||||
To get the list of URLS:
|
||||
```
|
||||
curl -X 'GET' \
|
||||
'<server_url>/api/automation/' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Authorization: <token>'
|
||||
```
|
||||
More infomation, please refer to the swagger page
|
@ -194,12 +194,6 @@ SPECTACULAR_SETTINGS = {
|
||||
# Custom Spectacular Settings
|
||||
"EXCLUDE_PATH": [reverse_lazy("schema")],
|
||||
"EXCLUDE_RELATIVE_PATH": ["/rsa", '/gen-token', '/app/'],
|
||||
"TAGS": [
|
||||
"Login",
|
||||
"OCR",
|
||||
"Data",
|
||||
"System",
|
||||
],
|
||||
"TAGS_SORTER": "alpha"
|
||||
}
|
||||
|
||||
@ -305,3 +299,12 @@ LOGGING = {
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
REASON_SOLUTION_MAP = {"Invalid image": "Remove this image from the evaluation report",
|
||||
"Missing information": "Remove this image from the evaluation report",
|
||||
"Too blurry text": "Remove this image from the evaluation report",
|
||||
"Too small text": "Remove this image from the evaluation report",
|
||||
"Handwritten": "Remove this image from the evaluation report",
|
||||
"Wrong feedback": "Update revised resutl and re-calculate accuracy",
|
||||
"Ocr cannot extract": "Improve OCR",
|
||||
}
|
75
cope2n-api/fwd_api/api/semi_auto_correction.py
Normal file
75
cope2n-api/fwd_api/api/semi_auto_correction.py
Normal file
@ -0,0 +1,75 @@
|
||||
from random import choice
|
||||
from rest_framework import viewsets
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
from drf_spectacular.types import OpenApiTypes
|
||||
from rest_framework.decorators import action
|
||||
from django.core.paginator import Paginator
|
||||
from drf_spectacular.utils import extend_schema, OpenApiParameter
|
||||
from django.conf import settings
|
||||
from rest_framework import status
|
||||
from django.db.models import Q
|
||||
import logging
|
||||
|
||||
from fwd_api.utils.subsidiary import map_subsidiary_long_to_short
|
||||
from fwd_api.utils.auto_correct_language import condition_to_ORM_command
|
||||
from ..models.SemiAutoCorrection import SemiAutoCorrection
|
||||
from ..models.SubscriptionRequestFile import SubscriptionRequestFile
|
||||
from ..serializers.SemiAutoCorrection import SemiAutoCorrectionSerializer, SemiAutoCorrectionScanSerializer
|
||||
|
||||
|
||||
class SemiAutoCorrectionViewSet(viewsets.ModelViewSet):
|
||||
queryset = SemiAutoCorrection.objects.all()
|
||||
serializer_class = SemiAutoCorrectionSerializer
|
||||
permission_classes = []
|
||||
|
||||
def get_serializer_class(self):
|
||||
if self.action in ['scan']:
|
||||
return SemiAutoCorrectionScanSerializer
|
||||
# Return the default serializer class for other actions
|
||||
return super().get_serializer_class()
|
||||
|
||||
def perform_create(self, serializer):
|
||||
serializer.save()
|
||||
|
||||
@action(detail=True, url_path="scan", methods=["POST"])
|
||||
def scan(self, request, pk=None):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
validated_data = serializer.validated_data
|
||||
semi_auto_correction_rule = self.get_object()
|
||||
|
||||
# TODO: Make this a background task
|
||||
base_query = Q(created_at__range=(validated_data["start_date"], validated_data["end_date"]))
|
||||
if semi_auto_correction_rule.subsidiary:
|
||||
short_sub = map_subsidiary_long_to_short(
|
||||
semi_auto_correction_rule.subsidiary)
|
||||
base_query = Q(request__subsidiary__startswith=short_sub)
|
||||
ORM_commands = {"include": base_query,
|
||||
"exclude": None}
|
||||
for [item, i_name] in [[semi_auto_correction_rule.feedback_result, "feedback_result"],
|
||||
[semi_auto_correction_rule.reviewed_result, "reviewed_result"],
|
||||
[semi_auto_correction_rule.predict_result, "predict_result"],
|
||||
[semi_auto_correction_rule.feedback_accuracy, "feedback_accuracy"],
|
||||
[semi_auto_correction_rule.reviewed_accuracy, "reviewed_accuracy"]]:
|
||||
for k, v in item.items():
|
||||
if v is not None:
|
||||
ORM_commands = condition_to_ORM_command(
|
||||
v, k, i_name, ORM_commands)
|
||||
if ORM_commands["exclude"]:
|
||||
images_to_scan = SubscriptionRequestFile.objects.filter(
|
||||
ORM_commands["include"]
|
||||
).exclude(ORM_commands["exclude"])
|
||||
else:
|
||||
images_to_scan = SubscriptionRequestFile.objects.filter(
|
||||
ORM_commands["include"]
|
||||
)
|
||||
|
||||
requestfile_ids = []
|
||||
for image in images_to_scan:
|
||||
image.reason = semi_auto_correction_rule.reason
|
||||
image.counter_measures = semi_auto_correction_rule.counter_measures
|
||||
image.save()
|
||||
requestfile_ids.append(image.id)
|
||||
|
||||
return Response(data={"requestfile_ids": requestfile_ids, "count": len(requestfile_ids)}, status=status.HTTP_201_CREATED)
|
@ -3,16 +3,16 @@ from rest_framework.routers import DefaultRouter, SimpleRouter
|
||||
|
||||
from fwd_api.api.ctel_view import CtelViewSet
|
||||
from fwd_api.api.accuracy_view import AccuracyViewSet
|
||||
|
||||
from fwd_api.api.ctel_user_view import CtelUserViewSet
|
||||
|
||||
from fwd_api.api.ctel_template_view import CtelTemplateViewSet
|
||||
from fwd_api.api.semi_auto_correction import SemiAutoCorrectionViewSet
|
||||
|
||||
if settings.DEBUG:
|
||||
router = DefaultRouter()
|
||||
else:
|
||||
router = SimpleRouter()
|
||||
|
||||
router.register("automation", SemiAutoCorrectionViewSet, basename="SemiAutoAPI")
|
||||
router.register("ctel", CtelViewSet, basename="CtelAPI")
|
||||
router.register("ctel", CtelUserViewSet, basename="CtelUserAPI")
|
||||
router.register("ctel", AccuracyViewSet, basename="AccuracyAPI")
|
||||
|
26
cope2n-api/fwd_api/migrations/0192_semiautocorrection.py
Normal file
26
cope2n-api/fwd_api/migrations/0192_semiautocorrection.py
Normal file
@ -0,0 +1,26 @@
|
||||
# Generated by Django 4.1.3 on 2024-07-15 07:02
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('fwd_api', '0191_subscriptionrequest_is_required_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='SemiAutoCorrection',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('feedback_result', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
|
||||
('reviewed_result', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
|
||||
('predict_result', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
|
||||
('feedback_accuracy', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
|
||||
('reviewed_accuracy', models.JSONField(default={'imei_number': None, 'invoice_no': None, 'purchase_date': None, 'retailername': None, 'sold_to_party': None}, null=True)),
|
||||
('reason', models.TextField(blank=True)),
|
||||
('counter_measures', models.TextField(blank=True)),
|
||||
],
|
||||
),
|
||||
]
|
@ -0,0 +1,18 @@
|
||||
# Generated by Django 4.1.3 on 2024-07-15 09:14
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('fwd_api', '0192_semiautocorrection'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='semiautocorrection',
|
||||
name='subsidiary',
|
||||
field=models.CharField(max_length=200, null=True),
|
||||
),
|
||||
]
|
@ -0,0 +1,39 @@
|
||||
# Generated by Django 4.1.3 on 2024-07-15 09:16
|
||||
|
||||
from django.db import migrations, models
|
||||
import fwd_api.models.SemiAutoCorrection
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('fwd_api', '0193_semiautocorrection_subsidiary'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='semiautocorrection',
|
||||
name='feedback_accuracy',
|
||||
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='semiautocorrection',
|
||||
name='feedback_result',
|
||||
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='semiautocorrection',
|
||||
name='predict_result',
|
||||
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='semiautocorrection',
|
||||
name='reviewed_accuracy',
|
||||
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='semiautocorrection',
|
||||
name='reviewed_result',
|
||||
field=models.JSONField(default=fwd_api.models.SemiAutoCorrection.default_json_fields, null=True),
|
||||
),
|
||||
]
|
19
cope2n-api/fwd_api/models/SemiAutoCorrection.py
Normal file
19
cope2n-api/fwd_api/models/SemiAutoCorrection.py
Normal file
@ -0,0 +1,19 @@
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
|
||||
def default_json_fields():
|
||||
return {"invoice_no": None, "imei_number": None, "retailername": None, "purchase_date": None, "sold_to_party": None}
|
||||
|
||||
class SemiAutoCorrection(models.Model):
|
||||
# INPUT
|
||||
id = models.AutoField(primary_key=True)
|
||||
subsidiary = models.CharField(null=True, max_length=200)
|
||||
feedback_result = models.JSONField(null=True, default=default_json_fields)
|
||||
reviewed_result = models.JSONField(null=True, default=default_json_fields)
|
||||
predict_result = models.JSONField(null=True, default=default_json_fields)
|
||||
reviewed_accuracy = models.JSONField(null=True, default=default_json_fields)
|
||||
feedback_accuracy = models.JSONField(null=True, default=default_json_fields)
|
||||
reviewed_accuracy = models.JSONField(null=True, default=default_json_fields)
|
||||
# OUTPUT
|
||||
reason = models.TextField(blank=True)
|
||||
counter_measures = models.TextField(blank=True)
|
47
cope2n-api/fwd_api/serializers/SemiAutoCorrection.py
Normal file
47
cope2n-api/fwd_api/serializers/SemiAutoCorrection.py
Normal file
@ -0,0 +1,47 @@
|
||||
from rest_framework import serializers
|
||||
|
||||
from ..models.SemiAutoCorrection import SemiAutoCorrection
|
||||
|
||||
def default_json_fields():
|
||||
return {"invoice_no": None, "imei_number": None, "retailername": None, "purchase_date": None, "sold_to_party": None}
|
||||
|
||||
class SemiAutoCorrectionSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = SemiAutoCorrection
|
||||
fields = '__all__'
|
||||
|
||||
def to_internal_value(self, data):
|
||||
"""
|
||||
Customize the deserialization process for the JSONField fields.
|
||||
"""
|
||||
internal_value = super().to_internal_value(data)
|
||||
|
||||
# Update the JSONField fields
|
||||
internal_value['feedback_result'] = self.update_json_field(data.get('feedback_result'))
|
||||
internal_value['reviewed_result'] = self.update_json_field(data.get('reviewed_result'))
|
||||
internal_value['predict_result'] = self.update_json_field(data.get('predict_result'))
|
||||
internal_value['reviewed_accuracy'] = self.update_json_field(data.get('reviewed_accuracy'))
|
||||
internal_value['feedback_accuracy'] = self.update_json_field(data.get('feedback_accuracy'))
|
||||
internal_value['reviewed_accuracy'] = self.update_json_field(data.get('reviewed_accuracy'))
|
||||
|
||||
return internal_value
|
||||
|
||||
def update_json_field(self, value):
|
||||
"""
|
||||
Helper method to update the JSONField value.
|
||||
"""
|
||||
if value is None or value == "":
|
||||
return default_json_fields()
|
||||
else:
|
||||
_value = default_json_fields()
|
||||
_value.update(value)
|
||||
return _value
|
||||
|
||||
|
||||
class SemiAutoCorrectionScanSerializer(serializers.ModelSerializer):
|
||||
start_date = serializers.DateTimeField()
|
||||
end_date = serializers.DateTimeField()
|
||||
|
||||
class Meta:
|
||||
model = SemiAutoCorrection
|
||||
fields = ["id", "start_date", "end_date"]
|
68
cope2n-api/fwd_api/utils/auto_correct_language.py
Normal file
68
cope2n-api/fwd_api/utils/auto_correct_language.py
Normal file
@ -0,0 +1,68 @@
|
||||
from django.db.models import Q
|
||||
|
||||
NO_DEFAULT_VALUE = "*&%"
|
||||
SEPARATE_KEYWORD = "||"
|
||||
KEYWORD_TO_ORM = {
|
||||
"<": {"orm": [["__0__lt", NO_DEFAULT_VALUE, "include"], ["__exact", [], "exclude"]],
|
||||
"operation": "AND"}, # [[<command>, <value>, <is_excluded>],...]
|
||||
"notEmpty": {"orm": [["__exact", None, "exclude"], ["__exact", "", "exclude"]],
|
||||
"operation": "OR"},
|
||||
"Empty": {"orm": [["__exact", None, "include"], ["__exact", "", "include"]],
|
||||
"operation": "OR"}, # operation bw the 2 orm cmd for this only
|
||||
"starts_with": {"orm": [["__istartswith", NO_DEFAULT_VALUE, "include"]],
|
||||
"operation": "AND"},
|
||||
}
|
||||
|
||||
|
||||
def condition_to_ORM_command(condition, keyword, parent=None, ORM_commands={"include": None,
|
||||
"exclude": None}):
|
||||
# For *result and *accuracy only
|
||||
ORM_command = ""
|
||||
if parent:
|
||||
ORM_command += f"{parent}__{keyword}"
|
||||
else:
|
||||
ORM_command += f"{keyword}"
|
||||
# map the command by condition
|
||||
# Example:
|
||||
# <1.0
|
||||
# notEmpty
|
||||
# Empty
|
||||
# starts_with||Shopee
|
||||
special_case = False
|
||||
for k, v in KEYWORD_TO_ORM.items():
|
||||
if k in str(condition):
|
||||
special_case = True
|
||||
this_query = {"include": None,
|
||||
"exclude": None}
|
||||
for cmd in v["orm"]:
|
||||
full_cmd = ORM_command + cmd[0]
|
||||
if cmd[1] != NO_DEFAULT_VALUE:
|
||||
cmd_value = cmd[1]
|
||||
else:
|
||||
try:
|
||||
cmd_value = float(
|
||||
str(condition).split(SEPARATE_KEYWORD)[-1])
|
||||
except ValueError:
|
||||
cmd_value = str(condition).split(SEPARATE_KEYWORD)[-1]
|
||||
if not this_query[cmd[2]]:
|
||||
this_query[cmd[2]] = Q(**{full_cmd: cmd_value})
|
||||
else:
|
||||
if v["operation"] == "AND":
|
||||
this_query[cmd[2]] &= Q(**{full_cmd: cmd_value})
|
||||
elif v["operation"] == "OR":
|
||||
this_query[cmd[2]] |= Q(**{full_cmd: cmd_value})
|
||||
for stat in this_query.keys():
|
||||
if this_query[stat]:
|
||||
if not ORM_commands[stat]:
|
||||
ORM_commands[stat] = this_query[stat]
|
||||
else:
|
||||
ORM_commands[stat] &= this_query[stat]
|
||||
break
|
||||
if not special_case:
|
||||
if "accuracy" in parent:
|
||||
condition = [condition]
|
||||
if not ORM_commands["include"]:
|
||||
ORM_commands["include"] = Q(**{ORM_command: condition})
|
||||
else:
|
||||
ORM_commands["include"] &= Q(**{ORM_command: condition})
|
||||
return ORM_commands
|
@ -1 +1 @@
|
||||
Subproject commit be37541e48bcf2045be3e375319fdb69aa8bcef0
|
||||
Subproject commit 03bfaeb4441178fe933f65b7a05c35b04779ff07
|
Loading…
Reference in New Issue
Block a user