Fix: partial #62

This commit is contained in:
dx-tan 2024-03-15 08:46:06 +07:00
parent cfdced2618
commit 270008f915
9 changed files with 152 additions and 14 deletions

1
.gitignore vendored
View File

@ -42,3 +42,4 @@ Jan.csv
cope2n-api/reviewed/date.xlsx cope2n-api/reviewed/date.xlsx
cope2n-api/reviewed/retailer.xlsx cope2n-api/reviewed/retailer.xlsx
/scripts/* /scripts/*
scripts/crawl_database.py

View File

@ -4,7 +4,7 @@ import requests
from datetime import datetime from datetime import datetime
# Get the proxy URL from the environment variable # Get the proxy URL from the environment variable
interval = 60*60*1 # 1 minute interval = 60*60*3 # 1 minute
update_cost = int(60*2) update_cost = int(60*2)
proxy_url = os.getenv('PROXY', "localhost") proxy_url = os.getenv('PROXY', "localhost")
user = os.getenv('ADMIN_USER_NAME', "") user = os.getenv('ADMIN_USER_NAME', "")

@ -1 +1 @@
Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a Subproject commit bae5f732e12c5d5307b3ce14a98fa198ea49e372

View File

@ -17,6 +17,7 @@ RUN pip install uvicorn gunicorn Celery
RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
RUN pip install -U openmim==0.3.7 --no-cache-dir RUN pip install -U openmim==0.3.7 --no-cache-dir
RUN mim install mmcv-full==1.7.2 RUN mim install mmcv-full==1.7.2
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
# End intergration with sdskvu # End intergration with sdskvu
USER ${UID} USER ${UID}

View File

@ -16,7 +16,7 @@ import json
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url, validate_review
from ..utils.redis import RedisUtils from ..utils.redis import RedisUtils
from ..utils.process import string_to_boolean from ..utils.process import string_to_boolean
from ..utils.cache import get_cache, set_cache from ..utils.cache import get_cache, set_cache
@ -695,8 +695,10 @@ class AccuracyViewSet(viewsets.ViewSet):
if not reviewed_result: if not reviewed_result:
reviewed_result = copy.deepcopy(sample_result) reviewed_result = copy.deepcopy(sample_result)
reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
if not feedback_result: if not feedback_result:
feedback_result = copy.deepcopy(sample_result) feedback_result = copy.deepcopy(sample_result)
feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
if not predicted_result: if not predicted_result:
predicted_result = copy.deepcopy(sample_result) predicted_result = copy.deepcopy(sample_result)
@ -726,8 +728,10 @@ class AccuracyViewSet(viewsets.ViewSet):
if not reviewed_result: if not reviewed_result:
reviewed_result = copy.deepcopy(sample_result) reviewed_result = copy.deepcopy(sample_result)
reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
if not feedback_result: if not feedback_result:
feedback_result = copy.deepcopy(sample_result) feedback_result = copy.deepcopy(sample_result)
feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
if not predicted_result: if not predicted_result:
predicted_result = copy.deepcopy(sample_result) predicted_result = copy.deepcopy(sample_result)
@ -760,25 +764,22 @@ class AccuracyViewSet(viewsets.ViewSet):
elif request.method == 'POST': elif request.method == 'POST':
data = request.data data = request.data
base_query = Q(request_id=request_id) base_query = Q(request_id=request_id)
subscription_request = SubscriptionRequest.objects.filter(base_query) subscription_request = SubscriptionRequest.objects.filter(base_query)
if subscription_request.count() == 0: if subscription_request.count() == 0:
raise NotFoundException(excArgs=request_id) raise NotFoundException(excArgs=request_id)
subscription_request = subscription_request.first() subscription_request = subscription_request.first()
subscription_request_files = SubscriptionRequestFile.objects.filter(request=subscription_request.id) subscription_request_files = SubscriptionRequestFile.objects.filter(request=subscription_request.id)
if "reviewed_result" not in data: if "reviewed_result" not in data:
raise InvalidException(excArgs=f'reviewed_result') raise InvalidException(excArgs=f'reviewed_result')
reviewed_result = data["reviewed_result"] reviewed_result = data["reviewed_result"]
for field in ['retailername', 'sold_to_party', 'invoice_no', 'purchase_date', 'imei_number']: if not subscription_request.predict_result:
if not field in reviewed_result.keys(): raise InvalidException(excArgs=f'request_id')
raise RequiredFieldException(excArgs=f'reviewed_result.{field}') validate_review(reviewed_result, len(subscription_request.predict_result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])))
reviewed_result['request_id'] = request_id reviewed_result['request_id'] = request_id
for subscription_request_file in subscription_request_files: for subscription_request_file in subscription_request_files:

View File

@ -0,0 +1,79 @@
# myapp/management/commands/mycustomcommand.py
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.exception.exceptions import InvalidException
from fwd_api.utils.accuracy import predict_result_to_ready
import traceback
import copy
from django.utils import timezone
TEST_IMEI_VALUES = ["357822611219904", "RFAW2022FED", "5AWH14MT400396N", "0HU33NIW200044K", "0GJG4DBW200318X", "358975990917032", "350731691693549", "R52W70BHDWX", "R52W70BHDWX", "350073345090297", "0PBL3NHW500023N"]
TEST_RETAILER_VALUES = ["Best Denki", "Arrow Communication"]
KEY = "invoice_no"
class Command(BaseCommand):
help = 'Refactor database for image level'
def add_arguments(self, parser):
# Add your command-line arguments here
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800')
def process_request(self, request, test_requests, supicious_requests):
if len(request.request_id.split(".")[0].split("_")) < 2:
return
if not request.predict_result:
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
return
if request.predict_result.get("status", 200) != 200:
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
return
imeis = []
retailername = None
for content in request.predict_result["content"]["document"][0]["content"]:
if content["label"] == "retailername":
retailername = content["value"]
elif content["label"] == "imei_number":
imeis = content["value"]
if retailername in TEST_RETAILER_VALUES and any(imei in TEST_IMEI_VALUES for imei in imeis):
request.is_test_request= True
request.save()
test_requests.append(request.request_id)
elif any(imei in TEST_IMEI_VALUES for imei in imeis):
request.is_test_request= True
request.save()
test_requests.append(request.request_id)
elif retailername in TEST_RETAILER_VALUES:
supicious_requests.append(request.request_id)
def handle(self, *args, **options):
start = options['start']
end = options['end']
test_requests = []
supicious_requests = []
if start or end:
try:
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
except Exception as e:
print(f"[INFO]: start: {start}")
print(f"[INFO]: end: {end}")
raise InvalidException(excArgs="Date format")
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
else:
subcription_iter = SubscriptionRequest.objects.all()
for request in tqdm(subcription_iter.iterator()):
self.process_request(request, test_requests, supicious_requests)
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))
self.stdout.write(self.style.SUCCESS(f'Test request: \n {test_requests} \n ============================='))
self.stdout.write(self.style.SUCCESS(f'Supicious request: \n {supicious_requests} \n ============================='))

View File

@ -0,0 +1,49 @@
# myapp/management/commands/mycustomcommand.py
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.exception.exceptions import InvalidException
from fwd_api.utils.accuracy import predict_result_to_ready
import traceback
import copy
from django.utils import timezone
class Command(BaseCommand):
help = 'Refactor database for image level'
def add_arguments(self, parser):
# Add your command-line arguments here
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800')
def process_request(self, request):
# if not request.predict_result:
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
# return
# if request.predict_result.get("status", 200) != 200:
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
# return
if not request.feedback_result or not request.redemption_id:
request.is_test_request= True
request.save()
def handle(self, *args, **options):
start = options['start']
end = options['end']
if start or end:
try:
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
except Exception as e:
print(f"[INFO]: start: {start}")
print(f"[INFO]: end: {end}")
raise InvalidException(excArgs="Date format")
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
else:
subcription_iter = SubscriptionRequest.objects.all()
for request in tqdm(subcription_iter.iterator()):
self.process_request(request)
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -536,7 +536,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
"Request ID": report_file.correspond_request_id, "Request ID": report_file.correspond_request_id,
"Redemption Number": report_file.correspond_redemption_id, "Redemption Number": report_file.correspond_redemption_id,
"Image type": report_file.doc_type, "Image type": report_file.doc_type,
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])), "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])) if report_file.feedback_result else None,
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
"IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])) if report_file.reviewed_result else None, "IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])) if report_file.reviewed_result else None,
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
@ -623,9 +623,9 @@ def predict_result_to_ready(result):
return dict_result return dict_result
dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None) dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None)
dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None) dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None)
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[2].get("value", None) dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", [])
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[3].get("value", []) dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[4].get("value", []) dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None)
return dict_result return dict_result
def align_fine_result(ready_predict, fine_result): def align_fine_result(ready_predict, fine_result):

View File

@ -80,6 +80,13 @@ def validate_feedback_file(csv_file_path):
if missing_columns: if missing_columns:
raise RequiredColumnException(excArgs=str(missing_columns)) raise RequiredColumnException(excArgs=str(missing_columns))
def validate_review(review, num_imei):
for field in settings.FIELD:
if not field in review.keys():
raise RequiredFieldException(excArgs=f'reviewed_result.{field}')
if not isinstance(review["imei_number"], list) or len(review["imei_number"]) != num_imei:
raise InvalidException(excArgs=f'imei_number')
def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"): def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"):
total_file_size = 0 total_file_size = 0
if len(files) < min_file_num: if len(files) < min_file_num:
@ -140,7 +147,7 @@ def get_folder_path(rq: SubscriptionRequest):
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
request_id = str(rq.request_id) request_id = str(rq.request_id)
logger.info(f"[DEBUG]: rq.process_type: {rq.process_type}") logger.debug(f"rq.process_type: {rq.process_type}")
p_type = ProcessUtil.map_process_type_to_folder_name(int(rq.process_type)) p_type = ProcessUtil.map_process_type_to_folder_name(int(rq.process_type))
sub_id = str(rq.subscription.id) sub_id = str(rq.subscription.id)
user_id = str(rq.subscription.user.id) user_id = str(rq.subscription.user.id)