Fix: partial #62

This commit is contained in:
dx-tan 2024-03-15 08:46:06 +07:00
parent cfdced2618
commit 270008f915
9 changed files with 152 additions and 14 deletions

1
.gitignore vendored
View File

@ -42,3 +42,4 @@ Jan.csv
cope2n-api/reviewed/date.xlsx
cope2n-api/reviewed/retailer.xlsx
/scripts/*
scripts/crawl_database.py

View File

@ -4,7 +4,7 @@ import requests
from datetime import datetime
# Get the proxy URL from the environment variable
interval = 60*60*1 # 1 minute
interval = 60*60*3 # 1 minute
update_cost = int(60*2)
proxy_url = os.getenv('PROXY', "localhost")
user = os.getenv('ADMIN_USER_NAME', "")

@ -1 +1 @@
Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a
Subproject commit bae5f732e12c5d5307b3ce14a98fa198ea49e372

View File

@ -17,6 +17,7 @@ RUN pip install uvicorn gunicorn Celery
RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
RUN pip install -U openmim==0.3.7 --no-cache-dir
RUN mim install mmcv-full==1.7.2
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
# End intergration with sdskvu
USER ${UID}

View File

@ -16,7 +16,7 @@ import json
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url
from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url, validate_review
from ..utils.redis import RedisUtils
from ..utils.process import string_to_boolean
from ..utils.cache import get_cache, set_cache
@ -695,8 +695,10 @@ class AccuracyViewSet(viewsets.ViewSet):
if not reviewed_result:
reviewed_result = copy.deepcopy(sample_result)
reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
if not feedback_result:
feedback_result = copy.deepcopy(sample_result)
feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
if not predicted_result:
predicted_result = copy.deepcopy(sample_result)
@ -726,8 +728,10 @@ class AccuracyViewSet(viewsets.ViewSet):
if not reviewed_result:
reviewed_result = copy.deepcopy(sample_result)
reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
if not feedback_result:
feedback_result = copy.deepcopy(sample_result)
feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
if not predicted_result:
predicted_result = copy.deepcopy(sample_result)
@ -760,25 +764,22 @@ class AccuracyViewSet(viewsets.ViewSet):
elif request.method == 'POST':
data = request.data
base_query = Q(request_id=request_id)
subscription_request = SubscriptionRequest.objects.filter(base_query)
if subscription_request.count() == 0:
raise NotFoundException(excArgs=request_id)
subscription_request = subscription_request.first()
subscription_request_files = SubscriptionRequestFile.objects.filter(request=subscription_request.id)
if "reviewed_result" not in data:
raise InvalidException(excArgs=f'reviewed_result')
reviewed_result = data["reviewed_result"]
for field in ['retailername', 'sold_to_party', 'invoice_no', 'purchase_date', 'imei_number']:
if not field in reviewed_result.keys():
raise RequiredFieldException(excArgs=f'reviewed_result.{field}')
if not subscription_request.predict_result:
raise InvalidException(excArgs=f'request_id')
validate_review(reviewed_result, len(subscription_request.predict_result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])))
reviewed_result['request_id'] = request_id
for subscription_request_file in subscription_request_files:

View File

@ -0,0 +1,79 @@
# myapp/management/commands/mycustomcommand.py
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.exception.exceptions import InvalidException
from fwd_api.utils.accuracy import predict_result_to_ready
import traceback
import copy
from django.utils import timezone
TEST_IMEI_VALUES = ["357822611219904", "RFAW2022FED", "5AWH14MT400396N", "0HU33NIW200044K", "0GJG4DBW200318X", "358975990917032", "350731691693549", "R52W70BHDWX", "R52W70BHDWX", "350073345090297", "0PBL3NHW500023N"]
TEST_RETAILER_VALUES = ["Best Denki", "Arrow Communication"]
KEY = "invoice_no"
class Command(BaseCommand):
help = 'Refactor database for image level'
def add_arguments(self, parser):
# Add your command-line arguments here
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800')
def process_request(self, request, test_requests, supicious_requests):
if len(request.request_id.split(".")[0].split("_")) < 2:
return
if not request.predict_result:
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
return
if request.predict_result.get("status", 200) != 200:
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
return
imeis = []
retailername = None
for content in request.predict_result["content"]["document"][0]["content"]:
if content["label"] == "retailername":
retailername = content["value"]
elif content["label"] == "imei_number":
imeis = content["value"]
if retailername in TEST_RETAILER_VALUES and any(imei in TEST_IMEI_VALUES for imei in imeis):
request.is_test_request= True
request.save()
test_requests.append(request.request_id)
elif any(imei in TEST_IMEI_VALUES for imei in imeis):
request.is_test_request= True
request.save()
test_requests.append(request.request_id)
elif retailername in TEST_RETAILER_VALUES:
supicious_requests.append(request.request_id)
def handle(self, *args, **options):
start = options['start']
end = options['end']
test_requests = []
supicious_requests = []
if start or end:
try:
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
except Exception as e:
print(f"[INFO]: start: {start}")
print(f"[INFO]: end: {end}")
raise InvalidException(excArgs="Date format")
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
else:
subcription_iter = SubscriptionRequest.objects.all()
for request in tqdm(subcription_iter.iterator()):
self.process_request(request, test_requests, supicious_requests)
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))
self.stdout.write(self.style.SUCCESS(f'Test request: \n {test_requests} \n ============================='))
self.stdout.write(self.style.SUCCESS(f'Supicious request: \n {supicious_requests} \n ============================='))

View File

@ -0,0 +1,49 @@
# myapp/management/commands/mycustomcommand.py
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.exception.exceptions import InvalidException
from fwd_api.utils.accuracy import predict_result_to_ready
import traceback
import copy
from django.utils import timezone
class Command(BaseCommand):
help = 'Refactor database for image level'
def add_arguments(self, parser):
# Add your command-line arguments here
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800')
def process_request(self, request):
# if not request.predict_result:
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
# return
# if request.predict_result.get("status", 200) != 200:
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
# return
if not request.feedback_result or not request.redemption_id:
request.is_test_request= True
request.save()
def handle(self, *args, **options):
start = options['start']
end = options['end']
if start or end:
try:
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
except Exception as e:
print(f"[INFO]: start: {start}")
print(f"[INFO]: end: {end}")
raise InvalidException(excArgs="Date format")
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
else:
subcription_iter = SubscriptionRequest.objects.all()
for request in tqdm(subcription_iter.iterator()):
self.process_request(request)
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -536,7 +536,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
"Request ID": report_file.correspond_request_id,
"Redemption Number": report_file.correspond_redemption_id,
"Image type": report_file.doc_type,
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])),
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])) if report_file.feedback_result else None,
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
"IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])) if report_file.reviewed_result else None,
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
@ -623,9 +623,9 @@ def predict_result_to_ready(result):
return dict_result
dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None)
dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None)
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[2].get("value", None)
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[3].get("value", [])
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[4].get("value", [])
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", [])
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None)
return dict_result
def align_fine_result(ready_predict, fine_result):

View File

@ -80,6 +80,13 @@ def validate_feedback_file(csv_file_path):
if missing_columns:
raise RequiredColumnException(excArgs=str(missing_columns))
def validate_review(review, num_imei):
for field in settings.FIELD:
if not field in review.keys():
raise RequiredFieldException(excArgs=f'reviewed_result.{field}')
if not isinstance(review["imei_number"], list) or len(review["imei_number"]) != num_imei:
raise InvalidException(excArgs=f'imei_number')
def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"):
total_file_size = 0
if len(files) < min_file_num:
@ -140,7 +147,7 @@ def get_folder_path(rq: SubscriptionRequest):
logger = get_task_logger(__name__)
request_id = str(rq.request_id)
logger.info(f"[DEBUG]: rq.process_type: {rq.process_type}")
logger.debug(f"rq.process_type: {rq.process_type}")
p_type = ProcessUtil.map_process_type_to_folder_name(int(rq.process_type))
sub_id = str(rq.subscription.id)
user_id = str(rq.subscription.user.id)