Fix: partial #62
This commit is contained in:
parent
cfdced2618
commit
270008f915
1
.gitignore
vendored
1
.gitignore
vendored
@ -42,3 +42,4 @@ Jan.csv
|
|||||||
cope2n-api/reviewed/date.xlsx
|
cope2n-api/reviewed/date.xlsx
|
||||||
cope2n-api/reviewed/retailer.xlsx
|
cope2n-api/reviewed/retailer.xlsx
|
||||||
/scripts/*
|
/scripts/*
|
||||||
|
scripts/crawl_database.py
|
||||||
|
@ -4,7 +4,7 @@ import requests
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# Get the proxy URL from the environment variable
|
# Get the proxy URL from the environment variable
|
||||||
interval = 60*60*1 # 1 minute
|
interval = 60*60*3 # 1 minute
|
||||||
update_cost = int(60*2)
|
update_cost = int(60*2)
|
||||||
proxy_url = os.getenv('PROXY', "localhost")
|
proxy_url = os.getenv('PROXY', "localhost")
|
||||||
user = os.getenv('ADMIN_USER_NAME', "")
|
user = os.getenv('ADMIN_USER_NAME', "")
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a
|
Subproject commit bae5f732e12c5d5307b3ce14a98fa198ea49e372
|
@ -17,6 +17,7 @@ RUN pip install uvicorn gunicorn Celery
|
|||||||
RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
|
RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
|
||||||
RUN pip install -U openmim==0.3.7 --no-cache-dir
|
RUN pip install -U openmim==0.3.7 --no-cache-dir
|
||||||
RUN mim install mmcv-full==1.7.2
|
RUN mim install mmcv-full==1.7.2
|
||||||
|
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
|
||||||
# End intergration with sdskvu
|
# End intergration with sdskvu
|
||||||
|
|
||||||
USER ${UID}
|
USER ${UID}
|
||||||
|
@ -16,7 +16,7 @@ import json
|
|||||||
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
|
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
|
||||||
from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile
|
from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile
|
||||||
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
|
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
|
||||||
from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url
|
from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url, validate_review
|
||||||
from ..utils.redis import RedisUtils
|
from ..utils.redis import RedisUtils
|
||||||
from ..utils.process import string_to_boolean
|
from ..utils.process import string_to_boolean
|
||||||
from ..utils.cache import get_cache, set_cache
|
from ..utils.cache import get_cache, set_cache
|
||||||
@ -695,8 +695,10 @@ class AccuracyViewSet(viewsets.ViewSet):
|
|||||||
|
|
||||||
if not reviewed_result:
|
if not reviewed_result:
|
||||||
reviewed_result = copy.deepcopy(sample_result)
|
reviewed_result = copy.deepcopy(sample_result)
|
||||||
|
reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
|
||||||
if not feedback_result:
|
if not feedback_result:
|
||||||
feedback_result = copy.deepcopy(sample_result)
|
feedback_result = copy.deepcopy(sample_result)
|
||||||
|
feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
|
||||||
if not predicted_result:
|
if not predicted_result:
|
||||||
predicted_result = copy.deepcopy(sample_result)
|
predicted_result = copy.deepcopy(sample_result)
|
||||||
|
|
||||||
@ -726,8 +728,10 @@ class AccuracyViewSet(viewsets.ViewSet):
|
|||||||
|
|
||||||
if not reviewed_result:
|
if not reviewed_result:
|
||||||
reviewed_result = copy.deepcopy(sample_result)
|
reviewed_result = copy.deepcopy(sample_result)
|
||||||
|
reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
|
||||||
if not feedback_result:
|
if not feedback_result:
|
||||||
feedback_result = copy.deepcopy(sample_result)
|
feedback_result = copy.deepcopy(sample_result)
|
||||||
|
feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
|
||||||
if not predicted_result:
|
if not predicted_result:
|
||||||
predicted_result = copy.deepcopy(sample_result)
|
predicted_result = copy.deepcopy(sample_result)
|
||||||
|
|
||||||
@ -760,25 +764,22 @@ class AccuracyViewSet(viewsets.ViewSet):
|
|||||||
|
|
||||||
elif request.method == 'POST':
|
elif request.method == 'POST':
|
||||||
data = request.data
|
data = request.data
|
||||||
|
|
||||||
base_query = Q(request_id=request_id)
|
base_query = Q(request_id=request_id)
|
||||||
|
|
||||||
subscription_request = SubscriptionRequest.objects.filter(base_query)
|
subscription_request = SubscriptionRequest.objects.filter(base_query)
|
||||||
|
|
||||||
if subscription_request.count() == 0:
|
if subscription_request.count() == 0:
|
||||||
raise NotFoundException(excArgs=request_id)
|
raise NotFoundException(excArgs=request_id)
|
||||||
|
|
||||||
subscription_request = subscription_request.first()
|
subscription_request = subscription_request.first()
|
||||||
|
|
||||||
subscription_request_files = SubscriptionRequestFile.objects.filter(request=subscription_request.id)
|
subscription_request_files = SubscriptionRequestFile.objects.filter(request=subscription_request.id)
|
||||||
|
|
||||||
if "reviewed_result" not in data:
|
if "reviewed_result" not in data:
|
||||||
raise InvalidException(excArgs=f'reviewed_result')
|
raise InvalidException(excArgs=f'reviewed_result')
|
||||||
|
|
||||||
reviewed_result = data["reviewed_result"]
|
reviewed_result = data["reviewed_result"]
|
||||||
for field in ['retailername', 'sold_to_party', 'invoice_no', 'purchase_date', 'imei_number']:
|
if not subscription_request.predict_result:
|
||||||
if not field in reviewed_result.keys():
|
raise InvalidException(excArgs=f'request_id')
|
||||||
raise RequiredFieldException(excArgs=f'reviewed_result.{field}')
|
validate_review(reviewed_result, len(subscription_request.predict_result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])))
|
||||||
reviewed_result['request_id'] = request_id
|
reviewed_result['request_id'] = request_id
|
||||||
|
|
||||||
for subscription_request_file in subscription_request_files:
|
for subscription_request_file in subscription_request_files:
|
||||||
|
@ -0,0 +1,79 @@
|
|||||||
|
# myapp/management/commands/mycustomcommand.py
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from tqdm import tqdm
|
||||||
|
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
|
||||||
|
from fwd_api.exception.exceptions import InvalidException
|
||||||
|
from fwd_api.utils.accuracy import predict_result_to_ready
|
||||||
|
import traceback
|
||||||
|
import copy
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
TEST_IMEI_VALUES = ["357822611219904", "RFAW2022FED", "5AWH14MT400396N", "0HU33NIW200044K", "0GJG4DBW200318X", "358975990917032", "350731691693549", "R52W70BHDWX", "R52W70BHDWX", "350073345090297", "0PBL3NHW500023N"]
|
||||||
|
TEST_RETAILER_VALUES = ["Best Denki", "Arrow Communication"]
|
||||||
|
KEY = "invoice_no"
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = 'Refactor database for image level'
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
# Add your command-line arguments here
|
||||||
|
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
|
||||||
|
parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800')
|
||||||
|
|
||||||
|
def process_request(self, request, test_requests, supicious_requests):
|
||||||
|
if len(request.request_id.split(".")[0].split("_")) < 2:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not request.predict_result:
|
||||||
|
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||||
|
return
|
||||||
|
if request.predict_result.get("status", 200) != 200:
|
||||||
|
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||||
|
return
|
||||||
|
|
||||||
|
imeis = []
|
||||||
|
retailername = None
|
||||||
|
|
||||||
|
for content in request.predict_result["content"]["document"][0]["content"]:
|
||||||
|
if content["label"] == "retailername":
|
||||||
|
retailername = content["value"]
|
||||||
|
elif content["label"] == "imei_number":
|
||||||
|
imeis = content["value"]
|
||||||
|
if retailername in TEST_RETAILER_VALUES and any(imei in TEST_IMEI_VALUES for imei in imeis):
|
||||||
|
request.is_test_request= True
|
||||||
|
request.save()
|
||||||
|
test_requests.append(request.request_id)
|
||||||
|
|
||||||
|
elif any(imei in TEST_IMEI_VALUES for imei in imeis):
|
||||||
|
request.is_test_request= True
|
||||||
|
request.save()
|
||||||
|
test_requests.append(request.request_id)
|
||||||
|
|
||||||
|
elif retailername in TEST_RETAILER_VALUES:
|
||||||
|
supicious_requests.append(request.request_id)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
start = options['start']
|
||||||
|
end = options['end']
|
||||||
|
|
||||||
|
test_requests = []
|
||||||
|
supicious_requests = []
|
||||||
|
|
||||||
|
if start or end:
|
||||||
|
try:
|
||||||
|
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
|
||||||
|
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[INFO]: start: {start}")
|
||||||
|
print(f"[INFO]: end: {end}")
|
||||||
|
raise InvalidException(excArgs="Date format")
|
||||||
|
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
|
||||||
|
else:
|
||||||
|
subcription_iter = SubscriptionRequest.objects.all()
|
||||||
|
|
||||||
|
for request in tqdm(subcription_iter.iterator()):
|
||||||
|
self.process_request(request, test_requests, supicious_requests)
|
||||||
|
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))
|
||||||
|
self.stdout.write(self.style.SUCCESS(f'Test request: \n {test_requests} \n ============================='))
|
||||||
|
self.stdout.write(self.style.SUCCESS(f'Supicious request: \n {supicious_requests} \n ============================='))
|
||||||
|
|
@ -0,0 +1,49 @@
|
|||||||
|
# myapp/management/commands/mycustomcommand.py
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from tqdm import tqdm
|
||||||
|
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
|
||||||
|
from fwd_api.exception.exceptions import InvalidException
|
||||||
|
from fwd_api.utils.accuracy import predict_result_to_ready
|
||||||
|
import traceback
|
||||||
|
import copy
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = 'Refactor database for image level'
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
# Add your command-line arguments here
|
||||||
|
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
|
||||||
|
parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800')
|
||||||
|
|
||||||
|
def process_request(self, request):
|
||||||
|
# if not request.predict_result:
|
||||||
|
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||||
|
# return
|
||||||
|
# if request.predict_result.get("status", 200) != 200:
|
||||||
|
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||||
|
# return
|
||||||
|
|
||||||
|
if not request.feedback_result or not request.redemption_id:
|
||||||
|
request.is_test_request= True
|
||||||
|
request.save()
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
start = options['start']
|
||||||
|
end = options['end']
|
||||||
|
|
||||||
|
if start or end:
|
||||||
|
try:
|
||||||
|
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
|
||||||
|
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[INFO]: start: {start}")
|
||||||
|
print(f"[INFO]: end: {end}")
|
||||||
|
raise InvalidException(excArgs="Date format")
|
||||||
|
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
|
||||||
|
else:
|
||||||
|
subcription_iter = SubscriptionRequest.objects.all()
|
||||||
|
|
||||||
|
for request in tqdm(subcription_iter.iterator()):
|
||||||
|
self.process_request(request)
|
||||||
|
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))
|
@ -536,7 +536,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
|
|||||||
"Request ID": report_file.correspond_request_id,
|
"Request ID": report_file.correspond_request_id,
|
||||||
"Redemption Number": report_file.correspond_redemption_id,
|
"Redemption Number": report_file.correspond_redemption_id,
|
||||||
"Image type": report_file.doc_type,
|
"Image type": report_file.doc_type,
|
||||||
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])),
|
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])) if report_file.feedback_result else None,
|
||||||
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
|
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
|
||||||
"IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])) if report_file.reviewed_result else None,
|
"IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])) if report_file.reviewed_result else None,
|
||||||
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
|
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
|
||||||
@ -623,9 +623,9 @@ def predict_result_to_ready(result):
|
|||||||
return dict_result
|
return dict_result
|
||||||
dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None)
|
dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None)
|
||||||
dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None)
|
dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None)
|
||||||
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[2].get("value", None)
|
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", [])
|
||||||
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[3].get("value", [])
|
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])
|
||||||
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[4].get("value", [])
|
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None)
|
||||||
return dict_result
|
return dict_result
|
||||||
|
|
||||||
def align_fine_result(ready_predict, fine_result):
|
def align_fine_result(ready_predict, fine_result):
|
||||||
|
@ -80,6 +80,13 @@ def validate_feedback_file(csv_file_path):
|
|||||||
if missing_columns:
|
if missing_columns:
|
||||||
raise RequiredColumnException(excArgs=str(missing_columns))
|
raise RequiredColumnException(excArgs=str(missing_columns))
|
||||||
|
|
||||||
|
def validate_review(review, num_imei):
|
||||||
|
for field in settings.FIELD:
|
||||||
|
if not field in review.keys():
|
||||||
|
raise RequiredFieldException(excArgs=f'reviewed_result.{field}')
|
||||||
|
if not isinstance(review["imei_number"], list) or len(review["imei_number"]) != num_imei:
|
||||||
|
raise InvalidException(excArgs=f'imei_number')
|
||||||
|
|
||||||
def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"):
|
def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"):
|
||||||
total_file_size = 0
|
total_file_size = 0
|
||||||
if len(files) < min_file_num:
|
if len(files) < min_file_num:
|
||||||
@ -140,7 +147,7 @@ def get_folder_path(rq: SubscriptionRequest):
|
|||||||
logger = get_task_logger(__name__)
|
logger = get_task_logger(__name__)
|
||||||
|
|
||||||
request_id = str(rq.request_id)
|
request_id = str(rq.request_id)
|
||||||
logger.info(f"[DEBUG]: rq.process_type: {rq.process_type}")
|
logger.debug(f"rq.process_type: {rq.process_type}")
|
||||||
p_type = ProcessUtil.map_process_type_to_folder_name(int(rq.process_type))
|
p_type = ProcessUtil.map_process_type_to_folder_name(int(rq.process_type))
|
||||||
sub_id = str(rq.subscription.id)
|
sub_id = str(rq.subscription.id)
|
||||||
user_id = str(rq.subscription.user.id)
|
user_id = str(rq.subscription.user.id)
|
||||||
|
Loading…
Reference in New Issue
Block a user