Fix: partial #62
This commit is contained in:
parent
cfdced2618
commit
270008f915
1
.gitignore
vendored
1
.gitignore
vendored
@ -42,3 +42,4 @@ Jan.csv
|
||||
cope2n-api/reviewed/date.xlsx
|
||||
cope2n-api/reviewed/retailer.xlsx
|
||||
/scripts/*
|
||||
scripts/crawl_database.py
|
||||
|
@ -4,7 +4,7 @@ import requests
|
||||
from datetime import datetime
|
||||
|
||||
# Get the proxy URL from the environment variable
|
||||
interval = 60*60*1 # 1 minute
|
||||
interval = 60*60*3 # 1 minute
|
||||
update_cost = int(60*2)
|
||||
proxy_url = os.getenv('PROXY', "localhost")
|
||||
user = os.getenv('ADMIN_USER_NAME', "")
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a
|
||||
Subproject commit bae5f732e12c5d5307b3ce14a98fa198ea49e372
|
@ -17,6 +17,7 @@ RUN pip install uvicorn gunicorn Celery
|
||||
RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
|
||||
RUN pip install -U openmim==0.3.7 --no-cache-dir
|
||||
RUN mim install mmcv-full==1.7.2
|
||||
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
|
||||
# End intergration with sdskvu
|
||||
|
||||
USER ${UID}
|
||||
|
@ -16,7 +16,7 @@ import json
|
||||
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
|
||||
from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile
|
||||
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
|
||||
from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url
|
||||
from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url, validate_review
|
||||
from ..utils.redis import RedisUtils
|
||||
from ..utils.process import string_to_boolean
|
||||
from ..utils.cache import get_cache, set_cache
|
||||
@ -695,8 +695,10 @@ class AccuracyViewSet(viewsets.ViewSet):
|
||||
|
||||
if not reviewed_result:
|
||||
reviewed_result = copy.deepcopy(sample_result)
|
||||
reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
|
||||
if not feedback_result:
|
||||
feedback_result = copy.deepcopy(sample_result)
|
||||
feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
|
||||
if not predicted_result:
|
||||
predicted_result = copy.deepcopy(sample_result)
|
||||
|
||||
@ -726,8 +728,10 @@ class AccuracyViewSet(viewsets.ViewSet):
|
||||
|
||||
if not reviewed_result:
|
||||
reviewed_result = copy.deepcopy(sample_result)
|
||||
reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
|
||||
if not feedback_result:
|
||||
feedback_result = copy.deepcopy(sample_result)
|
||||
feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))]
|
||||
if not predicted_result:
|
||||
predicted_result = copy.deepcopy(sample_result)
|
||||
|
||||
@ -760,25 +764,22 @@ class AccuracyViewSet(viewsets.ViewSet):
|
||||
|
||||
elif request.method == 'POST':
|
||||
data = request.data
|
||||
|
||||
base_query = Q(request_id=request_id)
|
||||
|
||||
subscription_request = SubscriptionRequest.objects.filter(base_query)
|
||||
|
||||
if subscription_request.count() == 0:
|
||||
raise NotFoundException(excArgs=request_id)
|
||||
|
||||
subscription_request = subscription_request.first()
|
||||
|
||||
subscription_request_files = SubscriptionRequestFile.objects.filter(request=subscription_request.id)
|
||||
|
||||
if "reviewed_result" not in data:
|
||||
raise InvalidException(excArgs=f'reviewed_result')
|
||||
|
||||
reviewed_result = data["reviewed_result"]
|
||||
for field in ['retailername', 'sold_to_party', 'invoice_no', 'purchase_date', 'imei_number']:
|
||||
if not field in reviewed_result.keys():
|
||||
raise RequiredFieldException(excArgs=f'reviewed_result.{field}')
|
||||
if not subscription_request.predict_result:
|
||||
raise InvalidException(excArgs=f'request_id')
|
||||
validate_review(reviewed_result, len(subscription_request.predict_result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])))
|
||||
reviewed_result['request_id'] = request_id
|
||||
|
||||
for subscription_request_file in subscription_request_files:
|
||||
|
@ -0,0 +1,79 @@
|
||||
# myapp/management/commands/mycustomcommand.py
|
||||
from django.core.management.base import BaseCommand
|
||||
from tqdm import tqdm
|
||||
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
|
||||
from fwd_api.exception.exceptions import InvalidException
|
||||
from fwd_api.utils.accuracy import predict_result_to_ready
|
||||
import traceback
|
||||
import copy
|
||||
from django.utils import timezone
|
||||
|
||||
TEST_IMEI_VALUES = ["357822611219904", "RFAW2022FED", "5AWH14MT400396N", "0HU33NIW200044K", "0GJG4DBW200318X", "358975990917032", "350731691693549", "R52W70BHDWX", "R52W70BHDWX", "350073345090297", "0PBL3NHW500023N"]
|
||||
TEST_RETAILER_VALUES = ["Best Denki", "Arrow Communication"]
|
||||
KEY = "invoice_no"
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Refactor database for image level'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
# Add your command-line arguments here
|
||||
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
|
||||
parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800')
|
||||
|
||||
def process_request(self, request, test_requests, supicious_requests):
|
||||
if len(request.request_id.split(".")[0].split("_")) < 2:
|
||||
return
|
||||
|
||||
if not request.predict_result:
|
||||
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||
return
|
||||
if request.predict_result.get("status", 200) != 200:
|
||||
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||
return
|
||||
|
||||
imeis = []
|
||||
retailername = None
|
||||
|
||||
for content in request.predict_result["content"]["document"][0]["content"]:
|
||||
if content["label"] == "retailername":
|
||||
retailername = content["value"]
|
||||
elif content["label"] == "imei_number":
|
||||
imeis = content["value"]
|
||||
if retailername in TEST_RETAILER_VALUES and any(imei in TEST_IMEI_VALUES for imei in imeis):
|
||||
request.is_test_request= True
|
||||
request.save()
|
||||
test_requests.append(request.request_id)
|
||||
|
||||
elif any(imei in TEST_IMEI_VALUES for imei in imeis):
|
||||
request.is_test_request= True
|
||||
request.save()
|
||||
test_requests.append(request.request_id)
|
||||
|
||||
elif retailername in TEST_RETAILER_VALUES:
|
||||
supicious_requests.append(request.request_id)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
start = options['start']
|
||||
end = options['end']
|
||||
|
||||
test_requests = []
|
||||
supicious_requests = []
|
||||
|
||||
if start or end:
|
||||
try:
|
||||
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
|
||||
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
|
||||
except Exception as e:
|
||||
print(f"[INFO]: start: {start}")
|
||||
print(f"[INFO]: end: {end}")
|
||||
raise InvalidException(excArgs="Date format")
|
||||
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
|
||||
else:
|
||||
subcription_iter = SubscriptionRequest.objects.all()
|
||||
|
||||
for request in tqdm(subcription_iter.iterator()):
|
||||
self.process_request(request, test_requests, supicious_requests)
|
||||
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))
|
||||
self.stdout.write(self.style.SUCCESS(f'Test request: \n {test_requests} \n ============================='))
|
||||
self.stdout.write(self.style.SUCCESS(f'Supicious request: \n {supicious_requests} \n ============================='))
|
||||
|
@ -0,0 +1,49 @@
|
||||
# myapp/management/commands/mycustomcommand.py
|
||||
from django.core.management.base import BaseCommand
|
||||
from tqdm import tqdm
|
||||
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
|
||||
from fwd_api.exception.exceptions import InvalidException
|
||||
from fwd_api.utils.accuracy import predict_result_to_ready
|
||||
import traceback
|
||||
import copy
|
||||
from django.utils import timezone
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Refactor database for image level'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
# Add your command-line arguments here
|
||||
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
|
||||
parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800')
|
||||
|
||||
def process_request(self, request):
|
||||
# if not request.predict_result:
|
||||
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||
# return
|
||||
# if request.predict_result.get("status", 200) != 200:
|
||||
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||
# return
|
||||
|
||||
if not request.feedback_result or not request.redemption_id:
|
||||
request.is_test_request= True
|
||||
request.save()
|
||||
|
||||
def handle(self, *args, **options):
|
||||
start = options['start']
|
||||
end = options['end']
|
||||
|
||||
if start or end:
|
||||
try:
|
||||
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
|
||||
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
|
||||
except Exception as e:
|
||||
print(f"[INFO]: start: {start}")
|
||||
print(f"[INFO]: end: {end}")
|
||||
raise InvalidException(excArgs="Date format")
|
||||
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
|
||||
else:
|
||||
subcription_iter = SubscriptionRequest.objects.all()
|
||||
|
||||
for request in tqdm(subcription_iter.iterator()):
|
||||
self.process_request(request)
|
||||
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))
|
@ -536,7 +536,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
|
||||
"Request ID": report_file.correspond_request_id,
|
||||
"Redemption Number": report_file.correspond_redemption_id,
|
||||
"Image type": report_file.doc_type,
|
||||
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])),
|
||||
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])) if report_file.feedback_result else None,
|
||||
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
|
||||
"IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])) if report_file.reviewed_result else None,
|
||||
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
|
||||
@ -623,9 +623,9 @@ def predict_result_to_ready(result):
|
||||
return dict_result
|
||||
dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None)
|
||||
dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None)
|
||||
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[2].get("value", None)
|
||||
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[3].get("value", [])
|
||||
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[4].get("value", [])
|
||||
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", [])
|
||||
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])
|
||||
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None)
|
||||
return dict_result
|
||||
|
||||
def align_fine_result(ready_predict, fine_result):
|
||||
|
@ -80,6 +80,13 @@ def validate_feedback_file(csv_file_path):
|
||||
if missing_columns:
|
||||
raise RequiredColumnException(excArgs=str(missing_columns))
|
||||
|
||||
def validate_review(review, num_imei):
|
||||
for field in settings.FIELD:
|
||||
if not field in review.keys():
|
||||
raise RequiredFieldException(excArgs=f'reviewed_result.{field}')
|
||||
if not isinstance(review["imei_number"], list) or len(review["imei_number"]) != num_imei:
|
||||
raise InvalidException(excArgs=f'imei_number')
|
||||
|
||||
def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"):
|
||||
total_file_size = 0
|
||||
if len(files) < min_file_num:
|
||||
@ -140,7 +147,7 @@ def get_folder_path(rq: SubscriptionRequest):
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
request_id = str(rq.request_id)
|
||||
logger.info(f"[DEBUG]: rq.process_type: {rq.process_type}")
|
||||
logger.debug(f"rq.process_type: {rq.process_type}")
|
||||
p_type = ProcessUtil.map_process_type_to_folder_name(int(rq.process_type))
|
||||
sub_id = str(rq.subscription.id)
|
||||
user_id = str(rq.subscription.user.id)
|
||||
|
Loading…
Reference in New Issue
Block a user