Merge pull request #51 from SDSRV-IDP/dev/all_rq_for_acc

Dev/all rq for acc
This commit is contained in:
Phan Thành Trung 2024-02-29 14:38:26 +07:00 committed by GitHub Enterprise
commit 605d9f32c8
24 changed files with 542 additions and 169 deletions

View File

@ -15,7 +15,9 @@ RUN pip install mmcv==1.6.0 -f https://download.openmmlab.com/mmcv/dst/cu116/tor
RUN ln -s /opt/conda/lib/python3.10/site-packages/torch/lib/libcudnn.so.8 /usr/lib/libcudnn.so && \ RUN ln -s /opt/conda/lib/python3.10/site-packages/torch/lib/libcudnn.so.8 /usr/lib/libcudnn.so && \
ln -s /opt/conda/lib/libcublas.so /usr/lib/libcublas.so ln -s /opt/conda/lib/libcublas.so /usr/lib/libcublas.so
RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir # RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' --no-cache-dir RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' --no-cache-dir

@ -1 +1 @@
Subproject commit d01de312ab86db554ffa2f1b01396ef8d56b78ed Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a

View File

@ -224,7 +224,7 @@ OVERVIEW_REFRESH_INTERVAL = 2
OVERVIEW_REPORT_ROOT = "overview" OVERVIEW_REPORT_ROOT = "overview"
OVERVIEW_REPORT_DURATION = ["30d", "7d"] OVERVIEW_REPORT_DURATION = ["30d", "7d"]
ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten"] ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten", "invalid_image", "missing_information", "too_blurry_text", "too_small_text"]
SUBS = { SUBS = {
"SEAU": "AU", "SEAU": "AU",
@ -233,13 +233,17 @@ SUBS = {
"SEPCO": "PH", "SEPCO": "PH",
"TSE": "TH", "TSE": "TH",
"SEIN": "ID", "SEIN": "ID",
"ALL": "all" "ALL": "all", # all_detail
"SEAO": "seao"
} }
BAD_THRESHOLD = 0.75
NEED_REVIEW = 1.0
SUB_FOR_BILLING = ["all", "seao"]
CACHES = { CACHES = {
'default': { 'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache', 'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
} }
} }

View File

@ -7,6 +7,7 @@ from django.utils import timezone
from django.db.models import Q from django.db.models import Q
import uuid import uuid
import os import os
import copy
import pytz import pytz
from fwd import settings from fwd import settings
from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes
@ -15,11 +16,12 @@ import json
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3, convert_date_string, build_media_url_v2, build_url, build_S3_url from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url
from ..utils.redis import RedisUtils from ..utils.redis import RedisUtils
from ..utils.process import string_to_boolean from ..utils.process import string_to_boolean
from ..request.ReportCreationSerializer import ReportCreationSerializer from ..request.ReportCreationSerializer import ReportCreationSerializer
from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long
from ..utils.report import aggregate_overview
from fwd_api.utils.accuracy import predict_result_to_ready from fwd_api.utils.accuracy import predict_result_to_ready
import copy import copy
@ -455,8 +457,8 @@ class AccuracyViewSet(viewsets.ViewSet):
], ],
responses=None, tags=['Accuracy'] responses=None, tags=['Accuracy']
) )
@action(detail=False, url_path="overview", methods=["GET"]) @action(detail=False, url_path="overview_sumary", methods=["GET"])
def overview(self, request): def overview_sumary(self, request):
if request.method == 'GET': if request.method == 'GET':
_subsidiary = request.GET.get('subsidiary', "ALL") _subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "") duration = request.GET.get('duration', "")
@ -472,6 +474,89 @@ class AccuracyViewSet(viewsets.ViewSet):
return JsonResponse(response, status=200) return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405) return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='duration',
location=OpenApiParameter.QUERY,
description='one of [30d, 7d]',
type=OpenApiTypes.STR,
default='30d',
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
)
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="overview", methods=["GET"])
def overview(self, request):
if request.method == 'GET':
_subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "")
subsidiary = map_subsidiary_long_to_short(_subsidiary)
if _subsidiary == "ALL":
# aggregate_overview from subsibdiaries
subsidiaries_to_include = list(settings.SUBS.values())
subsidiaries_to_include.remove("all")
# subsidiaries_to_include.remove("seao")
subsidiary_overview_reports = []
for sub in subsidiaries_to_include:
key = f"{sub}_{duration}"
try:
this_overview = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", [])
if sub != "seao":
this_overview = [d for d in this_overview if d.get("subs") != "+"]
else:
for item in this_overview:
if item.get("subs") == "+":
item["extraction_date"] = item["extraction_date"].replace("Subtotal ", "").replace("(", "").replace(")", "") + "-32"
subsidiary_overview_reports += this_overview
except Exception as e:
print(f"[WARM]: Unable to retrive data {key} from Redis, skipping...")
data = aggregate_overview(subsidiary_overview_reports)
for item in data:
if item.get("subs") == "+":
item["extraction_date"] = "Subtotal (" + item["extraction_date"].replace("-32", "") + ")"
# Do the saving process
report_fine_data = copy.deepcopy(data)
for i, dat in enumerate(report_fine_data):
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
keys_percent = "images_quality"
for x_key in report_fine_data[i][keys_percent].keys():
if "percent" not in x_key:
continue
report_fine_data[i][keys_percent][x_key] = report_fine_data[i][keys_percent][x_key]*100
for key in keys:
if report_fine_data[i][key]:
for x_key in report_fine_data[i][key].keys():
report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100
overview_filename = _subsidiary + "_" + duration + ".xlsx"
data_workbook = dict2xlsx(report_fine_data, _type='report')
folder_path = os.path.join(settings.MEDIA_ROOT, "report", settings.OVERVIEW_REPORT_ROOT)
os.makedirs(folder_path, exist_ok = True)
file_path = os.path.join(folder_path, overview_filename)
data_workbook.save(file_path)
s3_key=save_report_to_S3(None, file_path)
# redis_client.set_cache(settings.OVERVIEW_REPORT_ROOT, overview_filename.replace(".xlsx", ""), json.dumps(save_data))
else:
# Retrive data from Redis
key = f"{subsidiary}_{duration}"
data = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", [])
response = {
'overview_data': data,
}
return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema( @extend_schema(
parameters=[ parameters=[

View File

@ -119,7 +119,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
continue continue
_predict_result = copy.deepcopy(predict_result_to_ready(sub_rq.predict_result)) _predict_result = copy.deepcopy(predict_result_to_ready(sub_rq.predict_result))
_feedback_result = copy.deepcopy(sub_rq.feedback_result) _feedback_result = copy.deepcopy(sub_rq.feedback_result)
_reviewed_result = copy.deepcopy(sub_rq.reviewed_result) # _reviewed_result = copy.deepcopy(sub_rq.reviewed_result)
try: try:
image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request] image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request]
except Exception as e: except Exception as e:
@ -132,19 +132,19 @@ def process_csv_feedback(csv_file_path, feedback_id):
if _feedback_result: if _feedback_result:
_feedback_result["imei_number"] = [] _feedback_result["imei_number"] = []
if _reviewed_result: # if _reviewed_result:
_reviewed_result["imei_number"] = [] # _reviewed_result["imei_number"] = []
else: else:
try: try:
_predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} _predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]}
_feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None _feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None
_reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None # _reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None
except Exception as e: except Exception as e:
print (f"[ERROR]: {request_id} - {e}") print (f"[ERROR]: {request_id} - {e}")
image.predict_result = _predict_result image.predict_result = _predict_result
image.feedback_result = _feedback_result image.feedback_result = _feedback_result
image.reviewed_result = _reviewed_result # image.reviewed_result = _reviewed_result
image.save() image.save()
# update log into database # update log into database
feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first() feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first()
@ -264,10 +264,11 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
try: try:
time.sleep(delay) time.sleep(delay)
s3_client.upload_file(local_file_path, s3_key) s3_client.upload_file(local_file_path, s3_key)
report = Report.objects.filter(report_id=report_id)[0] if report_id:
report.S3_uploaded = True report = Report.objects.filter(report_id=report_id)[0]
report.S3_file_name = s3_key report.S3_uploaded = True
report.save() report.S3_file_name = s3_key
report.save()
except Exception as e: except Exception as e:
logger.error(f"Unable to set S3: {e}") logger.error(f"Unable to set S3: {e}")
print(f"Unable to set S3: {e}") print(f"Unable to set S3: {e}")

View File

@ -180,7 +180,7 @@ def make_a_report_2(report_id, query_set):
if query_set["include_test"]: if query_set["include_test"]:
base_query = ~base_query base_query = ~base_query
if isinstance(query_set["subsidiary"], str): if isinstance(query_set["subsidiary"], str):
if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all": if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "") not in settings.SUB_FOR_BILLING:
base_query &= Q(redemption_id__startswith=query_set["subsidiary"]) base_query &= Q(redemption_id__startswith=query_set["subsidiary"])
if isinstance(query_set["is_reviewed"], str): if isinstance(query_set["is_reviewed"], str):
if query_set["is_reviewed"] == "reviewed": if query_set["is_reviewed"] == "reviewed":
@ -208,6 +208,7 @@ def make_a_report_2(report_id, query_set):
bad_image_list = [] bad_image_list = []
number_images = 0 number_images = 0
number_bad_images = 0 number_bad_images = 0
review_progress = []
# TODO: Multithreading # TODO: Multithreading
# Calculate accuracy, processing time, ....Then save. # Calculate accuracy, processing time, ....Then save.
subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')
@ -245,6 +246,7 @@ def make_a_report_2(report_id, query_set):
errors += request_att["err"] errors += request_att["err"]
num_request += 1 num_request += 1
review_progress += request_att.get("is_reviewed", [])
report_fine_data, _save_data = report_engine.save(report.report_id, query_set.get("is_daily_report", False), query_set["include_test"]) report_fine_data, _save_data = report_engine.save(report.report_id, query_set.get("is_daily_report", False), query_set["include_test"])
transaction_att = count_transactions(start_date, end_date, report.subsidiary) transaction_att = count_transactions(start_date, end_date, report.subsidiary)
@ -277,6 +279,10 @@ def make_a_report_2(report_id, query_set):
report.feedback_accuracy = acumulated_acc["feedback"] report.feedback_accuracy = acumulated_acc["feedback"]
report.reviewed_accuracy = acumulated_acc["reviewed"] report.reviewed_accuracy = acumulated_acc["reviewed"]
report.combined_accuracy = acumulated_acc["acumulated"] report.combined_accuracy = acumulated_acc["acumulated"]
report.num_reviewed = review_progress.count(1)
report.num_not_reviewed = review_progress.count(0)
report.num_no_reviewed = review_progress.count(-1)
report.errors = "|".join(errors) report.errors = "|".join(errors)
report.status = "Ready" report.status = "Ready"
@ -292,7 +298,9 @@ def make_a_report_2(report_id, query_set):
# Save overview dashboard # Save overview dashboard
# multiple accuracy by 100 # multiple accuracy by 100
save_data = copy.deepcopy(_save_data) save_data = copy.deepcopy(_save_data)
review_key = "review_progress"
for i, dat in enumerate(report_fine_data): for i, dat in enumerate(report_fine_data):
report_fine_data[i][review_key] = report_fine_data[i][review_key]*100
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()] keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
keys_percent = "images_quality" keys_percent = "images_quality"
for x_key in report_fine_data[i][keys_percent].keys(): for x_key in report_fine_data[i][keys_percent].keys():

View File

@ -0,0 +1,128 @@
# myapp/management/commands/mycustomcommand.py
from io import StringIO
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.utils.accuracy import predict_result_to_ready
import traceback
import copy
import csv
from fwd_api.constant.common import FileCategory
import re
PREDICT_INDEX = 4
FEEDBACK_INDEX = 3
REVIEWED_INDEX = 5
REASON_INDEX = 6
COUNTER_INDEX = 9
def detect_date_format(date_string):
pattern = r'^\d{2}/\d{2}/\d{4}$'
match = re.match(pattern, date_string)
if match:
return True
else:
return False
class Command(BaseCommand):
help = 'Refactor database for image level'
def add_arguments(self, parser):
# Add your command-line arguments here
parser.add_argument('test', type=str, help='Value for the argument')
def process_request(self, request_id):
request = SubscriptionRequest.objects.filter(request_id=request_id).first()
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value).order_by('index_in_request')
request_review = {"imei_number": [], "retailername": None, "purchase_date": None, "sold_to_party": None}
for image in images:
if image.doc_type == "imei":
request_review["imei_number"] += image.reviewed_result.get("imei_number", [])
elif image.doc_type == "invoice":
request_review["retailername"] = image.reviewed_result.get("retailername", None)
request_review["purchase_date"] = image.reviewed_result.get("purchase_date", None)
request_review["sold_to_party"] = image.reviewed_result.get("sold_to_party", None)
request.reviewed_result = request_review
request.save()
def process_requestfile(self, request_list, traversed_requestfiles, request_filename, predict_result, user_feedback, reviewed_result, reason=None, counter=None):
image = SubscriptionRequestFile.objects.filter(file_name=request_filename).first()
try:
if image.doc_type == "imei":
if request_filename not in traversed_requestfiles:
image.reviewed_result = {"imei_number": [reviewed_result], "retailername": None, "purchase_date": None, "sold_to_party": None}
else:
image.reviewed_result["imei_number"].append(reviewed_result)
if request_filename not in traversed_requestfiles:
image.feedback_result = {"imei_number": [user_feedback], "retailername": None, "purchase_date": None, "sold_to_party": None}
else:
image.feedback_result["imei_number"].append(user_feedback)
if request_filename not in traversed_requestfiles:
image.predict_result = {"imei_number": [predict_result], "retailername": None, "purchase_date": [], "sold_to_party": None}
else:
image.predict_result["imei_number"].append(predict_result)
elif image.doc_type == "invoice":
if detect_date_format(reviewed_result):
if not image.reviewed_result:
image.reviewed_result = {"imei_number": [], "retailername": None, "purchase_date": reviewed_result, "sold_to_party": None}
else:
image.reviewed_result["purchase_date"] = reviewed_result
if not image.feedback_result:
image.feedback_result = {"imei_number": [], "retailername": None, "purchase_date": user_feedback, "sold_to_party": None}
else:
image.feedback_result["purchase_date"] = user_feedback
if not image.predict_result:
image.predict_result = {"imei_number": [], "retailername": None, "purchase_date": [predict_result], "sold_to_party": None}
else:
image.predict_result["purchase_date"] = [predict_result]
else:
if not image.reviewed_result:
image.reviewed_result = {"imei_number": [], "retailername": reviewed_result, "purchase_date": None, "sold_to_party": None}
else:
image.reviewed_result["retailername"] = reviewed_result
if not image.feedback_result:
image.feedback_result = {"imei_number": [], "retailername": user_feedback, "purchase_date": None, "sold_to_party": None}
else:
image.feedback_result["retailername"] = user_feedback
if not image.predict_result:
image.predict_result = {"imei_number": [], "retailername": predict_result, "purchase_date": [], "sold_to_party": None}
else:
image.predict_result["retailername"] = predict_result
if reason:
image.reason = reason
if counter:
image.counter_measures = counter
image.save()
request_list.append(image.request.request_id)
traversed_requestfiles.append(request_filename)
if request_filename == "temp_imei_SAP_20240201130151_7e7fa87017af40c1bd079b7da6950193_0.pdf":
print(f"[INFO]: {image.reviewed_result}")
print(f"[INFO]: {image.predict_result}")
print(f"[INFO]: {image.feedback_result}")
except Exception as e:
self.stdout.write(self.style.ERROR(f"Request File: {request_filename} failed with {e}"))
print(traceback.format_exc())
def handle(self, *args, **options):
test = options['test']
request_list = []
traversed_requestfiles = []
#open csv file
with open(test, 'r') as csvfile:
reader = csv.reader(csvfile)
index = 0
for row in reader:
if index != 0:
# request_list, traversed_requestfiles, request_filename, predict_result, user_feedback, reviewed_result, reason=None, counter=None
self.process_requestfile(request_list, traversed_requestfiles, row[2], row[PREDICT_INDEX], row[FEEDBACK_INDEX], row[REVIEWED_INDEX])
index += 1
self.stdout.write(self.style.SUCCESS(f"Reverting {len(list(set(request_list)))} from images"))
for request in request_list:
self.process_request(request)
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -1,4 +1,5 @@
# myapp/management/commands/mycustomcommand.py # myapp/management/commands/mycustomcommand.py
from io import StringIO
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from tqdm import tqdm from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
@ -7,6 +8,12 @@ import traceback
import copy import copy
import csv import csv
PREDICT_INDEX = 3
FEEDBACK_INDEX = 2
REVIEWED_INDEX = 4
REASON_INDEX = 6
COUNTER_INDEX = 9
class Command(BaseCommand): class Command(BaseCommand):
help = 'Refactor database for image level' help = 'Refactor database for image level'
@ -14,21 +21,12 @@ class Command(BaseCommand):
# Add your command-line arguments here # Add your command-line arguments here
parser.add_argument('test', type=str, help='Value for the argument') parser.add_argument('test', type=str, help='Value for the argument')
def process_request(self, request, predict_result, user_feedback, reviewed_result): def process_request(self, total, failed, request, predict_result, user_feedback, reviewed_result, reason, counter):
if len(request.request_id.split(".")[0].split("_")) < 2: if len(request.request_id.split(".")[0].split("_")) < 2:
return return
total[0] += 1
request_feedback = copy.deepcopy(request.feedback_result)
request_review = copy.deepcopy(request.reviewed_result) request_review = copy.deepcopy(request.reviewed_result)
if not request_feedback:
request_feedback = {
"request_id": request.request_id,
"imei_number": [],
"retailername": "",
"purchase_date": "",
"sold_to_party": ""
}
if not request_review: if not request_review:
request_review = { request_review = {
@ -53,87 +51,60 @@ class Command(BaseCommand):
is_match = True is_match = True
if field == 'imei_number': if field == 'imei_number':
if not reviewed_result in request_review["imei_number"]: if not (reviewed_result in request_review["imei_number"]):
request_review["imei_number"].append(reviewed_result) request_review["imei_number"].append(reviewed_result)
if not user_feedback in request_feedback["imei_number"]:
request_feedback["imei_number"].append(user_feedback)
else: else:
if not reviewed_result == request_review[field]: if not reviewed_result == request_review[field]:
request_review[field] = reviewed_result request_review[field] = reviewed_result
if not user_feedback == request_feedback[field]:
request_feedback[field] = user_feedback
_predict_result = copy.deepcopy(predict_result_to_ready(request.predict_result))
_feedback_result = copy.deepcopy(request.feedback_result)
_reviewed_result = copy.deepcopy(request.reviewed_result) _reviewed_result = copy.deepcopy(request.reviewed_result)
if not _feedback_result:
_feedback_result = {
"imei_number": [],
"retailername": "",
"purchase_date": "",
"sold_to_party": ""
}
if not _reviewed_result: if not _reviewed_result:
_reviewed_result = { _reviewed_result = {
"request_id": image.request_id,
"imei_number": [], "imei_number": [],
"retailername": "", "retailername": "",
"purchase_date": "", "purchase_date": "",
"sold_to_party": "" "sold_to_party": ""
} }
if image.doc_type == "invoice": if image.doc_type == "invoice" and field in ['retailername', 'purchase_date']:
_predict_result[field] = predict_result
_predict_result["imei_number"] = []
if _feedback_result:
_feedback_result[field] = user_feedback
_feedback_result["imei_number"] = []
else:
None
if _reviewed_result: if _reviewed_result:
_reviewed_result[field] = reviewed_result _reviewed_result[field] = reviewed_result
_reviewed_result["imei_number"] = [] _reviewed_result["imei_number"] = []
else: else:
None None
else: elif image.doc_type == "imei" and field == "imei_number":
_predict_result = {
"retailername": None,
"sold_to_party": None,
"purchase_date": [],
"imei_number": [predict_result]
}
_feedback_result = {
"retailername": None,
"sold_to_party": None,
"purchase_date": None,
"imei_number": [user_feedback]
} if _feedback_result else None
_reviewed_result = { _reviewed_result = {
"retailername": None, "retailername": None,
"sold_to_party": None, "sold_to_party": None,
"purchase_date": None, "purchase_date": None,
"imei_number": [reviewed_result] "imei_number": [reviewed_result]
} if _reviewed_result else None } if _reviewed_result else None
image.predict_result = _predict_result
image.feedback_result = _feedback_result
image.reviewed_result = _reviewed_result image.reviewed_result = _reviewed_result
if reason:
image.reason = reason
if counter:
image.counter_measures = counter
image.save() image.save()
request.feedback_result = request_feedback
request.reviewed_result = request_review request.reviewed_result = request_review
request.feedback_result["request_id"] = request.request_id
request.reviewed_result["request_id"] = request.request_id request.reviewed_result["request_id"] = request.request_id
request.is_reviewed = True request.is_reviewed = True
request.save() request.save()
except Exception as e: except Exception as e:
self.stdout.write(self.style.ERROR(f"Request: {request.request_id} failed with {e}")) self.stdout.write(self.style.ERROR(f"Request: {request.request_id} failed with {e}"))
failed[0] += 1
print(traceback.format_exc()) print(traceback.format_exc())
if not is_match: if not is_match:
failed[0] += 1
print("FAIL =====>", image.feedback_result, predict_result, user_feedback, reviewed_result) print("FAIL =====>", image.feedback_result, predict_result, user_feedback, reviewed_result)
def handle(self, *args, **options): def handle(self, *args, **options):
test = options['test'] test = options['test']
total = [0]
failed = [0]
#open csv file #open csv file
with open(test, 'r') as csvfile: with open(test, 'r') as csvfile:
reader = csv.reader(csvfile) reader = csv.reader(csvfile)
@ -144,7 +115,10 @@ class Command(BaseCommand):
if not request: if not request:
print("Not found ====>", row) print("Not found ====>", row)
else: else:
self.process_request(request, row[3], row[2], row[4]) # request, predict_result, user_feedback, reviewed_result, reason, counter
self.process_request(total, failed, request, row[PREDICT_INDEX], row[FEEDBACK_INDEX], row[REVIEWED_INDEX], row[REASON_INDEX], row[COUNTER_INDEX])
index += 1 index += 1
self.stdout.write(self.style.SUCCESS(f"Failed/Total: {failed[0]}/{total[0]}"))
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -2,17 +2,57 @@
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from tqdm import tqdm from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.exception.exceptions import InvalidException
from fwd_api.utils.accuracy import predict_result_to_ready from fwd_api.utils.accuracy import predict_result_to_ready
import traceback import traceback
import copy import copy
from django.utils import timezone
RETAILER_LIST = [
'',
'Amazon.sg',
'Gain City (TV/AV)',
'Harvey Norman (TV/AV)',
'KRIS SHOP',
'Lazada (Samsung Brand Store)',
'M1 Shop',
'Mohamed Mustafa & Samsuddin CO (TV/AV)',
'Parisilk (TV/AV)',
'Shopee (Samsung Brand Store)',
'StarHub Shop',
'M1 Shop',
'Ectri',
'Whandising',
'3 Mobile',
'Mister Mobile',
'Lazada',
'Mister Mobile',
'Samsung Experience Store',
'A & Samsuddin Co.',
'Parisilk',
'Samsung Brand Store',
'Shopee',
'M1 Shop',
'Onephone Online',
'3 Mobile',
'Samsung Experience Store',
'Challenger',
'Eas Marketing',
'Ing Mobile',
'Starhub Shop',
'Mister Mobile',
'Onephone Online',
'Starho'
]
RETAILER_LIST = list(set(RETAILER_LIST))
class Command(BaseCommand): class Command(BaseCommand):
help = 'Refactor database for image level' help = 'Refactor database for image level'
def add_arguments(self, parser): def add_arguments(self, parser):
# Add your command-line arguments here # Add your command-line arguments here
parser.add_argument('test', type=str, help='Value for the argument') parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
parser.add_argument('end', type=str, help='end date, sample: 2023-01-03T00:00:00+0700')
def process_request(self, request): def process_request(self, request):
if len(request.request_id.split(".")[0].split("_")) < 2: if len(request.request_id.split(".")[0].split("_")) < 2:
@ -29,7 +69,7 @@ class Command(BaseCommand):
for i, image in enumerate(images): for i, image in enumerate(images):
# temp_imei_SAP_20240127223644_a493434edbf84fc08aeb87ef6cdde102_0.jpg # temp_imei_SAP_20240127223644_a493434edbf84fc08aeb87ef6cdde102_0.jpg
try: try:
image.index_in_request = int(image.file_name.split(".")[0].split("_")[-1]) if len(image.file_name.split(".")[0].split("_")) > 4 else 0 # image.index_in_request = int(image.file_name.split(".")[0].split("_")[-1]) if len(image.file_name.split(".")[0].split("_")) > 4 else 0
image.doc_type = image.file_name.split(".")[0].split("_")[1] if len(image.file_name.split(".")[0].split("_")) > 4 else "all" image.doc_type = image.file_name.split(".")[0].split("_")[1] if len(image.file_name.split(".")[0].split("_")) > 4 else "all"
image.processing_time = time_cost[image.doc_type][image.index_in_request] image.processing_time = time_cost[image.doc_type][image.index_in_request]
if not request.predict_result: if not request.predict_result:
@ -53,7 +93,13 @@ class Command(BaseCommand):
else: else:
_predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} _predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]}
_feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None _feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None
_reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None if isinstance(_reviewed_result, dict) and (len(_reviewed_result.get("imei_number", [])) or any(element in RETAILER_LIST for element in _reviewed_result.get("imei_number", []))) :
_reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None
else:
_reviewed_result = None
request.reviewed_result = None
request.is_reviewed = False
request.save()
image.predict_result = _predict_result image.predict_result = _predict_result
image.feedback_result = _feedback_result image.feedback_result = _feedback_result
image.reviewed_result = _reviewed_result image.reviewed_result = _reviewed_result
@ -64,8 +110,28 @@ class Command(BaseCommand):
continue continue
def handle(self, *args, **options): def handle(self, *args, **options):
test = options['test'] start = options['start']
subcription_iter = SubscriptionRequest.objects.all() end = options['end']
white_list = [
]
if start or end:
try:
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
# start_date = timezone.make_aware(start_date)
# end_date = timezone.make_aware(end_date)
except Exception as e:
print(f"[INFO]: start: {start}")
print(f"[INFO]: end: {end}")
raise InvalidException(excArgs="Date format")
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
else:
subcription_iter = SubscriptionRequest.objects.all()
for request in tqdm(subcription_iter.iterator()): for request in tqdm(subcription_iter.iterator()):
if request.request_id not in white_list:
continue
self.process_request(request) self.process_request(request)
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -0,0 +1,33 @@
# Generated by Django 4.1.3 on 2024-02-28 09:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0182_report_combined_accuracy'),
]
operations = [
migrations.AddField(
model_name='report',
name='num_no_reviewed',
field=models.IntegerField(default=0),
),
migrations.AddField(
model_name='report',
name='num_not_reviewed',
field=models.IntegerField(default=0),
),
migrations.AddField(
model_name='report',
name='num_reviewed',
field=models.IntegerField(default=0),
),
migrations.AddField(
model_name='reportfile',
name='review_status',
field=models.IntegerField(default=-1),
),
]

View File

@ -28,6 +28,9 @@ class Report(models.Model):
number_bad_images = models.IntegerField(default=0) number_bad_images = models.IntegerField(default=0)
number_imei = models.IntegerField(default=0) number_imei = models.IntegerField(default=0)
number_invoice = models.IntegerField(default=0) number_invoice = models.IntegerField(default=0)
num_reviewed = models.IntegerField(default=0)
num_not_reviewed = models.IntegerField(default=0)
num_no_reviewed = models.IntegerField(default=0)
number_imei_transaction = models.IntegerField(default=0) number_imei_transaction = models.IntegerField(default=0)
number_invoice_transaction = models.IntegerField(default=0) number_invoice_transaction = models.IntegerField(default=0)

View File

@ -32,6 +32,7 @@ class ReportFile(models.Model):
bad_image_reason = models.TextField(default="") bad_image_reason = models.TextField(default="")
counter_measures = models.TextField(default="") counter_measures = models.TextField(default="")
error = models.TextField(default="") error = models.TextField(default="")
review_status = models.IntegerField(default=-1) # -1: No need review, 0: not reviewed, 1: reviewed

View File

@ -17,8 +17,6 @@ from fwd import settings
from ..models import SubscriptionRequest, Report, ReportFile from ..models import SubscriptionRequest, Report, ReportFile
import json import json
BAD_THRESHOLD = 0.75
valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"] valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"]
class ReportAccumulateByRequest: class ReportAccumulateByRequest:
@ -67,7 +65,8 @@ class ReportAccumulateByRequest:
'retailername': IterAvg(), 'retailername': IterAvg(),
'sold_to_party': IterAvg() 'sold_to_party': IterAvg()
}, },
'num_request': 0 'num_request': 0,
"review_progress": []
} }
self.day_format = { self.day_format = {
'subs': sub, 'subs': sub,
@ -110,7 +109,8 @@ class ReportAccumulateByRequest:
'sold_to_party': IterAvg() 'sold_to_party': IterAvg()
}, },
"report_files": [], "report_files": [],
'num_request': 0 "num_request": 0,
"review_progress": []
}, },
@staticmethod @staticmethod
@ -155,7 +155,7 @@ class ReportAccumulateByRequest:
total["usage"]["imei"] += 1 if doc_type == "imei" else 0 total["usage"]["imei"] += 1 if doc_type == "imei" else 0
total["usage"]["invoice"] += 1 if doc_type == "invoice" else 0 total["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
total["usage"]["total_images"] += 1 total["usage"]["total_images"] += 1
total["review_progress"].append(report_file.review_status)
return total return total
@staticmethod @staticmethod
@ -192,7 +192,7 @@ class ReportAccumulateByRequest:
print(f"[WARM]: Weird doctype: {report_file.doc_type}") print(f"[WARM]: Weird doctype: {report_file.doc_type}")
day_data["average_processing_time"][report_file.doc_type] = IterAvg() day_data["average_processing_time"][report_file.doc_type] = IterAvg()
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
day_data["review_progress"].append(report_file.review_status)
return day_data return day_data
def add(self, request, report_files): def add(self, request, report_files):
@ -212,9 +212,18 @@ class ReportAccumulateByRequest:
self.data[this_month][1][this_day]['num_request'] += 1 self.data[this_month][1][this_day]['num_request'] += 1
self.data[this_month][0]['num_request'] += 1 self.data[this_month][0]['num_request'] += 1
for report_file in report_files: for report_file in report_files:
self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month _report_file = copy.deepcopy(report_file)
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
_report_file.acc = None
for t in _report_file.feedback_accuracy.keys():
_report_file.feedback_accuracy[t] = []
for t in _report_file.reviewed_accuracy.keys():
_report_file.reviewed_accuracy[t] = []
self.data[this_month][0] = self.update_total(self.data[this_month][0], _report_file) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file) # Update the subtotal of the day
def count_transactions_within_day(self, date_string): def count_transactions_within_day(self, date_string):
# convert this day into timezone.datetime at UTC # convert this day into timezone.datetime at UTC
@ -310,6 +319,7 @@ class ReportAccumulateByRequest:
_data[month][1][day]["reviewed_accuracy"]["purchase_date"] = _data[month][1][day]["reviewed_accuracy"]["purchase_date"]() _data[month][1][day]["reviewed_accuracy"]["purchase_date"] = _data[month][1][day]["reviewed_accuracy"]["purchase_date"]()
_data[month][1][day]["reviewed_accuracy"]["retailername"] = _data[month][1][day]["reviewed_accuracy"]["retailername"]() _data[month][1][day]["reviewed_accuracy"]["retailername"] = _data[month][1][day]["reviewed_accuracy"]["retailername"]()
_data[month][1][day]["reviewed_accuracy"]["sold_to_party"] = _data[month][1][day]["reviewed_accuracy"]["sold_to_party"]() _data[month][1][day]["reviewed_accuracy"]["sold_to_party"] = _data[month][1][day]["reviewed_accuracy"]["sold_to_party"]()
_data[month][1][day]["review_progress"] = _data[month][1][day]["review_progress"].count(1)/(_data[month][1][day]["review_progress"].count(0)+ _data[month][1][day]["review_progress"].count(1)) if (_data[month][1][day]["review_progress"].count(0)+ _data[month][1][day]["review_progress"].count(1)) >0 else 0
_data[month][1][day].pop("report_files") _data[month][1][day].pop("report_files")
_data[month][1][day]["images_quality"]["successful_percent"] = _data[month][1][day]["images_quality"]["successful"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0 _data[month][1][day]["images_quality"]["successful_percent"] = _data[month][1][day]["images_quality"]["successful"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0
@ -333,6 +343,7 @@ class ReportAccumulateByRequest:
_data[month][0]["reviewed_accuracy"]["purchase_date"] = _data[month][0]["reviewed_accuracy"]["purchase_date"]() _data[month][0]["reviewed_accuracy"]["purchase_date"] = _data[month][0]["reviewed_accuracy"]["purchase_date"]()
_data[month][0]["reviewed_accuracy"]["retailername"] = _data[month][0]["reviewed_accuracy"]["retailername"]() _data[month][0]["reviewed_accuracy"]["retailername"] = _data[month][0]["reviewed_accuracy"]["retailername"]()
_data[month][0]["reviewed_accuracy"]["sold_to_party"] = _data[month][0]["reviewed_accuracy"]["sold_to_party"]() _data[month][0]["reviewed_accuracy"]["sold_to_party"] = _data[month][0]["reviewed_accuracy"]["sold_to_party"]()
_data[month][0]["review_progress"] = _data[month][0]["review_progress"].count(1)/(_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) if (_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) >0 else 0
return _data return _data
@ -575,7 +586,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
def count_transactions(start_date, end_date, subsidiary="all"): def count_transactions(start_date, end_date, subsidiary="all"):
base_query = Q(created_at__range=(start_date, end_date)) base_query = Q(created_at__range=(start_date, end_date))
base_query &= Q(is_test_request=False) base_query &= Q(is_test_request=False)
if subsidiary and subsidiary.lower().replace(" ", "")!="all": if subsidiary and subsidiary.lower().replace(" ", "") not in settings.SUB_FOR_BILLING:
base_query &= Q(redemption_id__startswith=subsidiary) base_query &= Q(redemption_id__startswith=subsidiary)
transaction_att = {} transaction_att = {}
@ -707,6 +718,7 @@ def calculate_avg_accuracy(acc, type, keys=[]):
return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None
# Deprecated
def calculate_and_save_subcription_file(report, request): def calculate_and_save_subcription_file(report, request):
request_att = {"acc": {"feedback": {"imei_number": [], request_att = {"acc": {"feedback": {"imei_number": [],
"purchase_date": [], "purchase_date": [],
@ -772,11 +784,18 @@ def calculate_and_save_subcription_file(report, request):
return request_att return request_att
# def result_maximize_list_values(result, acc):
# for k in acc.keys():
# if isinstance(acc[k], list) and len(acc[k]) > 0:
def acc_maximize_list_values(acc): def acc_maximize_list_values(acc):
pos = {}
for k in acc.keys(): for k in acc.keys():
pos[k] = 0
if isinstance(acc[k], list) and len(acc[k]) > 0: if isinstance(acc[k], list) and len(acc[k]) > 0:
acc[k] = [max(acc[k])] acc[k] = [max(acc[k])]
return acc pos[k] = acc[k].index(acc[k][0])
return acc, pos
def calculate_a_request(report, request): def calculate_a_request(report, request):
request_att = {"acc": {"feedback": {"imei_number": [], request_att = {"acc": {"feedback": {"imei_number": [],
@ -795,15 +814,23 @@ def calculate_a_request(report, request):
"sold_to_party": [], "sold_to_party": [],
}}, }},
"err": [], "err": [],
"time_cost": {}, "time_cost": {"imei": [],
"invoice": []},
"total_images": 0, "total_images": 0,
"bad_images": 0, "bad_images": 0,
"bad_image_list": [], "bad_image_list": [],
"is_reviewed": [], # -1: No need to review, 0: Not reviewed, 1: Reviewed
} }
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value) images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value)
report_files = [] report_files = []
for image in images: for image in images:
status, att = calculate_subcription_file(image) status, att = calculate_subcription_file(image)
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"])
_att = copy.deepcopy(att)
if status != 200: if status != 200:
continue continue
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]} image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
@ -820,6 +847,17 @@ def calculate_a_request(report, request):
_sub = map_subsidiary_short_to_long(request.redemption_id[:2]) _sub = map_subsidiary_short_to_long(request.redemption_id[:2])
else: else:
print(f"[WARM]: empty redemption_id, check request: {request.request_id}") print(f"[WARM]: empty redemption_id, check request: {request.request_id}")
# Little trick to replace purchase date to normalized
if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0:
image.predict_result["purchase_date"] = [att["normalized_data"]["feedback"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["feedback"]["purchase_date"]))]
image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1]
if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0:
image.predict_result["purchase_date"] = [att["normalized_data"]["reviewed"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["reviewed"]["purchase_date"]))]
image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1]
if request.is_reviewed:
att["is_reviewed"] = 1
request_att["is_reviewed"].append(att["is_reviewed"])
new_report_file = ReportFile(report=report, new_report_file = ReportFile(report=report,
subsidiary=_sub, subsidiary=_sub,
correspond_request_id=request.request_id, correspond_request_id=request.request_id,
@ -828,31 +866,34 @@ def calculate_a_request(report, request):
predict_result=image.predict_result, predict_result=image.predict_result,
feedback_result=image.feedback_result, feedback_result=image.feedback_result,
reviewed_result=image.reviewed_result, reviewed_result=image.reviewed_result,
feedback_accuracy=acc_maximize_list_values(att["acc"]["feedback"]), feedback_accuracy=att["acc"]["feedback"],
reviewed_accuracy=acc_maximize_list_values(att["acc"]["reviewed"]), reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"], acc=att["avg_acc"],
is_bad_image=att["is_bad_image"], is_bad_image=att["is_bad_image"],
is_reviewed="Yes" if request.is_reviewed else "No", is_reviewed= "Yes" if request.is_reviewed else "No",
time_cost=image.processing_time, time_cost=image.processing_time,
bad_image_reason=image.reason, bad_image_reason=image.reason,
counter_measures=image.counter_measures, counter_measures=image.counter_measures,
error="|".join(att["err"]) error="|".join(att["err"]),
review_status=att["is_reviewed"],
) )
report_files.append(new_report_file) report_files.append(new_report_file)
_att = copy.deepcopy(att)
if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS: if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS:
request_att["bad_image_list"].append(image.file_name) if att["is_bad_image"]:
request_att["bad_image_list"].append(image.file_name)
# if image.reason in settings.ACC_EXCLUDE_RESEASONS:
# print(f"[DEBUG]: {image.reason}")
# TODO: Exclude bad image accuracy from average accuracy # TODO: Exclude bad image accuracy from average accuracy
_att["avg_acc"] = None _att["avg_acc"] = None
for t in ["feedback", "reviewed"]: for t in _att["acc"].keys():
for k in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: for k in _att["acc"][t].keys():
_att["acc"][t][k] = [] _att["acc"][t][k] = []
if request_att["time_cost"].get(image.doc_type, None):
request_att["time_cost"][image.doc_type].append(image.processing_time)
else: else:
request_att["time_cost"][image.doc_type] = [image.processing_time] if request_att["time_cost"].get(image.doc_type, None):
request_att["time_cost"][image.doc_type].append(image.processing_time)
else:
request_att["time_cost"][image.doc_type] = [image.processing_time]
try: try:
request_att["acc"]["feedback"]["imei_number"] += _att["acc"]["feedback"]["imei_number"] request_att["acc"]["feedback"]["imei_number"] += _att["acc"]["feedback"]["imei_number"]
@ -865,13 +906,14 @@ def calculate_a_request(report, request):
request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"]
request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"]
request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if att["acc"]["reviewed"]["imei_number"] else att["acc"]["feedback"]["imei_number"] request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] else _att["acc"]["feedback"]["imei_number"]
request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if att["acc"]["reviewed"]["purchase_date"] else att["acc"]["feedback"]["purchase_date"] request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] else _att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if att["acc"]["reviewed"]["retailername"] else att["acc"]["feedback"]["retailername"] request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] else _att["acc"]["feedback"]["retailername"]
request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if att["acc"]["reviewed"]["sold_to_party"] else att["acc"]["feedback"]["sold_to_party"] request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] else _att["acc"]["feedback"]["sold_to_party"]
request_att["bad_images"] += int(_att["is_bad_image"]) if image.reason not in settings.ACC_EXCLUDE_RESEASONS:
request_att["total_images"] += 1 request_att["bad_images"] += int(_att["is_bad_image"])
request_att["total_images"] += 1
request_att["err"] += _att["err"] request_att["err"] += _att["err"]
except Exception as e: except Exception as e:
print(f"[ERROR]: failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}") print(f"[ERROR]: failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}")
@ -882,9 +924,13 @@ def calculate_a_request(report, request):
def calculate_subcription_file(subcription_request_file): def calculate_subcription_file(subcription_request_file):
att = {"acc": {"feedback": {}, att = {"acc": {"feedback": {},
"reviewed": {}}, "reviewed": {}},
"normalized_data": {"feedback": {},
"reviewed": {}},
"err": [], "err": [],
"is_bad_image": False, "is_bad_image": False,
"avg_acc": None} "avg_acc": None,
"is_reviewed": -1, # -1: No need to review, 0: Not reviewed, 1: Reviewed
}
if not subcription_request_file.predict_result: if not subcription_request_file.predict_result:
return 400, att return 400, att
@ -898,32 +944,29 @@ def calculate_subcription_file(subcription_request_file):
for key_name in valid_keys: for key_name in valid_keys:
try: try:
att["acc"]["feedback"][key_name], _ = calculate_accuracy(key_name, inference_result, feedback_result) att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result)
att["acc"]["reviewed"][key_name], _ = calculate_accuracy(key_name, inference_result, reviewed_result) att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result)
except Exception as e: except Exception as e:
att["err"].append(str(e)) att["err"].append(str(e))
# print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}") # print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}")
# print(f"[DEBUG]: e: {e} -key_name: {key_name}") # print(f"[DEBUG]: e: {e} -key_name: {key_name}")
subcription_request_file.feedback_accuracy = att["acc"]["feedback"] subcription_request_file.feedback_accuracy = att["acc"]["feedback"]
subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"] subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"]
subcription_request_file.save()
avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "purchase_date", "imei_number"]) avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "purchase_date", "imei_number"])
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "purchase_date", "imei_number"]) avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "purchase_date", "imei_number"])
if avg_feedback is not None or avg_reviewed is not None: if avg_feedback is not None or avg_reviewed is not None:
avg_acc = 0 avg_acc = 0
if avg_feedback is not None: if avg_feedback is not None:
avg_acc = avg_feedback avg_acc = avg_feedback
if avg_feedback < settings.NEED_REVIEW:
att["is_reviewed"] = 0
if avg_reviewed is not None: if avg_reviewed is not None:
avg_acc = avg_reviewed avg_acc = avg_reviewed
att["is_reviewed"] = 1
att["avg_acc"] = avg_acc att["avg_acc"] = avg_acc
if avg_acc < BAD_THRESHOLD: if avg_acc < settings.BAD_THRESHOLD:
att["is_bad_image"] = True att["is_bad_image"] = True
# exclude bad images
# for key_name in valid_keys:
# att["acc"]["feedback"][key_name] = []
# att["acc"]["reviewed"][key_name] = []
# att["avg_acc"] = None
return 200, att return 200, att
def calculate_attributions(request): # for one request, return in order def calculate_attributions(request): # for one request, return in order
@ -969,11 +1012,11 @@ def calculate_attributions(request): # for one request, return in order
avg_invoice_feedback = calculate_avg_accuracy(acc, "feedback", ["retailername", "sold_to_party", "purchase_date"]) avg_invoice_feedback = calculate_avg_accuracy(acc, "feedback", ["retailername", "sold_to_party", "purchase_date"])
avg_invoice_reviewed = calculate_avg_accuracy(acc, "reviewed", ["retailername", "sold_to_party", "purchase_date"]) avg_invoice_reviewed = calculate_avg_accuracy(acc, "reviewed", ["retailername", "sold_to_party", "purchase_date"])
if avg_invoice_feedback is not None or avg_invoice_reviewed is not None: if avg_invoice_feedback is not None or avg_invoice_reviewed is not None:
if max([x for x in [avg_invoice_feedback, avg_invoice_reviewed] if x is not None]) < BAD_THRESHOLD: if max([x for x in [avg_invoice_feedback, avg_invoice_reviewed] if x is not None]) < settings.BAD_THRESHOLD:
image_quality_num[1] += 1 image_quality_num[1] += 1
for i, _ in enumerate(acc["feedback"]["imei_number"]): for i, _ in enumerate(acc["feedback"]["imei_number"]):
if acc["feedback"]["imei_number"][i] is not None and acc["reviewed"]["imei_number"][i] is not None: if acc["feedback"]["imei_number"][i] is not None and acc["reviewed"]["imei_number"][i] is not None:
if max([x for x in [acc["feedback"]["imei_number"][i], acc["reviewed"]["imei_number"][i]] if x is not None]) < BAD_THRESHOLD: if max([x for x in [acc["feedback"]["imei_number"][i], acc["reviewed"]["imei_number"][i]] if x is not None]) < settings.BAD_THRESHOLD:
image_quality_num[1] += 1 image_quality_num[1] += 1
# time cost and quality calculation # time cost and quality calculation
# TODO: to be deprecated, doc_type would be in file level in the future # TODO: to be deprecated, doc_type would be in file level in the future

View File

@ -489,7 +489,7 @@ def dict2xlsx(input: json, _type='report'):
'O': 'average_accuracy_rate.retailer_name', 'O': 'average_accuracy_rate.retailer_name',
'P': 'average_processing_time.imei', 'P': 'average_processing_time.imei',
'Q': 'average_processing_time.invoice', 'Q': 'average_processing_time.invoice',
'R': 'preview_progress' 'R': 'review_progress'
} }
start_index = 5 start_index = 5
@ -529,13 +529,13 @@ def dict2xlsx(input: json, _type='report'):
ws[key + str(start_index)] = "-" ws[key + str(start_index)] = "-"
ws[key + str(start_index)].border = border ws[key + str(start_index)].border = border
ws[key + str(start_index)].font = font_black ws[key + str(start_index)].font = font_black
if 'accuracy' in mapping[key] or 'time' in mapping[key] or 'percent' in mapping[key] or 'speed' in mapping[key]: if 'accuracy' in mapping[key] or 'time' in mapping[key] or 'percent' in mapping[key] or 'speed' in mapping[key] or mapping[key] in ["review_progress"]:
ws[key + str(start_index)].number_format = '0.0' ws[key + str(start_index)].number_format = '0.0'
if _type == 'report': if _type == 'report':
if subtotal['subs'] == '+': if subtotal['subs'] == '+':
ws[key + str(start_index)].font = font_black_bold ws[key + str(start_index)].font = font_black_bold
if key in ['A', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']: if key in ['A', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R']:
ws[key + str(start_index)].fill = fill_gray ws[key + str(start_index)].fill = fill_gray
elif key == 'B': elif key == 'B':
ws[key + str(start_index)].fill = fill_green ws[key + str(start_index)].fill = fill_green

View File

@ -0,0 +1,7 @@
CAT_VALUES = {
"ALL": "ZZZZZZZZ",
"SEAO": "ZZ"
}
def aggregate_overview(overview_list):
overview_list = sorted(overview_list, key=lambda x: x["extraction_date"] + CAT_VALUES.get(x["subs"], x["subs"]), reverse=True)
return overview_list

@ -1 +1 @@
Subproject commit d01de312ab86db554ffa2f1b01396ef8d56b78ed Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a

View File

@ -2,10 +2,10 @@ from fwd.settings import SUBS
def map_subsidiary_long_to_short(long_sub): def map_subsidiary_long_to_short(long_sub):
short_sub = SUBS.get(long_sub.upper(), "all") short_sub = SUBS.get(long_sub.upper(), "all")
return short_sub.upper() return short_sub
def map_subsidiary_short_to_long(short_sub): def map_subsidiary_short_to_long(short_sub):
for k, v in SUBS.items(): for k, v in SUBS.items():
if v == short_sub.upper(): if v == short_sub.upper() or v == short_sub:
return k return k
return "ALL" return "SEAO"

View File

@ -0,0 +1,2 @@
python manage.py migrate-csv-revert reviewed/0131-0206-Mai-.csv
python manage.py migrate-database-010224 2024-01-28T00:00:00+0700 2024-02-07T00:00:00+0700

View File

@ -5,7 +5,7 @@ from datetime import datetime
# Get the proxy URL from the environment variable # Get the proxy URL from the environment variable
interval = 60*60*1 # 1 minute interval = 60*60*1 # 1 minute
update_cost = int(60*1.5) update_cost = int(60*2)
proxy_url = os.getenv('PROXY', "localhost") proxy_url = os.getenv('PROXY', "localhost")
# Define the login API URL # Define the login API URL
@ -15,8 +15,8 @@ login_token = None
# Define the login credentials # Define the login credentials
login_credentials = { login_credentials = {
'username': 'sbt', 'username': 'sbt',
# 'password': '7Eg4AbWIXDnufgn' 'password': '7Eg4AbWIXDnufgn'
'password': 'abc' # 'password': 'abc'
} }
# Define the command to call the update API # Define the command to call the update API
@ -29,7 +29,8 @@ update_data = {
"report_overview_duration" "report_overview_duration"
def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]): # def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]):
def update_report(login_token, report_overview_duration=["7d", "30d"], subsidiary=["SEAO", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]):
headers = {'Authorization': login_token} headers = {'Authorization': login_token}
for dur in report_overview_duration: for dur in report_overview_duration:
for sub in subsidiary: for sub in subsidiary:

View File

@ -21,6 +21,7 @@ interface DataType {
invoiceAPT: number; invoiceAPT: number;
snImeiTC: number; // TC: transaction count snImeiTC: number; // TC: transaction count
invoiceTC: number; invoiceTC: number;
reviewProgress: number;
} }
const columns: TableColumnsType<DataType> = [ const columns: TableColumnsType<DataType> = [
@ -213,7 +214,7 @@ const columns: TableColumnsType<DataType> = [
], ],
}, },
{ {
title: 'Average Processing Per Image (Seconds)', title: 'Average Processing Time Per Image (Seconds)',
children: [ children: [
{ {
title: 'SN/IMEI', title: 'SN/IMEI',
@ -223,7 +224,7 @@ const columns: TableColumnsType<DataType> = [
const isAbnormal = ensureMax(record.snImeiAPT, 2); const isAbnormal = ensureMax(record.snImeiAPT, 2);
return ( return (
<span style={{ color: isAbnormal ? 'red' : '' }}> <span style={{ color: isAbnormal ? 'red' : '' }}>
{record?.snImeiAPT?.toFixed(2)} {record?.snImeiAPT?.toFixed(1)}
</span> </span>
); );
}, },
@ -236,13 +237,26 @@ const columns: TableColumnsType<DataType> = [
const isAbnormal = ensureMax(record.invoiceAPT, 2); const isAbnormal = ensureMax(record.invoiceAPT, 2);
return ( return (
<span style={{ color: isAbnormal ? 'red' : '' }}> <span style={{ color: isAbnormal ? 'red' : '' }}>
{record?.invoiceAPT?.toFixed(2)} {record?.invoiceAPT?.toFixed(1)}
</span> </span>
); );
}, },
}, },
], ],
}, },
{
title: 'Review Progress',
dataIndex: 'review_progress',
key: 'review_progress',
width: '100px',
render: (_, record) => {
return (
<span>
{formatPercent(record.reviewProgress)==='-'? 0:formatPercent(record.reviewProgress)}
</span>
);
},
},
]; ];
interface ReportOverViewTableProps { interface ReportOverViewTableProps {
@ -275,6 +289,7 @@ const ReportOverViewTable: React.FC<ReportOverViewTableProps> = ({
invoiceAPT: item.average_processing_time.invoice, invoiceAPT: item.average_processing_time.invoice,
snImeiTC: item.usage.imei, snImeiTC: item.usage.imei,
invoiceTC: item.usage.invoice, invoiceTC: item.usage.invoice,
reviewProgress:item.review_progress,
}; };
}, },
); );

View File

@ -91,7 +91,7 @@ const ReportTable: React.FC = () => {
}, },
}, },
{ {
title: 'Purchase Date Acc', title: 'Purchase Date Accuracy',
dataIndex: 'Purchase Date Acc', dataIndex: 'Purchase Date Acc',
key: 'Purchase Date Acc', key: 'Purchase Date Acc',
render: (_, record) => { render: (_, record) => {
@ -105,7 +105,7 @@ const ReportTable: React.FC = () => {
}, },
{ {
title: 'Retailer Acc', title: 'Retailer Accuracy',
dataIndex: 'Retailer Acc', dataIndex: 'Retailer Acc',
key: 'Retailer Acc', key: 'Retailer Acc',
render: (_, record) => { render: (_, record) => {
@ -118,7 +118,7 @@ const ReportTable: React.FC = () => {
}, },
}, },
{ {
title: 'IMEI Acc', title: 'IMEI Accuracy',
dataIndex: 'IMEI Acc', dataIndex: 'IMEI Acc',
key: 'IMEI Acc', key: 'IMEI Acc',
render: (_, record) => { render: (_, record) => {

View File

@ -6,8 +6,8 @@ tag=$1
echo "[INFO] Tag received from Python: $tag" echo "[INFO] Tag received from Python: $tag"
echo "[INFO] Updating everything the remote..." # echo "[INFO] Updating everything the remote..."
git submodule update --recursive --remote # git submodule update --recursive --remote
echo "[INFO] Pushing AI image with tag: $tag..." echo "[INFO] Pushing AI image with tag: $tag..."
docker compose -f docker-compose-dev.yml build cope2n-fi-sbt docker compose -f docker-compose-dev.yml build cope2n-fi-sbt

View File

@ -77,7 +77,7 @@ services:
networks: networks:
- ctel-sbt - ctel-sbt
volumes: volumes:
# - BE_media:${MEDIA_ROOT} - BE_media:${MEDIA_ROOT}
- BE_static:/app/static - BE_static:/app/static
- ./cope2n-api:/app - ./cope2n-api:/app
working_dir: /app working_dir: /app
@ -170,12 +170,12 @@ services:
rabbitmq-sbt: rabbitmq-sbt:
condition: service_started condition: service_started
volumes: volumes:
# - BE_media:${MEDIA_ROOT} - BE_media:${MEDIA_ROOT}
- ./cope2n-api:/app - ./cope2n-api:/app
working_dir: /app working_dir: /app
# command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
command: bash -c "tail -f > /dev/null" # command: bash -c "tail -f > /dev/null"
# Back-end persistent # Back-end persistent
db-sbt: db-sbt:

View File

@ -10,9 +10,9 @@ from dotenv import load_dotenv
load_dotenv("../.env_prod") load_dotenv("../.env_prod")
OUTPUT_NAME = "Jan" OUTPUT_NAME = "all_0219_0226"
START_DATE = datetime(2024, 1, 1, tzinfo=timezone('Asia/Ho_Chi_Minh')) START_DATE = datetime(2024, 2, 19, tzinfo=timezone('Asia/Ho_Chi_Minh'))
END_DATE = datetime(2024, 2, 1, tzinfo=timezone('Asia/Ho_Chi_Minh')) END_DATE = datetime(2024, 2, 27, tzinfo=timezone('Asia/Ho_Chi_Minh'))
# Database connection details # Database connection details
db_host = os.environ.get('DB_HOST', "") db_host = os.environ.get('DB_HOST', "")
@ -63,31 +63,31 @@ cursor.close()
conn.close() conn.close()
# # Download folders from S3 # # Download folders from S3
# s3_client = boto3.client( s3_client = boto3.client(
# 's3', 's3',
# aws_access_key_id=access_key, aws_access_key_id=access_key,
# aws_secret_access_key=secret_key aws_secret_access_key=secret_key
# ) )
# request_ids = [] request_ids = []
# for rq in data: for rq in data:
# rq_id = rq[3] rq_id = rq[3]
# request_ids.append(rq_id) request_ids.append(rq_id)
# for request_id in tqdm(request_ids): for request_id in tqdm(request_ids):
# folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/
# local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files
# os.makedirs(OUTPUT_NAME, exist_ok=True) os.makedirs(OUTPUT_NAME, exist_ok=True)
# os.makedirs(local_folder_path, exist_ok=True) os.makedirs(local_folder_path, exist_ok=True)
# # List objects in the S3 folder # List objects in the S3 folder
# response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key)
# objects = response.get('Contents', []) objects = response.get('Contents', [])
# for s3_object in objects: for s3_object in objects:
# object_key = s3_object['Key'] object_key = s3_object['Key']
# local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key
# # Download the S3 object to the local file # Download the S3 object to the local file
# s3_client.download_file(s3_bucket_name, object_key, local_file_path) s3_client.download_file(s3_bucket_name, object_key, local_file_path)