diff --git a/api-cronjob/Dockerfile b/api-cronjob/Dockerfile new file mode 100644 index 0000000..606072c --- /dev/null +++ b/api-cronjob/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.9-slim + +WORKDIR /app + +COPY script.py . + +RUN apt-get update && apt-get -y install curl + +CMD [ "python", "script.py" ] \ No newline at end of file diff --git a/cope2n-api/fwd/settings.py b/cope2n-api/fwd/settings.py index e2ddf3a..dd5801c 100755 --- a/cope2n-api/fwd/settings.py +++ b/cope2n-api/fwd/settings.py @@ -143,8 +143,8 @@ LANGUAGE_CODE = "en-us" USE_I18N = True CELERY_ENABLE_UTC = False -CELERY_TIMEZONE = "Asia/Ho_Chi_Minh" -TIME_ZONE = "Asia/Ho_Chi_Minh" +CELERY_TIMEZONE = "Asia/Singapore" +TIME_ZONE = "Asia/Singapore" USE_TZ = True # Static files (CSS, JavaScript, Images) @@ -220,6 +220,20 @@ SIZE_TO_COMPRESS = 2 * 1024 * 1024 MAX_NUMBER_OF_TEMPLATE = 3 MAX_PAGES_OF_PDF_FILE = 50 +OVERVIEW_REFRESH_INTERVAL = 2 +OVERVIEW_REPORT_ROOT = "overview" +OVERVIEW_REPORT_DURATION = ["30d", "7d"] + +SUBS = { + "SEAU": "AU", + "SESP": "SG", + "SME": "MY", + "SEPCO": "PH", + "TSE": "TH", + "SEIN": "ID", + "ALL": "all" + } + CACHES = { 'default': { 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index e1b8f69..d7e47eb 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -14,9 +14,12 @@ import json from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..models import SubscriptionRequest, Report, ReportFile from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg -from ..utils.file import download_from_S3 +from ..utils.file import download_from_S3, convert_date_string +from ..utils.redis import RedisUtils from ..utils.process import string_to_boolean -from ..celery_worker.client_connector import c_connector +from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long + +redis_client = RedisUtils() class AccuracyViewSet(viewsets.ViewSet): lookup_field = "username" @@ -226,6 +229,12 @@ class AccuracyViewSet(viewsets.ViewSet): description='Subsidiary', type=OpenApiTypes.STR, ), + OpenApiParameter( + name='report_overview_duration', + location=OpenApiParameter.QUERY, + description=f'open of {settings.OVERVIEW_REPORT_DURATION}', + type=OpenApiTypes.STR, + ), ], responses=None, tags=['Accuracy'] ) @@ -240,12 +249,26 @@ class AccuracyViewSet(viewsets.ViewSet): include_test = string_to_boolean(request.GET.get('include_test', "false")) subsidiary = request.GET.get("subsidiary", "all") is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false")) - - try: - start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') - end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') - except ValueError: - raise InvalidException(excArgs="Date format") + report_overview_duration = request.GET.get("report_overview_duration", "") + subsidiary = map_subsidiary_long_to_short(subsidiary) + + if is_daily_report: + if report_overview_duration not in settings.OVERVIEW_REPORT_DURATION: + raise InvalidException(excArgs="overview duration") + end_date = timezone.now() + if report_overview_duration == "30d": + start_date = end_date - timezone.timedelta(days=30) + else: + start_date = end_date - timezone.timedelta(days=7) + start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0) + start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') + end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%S%z') + else: + try: + start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') + end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') + except ValueError: + raise InvalidException(excArgs="Date format") query_set = {"start_date_str": start_date_str, "end_date_str": end_date_str, @@ -255,7 +278,11 @@ class AccuracyViewSet(viewsets.ViewSet): "include_test": include_test, "subsidiary": subsidiary, "is_daily_report": is_daily_report, + "report_overview_duration": report_overview_duration } + # if is_daily_report: + # if (end_date-start_date) > timezone.timedelta(days=1): + # raise InvalidException(excArgs="Date range") report_id = "report" + "_" + timezone.datetime.now().strftime("%Y%m%d%H%M%S%z") + "_" + uuid.uuid4().hex new_report: Report = Report( @@ -268,8 +295,6 @@ class AccuracyViewSet(viewsets.ViewSet): end_at=end_date, status="Processing", ) - if is_daily_report: - new_report.created_at = end_date new_report.save() # Background job to calculate accuracy shadow_report(report_id, query_set) @@ -318,7 +343,7 @@ class AccuracyViewSet(viewsets.ViewSet): response = { 'report_detail': data, - 'metadata': {"subsidiary": report.subsidiary, + 'metadata': {"subsidiary": map_subsidiary_short_to_long(report.subsidiary), "start_at": report.start_at, "end_at": report.end_at}, 'page': { @@ -380,7 +405,7 @@ class AccuracyViewSet(viewsets.ViewSet): page_size = int(request.GET.get('page_size', 10)) if not start_date_str or not end_date_str: - reports = Report.objects.all() + reports = Report.objects.all().order_by('created_at').reverse() else: try: start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') @@ -390,26 +415,35 @@ class AccuracyViewSet(viewsets.ViewSet): base_query = Q(created_at__range=(start_date, end_date)) if daily_report_only: base_query &= Q(is_daily_report=True) - reports = Report.objects.filter(base_query).order_by('created_at') + reports = Report.objects.filter(base_query).order_by('created_at').reverse() - paginator = Paginator(reports, page_size) page = paginator.get_page(page_number) + data = [] for report in page: + acc_keys = ["purchase_date", "retailername", "imei_number", "avg"] + acc = {} + for key in acc_keys: + fb = report.feedback_accuracy.get(key, 0) if report.feedback_accuracy else 0 + rv = report.reviewed_accuracy.get(key, 0) if report.reviewed_accuracy else 0 + acc[key] = max([fb, rv]) data.append({ "ID": report.id, "Created Date": report.created_at, + "Start Date": report.start_at, + "End Date": report.end_at, "No. Requests": report.number_request, "Status": report.status, - "Purchase Date Acc": report.reviewed_accuracy.get("purchase_date", None) if report.reviewed_accuracy else None, - "Retailer Acc": report.feedback_accuracy.get("retailername", None) if report.reviewed_accuracy else None, - "IMEI Acc": report.feedback_accuracy.get("imei_number", None) if report.reviewed_accuracy else None, - "Avg. Accuracy": report.feedback_accuracy.get("avg", None) if report.reviewed_accuracy else None, + "Purchase Date Acc": acc["purchase_date"], + "Retailer Acc": acc["retailername"], + "IMEI Acc": acc["imei_number"], + "Avg. Accuracy": acc["avg"], "Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0, "Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_OCR_time else 0, "report_id": report.report_id, + "Subsidiary": map_subsidiary_short_to_long(report.subsidiary), }) response = { @@ -427,103 +461,79 @@ class AccuracyViewSet(viewsets.ViewSet): @extend_schema( parameters=[ OpenApiParameter( - name='start_date', + name='duration', location=OpenApiParameter.QUERY, - description='Start date (YYYY-mm-DDTHH:MM:SSZ)', - type=OpenApiTypes.DATE, - default='2023-01-02T00:00:00+0700', - ), - OpenApiParameter( - name='end_date', - location=OpenApiParameter.QUERY, - description='End date (YYYY-mm-DDTHH:MM:SSZ)', - type=OpenApiTypes.DATE, - default='2024-01-10T00:00:00+0700', + description='one of [30d, 7d]', + type=OpenApiTypes.STR, + default='30d', ), OpenApiParameter( name='subsidiary', location=OpenApiParameter.QUERY, description='Subsidiary', type=OpenApiTypes.STR, - ), - OpenApiParameter( - name='page', - location=OpenApiParameter.QUERY, - description='Page number', - type=OpenApiTypes.INT, - required=False - ), - OpenApiParameter( - name='page_size', - location=OpenApiParameter.QUERY, - description='Number of items per page', - type=OpenApiTypes.INT, - required=False - ), + ) ], responses=None, tags=['Accuracy'] ) @action(detail=False, url_path="overview", methods=["GET"]) def overview(self, request): if request.method == 'GET': - subsidiary = request.GET.get('subsidiary', None) - start_date_str = request.GET.get('start_date', "") - end_date_str = request.GET.get('end_date', "") - page_number = int(request.GET.get('page', 1)) - page_size = int(request.GET.get('page_size', 10)) + subsidiary = request.GET.get('subsidiary', "ALL") + duration = request.GET.get('duration', "") - base_query = Q() - - if start_date_str and end_date_str: - try: - start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') - end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') - except ValueError: - raise InvalidException(excArgs="Date format") - base_query &= Q(created_at__range=(start_date, end_date)) - - if subsidiary: - base_query &= Q(subsidiary=subsidiary) - base_query &= Q(is_daily_report=True) - reports = Report.objects.filter(base_query).order_by('created_at') - - paginator = Paginator(reports, page_size) - page = paginator.get_page(page_number) - - data = [] - this_month_report = MonthReportAccumulate() - for report in page: - res = this_month_report.add(report) - if not(res): - _, _data, total = this_month_report() - data += [total] - data += _data - this_month_report = MonthReportAccumulate() - this_month_report.add(report) - else: - continue - _, _data, total = this_month_report() - data += [total] - data += _data - # Generate xlsx file - # workbook = dict2xlsx(data, _type="report") - # tmp_file = f"/tmp/{str(uuid.uuid4())}.xlsx" - # os.makedirs(os.path.dirname(tmp_file), exist_ok=True) - # workbook.save(tmp_file) - # c_connector.remove_local_file((tmp_file, "fake_request_id")) + subsidiary = map_subsidiary_long_to_short(subsidiary) + # Retrive data from Redis + key = f"{subsidiary}_{duration}" + data = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", []) response = { - # 'file': load_xlsx_file(), 'overview_data': data, - 'page': { - 'number': page.number, - 'total_pages': page.paginator.num_pages, - 'count': page.paginator.count, - } } return JsonResponse(response, status=200) return JsonResponse({'error': 'Invalid request method.'}, status=405) + + @extend_schema( + parameters=[ + OpenApiParameter( + name='duration', + location=OpenApiParameter.QUERY, + description='one of [30d, 7d]', + type=OpenApiTypes.STR, + default='30d', + ), + OpenApiParameter( + name='subsidiary', + location=OpenApiParameter.QUERY, + description='Subsidiary', + type=OpenApiTypes.STR, + ) + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="overview_download_file", methods=["GET"]) + def overview_download_file(self, request): + if request.method == 'GET': + subsidiary = request.GET.get('subsidiary', "ALL") + duration = request.GET.get('duration', "") + + subsidiary = map_subsidiary_long_to_short(subsidiary) + + s3_key = f"{subsidiary}_{duration}.xlsx" + + tmp_file = "/tmp/" + s3_key + os.makedirs("/tmp", exist_ok=True) + download_from_S3("report/" + settings.OVERVIEW_REPORT_ROOT + "/" + s3_key, tmp_file) + file = open(tmp_file, 'rb') + response = FileResponse(file, status=200) + + # Set the content type and content disposition headers + response['Content-Type'] = 'application/octet-stream' + response['Content-Disposition'] = 'attachment; filename="{0}"'.format(os.path.basename(tmp_file)) + return response + + return JsonResponse({'error': 'Invalid request method.'}, status=405) @extend_schema( parameters=[], @@ -541,7 +551,7 @@ class AccuracyViewSet(viewsets.ViewSet): raise NotFoundException(excArgs=f"report: {report_id}") report = Report.objects.filter(report_id=report_id).first() # download from s3 to local - tmp_file = "/tmp/" + "report_" + uuid.uuid4().hex + ".xlsx" + tmp_file = "/tmp/" + report.subsidiary + "_" + report.start_at.strftime("%Y%m%d") + "_" + report.end_at.strftime("%Y%m%d") + "_created_on_" + report.created_at.strftime("%Y%m%d") + ".xlsx" os.makedirs("/tmp", exist_ok=True) if not report.S3_file_name: raise NotFoundException(excArgs="S3 file name") diff --git a/cope2n-api/fwd_api/celery_worker/client_connector.py b/cope2n-api/fwd_api/celery_worker/client_connector.py index c10cbdd..5394c8e 100755 --- a/cope2n-api/fwd_api/celery_worker/client_connector.py +++ b/cope2n-api/fwd_api/celery_worker/client_connector.py @@ -36,6 +36,8 @@ class CeleryConnector: 'remove_local_file': {'queue': "remove_local_file"}, 'csv_feedback': {'queue': "csv_feedback"}, 'make_a_report': {'queue': "report"}, + 'make_a_report_2': {'queue': "report_2"}, + } app = Celery( @@ -45,12 +47,16 @@ class CeleryConnector: ) def make_a_report(self, args): return self.send_task('make_a_report', args) + + def make_a_report_2(self, args): + return self.send_task('make_a_report_2', args) + def csv_feedback(self, args): return self.send_task('csv_feedback', args) def do_pdf(self, args): return self.send_task('do_pdf', args) - def upload_file_to_s3(self, args): - return self.send_task('upload_file_to_s3', args) + def upload_feedback_to_s3(self, args): + return self.send_task('upload_feedback_to_s3', args) def upload_file_to_s3(self, args): return self.send_task('upload_file_to_s3', args) def upload_report_to_s3(self, args): @@ -59,6 +65,7 @@ class CeleryConnector: return self.send_task('upload_obj_to_s3', args) def remove_local_file(self, args): return self.send_task('remove_local_file', args, countdown=280) # nearest execution of this task in 280 seconds + def process_fi(self, args): return self.send_task('process_fi_invoice', args) def process_fi_result(self, args): diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index bf12b3f..dc5e7cd 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -13,10 +13,13 @@ from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report from ..utils import file as FileUtils from ..utils import process as ProcessUtil from ..utils import s3 as S3Util +from ..utils.accuracy import validate_feedback_file from fwd_api.constant.common import ProcessType import csv import json +import copy +from fwd_api.utils.accuracy import predict_result_to_ready from celery.utils.log import get_task_logger from fwd import settings @@ -79,6 +82,7 @@ def process_csv_feedback(csv_file_path, feedback_id): continue else: sub_rq = sub_rqs[0] + images = SubscriptionRequestFile.objects.filter(request=sub_rq) fb = {} # update user result (with validate) redemption_id = row.get('redemptionNumber') @@ -99,6 +103,42 @@ def process_csv_feedback(csv_file_path, feedback_id): if len(redemption_id) > 0: sub_rq.redemption_id = redemption_id sub_rq.save() + # Update files + time_cost = {"imei": [], "invoice": [], "all": []} + imei_count = 0 + if sub_rq.ai_inference_profile is None: + time_cost["imei"] = [-1 for _ in range(len(images))] + time_cost["invoice"] = [-1] + time_cost["all"] = [-1] + else: + for k, v in sub_rq.ai_inference_profile.items(): + time_cost[k.split("_")[0]].append(v["inference"][1][0] - v["inference"][0] + (v["postprocess"][1]-v["postprocess"][0])) + for i, image in enumerate(images): + _predict_result = copy.deepcopy(predict_result_to_ready(sub_rq.predict_result)) + _feedback_result = copy.deepcopy(sub_rq.feedback_result) + _reviewed_result = copy.deepcopy(sub_rq.reviewed_result) + image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request] + if not validate_feedback_file(_feedback_result, _predict_result): + status[request_id] = "Missalign imei number between feedback and predict" + continue + if image.doc_type == "invoice": + _predict_result["imei_number"] = [] + if _feedback_result: + _feedback_result["imei_number"] = [] + else: + None + if _reviewed_result: + _reviewed_result["imei_number"] = [] + else: + None + else: + _predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} + _feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None + _reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None + image.predict_result = _predict_result + image.feedback_result = _feedback_result + image.reviewed_result = _reviewed_result + image.save() # update log into database feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first() feedback_rq.error_status = status diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index 5f72781..28a1e06 100644 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -3,14 +3,19 @@ import traceback from fwd_api.models import SubscriptionRequest, Report, ReportFile from fwd_api.celery_worker.worker import app from ..utils import s3 as S3Util -from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list +from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list, calculate_a_request, ReportAccumulateByRequest from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3 +from ..utils import time_stuff +from ..utils.redis import RedisUtils from django.utils import timezone from django.db.models import Q +import json +import copy from celery.utils.log import get_task_logger from fwd import settings +redis_client = RedisUtils() logger = get_task_logger(__name__) @@ -29,6 +34,7 @@ def mean_list(l): @app.task(name='make_a_report') def make_a_report(report_id, query_set): + # TODO: to be deprecated try: start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z') end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z') @@ -105,7 +111,7 @@ def make_a_report(report_id, query_set): errors += request_att["err"] num_request += 1 - transaction_att = count_transactions(start_date, end_date) + transaction_att = count_transactions(start_date, end_date, report.subsidiary) # Do saving process report.number_request = num_request report.number_images = number_images @@ -151,4 +157,155 @@ def make_a_report(report_id, query_set): except Exception as e: print("[ERROR]: an error occured while processing report: ", report_id) traceback.print_exc() - return 400 \ No newline at end of file + return 400 + +@app.task(name='make_a_report_2') +def make_a_report_2(report_id, query_set): + try: + start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z') + end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z') + base_query = Q(created_at__range=(start_date, end_date)) + if query_set["request_id"]: + base_query &= Q(request_id=query_set["request_id"]) + if query_set["redemption_id"]: + base_query &= Q(redemption_id=query_set["redemption_id"]) + base_query &= Q(is_test_request=False) + if isinstance(query_set["include_test"], str): + query_set["include_test"] = True if query_set["include_test"].lower() in ["true", "yes", "1"] else False + if query_set["include_test"]: + # base_query = ~base_query + base_query.children = base_query.children[:-1] + + elif isinstance(query_set["include_test"], bool): + if query_set["include_test"]: + base_query = ~base_query + if isinstance(query_set["subsidiary"], str): + if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all": + base_query &= Q(redemption_id__startswith=query_set["subsidiary"]) + if isinstance(query_set["is_reviewed"], str): + if query_set["is_reviewed"] == "reviewed": + base_query &= Q(is_reviewed=True) + elif query_set["is_reviewed"] == "not reviewed": + base_query &= Q(is_reviewed=False) + # elif query_set["is_reviewed"] == "all": + # pass + + errors = [] + # Create a placeholder to fill + accuracy = {"feedback" :{"imei_number": IterAvg(), + "purchase_date": IterAvg(), + "retailername": IterAvg(), + "sold_to_party": IterAvg(),}, + "reviewed" :{"imei_number": IterAvg(), + "purchase_date": IterAvg(), + "retailername": IterAvg(), + "sold_to_party": IterAvg(),} + } # {"imei": {"acc": 0.1, count: 1}, ...} + time_cost = {"invoice": IterAvg(), + "imei": IterAvg()} + number_images = 0 + number_bad_images = 0 + # TODO: Multithreading + # Calculate accuracy, processing time, ....Then save. + subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') + report: Report = \ + Report.objects.filter(report_id=report_id).first() + # TODO: number of transaction by doc type + num_request = 0 + report_files = [] + report_engine = ReportAccumulateByRequest(report.subsidiary) + for request in subscription_requests: + if request.status != 200 or not (request.reviewed_result or request.feedback_result): + # Failed requests or lack of reviewed_result/feedback_result + continue + request_att, _report_files = calculate_a_request(report, request) + report_files += _report_files + report_engine.add(request, _report_files) + request.feedback_accuracy = {"imei_number" : mean_list(request_att["acc"]["feedback"].get("imei_number", [None])), + "purchase_date" : mean_list(request_att["acc"]["feedback"].get("purchase_date", [None])), + "retailername" : mean_list(request_att["acc"]["feedback"].get("retailername", [None])), + "sold_to_party" : mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None]))} + request.reviewed_accuracy = {"imei_number" : mean_list(request_att["acc"]["reviewed"].get("imei_number", [None])), + "purchase_date" : mean_list(request_att["acc"]["reviewed"].get("purchase_date", [None])), + "retailername" : mean_list(request_att["acc"]["reviewed"].get("retailername", [None])), + "sold_to_party" : mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None]))} + request.save() + number_images += request_att["total_images"] + number_bad_images += request_att["bad_images"] + update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) + update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) + + time_cost["imei"].add(request_att["time_cost"].get("imei", [])) + time_cost["invoice"].add(request_att["time_cost"].get("invoice", [])) + + errors += request_att["err"] + num_request += 1 + + report_fine_data, _save_data = report_engine.save(report.report_id, query_set.get("is_daily_report", False), query_set["include_test"]) + transaction_att = count_transactions(start_date, end_date, report.subsidiary) + # Do saving process + report.number_request = num_request + report.number_images = number_images + report.number_imei = time_cost["imei"].count + report.number_invoice = time_cost["invoice"].count + report.number_bad_images = number_bad_images + # FIXME: refactor this data stream for endurability + report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](), + "invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count} + + report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) if (report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) > 0 else None + + report.number_imei_transaction = transaction_att.get("imei", 0) + report.number_invoice_transaction = transaction_att.get("invoice", 0) + + acumulated_acc = {"feedback": {}, + "reviewed": {}} + for acc_type in ["feedback", "reviewed"]: + avg_acc = IterAvg() + for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + acumulated_acc[acc_type][key] = accuracy[acc_type][key]() + acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count + avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) + acumulated_acc[acc_type]["avg"] = avg_acc() + + report.feedback_accuracy = acumulated_acc["feedback"] + report.reviewed_accuracy = acumulated_acc["reviewed"] + + report.errors = "|".join(errors) + report.status = "Ready" + report.save() + # Saving a xlsx file + report_files = ReportFile.objects.filter(report=report) + data = extract_report_detail_list(report_files, lower=True) + data_workbook = dict2xlsx(data, _type='report_detail') + local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook) + s3_key=save_report_to_S3(report.report_id, local_workbook) + if query_set["is_daily_report"]: + # Save overview dashboard + # multiple accuracy by 100 + save_data = copy.deepcopy(_save_data) + for i, dat in enumerate(report_fine_data): + keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()] + keys_percent = "images_quality" + for x_key in report_fine_data[i][keys_percent].keys(): + if "percent" not in x_key: + continue + report_fine_data[i][keys_percent][x_key] = report_fine_data[i][keys_percent][x_key]*100 + for key in keys: + if report_fine_data[i][key]: + for x_key in report_fine_data[i][key].keys(): + report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100 + data_workbook = dict2xlsx(report_fine_data, _type='report') + overview_filename = query_set["subsidiary"] + "_" + query_set["report_overview_duration"] + ".xlsx" + local_workbook = save_workbook_file(overview_filename, report, data_workbook, settings.OVERVIEW_REPORT_ROOT) + s3_key=save_report_to_S3(report.report_id, local_workbook) + redis_client.set_cache(settings.OVERVIEW_REPORT_ROOT, overview_filename.replace(".xlsx", ""), json.dumps(save_data)) + + except IndexError as e: + print(e) + traceback.print_exc() + print("NotFound request by report id, %d", report_id) + except Exception as e: + print("[ERROR]: an error occured while processing report: ", report_id) + traceback.print_exc() + return 400 diff --git a/cope2n-api/fwd_api/celery_worker/worker.py b/cope2n-api/fwd_api/celery_worker/worker.py index 5bb6963..31ad456 100755 --- a/cope2n-api/fwd_api/celery_worker/worker.py +++ b/cope2n-api/fwd_api/celery_worker/worker.py @@ -42,7 +42,7 @@ app.conf.update({ Queue('remove_local_file'), Queue('csv_feedback'), Queue('report'), - + Queue('report_2'), ], 'task_routes': { 'process_sap_invoice_result': {'queue': 'invoice_sap_rs'}, @@ -61,6 +61,7 @@ app.conf.update({ 'remove_local_file': {'queue': "remove_local_file"}, 'csv_feedback': {'queue': "csv_feedback"}, 'make_a_report': {'queue': "report"}, + 'make_a_report_2': {'queue': "report_2"}, } }) diff --git a/cope2n-api/fwd_api/migrations/0179_reportfile_is_bad_image.py b/cope2n-api/fwd_api/migrations/0179_reportfile_is_bad_image.py new file mode 100644 index 0000000..72f95a7 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0179_reportfile_is_bad_image.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.3 on 2024-02-04 23:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0178_alter_reportfile_acc'), + ] + + operations = [ + migrations.AddField( + model_name='reportfile', + name='is_bad_image', + field=models.BooleanField(default=False), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0180_alter_reportfile_time_cost.py b/cope2n-api/fwd_api/migrations/0180_alter_reportfile_time_cost.py new file mode 100644 index 0000000..a646220 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0180_alter_reportfile_time_cost.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.3 on 2024-02-05 02:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0179_reportfile_is_bad_image'), + ] + + operations = [ + migrations.AlterField( + model_name='reportfile', + name='time_cost', + field=models.FloatField(default=None, null=True), + ), + ] diff --git a/cope2n-api/fwd_api/models/ReportFile.py b/cope2n-api/fwd_api/models/ReportFile.py index 86e9270..9599d5d 100644 --- a/cope2n-api/fwd_api/models/ReportFile.py +++ b/cope2n-api/fwd_api/models/ReportFile.py @@ -16,6 +16,7 @@ class ReportFile(models.Model): # Data S3_uploaded = models.BooleanField(default=False) doc_type = models.CharField(max_length=200) + is_bad_image = models.BooleanField(default=False) predict_result = models.JSONField(null=True) feedback_result = models.JSONField(null=True) @@ -25,7 +26,7 @@ class ReportFile(models.Model): reviewed_accuracy = models.JSONField(null=True) acc = models.FloatField(default=0, null=True) - time_cost = models.FloatField(default=0) + time_cost = models.FloatField(default=None, null=True) is_reviewed = models.CharField(default="NA", max_length=5) # NA, No, Yes bad_image_reason = models.TextField(default="") counter_measures = models.TextField(default="") diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 3ba1efd..6d500fd 100644 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -5,14 +5,307 @@ import copy from typing import Any from .ocr_utils.ocr_metrics import eval_ocr_metric from .ocr_utils.sbt_report import post_processing_str +import uuid from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile from ..celery_worker.client_connector import c_connector +from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3 from django.db.models import Q +from django.utils import timezone +import redis +from fwd import settings +from ..models import SubscriptionRequest, Report, ReportFile +import json BAD_THRESHOLD = 0.75 valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"] +class ReportAccumulateByRequest: + def __init__(self, sub): + # self.redis_client = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, decode_responses=True) + self.sub = sub + self.current_time = None + self.data = {} # {"month": [total, {"day": day_data}]} + self.total_format = { + 'subs': "+", + 'extraction_date': "Subtotal ()", + 'total_images': 0, + 'images_quality': { + 'successful': 0, + 'successful_percent': 0, + 'bad': 0, + 'bad_percent': 0 + }, + 'average_accuracy_rate': { + 'imei': IterAvg(), + 'purchase_date': IterAvg(), + 'retailer_name': IterAvg(), + 'sold_to_party': IterAvg() + }, + 'average_processing_time': { + 'imei': IterAvg(), + 'invoice': IterAvg() + }, + 'usage': { + 'imei':0, + 'invoice': 0, + 'request': 0 + }, + 'feedback_accuracy': { + 'imei_number': IterAvg(), + 'purchase_date': IterAvg(), + 'retailername': IterAvg(), + 'sold_to_party': IterAvg() + }, + 'reviewed_accuracy': { + 'imei_number': IterAvg(), + 'purchase_date': IterAvg(), + 'retailername': IterAvg(), + 'sold_to_party': IterAvg() + }, + 'num_request': 0 + } + self.day_format = { + 'subs': sub, + 'extraction_date': "", + 'num_imei': 0, + 'num_invoice': 0, + 'total_images': 0, + 'images_quality': { + 'successful': 0, + 'successful_percent': 0, + 'bad': 0, + 'bad_percent': 0 + }, + 'average_accuracy_rate': { + 'imei': IterAvg(), + 'purchase_date': IterAvg(), + 'retailer_name': IterAvg(), + 'sold_to_party': IterAvg() + }, + 'average_processing_time': { + 'imei': IterAvg(), + 'invoice': IterAvg() + }, + 'usage': { + 'imei': 0, + 'invoice': 0, + 'request': 0 + }, + 'feedback_accuracy': { + 'imei_number': IterAvg(), + 'purchase_date': IterAvg(), + 'retailername': IterAvg(), + 'sold_to_party': IterAvg() + }, + 'reviewed_accuracy': { + 'imei_number': IterAvg(), + 'purchase_date': IterAvg(), + 'retailername': IterAvg(), + 'sold_to_party': IterAvg() + }, + "report_files": [], + 'num_request': 0 + }, + + @staticmethod + def update_total(total, report_file): + total["total_images"] += 1 + total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 + total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0 + # total["report_files"].append(report_file) + + if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 : + total["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", [])) + total["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", [])) + total["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", [])) + total["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", [])) + elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0: + total["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", [])) + total["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", [])) + total["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", [])) + total["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", [])) + + for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, [])) + for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, [])) + + if not total["average_processing_time"].get(report_file.doc_type, None): + print(f"[WARM]: Weird doctype: {report_file.doc_type}") + total["average_processing_time"] = IterAvg() + total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 + + total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0 + total["usage"]["invoice"] += 1 if report_file.doc_type == "invoice" else 0 + + return total + + @staticmethod + def update_day(day_data, report_file): + day_data["total_images"] += 1 + day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 + day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0 + day_data["num_imei"] += 1 if report_file.doc_type == "imei" else 0 + day_data["num_invoice"] += 1 if report_file.doc_type == "invoice" else 0 + day_data["report_files"].append(report_file) + + if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 : + day_data["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", 0)) + day_data["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", 0)) + day_data["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", 0)) + day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", 0)) + elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0: + day_data["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", 0)) + day_data["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", 0)) + day_data["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", 0)) + day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", 0)) + + for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + day_data["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, 0)) + for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, 0)) + + if not day_data["average_processing_time"].get(report_file.doc_type, None): + print(f"[WARM]: Weird doctype: {report_file.doc_type}") + day_data["average_processing_time"] = IterAvg() + day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 + + return day_data + + def add(self, request, report_files): + this_month = request.created_at.strftime("%Y%m") + this_day = request.created_at.strftime("%Y%m%d") + if not self.data.get(this_month, None): + self.data[this_month] = [copy.deepcopy(self.total_format), {}] + if not self.data[this_month][1].get(this_day, None): + self.data[this_month][1][this_day] = copy.deepcopy(self.day_format)[0] + self.data[this_month][1][this_day]['extraction_date'] = request.created_at.strftime("%Y-%m-%d") + usage = self.count_transactions_within_day(this_day) + self.data[this_month][1][this_day]["usage"]["imei"] = usage.get("imei", 0) + self.data[this_month][1][this_day]["usage"]["invoice"] = usage.get("invoice", 0) + self.data[this_month][1][this_day]["usage"]["request"] = usage.get("request", 0) + + self.data[this_month][1][this_day]['num_request'] += 1 + self.data[this_month][0]['num_request'] += 1 + for report_file in report_files: + self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month + self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day + + def count_transactions_within_day(self, date_string): + # convert this day into timezone.datetime at UTC + start_date = datetime.strptime(date_string, "%Y%m%d") + start_date_with_timezone = timezone.make_aware(start_date) + end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1) + return count_transactions(start_date_with_timezone, end_date_with_timezone, self.sub) + + def save(self, root_report_id, is_daily_report=False, include_test=False): + report_data = self.get() + fine_data = [] + save_data = {"file": {"overview": f"{root_report_id}/{root_report_id}.xlsx"}, + "data": fine_data} # {"sub_report_id": "S3 location", "data": fine_data} + # extract data + for month in report_data.keys(): + fine_data.append(report_data[month][0]) + for day in report_data[month][1].keys(): + fine_data.append(report_data[month][1][day]) + # save daily reports + report_id = root_report_id + "_" + day + start_date = datetime.strptime(day, "%Y%m%d") + start_date_with_timezone = timezone.make_aware(start_date) + end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1) + _average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](), + "invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count} + + _average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None + acumulated_acc = {"feedback_accuracy": {}, + "reviewed_accuracy": {}} + for acc_type in ["feedback_accuracy", "reviewed_accuracy"]: + avg_acc = IterAvg() + for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + acumulated_acc[acc_type][key] = self.data[month][1][day][acc_type][key]() + acumulated_acc[acc_type][key+"_count"] = self.data[month][1][day][acc_type][key].count + avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) + acumulated_acc[acc_type]["avg"] = avg_acc() + acumulated_acc[acc_type]["avg_count"] = avg_acc.count + new_report: Report = Report( + report_id=report_id, + is_daily_report=is_daily_report, + subsidiary=self.sub.lower().replace(" ", ""), + include_test=include_test, + start_at=start_date_with_timezone, + end_at=end_date_with_timezone, + status="Ready", + number_request=report_data[month][1][day]["num_request"], + number_images=report_data[month][1][day]["total_images"], + number_imei=report_data[month][1][day]["num_imei"], + number_invoice=report_data[month][1][day]["num_invoice"], + number_bad_images=report_data[month][1][day]["images_quality"]["bad"], + average_OCR_time=_average_OCR_time, + number_imei_transaction=report_data[month][1][day]["usage"]["imei"], + number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"], + feedback_accuracy=acumulated_acc["feedback_accuracy"], + reviewed_accuracy=acumulated_acc["reviewed_accuracy"], + ) + new_report.save() + data = extract_report_detail_list(self.data[month][1][day]["report_files"], lower=True) + data_workbook = dict2xlsx(data, _type='report_detail') + local_workbook = save_workbook_file(report_id + ".xlsx", new_report, data_workbook) + s3_key=save_report_to_S3(report_id, local_workbook) + return fine_data, save_data + + def get(self) -> Any: + # FIXME: This looks like a junk + _data = copy.deepcopy(self.data) + for month in _data.keys(): + _data[month][0]["images_quality"]["successful_percent"] = _data[month][0]["images_quality"]["successful"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0 + _data[month][0]["images_quality"]["bad_percent"] = _data[month][0]["images_quality"]["bad"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0 + num_transaction_imei = 0 + num_transaction_invoice = 0 + for day in _data[month][1].keys(): + num_transaction_imei += _data[month][1][day]["usage"].get("imei", 0) + num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0) + _data[month][1][day]["average_accuracy_rate"]["imei"] = _data[month][1][day]["average_accuracy_rate"]["imei"]() + _data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]() + _data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]() + _data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]() + _data[month][1][day]["average_processing_time"]["imei"] = _data[month][1][day]["average_processing_time"]["imei"]() + _data[month][1][day]["average_processing_time"]["invoice"] = _data[month][1][day]["average_processing_time"]["invoice"]() + + _data[month][1][day]["feedback_accuracy"]["imei_number"] = _data[month][1][day]["feedback_accuracy"]["imei_number"]() + _data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]() + _data[month][1][day]["feedback_accuracy"]["retailername"] = _data[month][1][day]["feedback_accuracy"]["retailername"]() + _data[month][1][day]["feedback_accuracy"]["sold_to_party"] = _data[month][1][day]["feedback_accuracy"]["sold_to_party"]() + _data[month][1][day]["reviewed_accuracy"]["imei_number"] = _data[month][1][day]["reviewed_accuracy"]["imei_number"]() + _data[month][1][day]["reviewed_accuracy"]["purchase_date"] = _data[month][1][day]["reviewed_accuracy"]["purchase_date"]() + _data[month][1][day]["reviewed_accuracy"]["retailername"] = _data[month][1][day]["reviewed_accuracy"]["retailername"]() + _data[month][1][day]["reviewed_accuracy"]["sold_to_party"] = _data[month][1][day]["reviewed_accuracy"]["sold_to_party"]() + _data[month][1][day].pop("report_files") + + _data[month][1][day]["images_quality"]["successful_percent"] = _data[month][1][day]["images_quality"]["successful"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0 + _data[month][1][day]["images_quality"]["bad_percent"] = _data[month][1][day]["images_quality"]["bad"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0 + + _data[month][0]["usage"]["imei"] = num_transaction_imei + _data[month][0]["usage"]["invoice"] = num_transaction_invoice + _data[month][0]["average_accuracy_rate"]["imei"] = _data[month][0]["average_accuracy_rate"]["imei"]() + _data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]() + _data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]() + _data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]() + _data[month][0]["average_processing_time"]["imei"] = _data[month][0]["average_processing_time"]["imei"]() + _data[month][0]["average_processing_time"]["invoice"] = _data[month][0]["average_processing_time"]["invoice"]() + + _data[month][0]["feedback_accuracy"]["imei_number"] = _data[month][0]["feedback_accuracy"]["imei_number"]() + _data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]() + _data[month][0]["feedback_accuracy"]["retailername"] = _data[month][0]["feedback_accuracy"]["retailername"]() + _data[month][0]["feedback_accuracy"]["sold_to_party"] = _data[month][0]["feedback_accuracy"]["sold_to_party"]() + _data[month][0]["reviewed_accuracy"]["imei_number"] = _data[month][0]["reviewed_accuracy"]["imei_number"]() + _data[month][0]["reviewed_accuracy"]["purchase_date"] = _data[month][0]["reviewed_accuracy"]["purchase_date"]() + _data[month][0]["reviewed_accuracy"]["retailername"] = _data[month][0]["reviewed_accuracy"]["retailername"]() + _data[month][0]["reviewed_accuracy"]["sold_to_party"] = _data[month][0]["reviewed_accuracy"]["sold_to_party"]() + + return _data + + class MonthReportAccumulate: def __init__(self): self.month = None @@ -89,7 +382,7 @@ class MonthReportAccumulate: self.total["usage"]["invoice"] += report.number_invoice_transaction def add(self, report): - report_month = report.created_at.month + report_month = report.start_at.month if self.month is None: self.month = report_month @@ -103,7 +396,7 @@ class MonthReportAccumulate: new_data = copy.deepcopy(self.data_format)[0] new_data["num_imei"] = report.number_imei new_data["subs"] = report.subsidiary - new_data["extraction_date"] = report.created_at + new_data["extraction_date"] = report.start_at new_data["num_invoice"] = report.number_invoice new_data["total_images"] = report.number_images new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images @@ -130,10 +423,38 @@ class MonthReportAccumulate: self.accumulate(report) return True + def clear(self): + self.month = None + self.total = { + 'subs': "+", + 'extraction_date': "Subtotal ()", + 'total_images': 0, + 'images_quality': { + 'successful': 0, + 'successful_percent': 0, + 'bad': 0, + 'bad_percent': 0 + }, + 'average_accuracy_rate': { + 'imei': IterAvg(), + 'purchase_date': IterAvg(), + 'retailer_name': IterAvg() + }, + 'average_processing_time': { + 'imei': IterAvg(), + 'invoice': IterAvg() + }, + 'usage': { + 'imei':0, + 'invoice': 0 + } + } + self.data = [] + def __call__(self): - self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] if self.total["total_images"] else 0 - self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] if self.total["total_images"] else 0 total = copy.deepcopy(self.total) + total["images_quality"]["successful_percent"] = total["images_quality"]["successful"]/total["total_images"] if total["total_images"] else 0 + total["images_quality"]["bad_percent"] = total["images_quality"]["bad"]/total["total_images"] if total["total_images"] else 0 total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]() total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]() total["average_accuracy_rate"]["retailer_name"] = total["average_accuracy_rate"]["retailer_name"]() @@ -167,6 +488,16 @@ class IterAvg: def __call__(self): return self.avg +def validate_feedback_file(feedback, predict): + if feedback: + imei_feedback = feedback.get("imei_number", []) + imei_feedback = [x for x in imei_feedback if x != ""] + num_imei_feedback = len(imei_feedback) + num_imei_predict = len(predict.get("imei_number", [])) + if num_imei_feedback != num_imei_predict: + return False + return True + def first_of_list(the_list): if not the_list: return None @@ -210,9 +541,11 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True) data[i][key] = data[i][key]*100 return data -def count_transactions(start_date, end_date): +def count_transactions(start_date, end_date, subsidiary="all"): base_query = Q(created_at__range=(start_date, end_date)) base_query &= Q(is_test_request=False) + if subsidiary and subsidiary.lower().replace(" ", "")!="all": + base_query &= Q(redemption_id__startswith=subsidiary) transaction_att = {} print(f"[DEBUG]: atracting transactions attribute...") @@ -226,6 +559,10 @@ def count_transactions(start_date, end_date): transaction_att[doc_type] = 1 else: transaction_att[doc_type] += 1 + if not transaction_att.get("request", None): + transaction_att["request"] = 1 + else: + transaction_att["request"] += 1 return transaction_att def convert_datetime_format(date_string: str, is_gt=False) -> str: @@ -359,6 +696,7 @@ def calculate_and_save_subcription_file(report, request): reviewed_accuracy=att["acc"]["reviewed"], acc=att["avg_acc"], time_cost=image.processing_time, + is_bad_image=att["is_bad_image"], bad_image_reason=image.reason, counter_measures=image.counter_measures, error="|".join(att["err"]) @@ -387,6 +725,72 @@ def calculate_and_save_subcription_file(report, request): continue return request_att + +def calculate_a_request(report, request): + request_att = {"acc": {"feedback": {"imei_number": [], + "purchase_date": [], + "retailername": [], + "sold_to_party": [], + }, + "reviewed": {"imei_number": [], + "purchase_date": [], + "retailername": [], + "sold_to_party": [], + }}, + "err": [], + "time_cost": {}, + "total_images": 0, + "bad_images": 0} + images = SubscriptionRequestFile.objects.filter(request=request) + report_files = [] + for image in images: + status, att = calculate_subcription_file(image) + if status != 200: + continue + image.feedback_accuracy = att["acc"]["feedback"] + image.reviewed_accuracy = att["acc"]["reviewed"] + image.is_bad_image_quality = att["is_bad_image"] + image.save() + new_report_file = ReportFile(report=report, + correspond_request_id=request.request_id, + correspond_redemption_id=request.redemption_id, + doc_type=image.doc_type, + predict_result=image.predict_result, + feedback_result=image.feedback_result, + reviewed_result=image.reviewed_result, + feedback_accuracy=att["acc"]["feedback"], + reviewed_accuracy=att["acc"]["reviewed"], + acc=att["avg_acc"], + is_bad_image=att["is_bad_image"], + time_cost=image.processing_time, + bad_image_reason=image.reason, + counter_measures=image.counter_measures, + error="|".join(att["err"]) + ) + report_files.append(new_report_file) + if request_att["time_cost"].get(image.doc_type, None): + request_att["time_cost"][image.doc_type].append(image.processing_time) + else: + request_att["time_cost"][image.doc_type] = [image.processing_time] + try: + request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"] + request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"] + request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"] + request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"] + + request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"] + request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"] + request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"] + request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"] + + request_att["bad_images"] += int(att["is_bad_image"]) + request_att["total_images"] += 1 + request_att["err"] += att["err"] + except Exception as e: + print(e) + continue + + return request_att, report_files def calculate_subcription_file(subcription_request_file): @@ -490,5 +894,5 @@ def calculate_attributions(request): # for one request, return in order return acc, data, time_cost, image_quality_num, error def shadow_report(report_id, query): - c_connector.make_a_report( + c_connector.make_a_report_2( (report_id, query)) \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 3f44694..f7434d9 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -7,6 +7,7 @@ import json from PIL import Image, ExifTags from django.core.files.uploadedfile import TemporaryUploadedFile from django.utils import timezone +from datetime import datetime from fwd import settings from ..utils import s3 as S3Util @@ -30,6 +31,16 @@ s3_client = S3Util.MinioS3Client( bucket_name=settings.S3_BUCKET_NAME ) +def convert_date_string(date_string): + # Parse the input date string + date_format = "%Y-%m-%d %H:%M:%S.%f %z" + parsed_date = datetime.strptime(date_string, date_format) + + # Format the date as "YYYYMMDD" + formatted_date = parsed_date.strftime("%Y%m%d") + + return formatted_date + def validate_report_list(request): start_date_str = request.GET.get('start_date') end_date_str = request.GET.get('end_date') @@ -190,10 +201,13 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict) csvfile.write(file_contents) return file_path -def save_workbook_file(file_name: str, rp: Report, workbook): +def save_workbook_file(file_name: str, rp: Report, workbook, prefix=""): report_id = str(rp.report_id) - folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id) + if not prefix: + folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id) + else: + folder_path = os.path.join(settings.MEDIA_ROOT, "report", prefix) os.makedirs(folder_path, exist_ok = True) file_path = os.path.join(folder_path, file_name) @@ -388,12 +402,17 @@ def build_media_url_v2(media_id: str, user_id: int, sub_id: int, u_sync_id: str) def get_value(_dict, keys): keys = keys.split('.') value = _dict - for key in keys: - if not key in value.keys(): - return "-" - else: - value = value.get(key, {}) - + try: + for key in keys: + if not key in value.keys(): + return "-" + else: + value = value.get(key, {}) + except Exception as e: + print(f"[ERROR]: {e}") + print(f"[ERROR]: value: {value}") + print(f"[ERROR]: keys: {keys}") + if not value: return "-" elif isinstance(value, list): @@ -475,13 +494,23 @@ def dict2xlsx(input: json, _type='report'): ws[key + str(start_index)].border = border if _type == 'report': - ws[key + str(start_index)].font = font_black_bold - if key_index == 0 or (key_index >= 9 and key_index <= 15): - ws[key + str(start_index)].fill = fill_gray - elif key_index == 1: - ws[key + str(start_index)].fill = fill_green - elif key_index >= 4 and key_index <= 8: - ws[key + str(start_index)].fill = fill_yellow + if subtotal['subs'] == '+': + ws[key + str(start_index)].font = font_black_bold + if key_index == 0 or (key_index >= 9 and key_index <= 15): + ws[key + str(start_index)].fill = fill_gray + elif key_index == 1: + ws[key + str(start_index)].fill = fill_green + elif key_index >= 4 and key_index <= 8: + ws[key + str(start_index)].fill = fill_yellow + else: + if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95: + ws[key + str(start_index)].style = normal_cell_red + elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: + ws[key + str(start_index)].style = normal_cell_red + elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10: + ws[key + str(start_index)].style = normal_cell_red + else : + ws[key + str(start_index)].style = normal_cell elif _type == 'report_detail': if 'accuracy' in mapping[key] and type(value) in [int, float] and value < 75: ws[key + str(start_index)].style = normal_cell_red @@ -491,21 +520,5 @@ def dict2xlsx(input: json, _type='report'): ws[key + str(start_index)].style = normal_cell start_index += 1 - - if 'data' in subtotal.keys(): - for record in subtotal['data']: - for key in mapping.keys(): - value = get_value(record, mapping[key]) - ws[key + str(start_index)] = value - if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95: - ws[key + str(start_index)].style = normal_cell_red - elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: - ws[key + str(start_index)].style = normal_cell_red - elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10: - ws[key + str(start_index)].style = normal_cell_red - else : - ws[key + str(start_index)].style = normal_cell - - start_index += 1 return wb diff --git a/cope2n-api/fwd_api/utils/redis.py b/cope2n-api/fwd_api/utils/redis.py index ff65035..d8d74e1 100644 --- a/cope2n-api/fwd_api/utils/redis.py +++ b/cope2n-api/fwd_api/utils/redis.py @@ -22,6 +22,9 @@ class RedisUtils: for key, value in self.redis_client.hgetall(request_id).items(): resutlt[key] = json.loads(value) return resutlt + + def get_specific_cache(self, request_id, key): + return json.loads(self.redis_client.hget(request_id, key)) def get_size(self, request_id): return self.redis_client.hlen(request_id) diff --git a/cope2n-api/fwd_api/utils/subsidiary.py b/cope2n-api/fwd_api/utils/subsidiary.py new file mode 100644 index 0000000..d10c879 --- /dev/null +++ b/cope2n-api/fwd_api/utils/subsidiary.py @@ -0,0 +1,11 @@ +from fwd.settings import SUBS + +def map_subsidiary_long_to_short(long_sub): + short_sub = SUBS.get(long_sub.upper(), "all") + return short_sub.upper() + +def map_subsidiary_short_to_long(short_sub): + for k, v in SUBS.items(): + if v == short_sub.upper(): + return k + return "ALL" \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/time_stuff.py b/cope2n-api/fwd_api/utils/time_stuff.py new file mode 100644 index 0000000..bbdf6cf --- /dev/null +++ b/cope2n-api/fwd_api/utils/time_stuff.py @@ -0,0 +1,9 @@ +def is_the_same_day(first_day, second_day): + if first_day.day == second_day.day and first_day.month == second_day.month and first_day.year == second_day.year: + return True + return False + +def is_the_same_month(first_day, second_day): + if first_day.month == second_day.month and first_day.year == second_day.year: + return True + return False \ No newline at end of file diff --git a/cope2n-api/scripts/script.py b/cope2n-api/scripts/script.py new file mode 100644 index 0000000..713c925 --- /dev/null +++ b/cope2n-api/scripts/script.py @@ -0,0 +1,68 @@ +import os +import time +import requests +from datetime import datetime + +# Get the proxy URL from the environment variable +interval = 60*60*1 # 1 minute +update_cost = 60*3 +proxy_url = os.getenv('PROXY', "localhost") + +# Define the login API URL +login_url = f'{proxy_url}/api/ctel/login/' +login_token = None + +# Define the login credentials +login_credentials = { + 'username': 'sbt', + 'password': '7Eg4AbWIXDnufgn' +} + +# Define the command to call the update API +update_url = f'{proxy_url}/api/ctel/make_report/' +update_params = { + 'is_daily_report': 'true', + 'report_overview_duration': '', + 'subsidiary': None +} + +"report_overview_duration" + +def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]): + headers = {'Authorization': login_token} + for dur in report_overview_duration: + for sub in subsidiary: + update_params["report_overview_duration"] = dur + update_params["subsidiary"] = sub + update_response = requests.get(update_url, params=update_params, headers=headers) + print("[INFO]: update_response at {} by {} - {} with status {}".format(datetime.now(), dur, sub, update_response.status_code)) + update_response.raise_for_status() + time.sleep(update_cost) + +# Define the interval in seconds between API calls +# time.sleep(60) + +while True: + # Call the login API and retrieve the login token + if not login_token: + login_response = requests.post(login_url, data=login_credentials) + # login_response.raise_for_status() + if login_response.status_code == 200: + login_token = login_response.json()['token'] + print("[INFO] relogged in at {}".format(datetime.now())) + + # Call the update API + try: + update_report(login_token) + except Exception as e: + print(f"[ERROR]: {e}") + print(f"[ERROR]: Failed to update_response, retrying...") + login_response = requests.post(login_url, data=login_credentials) + # login_response.raise_for_status() + if login_response.status_code == 200: + login_token = login_response.json()['token'] + print("[INFO] relogged in at {}".format(datetime.now())) + update_report(login_token) + + # Wait for the specified interval + time.sleep(interval) \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 44f8c94..1d9e946 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -84,12 +84,12 @@ services: depends_on: db-sbt: condition: service_started - # command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && - # python manage.py makemigrations && - # python manage.py migrate && - # python manage.py compilemessages && - # gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod - command: bash -c "tail -f > /dev/null" + command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && + python manage.py makemigrations && + python manage.py migrate && + python manage.py compilemessages && + gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod + # command: bash -c "tail -f > /dev/null" minio: image: minio/minio @@ -175,6 +175,7 @@ services: working_dir: /app command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" + # command: bash -c "tail -f > /dev/null" # Back-end persistent db-sbt: diff --git a/document-classification-kv-demo b/document-classification-kv-demo new file mode 160000 index 0000000..220954c --- /dev/null +++ b/document-classification-kv-demo @@ -0,0 +1 @@ +Subproject commit 220954c5c6bfed15e93e26b2adacf28ff8b75baf diff --git a/junk_tests/date_compare.py b/junk_tests/date_compare.py new file mode 100644 index 0000000..1e5e4f4 --- /dev/null +++ b/junk_tests/date_compare.py @@ -0,0 +1,17 @@ +from datetime import datetime + +# Assuming you have two datetime objects for the same day in different months +date_jan = datetime(2022, 2, 15, 12, 30, 0) +date_feb = datetime(2022, 2, 15, 8, 45, 0) + +# Check if they are the same day +if date_jan.day == date_feb.day and date_jan.month == date_feb.month and date_jan.year == date_feb.year: + print("They are the same day") +else: + print("They are different days") + +# Check if they are the same month +if date_jan.month == date_feb.month and date_jan.year == date_feb.year: + print("They are the same month") +else: + print("They are different months") \ No newline at end of file