Merge pull request #16 from SDSRV-IDP/enhancment/accuracy_calculation

Enhancment/accuracy calculation
This commit is contained in:
Nguyen Viet Anh 2024-02-06 10:17:27 +07:00 committed by GitHub Enterprise
commit 333815e2d5
19 changed files with 949 additions and 147 deletions

9
api-cronjob/Dockerfile Normal file
View File

@ -0,0 +1,9 @@
FROM python:3.9-slim
WORKDIR /app
COPY script.py .
RUN apt-get update && apt-get -y install curl
CMD [ "python", "script.py" ]

View File

@ -143,8 +143,8 @@ LANGUAGE_CODE = "en-us"
USE_I18N = True USE_I18N = True
CELERY_ENABLE_UTC = False CELERY_ENABLE_UTC = False
CELERY_TIMEZONE = "Asia/Ho_Chi_Minh" CELERY_TIMEZONE = "Asia/Singapore"
TIME_ZONE = "Asia/Ho_Chi_Minh" TIME_ZONE = "Asia/Singapore"
USE_TZ = True USE_TZ = True
# Static files (CSS, JavaScript, Images) # Static files (CSS, JavaScript, Images)
@ -220,6 +220,20 @@ SIZE_TO_COMPRESS = 2 * 1024 * 1024
MAX_NUMBER_OF_TEMPLATE = 3 MAX_NUMBER_OF_TEMPLATE = 3
MAX_PAGES_OF_PDF_FILE = 50 MAX_PAGES_OF_PDF_FILE = 50
OVERVIEW_REFRESH_INTERVAL = 2
OVERVIEW_REPORT_ROOT = "overview"
OVERVIEW_REPORT_DURATION = ["30d", "7d"]
SUBS = {
"SEAU": "AU",
"SESP": "SG",
"SME": "MY",
"SEPCO": "PH",
"TSE": "TH",
"SEIN": "ID",
"ALL": "all"
}
CACHES = { CACHES = {
'default': { 'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache', 'BACKEND': 'django.core.cache.backends.dummy.DummyCache',

View File

@ -14,9 +14,12 @@ import json
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile from ..models import SubscriptionRequest, Report, ReportFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3 from ..utils.file import download_from_S3, convert_date_string
from ..utils.redis import RedisUtils
from ..utils.process import string_to_boolean from ..utils.process import string_to_boolean
from ..celery_worker.client_connector import c_connector from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long
redis_client = RedisUtils()
class AccuracyViewSet(viewsets.ViewSet): class AccuracyViewSet(viewsets.ViewSet):
lookup_field = "username" lookup_field = "username"
@ -226,6 +229,12 @@ class AccuracyViewSet(viewsets.ViewSet):
description='Subsidiary', description='Subsidiary',
type=OpenApiTypes.STR, type=OpenApiTypes.STR,
), ),
OpenApiParameter(
name='report_overview_duration',
location=OpenApiParameter.QUERY,
description=f'open of {settings.OVERVIEW_REPORT_DURATION}',
type=OpenApiTypes.STR,
),
], ],
responses=None, tags=['Accuracy'] responses=None, tags=['Accuracy']
) )
@ -240,12 +249,26 @@ class AccuracyViewSet(viewsets.ViewSet):
include_test = string_to_boolean(request.GET.get('include_test', "false")) include_test = string_to_boolean(request.GET.get('include_test', "false"))
subsidiary = request.GET.get("subsidiary", "all") subsidiary = request.GET.get("subsidiary", "all")
is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false")) is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false"))
report_overview_duration = request.GET.get("report_overview_duration", "")
try: subsidiary = map_subsidiary_long_to_short(subsidiary)
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') if is_daily_report:
except ValueError: if report_overview_duration not in settings.OVERVIEW_REPORT_DURATION:
raise InvalidException(excArgs="Date format") raise InvalidException(excArgs="overview duration")
end_date = timezone.now()
if report_overview_duration == "30d":
start_date = end_date - timezone.timedelta(days=30)
else:
start_date = end_date - timezone.timedelta(days=7)
start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z')
end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%S%z')
else:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
query_set = {"start_date_str": start_date_str, query_set = {"start_date_str": start_date_str,
"end_date_str": end_date_str, "end_date_str": end_date_str,
@ -255,7 +278,11 @@ class AccuracyViewSet(viewsets.ViewSet):
"include_test": include_test, "include_test": include_test,
"subsidiary": subsidiary, "subsidiary": subsidiary,
"is_daily_report": is_daily_report, "is_daily_report": is_daily_report,
"report_overview_duration": report_overview_duration
} }
# if is_daily_report:
# if (end_date-start_date) > timezone.timedelta(days=1):
# raise InvalidException(excArgs="Date range")
report_id = "report" + "_" + timezone.datetime.now().strftime("%Y%m%d%H%M%S%z") + "_" + uuid.uuid4().hex report_id = "report" + "_" + timezone.datetime.now().strftime("%Y%m%d%H%M%S%z") + "_" + uuid.uuid4().hex
new_report: Report = Report( new_report: Report = Report(
@ -268,8 +295,6 @@ class AccuracyViewSet(viewsets.ViewSet):
end_at=end_date, end_at=end_date,
status="Processing", status="Processing",
) )
if is_daily_report:
new_report.created_at = end_date
new_report.save() new_report.save()
# Background job to calculate accuracy # Background job to calculate accuracy
shadow_report(report_id, query_set) shadow_report(report_id, query_set)
@ -318,7 +343,7 @@ class AccuracyViewSet(viewsets.ViewSet):
response = { response = {
'report_detail': data, 'report_detail': data,
'metadata': {"subsidiary": report.subsidiary, 'metadata': {"subsidiary": map_subsidiary_short_to_long(report.subsidiary),
"start_at": report.start_at, "start_at": report.start_at,
"end_at": report.end_at}, "end_at": report.end_at},
'page': { 'page': {
@ -380,7 +405,7 @@ class AccuracyViewSet(viewsets.ViewSet):
page_size = int(request.GET.get('page_size', 10)) page_size = int(request.GET.get('page_size', 10))
if not start_date_str or not end_date_str: if not start_date_str or not end_date_str:
reports = Report.objects.all() reports = Report.objects.all().order_by('created_at').reverse()
else: else:
try: try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
@ -390,26 +415,35 @@ class AccuracyViewSet(viewsets.ViewSet):
base_query = Q(created_at__range=(start_date, end_date)) base_query = Q(created_at__range=(start_date, end_date))
if daily_report_only: if daily_report_only:
base_query &= Q(is_daily_report=True) base_query &= Q(is_daily_report=True)
reports = Report.objects.filter(base_query).order_by('created_at') reports = Report.objects.filter(base_query).order_by('created_at').reverse()
paginator = Paginator(reports, page_size) paginator = Paginator(reports, page_size)
page = paginator.get_page(page_number) page = paginator.get_page(page_number)
data = [] data = []
for report in page: for report in page:
acc_keys = ["purchase_date", "retailername", "imei_number", "avg"]
acc = {}
for key in acc_keys:
fb = report.feedback_accuracy.get(key, 0) if report.feedback_accuracy else 0
rv = report.reviewed_accuracy.get(key, 0) if report.reviewed_accuracy else 0
acc[key] = max([fb, rv])
data.append({ data.append({
"ID": report.id, "ID": report.id,
"Created Date": report.created_at, "Created Date": report.created_at,
"Start Date": report.start_at,
"End Date": report.end_at,
"No. Requests": report.number_request, "No. Requests": report.number_request,
"Status": report.status, "Status": report.status,
"Purchase Date Acc": report.reviewed_accuracy.get("purchase_date", None) if report.reviewed_accuracy else None, "Purchase Date Acc": acc["purchase_date"],
"Retailer Acc": report.feedback_accuracy.get("retailername", None) if report.reviewed_accuracy else None, "Retailer Acc": acc["retailername"],
"IMEI Acc": report.feedback_accuracy.get("imei_number", None) if report.reviewed_accuracy else None, "IMEI Acc": acc["imei_number"],
"Avg. Accuracy": report.feedback_accuracy.get("avg", None) if report.reviewed_accuracy else None, "Avg. Accuracy": acc["avg"],
"Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0, "Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0,
"Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_OCR_time else 0, "Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_OCR_time else 0,
"report_id": report.report_id, "report_id": report.report_id,
"Subsidiary": map_subsidiary_short_to_long(report.subsidiary),
}) })
response = { response = {
@ -427,103 +461,79 @@ class AccuracyViewSet(viewsets.ViewSet):
@extend_schema( @extend_schema(
parameters=[ parameters=[
OpenApiParameter( OpenApiParameter(
name='start_date', name='duration',
location=OpenApiParameter.QUERY, location=OpenApiParameter.QUERY,
description='Start date (YYYY-mm-DDTHH:MM:SSZ)', description='one of [30d, 7d]',
type=OpenApiTypes.DATE, type=OpenApiTypes.STR,
default='2023-01-02T00:00:00+0700', default='30d',
),
OpenApiParameter(
name='end_date',
location=OpenApiParameter.QUERY,
description='End date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2024-01-10T00:00:00+0700',
), ),
OpenApiParameter( OpenApiParameter(
name='subsidiary', name='subsidiary',
location=OpenApiParameter.QUERY, location=OpenApiParameter.QUERY,
description='Subsidiary', description='Subsidiary',
type=OpenApiTypes.STR, type=OpenApiTypes.STR,
), )
OpenApiParameter(
name='page',
location=OpenApiParameter.QUERY,
description='Page number',
type=OpenApiTypes.INT,
required=False
),
OpenApiParameter(
name='page_size',
location=OpenApiParameter.QUERY,
description='Number of items per page',
type=OpenApiTypes.INT,
required=False
),
], ],
responses=None, tags=['Accuracy'] responses=None, tags=['Accuracy']
) )
@action(detail=False, url_path="overview", methods=["GET"]) @action(detail=False, url_path="overview", methods=["GET"])
def overview(self, request): def overview(self, request):
if request.method == 'GET': if request.method == 'GET':
subsidiary = request.GET.get('subsidiary', None) subsidiary = request.GET.get('subsidiary', "ALL")
start_date_str = request.GET.get('start_date', "") duration = request.GET.get('duration', "")
end_date_str = request.GET.get('end_date', "")
page_number = int(request.GET.get('page', 1))
page_size = int(request.GET.get('page_size', 10))
base_query = Q() subsidiary = map_subsidiary_long_to_short(subsidiary)
if start_date_str and end_date_str:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
base_query &= Q(created_at__range=(start_date, end_date))
if subsidiary:
base_query &= Q(subsidiary=subsidiary)
base_query &= Q(is_daily_report=True)
reports = Report.objects.filter(base_query).order_by('created_at')
paginator = Paginator(reports, page_size)
page = paginator.get_page(page_number)
data = []
this_month_report = MonthReportAccumulate()
for report in page:
res = this_month_report.add(report)
if not(res):
_, _data, total = this_month_report()
data += [total]
data += _data
this_month_report = MonthReportAccumulate()
this_month_report.add(report)
else:
continue
_, _data, total = this_month_report()
data += [total]
data += _data
# Generate xlsx file
# workbook = dict2xlsx(data, _type="report")
# tmp_file = f"/tmp/{str(uuid.uuid4())}.xlsx"
# os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
# workbook.save(tmp_file)
# c_connector.remove_local_file((tmp_file, "fake_request_id"))
# Retrive data from Redis
key = f"{subsidiary}_{duration}"
data = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", [])
response = { response = {
# 'file': load_xlsx_file(),
'overview_data': data, 'overview_data': data,
'page': {
'number': page.number,
'total_pages': page.paginator.num_pages,
'count': page.paginator.count,
}
} }
return JsonResponse(response, status=200) return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405) return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='duration',
location=OpenApiParameter.QUERY,
description='one of [30d, 7d]',
type=OpenApiTypes.STR,
default='30d',
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
)
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="overview_download_file", methods=["GET"])
def overview_download_file(self, request):
if request.method == 'GET':
subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "")
subsidiary = map_subsidiary_long_to_short(subsidiary)
s3_key = f"{subsidiary}_{duration}.xlsx"
tmp_file = "/tmp/" + s3_key
os.makedirs("/tmp", exist_ok=True)
download_from_S3("report/" + settings.OVERVIEW_REPORT_ROOT + "/" + s3_key, tmp_file)
file = open(tmp_file, 'rb')
response = FileResponse(file, status=200)
# Set the content type and content disposition headers
response['Content-Type'] = 'application/octet-stream'
response['Content-Disposition'] = 'attachment; filename="{0}"'.format(os.path.basename(tmp_file))
return response
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema( @extend_schema(
parameters=[], parameters=[],
@ -541,7 +551,7 @@ class AccuracyViewSet(viewsets.ViewSet):
raise NotFoundException(excArgs=f"report: {report_id}") raise NotFoundException(excArgs=f"report: {report_id}")
report = Report.objects.filter(report_id=report_id).first() report = Report.objects.filter(report_id=report_id).first()
# download from s3 to local # download from s3 to local
tmp_file = "/tmp/" + "report_" + uuid.uuid4().hex + ".xlsx" tmp_file = "/tmp/" + report.subsidiary + "_" + report.start_at.strftime("%Y%m%d") + "_" + report.end_at.strftime("%Y%m%d") + "_created_on_" + report.created_at.strftime("%Y%m%d") + ".xlsx"
os.makedirs("/tmp", exist_ok=True) os.makedirs("/tmp", exist_ok=True)
if not report.S3_file_name: if not report.S3_file_name:
raise NotFoundException(excArgs="S3 file name") raise NotFoundException(excArgs="S3 file name")

View File

@ -36,6 +36,8 @@ class CeleryConnector:
'remove_local_file': {'queue': "remove_local_file"}, 'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"}, 'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"}, 'make_a_report': {'queue': "report"},
'make_a_report_2': {'queue': "report_2"},
} }
app = Celery( app = Celery(
@ -45,12 +47,16 @@ class CeleryConnector:
) )
def make_a_report(self, args): def make_a_report(self, args):
return self.send_task('make_a_report', args) return self.send_task('make_a_report', args)
def make_a_report_2(self, args):
return self.send_task('make_a_report_2', args)
def csv_feedback(self, args): def csv_feedback(self, args):
return self.send_task('csv_feedback', args) return self.send_task('csv_feedback', args)
def do_pdf(self, args): def do_pdf(self, args):
return self.send_task('do_pdf', args) return self.send_task('do_pdf', args)
def upload_file_to_s3(self, args): def upload_feedback_to_s3(self, args):
return self.send_task('upload_file_to_s3', args) return self.send_task('upload_feedback_to_s3', args)
def upload_file_to_s3(self, args): def upload_file_to_s3(self, args):
return self.send_task('upload_file_to_s3', args) return self.send_task('upload_file_to_s3', args)
def upload_report_to_s3(self, args): def upload_report_to_s3(self, args):
@ -59,6 +65,7 @@ class CeleryConnector:
return self.send_task('upload_obj_to_s3', args) return self.send_task('upload_obj_to_s3', args)
def remove_local_file(self, args): def remove_local_file(self, args):
return self.send_task('remove_local_file', args, countdown=280) # nearest execution of this task in 280 seconds return self.send_task('remove_local_file', args, countdown=280) # nearest execution of this task in 280 seconds
def process_fi(self, args): def process_fi(self, args):
return self.send_task('process_fi_invoice', args) return self.send_task('process_fi_invoice', args)
def process_fi_result(self, args): def process_fi_result(self, args):

View File

@ -13,10 +13,13 @@ from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
from ..utils import file as FileUtils from ..utils import file as FileUtils
from ..utils import process as ProcessUtil from ..utils import process as ProcessUtil
from ..utils import s3 as S3Util from ..utils import s3 as S3Util
from ..utils.accuracy import validate_feedback_file
from fwd_api.constant.common import ProcessType from fwd_api.constant.common import ProcessType
import csv import csv
import json import json
import copy
from fwd_api.utils.accuracy import predict_result_to_ready
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from fwd import settings from fwd import settings
@ -79,6 +82,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
continue continue
else: else:
sub_rq = sub_rqs[0] sub_rq = sub_rqs[0]
images = SubscriptionRequestFile.objects.filter(request=sub_rq)
fb = {} fb = {}
# update user result (with validate) # update user result (with validate)
redemption_id = row.get('redemptionNumber') redemption_id = row.get('redemptionNumber')
@ -99,6 +103,42 @@ def process_csv_feedback(csv_file_path, feedback_id):
if len(redemption_id) > 0: if len(redemption_id) > 0:
sub_rq.redemption_id = redemption_id sub_rq.redemption_id = redemption_id
sub_rq.save() sub_rq.save()
# Update files
time_cost = {"imei": [], "invoice": [], "all": []}
imei_count = 0
if sub_rq.ai_inference_profile is None:
time_cost["imei"] = [-1 for _ in range(len(images))]
time_cost["invoice"] = [-1]
time_cost["all"] = [-1]
else:
for k, v in sub_rq.ai_inference_profile.items():
time_cost[k.split("_")[0]].append(v["inference"][1][0] - v["inference"][0] + (v["postprocess"][1]-v["postprocess"][0]))
for i, image in enumerate(images):
_predict_result = copy.deepcopy(predict_result_to_ready(sub_rq.predict_result))
_feedback_result = copy.deepcopy(sub_rq.feedback_result)
_reviewed_result = copy.deepcopy(sub_rq.reviewed_result)
image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request]
if not validate_feedback_file(_feedback_result, _predict_result):
status[request_id] = "Missalign imei number between feedback and predict"
continue
if image.doc_type == "invoice":
_predict_result["imei_number"] = []
if _feedback_result:
_feedback_result["imei_number"] = []
else:
None
if _reviewed_result:
_reviewed_result["imei_number"] = []
else:
None
else:
_predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]}
_feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None
_reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None
image.predict_result = _predict_result
image.feedback_result = _feedback_result
image.reviewed_result = _reviewed_result
image.save()
# update log into database # update log into database
feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first() feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first()
feedback_rq.error_status = status feedback_rq.error_status = status

View File

@ -3,14 +3,19 @@ import traceback
from fwd_api.models import SubscriptionRequest, Report, ReportFile from fwd_api.models import SubscriptionRequest, Report, ReportFile
from fwd_api.celery_worker.worker import app from fwd_api.celery_worker.worker import app
from ..utils import s3 as S3Util from ..utils import s3 as S3Util
from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list, calculate_a_request, ReportAccumulateByRequest
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3 from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
from ..utils import time_stuff
from ..utils.redis import RedisUtils
from django.utils import timezone from django.utils import timezone
from django.db.models import Q from django.db.models import Q
import json
import copy
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from fwd import settings from fwd import settings
redis_client = RedisUtils()
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
@ -29,6 +34,7 @@ def mean_list(l):
@app.task(name='make_a_report') @app.task(name='make_a_report')
def make_a_report(report_id, query_set): def make_a_report(report_id, query_set):
# TODO: to be deprecated
try: try:
start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z') start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z') end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z')
@ -105,7 +111,7 @@ def make_a_report(report_id, query_set):
errors += request_att["err"] errors += request_att["err"]
num_request += 1 num_request += 1
transaction_att = count_transactions(start_date, end_date) transaction_att = count_transactions(start_date, end_date, report.subsidiary)
# Do saving process # Do saving process
report.number_request = num_request report.number_request = num_request
report.number_images = number_images report.number_images = number_images
@ -151,4 +157,155 @@ def make_a_report(report_id, query_set):
except Exception as e: except Exception as e:
print("[ERROR]: an error occured while processing report: ", report_id) print("[ERROR]: an error occured while processing report: ", report_id)
traceback.print_exc() traceback.print_exc()
return 400 return 400
@app.task(name='make_a_report_2')
def make_a_report_2(report_id, query_set):
try:
start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z')
base_query = Q(created_at__range=(start_date, end_date))
if query_set["request_id"]:
base_query &= Q(request_id=query_set["request_id"])
if query_set["redemption_id"]:
base_query &= Q(redemption_id=query_set["redemption_id"])
base_query &= Q(is_test_request=False)
if isinstance(query_set["include_test"], str):
query_set["include_test"] = True if query_set["include_test"].lower() in ["true", "yes", "1"] else False
if query_set["include_test"]:
# base_query = ~base_query
base_query.children = base_query.children[:-1]
elif isinstance(query_set["include_test"], bool):
if query_set["include_test"]:
base_query = ~base_query
if isinstance(query_set["subsidiary"], str):
if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all":
base_query &= Q(redemption_id__startswith=query_set["subsidiary"])
if isinstance(query_set["is_reviewed"], str):
if query_set["is_reviewed"] == "reviewed":
base_query &= Q(is_reviewed=True)
elif query_set["is_reviewed"] == "not reviewed":
base_query &= Q(is_reviewed=False)
# elif query_set["is_reviewed"] == "all":
# pass
errors = []
# Create a placeholder to fill
accuracy = {"feedback" :{"imei_number": IterAvg(),
"purchase_date": IterAvg(),
"retailername": IterAvg(),
"sold_to_party": IterAvg(),},
"reviewed" :{"imei_number": IterAvg(),
"purchase_date": IterAvg(),
"retailername": IterAvg(),
"sold_to_party": IterAvg(),}
} # {"imei": {"acc": 0.1, count: 1}, ...}
time_cost = {"invoice": IterAvg(),
"imei": IterAvg()}
number_images = 0
number_bad_images = 0
# TODO: Multithreading
# Calculate accuracy, processing time, ....Then save.
subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')
report: Report = \
Report.objects.filter(report_id=report_id).first()
# TODO: number of transaction by doc type
num_request = 0
report_files = []
report_engine = ReportAccumulateByRequest(report.subsidiary)
for request in subscription_requests:
if request.status != 200 or not (request.reviewed_result or request.feedback_result):
# Failed requests or lack of reviewed_result/feedback_result
continue
request_att, _report_files = calculate_a_request(report, request)
report_files += _report_files
report_engine.add(request, _report_files)
request.feedback_accuracy = {"imei_number" : mean_list(request_att["acc"]["feedback"].get("imei_number", [None])),
"purchase_date" : mean_list(request_att["acc"]["feedback"].get("purchase_date", [None])),
"retailername" : mean_list(request_att["acc"]["feedback"].get("retailername", [None])),
"sold_to_party" : mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None]))}
request.reviewed_accuracy = {"imei_number" : mean_list(request_att["acc"]["reviewed"].get("imei_number", [None])),
"purchase_date" : mean_list(request_att["acc"]["reviewed"].get("purchase_date", [None])),
"retailername" : mean_list(request_att["acc"]["reviewed"].get("retailername", [None])),
"sold_to_party" : mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None]))}
request.save()
number_images += request_att["total_images"]
number_bad_images += request_att["bad_images"]
update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"])
update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"])
time_cost["imei"].add(request_att["time_cost"].get("imei", []))
time_cost["invoice"].add(request_att["time_cost"].get("invoice", []))
errors += request_att["err"]
num_request += 1
report_fine_data, _save_data = report_engine.save(report.report_id, query_set.get("is_daily_report", False), query_set["include_test"])
transaction_att = count_transactions(start_date, end_date, report.subsidiary)
# Do saving process
report.number_request = num_request
report.number_images = number_images
report.number_imei = time_cost["imei"].count
report.number_invoice = time_cost["invoice"].count
report.number_bad_images = number_bad_images
# FIXME: refactor this data stream for endurability
report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](),
"invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count}
report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) if (report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) > 0 else None
report.number_imei_transaction = transaction_att.get("imei", 0)
report.number_invoice_transaction = transaction_att.get("invoice", 0)
acumulated_acc = {"feedback": {},
"reviewed": {}}
for acc_type in ["feedback", "reviewed"]:
avg_acc = IterAvg()
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
acumulated_acc[acc_type][key] = accuracy[acc_type][key]()
acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count
avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
acumulated_acc[acc_type]["avg"] = avg_acc()
report.feedback_accuracy = acumulated_acc["feedback"]
report.reviewed_accuracy = acumulated_acc["reviewed"]
report.errors = "|".join(errors)
report.status = "Ready"
report.save()
# Saving a xlsx file
report_files = ReportFile.objects.filter(report=report)
data = extract_report_detail_list(report_files, lower=True)
data_workbook = dict2xlsx(data, _type='report_detail')
local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook)
s3_key=save_report_to_S3(report.report_id, local_workbook)
if query_set["is_daily_report"]:
# Save overview dashboard
# multiple accuracy by 100
save_data = copy.deepcopy(_save_data)
for i, dat in enumerate(report_fine_data):
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
keys_percent = "images_quality"
for x_key in report_fine_data[i][keys_percent].keys():
if "percent" not in x_key:
continue
report_fine_data[i][keys_percent][x_key] = report_fine_data[i][keys_percent][x_key]*100
for key in keys:
if report_fine_data[i][key]:
for x_key in report_fine_data[i][key].keys():
report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100
data_workbook = dict2xlsx(report_fine_data, _type='report')
overview_filename = query_set["subsidiary"] + "_" + query_set["report_overview_duration"] + ".xlsx"
local_workbook = save_workbook_file(overview_filename, report, data_workbook, settings.OVERVIEW_REPORT_ROOT)
s3_key=save_report_to_S3(report.report_id, local_workbook)
redis_client.set_cache(settings.OVERVIEW_REPORT_ROOT, overview_filename.replace(".xlsx", ""), json.dumps(save_data))
except IndexError as e:
print(e)
traceback.print_exc()
print("NotFound request by report id, %d", report_id)
except Exception as e:
print("[ERROR]: an error occured while processing report: ", report_id)
traceback.print_exc()
return 400

View File

@ -42,7 +42,7 @@ app.conf.update({
Queue('remove_local_file'), Queue('remove_local_file'),
Queue('csv_feedback'), Queue('csv_feedback'),
Queue('report'), Queue('report'),
Queue('report_2'),
], ],
'task_routes': { 'task_routes': {
'process_sap_invoice_result': {'queue': 'invoice_sap_rs'}, 'process_sap_invoice_result': {'queue': 'invoice_sap_rs'},
@ -61,6 +61,7 @@ app.conf.update({
'remove_local_file': {'queue': "remove_local_file"}, 'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"}, 'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"}, 'make_a_report': {'queue': "report"},
'make_a_report_2': {'queue': "report_2"},
} }
}) })

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-02-04 23:32
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0178_alter_reportfile_acc'),
]
operations = [
migrations.AddField(
model_name='reportfile',
name='is_bad_image',
field=models.BooleanField(default=False),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-02-05 02:44
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0179_reportfile_is_bad_image'),
]
operations = [
migrations.AlterField(
model_name='reportfile',
name='time_cost',
field=models.FloatField(default=None, null=True),
),
]

View File

@ -16,6 +16,7 @@ class ReportFile(models.Model):
# Data # Data
S3_uploaded = models.BooleanField(default=False) S3_uploaded = models.BooleanField(default=False)
doc_type = models.CharField(max_length=200) doc_type = models.CharField(max_length=200)
is_bad_image = models.BooleanField(default=False)
predict_result = models.JSONField(null=True) predict_result = models.JSONField(null=True)
feedback_result = models.JSONField(null=True) feedback_result = models.JSONField(null=True)
@ -25,7 +26,7 @@ class ReportFile(models.Model):
reviewed_accuracy = models.JSONField(null=True) reviewed_accuracy = models.JSONField(null=True)
acc = models.FloatField(default=0, null=True) acc = models.FloatField(default=0, null=True)
time_cost = models.FloatField(default=0) time_cost = models.FloatField(default=None, null=True)
is_reviewed = models.CharField(default="NA", max_length=5) # NA, No, Yes is_reviewed = models.CharField(default="NA", max_length=5) # NA, No, Yes
bad_image_reason = models.TextField(default="") bad_image_reason = models.TextField(default="")
counter_measures = models.TextField(default="") counter_measures = models.TextField(default="")

View File

@ -5,14 +5,307 @@ import copy
from typing import Any from typing import Any
from .ocr_utils.ocr_metrics import eval_ocr_metric from .ocr_utils.ocr_metrics import eval_ocr_metric
from .ocr_utils.sbt_report import post_processing_str from .ocr_utils.sbt_report import post_processing_str
import uuid
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile
from ..celery_worker.client_connector import c_connector from ..celery_worker.client_connector import c_connector
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
from django.db.models import Q from django.db.models import Q
from django.utils import timezone
import redis
from fwd import settings
from ..models import SubscriptionRequest, Report, ReportFile
import json
BAD_THRESHOLD = 0.75 BAD_THRESHOLD = 0.75
valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"] valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"]
class ReportAccumulateByRequest:
def __init__(self, sub):
# self.redis_client = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, decode_responses=True)
self.sub = sub
self.current_time = None
self.data = {} # {"month": [total, {"day": day_data}]}
self.total_format = {
'subs': "+",
'extraction_date': "Subtotal ()",
'total_images': 0,
'images_quality': {
'successful': 0,
'successful_percent': 0,
'bad': 0,
'bad_percent': 0
},
'average_accuracy_rate': {
'imei': IterAvg(),
'purchase_date': IterAvg(),
'retailer_name': IterAvg(),
'sold_to_party': IterAvg()
},
'average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg()
},
'usage': {
'imei':0,
'invoice': 0,
'request': 0
},
'feedback_accuracy': {
'imei_number': IterAvg(),
'purchase_date': IterAvg(),
'retailername': IterAvg(),
'sold_to_party': IterAvg()
},
'reviewed_accuracy': {
'imei_number': IterAvg(),
'purchase_date': IterAvg(),
'retailername': IterAvg(),
'sold_to_party': IterAvg()
},
'num_request': 0
}
self.day_format = {
'subs': sub,
'extraction_date': "",
'num_imei': 0,
'num_invoice': 0,
'total_images': 0,
'images_quality': {
'successful': 0,
'successful_percent': 0,
'bad': 0,
'bad_percent': 0
},
'average_accuracy_rate': {
'imei': IterAvg(),
'purchase_date': IterAvg(),
'retailer_name': IterAvg(),
'sold_to_party': IterAvg()
},
'average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg()
},
'usage': {
'imei': 0,
'invoice': 0,
'request': 0
},
'feedback_accuracy': {
'imei_number': IterAvg(),
'purchase_date': IterAvg(),
'retailername': IterAvg(),
'sold_to_party': IterAvg()
},
'reviewed_accuracy': {
'imei_number': IterAvg(),
'purchase_date': IterAvg(),
'retailername': IterAvg(),
'sold_to_party': IterAvg()
},
"report_files": [],
'num_request': 0
},
@staticmethod
def update_total(total, report_file):
total["total_images"] += 1
total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
# total["report_files"].append(report_file)
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
total["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", []))
total["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", []))
total["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", []))
total["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", []))
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
total["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", []))
total["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", []))
total["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", []))
total["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", []))
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, []))
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
if not total["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
total["average_processing_time"] = IterAvg()
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0
total["usage"]["invoice"] += 1 if report_file.doc_type == "invoice" else 0
return total
@staticmethod
def update_day(day_data, report_file):
day_data["total_images"] += 1
day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
day_data["num_imei"] += 1 if report_file.doc_type == "imei" else 0
day_data["num_invoice"] += 1 if report_file.doc_type == "invoice" else 0
day_data["report_files"].append(report_file)
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
day_data["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", 0))
day_data["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", 0))
day_data["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", 0))
day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", 0))
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
day_data["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", 0))
day_data["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", 0))
day_data["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", 0))
day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", 0))
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
day_data["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, 0))
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, 0))
if not day_data["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
day_data["average_processing_time"] = IterAvg()
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
return day_data
def add(self, request, report_files):
this_month = request.created_at.strftime("%Y%m")
this_day = request.created_at.strftime("%Y%m%d")
if not self.data.get(this_month, None):
self.data[this_month] = [copy.deepcopy(self.total_format), {}]
if not self.data[this_month][1].get(this_day, None):
self.data[this_month][1][this_day] = copy.deepcopy(self.day_format)[0]
self.data[this_month][1][this_day]['extraction_date'] = request.created_at.strftime("%Y-%m-%d")
usage = self.count_transactions_within_day(this_day)
self.data[this_month][1][this_day]["usage"]["imei"] = usage.get("imei", 0)
self.data[this_month][1][this_day]["usage"]["invoice"] = usage.get("invoice", 0)
self.data[this_month][1][this_day]["usage"]["request"] = usage.get("request", 0)
self.data[this_month][1][this_day]['num_request'] += 1
self.data[this_month][0]['num_request'] += 1
for report_file in report_files:
self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day
def count_transactions_within_day(self, date_string):
# convert this day into timezone.datetime at UTC
start_date = datetime.strptime(date_string, "%Y%m%d")
start_date_with_timezone = timezone.make_aware(start_date)
end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1)
return count_transactions(start_date_with_timezone, end_date_with_timezone, self.sub)
def save(self, root_report_id, is_daily_report=False, include_test=False):
report_data = self.get()
fine_data = []
save_data = {"file": {"overview": f"{root_report_id}/{root_report_id}.xlsx"},
"data": fine_data} # {"sub_report_id": "S3 location", "data": fine_data}
# extract data
for month in report_data.keys():
fine_data.append(report_data[month][0])
for day in report_data[month][1].keys():
fine_data.append(report_data[month][1][day])
# save daily reports
report_id = root_report_id + "_" + day
start_date = datetime.strptime(day, "%Y%m%d")
start_date_with_timezone = timezone.make_aware(start_date)
end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1)
_average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](),
"invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count}
_average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None
acumulated_acc = {"feedback_accuracy": {},
"reviewed_accuracy": {}}
for acc_type in ["feedback_accuracy", "reviewed_accuracy"]:
avg_acc = IterAvg()
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
acumulated_acc[acc_type][key] = self.data[month][1][day][acc_type][key]()
acumulated_acc[acc_type][key+"_count"] = self.data[month][1][day][acc_type][key].count
avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
acumulated_acc[acc_type]["avg"] = avg_acc()
acumulated_acc[acc_type]["avg_count"] = avg_acc.count
new_report: Report = Report(
report_id=report_id,
is_daily_report=is_daily_report,
subsidiary=self.sub.lower().replace(" ", ""),
include_test=include_test,
start_at=start_date_with_timezone,
end_at=end_date_with_timezone,
status="Ready",
number_request=report_data[month][1][day]["num_request"],
number_images=report_data[month][1][day]["total_images"],
number_imei=report_data[month][1][day]["num_imei"],
number_invoice=report_data[month][1][day]["num_invoice"],
number_bad_images=report_data[month][1][day]["images_quality"]["bad"],
average_OCR_time=_average_OCR_time,
number_imei_transaction=report_data[month][1][day]["usage"]["imei"],
number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"],
feedback_accuracy=acumulated_acc["feedback_accuracy"],
reviewed_accuracy=acumulated_acc["reviewed_accuracy"],
)
new_report.save()
data = extract_report_detail_list(self.data[month][1][day]["report_files"], lower=True)
data_workbook = dict2xlsx(data, _type='report_detail')
local_workbook = save_workbook_file(report_id + ".xlsx", new_report, data_workbook)
s3_key=save_report_to_S3(report_id, local_workbook)
return fine_data, save_data
def get(self) -> Any:
# FIXME: This looks like a junk
_data = copy.deepcopy(self.data)
for month in _data.keys():
_data[month][0]["images_quality"]["successful_percent"] = _data[month][0]["images_quality"]["successful"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
_data[month][0]["images_quality"]["bad_percent"] = _data[month][0]["images_quality"]["bad"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
num_transaction_imei = 0
num_transaction_invoice = 0
for day in _data[month][1].keys():
num_transaction_imei += _data[month][1][day]["usage"].get("imei", 0)
num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0)
_data[month][1][day]["average_accuracy_rate"]["imei"] = _data[month][1][day]["average_accuracy_rate"]["imei"]()
_data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]()
_data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]()
_data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]()
_data[month][1][day]["average_processing_time"]["imei"] = _data[month][1][day]["average_processing_time"]["imei"]()
_data[month][1][day]["average_processing_time"]["invoice"] = _data[month][1][day]["average_processing_time"]["invoice"]()
_data[month][1][day]["feedback_accuracy"]["imei_number"] = _data[month][1][day]["feedback_accuracy"]["imei_number"]()
_data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]()
_data[month][1][day]["feedback_accuracy"]["retailername"] = _data[month][1][day]["feedback_accuracy"]["retailername"]()
_data[month][1][day]["feedback_accuracy"]["sold_to_party"] = _data[month][1][day]["feedback_accuracy"]["sold_to_party"]()
_data[month][1][day]["reviewed_accuracy"]["imei_number"] = _data[month][1][day]["reviewed_accuracy"]["imei_number"]()
_data[month][1][day]["reviewed_accuracy"]["purchase_date"] = _data[month][1][day]["reviewed_accuracy"]["purchase_date"]()
_data[month][1][day]["reviewed_accuracy"]["retailername"] = _data[month][1][day]["reviewed_accuracy"]["retailername"]()
_data[month][1][day]["reviewed_accuracy"]["sold_to_party"] = _data[month][1][day]["reviewed_accuracy"]["sold_to_party"]()
_data[month][1][day].pop("report_files")
_data[month][1][day]["images_quality"]["successful_percent"] = _data[month][1][day]["images_quality"]["successful"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0
_data[month][1][day]["images_quality"]["bad_percent"] = _data[month][1][day]["images_quality"]["bad"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0
_data[month][0]["usage"]["imei"] = num_transaction_imei
_data[month][0]["usage"]["invoice"] = num_transaction_invoice
_data[month][0]["average_accuracy_rate"]["imei"] = _data[month][0]["average_accuracy_rate"]["imei"]()
_data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]()
_data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]()
_data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]()
_data[month][0]["average_processing_time"]["imei"] = _data[month][0]["average_processing_time"]["imei"]()
_data[month][0]["average_processing_time"]["invoice"] = _data[month][0]["average_processing_time"]["invoice"]()
_data[month][0]["feedback_accuracy"]["imei_number"] = _data[month][0]["feedback_accuracy"]["imei_number"]()
_data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]()
_data[month][0]["feedback_accuracy"]["retailername"] = _data[month][0]["feedback_accuracy"]["retailername"]()
_data[month][0]["feedback_accuracy"]["sold_to_party"] = _data[month][0]["feedback_accuracy"]["sold_to_party"]()
_data[month][0]["reviewed_accuracy"]["imei_number"] = _data[month][0]["reviewed_accuracy"]["imei_number"]()
_data[month][0]["reviewed_accuracy"]["purchase_date"] = _data[month][0]["reviewed_accuracy"]["purchase_date"]()
_data[month][0]["reviewed_accuracy"]["retailername"] = _data[month][0]["reviewed_accuracy"]["retailername"]()
_data[month][0]["reviewed_accuracy"]["sold_to_party"] = _data[month][0]["reviewed_accuracy"]["sold_to_party"]()
return _data
class MonthReportAccumulate: class MonthReportAccumulate:
def __init__(self): def __init__(self):
self.month = None self.month = None
@ -89,7 +382,7 @@ class MonthReportAccumulate:
self.total["usage"]["invoice"] += report.number_invoice_transaction self.total["usage"]["invoice"] += report.number_invoice_transaction
def add(self, report): def add(self, report):
report_month = report.created_at.month report_month = report.start_at.month
if self.month is None: if self.month is None:
self.month = report_month self.month = report_month
@ -103,7 +396,7 @@ class MonthReportAccumulate:
new_data = copy.deepcopy(self.data_format)[0] new_data = copy.deepcopy(self.data_format)[0]
new_data["num_imei"] = report.number_imei new_data["num_imei"] = report.number_imei
new_data["subs"] = report.subsidiary new_data["subs"] = report.subsidiary
new_data["extraction_date"] = report.created_at new_data["extraction_date"] = report.start_at
new_data["num_invoice"] = report.number_invoice new_data["num_invoice"] = report.number_invoice
new_data["total_images"] = report.number_images new_data["total_images"] = report.number_images
new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images
@ -130,10 +423,38 @@ class MonthReportAccumulate:
self.accumulate(report) self.accumulate(report)
return True return True
def clear(self):
self.month = None
self.total = {
'subs': "+",
'extraction_date': "Subtotal ()",
'total_images': 0,
'images_quality': {
'successful': 0,
'successful_percent': 0,
'bad': 0,
'bad_percent': 0
},
'average_accuracy_rate': {
'imei': IterAvg(),
'purchase_date': IterAvg(),
'retailer_name': IterAvg()
},
'average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg()
},
'usage': {
'imei':0,
'invoice': 0
}
}
self.data = []
def __call__(self): def __call__(self):
self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] if self.total["total_images"] else 0
self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] if self.total["total_images"] else 0
total = copy.deepcopy(self.total) total = copy.deepcopy(self.total)
total["images_quality"]["successful_percent"] = total["images_quality"]["successful"]/total["total_images"] if total["total_images"] else 0
total["images_quality"]["bad_percent"] = total["images_quality"]["bad"]/total["total_images"] if total["total_images"] else 0
total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]() total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]()
total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]() total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]()
total["average_accuracy_rate"]["retailer_name"] = total["average_accuracy_rate"]["retailer_name"]() total["average_accuracy_rate"]["retailer_name"] = total["average_accuracy_rate"]["retailer_name"]()
@ -167,6 +488,16 @@ class IterAvg:
def __call__(self): def __call__(self):
return self.avg return self.avg
def validate_feedback_file(feedback, predict):
if feedback:
imei_feedback = feedback.get("imei_number", [])
imei_feedback = [x for x in imei_feedback if x != ""]
num_imei_feedback = len(imei_feedback)
num_imei_predict = len(predict.get("imei_number", []))
if num_imei_feedback != num_imei_predict:
return False
return True
def first_of_list(the_list): def first_of_list(the_list):
if not the_list: if not the_list:
return None return None
@ -210,9 +541,11 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
data[i][key] = data[i][key]*100 data[i][key] = data[i][key]*100
return data return data
def count_transactions(start_date, end_date): def count_transactions(start_date, end_date, subsidiary="all"):
base_query = Q(created_at__range=(start_date, end_date)) base_query = Q(created_at__range=(start_date, end_date))
base_query &= Q(is_test_request=False) base_query &= Q(is_test_request=False)
if subsidiary and subsidiary.lower().replace(" ", "")!="all":
base_query &= Q(redemption_id__startswith=subsidiary)
transaction_att = {} transaction_att = {}
print(f"[DEBUG]: atracting transactions attribute...") print(f"[DEBUG]: atracting transactions attribute...")
@ -226,6 +559,10 @@ def count_transactions(start_date, end_date):
transaction_att[doc_type] = 1 transaction_att[doc_type] = 1
else: else:
transaction_att[doc_type] += 1 transaction_att[doc_type] += 1
if not transaction_att.get("request", None):
transaction_att["request"] = 1
else:
transaction_att["request"] += 1
return transaction_att return transaction_att
def convert_datetime_format(date_string: str, is_gt=False) -> str: def convert_datetime_format(date_string: str, is_gt=False) -> str:
@ -359,6 +696,7 @@ def calculate_and_save_subcription_file(report, request):
reviewed_accuracy=att["acc"]["reviewed"], reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"], acc=att["avg_acc"],
time_cost=image.processing_time, time_cost=image.processing_time,
is_bad_image=att["is_bad_image"],
bad_image_reason=image.reason, bad_image_reason=image.reason,
counter_measures=image.counter_measures, counter_measures=image.counter_measures,
error="|".join(att["err"]) error="|".join(att["err"])
@ -387,6 +725,72 @@ def calculate_and_save_subcription_file(report, request):
continue continue
return request_att return request_att
def calculate_a_request(report, request):
request_att = {"acc": {"feedback": {"imei_number": [],
"purchase_date": [],
"retailername": [],
"sold_to_party": [],
},
"reviewed": {"imei_number": [],
"purchase_date": [],
"retailername": [],
"sold_to_party": [],
}},
"err": [],
"time_cost": {},
"total_images": 0,
"bad_images": 0}
images = SubscriptionRequestFile.objects.filter(request=request)
report_files = []
for image in images:
status, att = calculate_subcription_file(image)
if status != 200:
continue
image.feedback_accuracy = att["acc"]["feedback"]
image.reviewed_accuracy = att["acc"]["reviewed"]
image.is_bad_image_quality = att["is_bad_image"]
image.save()
new_report_file = ReportFile(report=report,
correspond_request_id=request.request_id,
correspond_redemption_id=request.redemption_id,
doc_type=image.doc_type,
predict_result=image.predict_result,
feedback_result=image.feedback_result,
reviewed_result=image.reviewed_result,
feedback_accuracy=att["acc"]["feedback"],
reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"],
is_bad_image=att["is_bad_image"],
time_cost=image.processing_time,
bad_image_reason=image.reason,
counter_measures=image.counter_measures,
error="|".join(att["err"])
)
report_files.append(new_report_file)
if request_att["time_cost"].get(image.doc_type, None):
request_att["time_cost"][image.doc_type].append(image.processing_time)
else:
request_att["time_cost"][image.doc_type] = [image.processing_time]
try:
request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"]
request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"]
request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"]
request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"]
request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"]
request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"]
request_att["bad_images"] += int(att["is_bad_image"])
request_att["total_images"] += 1
request_att["err"] += att["err"]
except Exception as e:
print(e)
continue
return request_att, report_files
def calculate_subcription_file(subcription_request_file): def calculate_subcription_file(subcription_request_file):
@ -490,5 +894,5 @@ def calculate_attributions(request): # for one request, return in order
return acc, data, time_cost, image_quality_num, error return acc, data, time_cost, image_quality_num, error
def shadow_report(report_id, query): def shadow_report(report_id, query):
c_connector.make_a_report( c_connector.make_a_report_2(
(report_id, query)) (report_id, query))

View File

@ -7,6 +7,7 @@ import json
from PIL import Image, ExifTags from PIL import Image, ExifTags
from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.files.uploadedfile import TemporaryUploadedFile
from django.utils import timezone from django.utils import timezone
from datetime import datetime
from fwd import settings from fwd import settings
from ..utils import s3 as S3Util from ..utils import s3 as S3Util
@ -30,6 +31,16 @@ s3_client = S3Util.MinioS3Client(
bucket_name=settings.S3_BUCKET_NAME bucket_name=settings.S3_BUCKET_NAME
) )
def convert_date_string(date_string):
# Parse the input date string
date_format = "%Y-%m-%d %H:%M:%S.%f %z"
parsed_date = datetime.strptime(date_string, date_format)
# Format the date as "YYYYMMDD"
formatted_date = parsed_date.strftime("%Y%m%d")
return formatted_date
def validate_report_list(request): def validate_report_list(request):
start_date_str = request.GET.get('start_date') start_date_str = request.GET.get('start_date')
end_date_str = request.GET.get('end_date') end_date_str = request.GET.get('end_date')
@ -190,10 +201,13 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict)
csvfile.write(file_contents) csvfile.write(file_contents)
return file_path return file_path
def save_workbook_file(file_name: str, rp: Report, workbook): def save_workbook_file(file_name: str, rp: Report, workbook, prefix=""):
report_id = str(rp.report_id) report_id = str(rp.report_id)
folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id) if not prefix:
folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id)
else:
folder_path = os.path.join(settings.MEDIA_ROOT, "report", prefix)
os.makedirs(folder_path, exist_ok = True) os.makedirs(folder_path, exist_ok = True)
file_path = os.path.join(folder_path, file_name) file_path = os.path.join(folder_path, file_name)
@ -388,12 +402,17 @@ def build_media_url_v2(media_id: str, user_id: int, sub_id: int, u_sync_id: str)
def get_value(_dict, keys): def get_value(_dict, keys):
keys = keys.split('.') keys = keys.split('.')
value = _dict value = _dict
for key in keys: try:
if not key in value.keys(): for key in keys:
return "-" if not key in value.keys():
else: return "-"
value = value.get(key, {}) else:
value = value.get(key, {})
except Exception as e:
print(f"[ERROR]: {e}")
print(f"[ERROR]: value: {value}")
print(f"[ERROR]: keys: {keys}")
if not value: if not value:
return "-" return "-"
elif isinstance(value, list): elif isinstance(value, list):
@ -475,13 +494,23 @@ def dict2xlsx(input: json, _type='report'):
ws[key + str(start_index)].border = border ws[key + str(start_index)].border = border
if _type == 'report': if _type == 'report':
ws[key + str(start_index)].font = font_black_bold if subtotal['subs'] == '+':
if key_index == 0 or (key_index >= 9 and key_index <= 15): ws[key + str(start_index)].font = font_black_bold
ws[key + str(start_index)].fill = fill_gray if key_index == 0 or (key_index >= 9 and key_index <= 15):
elif key_index == 1: ws[key + str(start_index)].fill = fill_gray
ws[key + str(start_index)].fill = fill_green elif key_index == 1:
elif key_index >= 4 and key_index <= 8: ws[key + str(start_index)].fill = fill_green
ws[key + str(start_index)].fill = fill_yellow elif key_index >= 4 and key_index <= 8:
ws[key + str(start_index)].fill = fill_yellow
else:
if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95:
ws[key + str(start_index)].style = normal_cell_red
elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0:
ws[key + str(start_index)].style = normal_cell_red
elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10:
ws[key + str(start_index)].style = normal_cell_red
else :
ws[key + str(start_index)].style = normal_cell
elif _type == 'report_detail': elif _type == 'report_detail':
if 'accuracy' in mapping[key] and type(value) in [int, float] and value < 75: if 'accuracy' in mapping[key] and type(value) in [int, float] and value < 75:
ws[key + str(start_index)].style = normal_cell_red ws[key + str(start_index)].style = normal_cell_red
@ -491,21 +520,5 @@ def dict2xlsx(input: json, _type='report'):
ws[key + str(start_index)].style = normal_cell ws[key + str(start_index)].style = normal_cell
start_index += 1 start_index += 1
if 'data' in subtotal.keys():
for record in subtotal['data']:
for key in mapping.keys():
value = get_value(record, mapping[key])
ws[key + str(start_index)] = value
if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95:
ws[key + str(start_index)].style = normal_cell_red
elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0:
ws[key + str(start_index)].style = normal_cell_red
elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10:
ws[key + str(start_index)].style = normal_cell_red
else :
ws[key + str(start_index)].style = normal_cell
start_index += 1
return wb return wb

View File

@ -22,6 +22,9 @@ class RedisUtils:
for key, value in self.redis_client.hgetall(request_id).items(): for key, value in self.redis_client.hgetall(request_id).items():
resutlt[key] = json.loads(value) resutlt[key] = json.loads(value)
return resutlt return resutlt
def get_specific_cache(self, request_id, key):
return json.loads(self.redis_client.hget(request_id, key))
def get_size(self, request_id): def get_size(self, request_id):
return self.redis_client.hlen(request_id) return self.redis_client.hlen(request_id)

View File

@ -0,0 +1,11 @@
from fwd.settings import SUBS
def map_subsidiary_long_to_short(long_sub):
short_sub = SUBS.get(long_sub.upper(), "all")
return short_sub.upper()
def map_subsidiary_short_to_long(short_sub):
for k, v in SUBS.items():
if v == short_sub.upper():
return k
return "ALL"

View File

@ -0,0 +1,9 @@
def is_the_same_day(first_day, second_day):
if first_day.day == second_day.day and first_day.month == second_day.month and first_day.year == second_day.year:
return True
return False
def is_the_same_month(first_day, second_day):
if first_day.month == second_day.month and first_day.year == second_day.year:
return True
return False

View File

@ -0,0 +1,68 @@
import os
import time
import requests
from datetime import datetime
# Get the proxy URL from the environment variable
interval = 60*60*1 # 1 minute
update_cost = 60*3
proxy_url = os.getenv('PROXY', "localhost")
# Define the login API URL
login_url = f'{proxy_url}/api/ctel/login/'
login_token = None
# Define the login credentials
login_credentials = {
'username': 'sbt',
'password': '7Eg4AbWIXDnufgn'
}
# Define the command to call the update API
update_url = f'{proxy_url}/api/ctel/make_report/'
update_params = {
'is_daily_report': 'true',
'report_overview_duration': '',
'subsidiary': None
}
"report_overview_duration"
def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]):
headers = {'Authorization': login_token}
for dur in report_overview_duration:
for sub in subsidiary:
update_params["report_overview_duration"] = dur
update_params["subsidiary"] = sub
update_response = requests.get(update_url, params=update_params, headers=headers)
print("[INFO]: update_response at {} by {} - {} with status {}".format(datetime.now(), dur, sub, update_response.status_code))
update_response.raise_for_status()
time.sleep(update_cost)
# Define the interval in seconds between API calls
# time.sleep(60)
while True:
# Call the login API and retrieve the login token
if not login_token:
login_response = requests.post(login_url, data=login_credentials)
# login_response.raise_for_status()
if login_response.status_code == 200:
login_token = login_response.json()['token']
print("[INFO] relogged in at {}".format(datetime.now()))
# Call the update API
try:
update_report(login_token)
except Exception as e:
print(f"[ERROR]: {e}")
print(f"[ERROR]: Failed to update_response, retrying...")
login_response = requests.post(login_url, data=login_credentials)
# login_response.raise_for_status()
if login_response.status_code == 200:
login_token = login_response.json()['token']
print("[INFO] relogged in at {}".format(datetime.now()))
update_report(login_token)
# Wait for the specified interval
time.sleep(interval)

View File

@ -84,12 +84,12 @@ services:
depends_on: depends_on:
db-sbt: db-sbt:
condition: service_started condition: service_started
# command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
# python manage.py makemigrations && python manage.py makemigrations &&
# python manage.py migrate && python manage.py migrate &&
# python manage.py compilemessages && python manage.py compilemessages &&
# gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
command: bash -c "tail -f > /dev/null" # command: bash -c "tail -f > /dev/null"
minio: minio:
image: minio/minio image: minio/minio
@ -175,6 +175,7 @@ services:
working_dir: /app working_dir: /app
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
# command: bash -c "tail -f > /dev/null"
# Back-end persistent # Back-end persistent
db-sbt: db-sbt:

@ -0,0 +1 @@
Subproject commit 220954c5c6bfed15e93e26b2adacf28ff8b75baf

View File

@ -0,0 +1,17 @@
from datetime import datetime
# Assuming you have two datetime objects for the same day in different months
date_jan = datetime(2022, 2, 15, 12, 30, 0)
date_feb = datetime(2022, 2, 15, 8, 45, 0)
# Check if they are the same day
if date_jan.day == date_feb.day and date_jan.month == date_feb.month and date_jan.year == date_feb.year:
print("They are the same day")
else:
print("They are different days")
# Check if they are the same month
if date_jan.month == date_feb.month and date_jan.year == date_feb.year:
print("They are the same month")
else:
print("They are different months")