Merge pull request #16 from SDSRV-IDP/enhancment/accuracy_calculation

Enhancment/accuracy calculation
This commit is contained in:
Nguyen Viet Anh 2024-02-06 10:17:27 +07:00 committed by GitHub Enterprise
commit 333815e2d5
19 changed files with 949 additions and 147 deletions

9
api-cronjob/Dockerfile Normal file
View File

@ -0,0 +1,9 @@
FROM python:3.9-slim
WORKDIR /app
COPY script.py .
RUN apt-get update && apt-get -y install curl
CMD [ "python", "script.py" ]

View File

@ -143,8 +143,8 @@ LANGUAGE_CODE = "en-us"
USE_I18N = True
CELERY_ENABLE_UTC = False
CELERY_TIMEZONE = "Asia/Ho_Chi_Minh"
TIME_ZONE = "Asia/Ho_Chi_Minh"
CELERY_TIMEZONE = "Asia/Singapore"
TIME_ZONE = "Asia/Singapore"
USE_TZ = True
# Static files (CSS, JavaScript, Images)
@ -220,6 +220,20 @@ SIZE_TO_COMPRESS = 2 * 1024 * 1024
MAX_NUMBER_OF_TEMPLATE = 3
MAX_PAGES_OF_PDF_FILE = 50
OVERVIEW_REFRESH_INTERVAL = 2
OVERVIEW_REPORT_ROOT = "overview"
OVERVIEW_REPORT_DURATION = ["30d", "7d"]
SUBS = {
"SEAU": "AU",
"SESP": "SG",
"SME": "MY",
"SEPCO": "PH",
"TSE": "TH",
"SEIN": "ID",
"ALL": "all"
}
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',

View File

@ -14,9 +14,12 @@ import json
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3
from ..utils.file import download_from_S3, convert_date_string
from ..utils.redis import RedisUtils
from ..utils.process import string_to_boolean
from ..celery_worker.client_connector import c_connector
from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long
redis_client = RedisUtils()
class AccuracyViewSet(viewsets.ViewSet):
lookup_field = "username"
@ -226,6 +229,12 @@ class AccuracyViewSet(viewsets.ViewSet):
description='Subsidiary',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='report_overview_duration',
location=OpenApiParameter.QUERY,
description=f'open of {settings.OVERVIEW_REPORT_DURATION}',
type=OpenApiTypes.STR,
),
],
responses=None, tags=['Accuracy']
)
@ -240,12 +249,26 @@ class AccuracyViewSet(viewsets.ViewSet):
include_test = string_to_boolean(request.GET.get('include_test', "false"))
subsidiary = request.GET.get("subsidiary", "all")
is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false"))
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
report_overview_duration = request.GET.get("report_overview_duration", "")
subsidiary = map_subsidiary_long_to_short(subsidiary)
if is_daily_report:
if report_overview_duration not in settings.OVERVIEW_REPORT_DURATION:
raise InvalidException(excArgs="overview duration")
end_date = timezone.now()
if report_overview_duration == "30d":
start_date = end_date - timezone.timedelta(days=30)
else:
start_date = end_date - timezone.timedelta(days=7)
start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z')
end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%S%z')
else:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
query_set = {"start_date_str": start_date_str,
"end_date_str": end_date_str,
@ -255,7 +278,11 @@ class AccuracyViewSet(viewsets.ViewSet):
"include_test": include_test,
"subsidiary": subsidiary,
"is_daily_report": is_daily_report,
"report_overview_duration": report_overview_duration
}
# if is_daily_report:
# if (end_date-start_date) > timezone.timedelta(days=1):
# raise InvalidException(excArgs="Date range")
report_id = "report" + "_" + timezone.datetime.now().strftime("%Y%m%d%H%M%S%z") + "_" + uuid.uuid4().hex
new_report: Report = Report(
@ -268,8 +295,6 @@ class AccuracyViewSet(viewsets.ViewSet):
end_at=end_date,
status="Processing",
)
if is_daily_report:
new_report.created_at = end_date
new_report.save()
# Background job to calculate accuracy
shadow_report(report_id, query_set)
@ -318,7 +343,7 @@ class AccuracyViewSet(viewsets.ViewSet):
response = {
'report_detail': data,
'metadata': {"subsidiary": report.subsidiary,
'metadata': {"subsidiary": map_subsidiary_short_to_long(report.subsidiary),
"start_at": report.start_at,
"end_at": report.end_at},
'page': {
@ -380,7 +405,7 @@ class AccuracyViewSet(viewsets.ViewSet):
page_size = int(request.GET.get('page_size', 10))
if not start_date_str or not end_date_str:
reports = Report.objects.all()
reports = Report.objects.all().order_by('created_at').reverse()
else:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
@ -390,26 +415,35 @@ class AccuracyViewSet(viewsets.ViewSet):
base_query = Q(created_at__range=(start_date, end_date))
if daily_report_only:
base_query &= Q(is_daily_report=True)
reports = Report.objects.filter(base_query).order_by('created_at')
reports = Report.objects.filter(base_query).order_by('created_at').reverse()
paginator = Paginator(reports, page_size)
page = paginator.get_page(page_number)
data = []
for report in page:
acc_keys = ["purchase_date", "retailername", "imei_number", "avg"]
acc = {}
for key in acc_keys:
fb = report.feedback_accuracy.get(key, 0) if report.feedback_accuracy else 0
rv = report.reviewed_accuracy.get(key, 0) if report.reviewed_accuracy else 0
acc[key] = max([fb, rv])
data.append({
"ID": report.id,
"Created Date": report.created_at,
"Start Date": report.start_at,
"End Date": report.end_at,
"No. Requests": report.number_request,
"Status": report.status,
"Purchase Date Acc": report.reviewed_accuracy.get("purchase_date", None) if report.reviewed_accuracy else None,
"Retailer Acc": report.feedback_accuracy.get("retailername", None) if report.reviewed_accuracy else None,
"IMEI Acc": report.feedback_accuracy.get("imei_number", None) if report.reviewed_accuracy else None,
"Avg. Accuracy": report.feedback_accuracy.get("avg", None) if report.reviewed_accuracy else None,
"Purchase Date Acc": acc["purchase_date"],
"Retailer Acc": acc["retailername"],
"IMEI Acc": acc["imei_number"],
"Avg. Accuracy": acc["avg"],
"Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0,
"Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_OCR_time else 0,
"report_id": report.report_id,
"Subsidiary": map_subsidiary_short_to_long(report.subsidiary),
})
response = {
@ -427,103 +461,79 @@ class AccuracyViewSet(viewsets.ViewSet):
@extend_schema(
parameters=[
OpenApiParameter(
name='start_date',
name='duration',
location=OpenApiParameter.QUERY,
description='Start date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2023-01-02T00:00:00+0700',
),
OpenApiParameter(
name='end_date',
location=OpenApiParameter.QUERY,
description='End date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2024-01-10T00:00:00+0700',
description='one of [30d, 7d]',
type=OpenApiTypes.STR,
default='30d',
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='page',
location=OpenApiParameter.QUERY,
description='Page number',
type=OpenApiTypes.INT,
required=False
),
OpenApiParameter(
name='page_size',
location=OpenApiParameter.QUERY,
description='Number of items per page',
type=OpenApiTypes.INT,
required=False
),
)
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="overview", methods=["GET"])
def overview(self, request):
if request.method == 'GET':
subsidiary = request.GET.get('subsidiary', None)
start_date_str = request.GET.get('start_date', "")
end_date_str = request.GET.get('end_date', "")
page_number = int(request.GET.get('page', 1))
page_size = int(request.GET.get('page_size', 10))
subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "")
base_query = Q()
if start_date_str and end_date_str:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
base_query &= Q(created_at__range=(start_date, end_date))
if subsidiary:
base_query &= Q(subsidiary=subsidiary)
base_query &= Q(is_daily_report=True)
reports = Report.objects.filter(base_query).order_by('created_at')
paginator = Paginator(reports, page_size)
page = paginator.get_page(page_number)
data = []
this_month_report = MonthReportAccumulate()
for report in page:
res = this_month_report.add(report)
if not(res):
_, _data, total = this_month_report()
data += [total]
data += _data
this_month_report = MonthReportAccumulate()
this_month_report.add(report)
else:
continue
_, _data, total = this_month_report()
data += [total]
data += _data
# Generate xlsx file
# workbook = dict2xlsx(data, _type="report")
# tmp_file = f"/tmp/{str(uuid.uuid4())}.xlsx"
# os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
# workbook.save(tmp_file)
# c_connector.remove_local_file((tmp_file, "fake_request_id"))
subsidiary = map_subsidiary_long_to_short(subsidiary)
# Retrive data from Redis
key = f"{subsidiary}_{duration}"
data = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", [])
response = {
# 'file': load_xlsx_file(),
'overview_data': data,
'page': {
'number': page.number,
'total_pages': page.paginator.num_pages,
'count': page.paginator.count,
}
}
return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='duration',
location=OpenApiParameter.QUERY,
description='one of [30d, 7d]',
type=OpenApiTypes.STR,
default='30d',
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
)
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="overview_download_file", methods=["GET"])
def overview_download_file(self, request):
if request.method == 'GET':
subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "")
subsidiary = map_subsidiary_long_to_short(subsidiary)
s3_key = f"{subsidiary}_{duration}.xlsx"
tmp_file = "/tmp/" + s3_key
os.makedirs("/tmp", exist_ok=True)
download_from_S3("report/" + settings.OVERVIEW_REPORT_ROOT + "/" + s3_key, tmp_file)
file = open(tmp_file, 'rb')
response = FileResponse(file, status=200)
# Set the content type and content disposition headers
response['Content-Type'] = 'application/octet-stream'
response['Content-Disposition'] = 'attachment; filename="{0}"'.format(os.path.basename(tmp_file))
return response
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[],
@ -541,7 +551,7 @@ class AccuracyViewSet(viewsets.ViewSet):
raise NotFoundException(excArgs=f"report: {report_id}")
report = Report.objects.filter(report_id=report_id).first()
# download from s3 to local
tmp_file = "/tmp/" + "report_" + uuid.uuid4().hex + ".xlsx"
tmp_file = "/tmp/" + report.subsidiary + "_" + report.start_at.strftime("%Y%m%d") + "_" + report.end_at.strftime("%Y%m%d") + "_created_on_" + report.created_at.strftime("%Y%m%d") + ".xlsx"
os.makedirs("/tmp", exist_ok=True)
if not report.S3_file_name:
raise NotFoundException(excArgs="S3 file name")

View File

@ -36,6 +36,8 @@ class CeleryConnector:
'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"},
'make_a_report_2': {'queue': "report_2"},
}
app = Celery(
@ -45,12 +47,16 @@ class CeleryConnector:
)
def make_a_report(self, args):
return self.send_task('make_a_report', args)
def make_a_report_2(self, args):
return self.send_task('make_a_report_2', args)
def csv_feedback(self, args):
return self.send_task('csv_feedback', args)
def do_pdf(self, args):
return self.send_task('do_pdf', args)
def upload_file_to_s3(self, args):
return self.send_task('upload_file_to_s3', args)
def upload_feedback_to_s3(self, args):
return self.send_task('upload_feedback_to_s3', args)
def upload_file_to_s3(self, args):
return self.send_task('upload_file_to_s3', args)
def upload_report_to_s3(self, args):
@ -59,6 +65,7 @@ class CeleryConnector:
return self.send_task('upload_obj_to_s3', args)
def remove_local_file(self, args):
return self.send_task('remove_local_file', args, countdown=280) # nearest execution of this task in 280 seconds
def process_fi(self, args):
return self.send_task('process_fi_invoice', args)
def process_fi_result(self, args):

View File

@ -13,10 +13,13 @@ from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
from ..utils import file as FileUtils
from ..utils import process as ProcessUtil
from ..utils import s3 as S3Util
from ..utils.accuracy import validate_feedback_file
from fwd_api.constant.common import ProcessType
import csv
import json
import copy
from fwd_api.utils.accuracy import predict_result_to_ready
from celery.utils.log import get_task_logger
from fwd import settings
@ -79,6 +82,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
continue
else:
sub_rq = sub_rqs[0]
images = SubscriptionRequestFile.objects.filter(request=sub_rq)
fb = {}
# update user result (with validate)
redemption_id = row.get('redemptionNumber')
@ -99,6 +103,42 @@ def process_csv_feedback(csv_file_path, feedback_id):
if len(redemption_id) > 0:
sub_rq.redemption_id = redemption_id
sub_rq.save()
# Update files
time_cost = {"imei": [], "invoice": [], "all": []}
imei_count = 0
if sub_rq.ai_inference_profile is None:
time_cost["imei"] = [-1 for _ in range(len(images))]
time_cost["invoice"] = [-1]
time_cost["all"] = [-1]
else:
for k, v in sub_rq.ai_inference_profile.items():
time_cost[k.split("_")[0]].append(v["inference"][1][0] - v["inference"][0] + (v["postprocess"][1]-v["postprocess"][0]))
for i, image in enumerate(images):
_predict_result = copy.deepcopy(predict_result_to_ready(sub_rq.predict_result))
_feedback_result = copy.deepcopy(sub_rq.feedback_result)
_reviewed_result = copy.deepcopy(sub_rq.reviewed_result)
image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request]
if not validate_feedback_file(_feedback_result, _predict_result):
status[request_id] = "Missalign imei number between feedback and predict"
continue
if image.doc_type == "invoice":
_predict_result["imei_number"] = []
if _feedback_result:
_feedback_result["imei_number"] = []
else:
None
if _reviewed_result:
_reviewed_result["imei_number"] = []
else:
None
else:
_predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]}
_feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None
_reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None
image.predict_result = _predict_result
image.feedback_result = _feedback_result
image.reviewed_result = _reviewed_result
image.save()
# update log into database
feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first()
feedback_rq.error_status = status

View File

@ -3,14 +3,19 @@ import traceback
from fwd_api.models import SubscriptionRequest, Report, ReportFile
from fwd_api.celery_worker.worker import app
from ..utils import s3 as S3Util
from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list
from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list, calculate_a_request, ReportAccumulateByRequest
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
from ..utils import time_stuff
from ..utils.redis import RedisUtils
from django.utils import timezone
from django.db.models import Q
import json
import copy
from celery.utils.log import get_task_logger
from fwd import settings
redis_client = RedisUtils()
logger = get_task_logger(__name__)
@ -29,6 +34,7 @@ def mean_list(l):
@app.task(name='make_a_report')
def make_a_report(report_id, query_set):
# TODO: to be deprecated
try:
start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z')
@ -105,7 +111,7 @@ def make_a_report(report_id, query_set):
errors += request_att["err"]
num_request += 1
transaction_att = count_transactions(start_date, end_date)
transaction_att = count_transactions(start_date, end_date, report.subsidiary)
# Do saving process
report.number_request = num_request
report.number_images = number_images
@ -151,4 +157,155 @@ def make_a_report(report_id, query_set):
except Exception as e:
print("[ERROR]: an error occured while processing report: ", report_id)
traceback.print_exc()
return 400
return 400
@app.task(name='make_a_report_2')
def make_a_report_2(report_id, query_set):
try:
start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z')
base_query = Q(created_at__range=(start_date, end_date))
if query_set["request_id"]:
base_query &= Q(request_id=query_set["request_id"])
if query_set["redemption_id"]:
base_query &= Q(redemption_id=query_set["redemption_id"])
base_query &= Q(is_test_request=False)
if isinstance(query_set["include_test"], str):
query_set["include_test"] = True if query_set["include_test"].lower() in ["true", "yes", "1"] else False
if query_set["include_test"]:
# base_query = ~base_query
base_query.children = base_query.children[:-1]
elif isinstance(query_set["include_test"], bool):
if query_set["include_test"]:
base_query = ~base_query
if isinstance(query_set["subsidiary"], str):
if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all":
base_query &= Q(redemption_id__startswith=query_set["subsidiary"])
if isinstance(query_set["is_reviewed"], str):
if query_set["is_reviewed"] == "reviewed":
base_query &= Q(is_reviewed=True)
elif query_set["is_reviewed"] == "not reviewed":
base_query &= Q(is_reviewed=False)
# elif query_set["is_reviewed"] == "all":
# pass
errors = []
# Create a placeholder to fill
accuracy = {"feedback" :{"imei_number": IterAvg(),
"purchase_date": IterAvg(),
"retailername": IterAvg(),
"sold_to_party": IterAvg(),},
"reviewed" :{"imei_number": IterAvg(),
"purchase_date": IterAvg(),
"retailername": IterAvg(),
"sold_to_party": IterAvg(),}
} # {"imei": {"acc": 0.1, count: 1}, ...}
time_cost = {"invoice": IterAvg(),
"imei": IterAvg()}
number_images = 0
number_bad_images = 0
# TODO: Multithreading
# Calculate accuracy, processing time, ....Then save.
subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')
report: Report = \
Report.objects.filter(report_id=report_id).first()
# TODO: number of transaction by doc type
num_request = 0
report_files = []
report_engine = ReportAccumulateByRequest(report.subsidiary)
for request in subscription_requests:
if request.status != 200 or not (request.reviewed_result or request.feedback_result):
# Failed requests or lack of reviewed_result/feedback_result
continue
request_att, _report_files = calculate_a_request(report, request)
report_files += _report_files
report_engine.add(request, _report_files)
request.feedback_accuracy = {"imei_number" : mean_list(request_att["acc"]["feedback"].get("imei_number", [None])),
"purchase_date" : mean_list(request_att["acc"]["feedback"].get("purchase_date", [None])),
"retailername" : mean_list(request_att["acc"]["feedback"].get("retailername", [None])),
"sold_to_party" : mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None]))}
request.reviewed_accuracy = {"imei_number" : mean_list(request_att["acc"]["reviewed"].get("imei_number", [None])),
"purchase_date" : mean_list(request_att["acc"]["reviewed"].get("purchase_date", [None])),
"retailername" : mean_list(request_att["acc"]["reviewed"].get("retailername", [None])),
"sold_to_party" : mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None]))}
request.save()
number_images += request_att["total_images"]
number_bad_images += request_att["bad_images"]
update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"])
update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"])
time_cost["imei"].add(request_att["time_cost"].get("imei", []))
time_cost["invoice"].add(request_att["time_cost"].get("invoice", []))
errors += request_att["err"]
num_request += 1
report_fine_data, _save_data = report_engine.save(report.report_id, query_set.get("is_daily_report", False), query_set["include_test"])
transaction_att = count_transactions(start_date, end_date, report.subsidiary)
# Do saving process
report.number_request = num_request
report.number_images = number_images
report.number_imei = time_cost["imei"].count
report.number_invoice = time_cost["invoice"].count
report.number_bad_images = number_bad_images
# FIXME: refactor this data stream for endurability
report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](),
"invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count}
report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) if (report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) > 0 else None
report.number_imei_transaction = transaction_att.get("imei", 0)
report.number_invoice_transaction = transaction_att.get("invoice", 0)
acumulated_acc = {"feedback": {},
"reviewed": {}}
for acc_type in ["feedback", "reviewed"]:
avg_acc = IterAvg()
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
acumulated_acc[acc_type][key] = accuracy[acc_type][key]()
acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count
avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
acumulated_acc[acc_type]["avg"] = avg_acc()
report.feedback_accuracy = acumulated_acc["feedback"]
report.reviewed_accuracy = acumulated_acc["reviewed"]
report.errors = "|".join(errors)
report.status = "Ready"
report.save()
# Saving a xlsx file
report_files = ReportFile.objects.filter(report=report)
data = extract_report_detail_list(report_files, lower=True)
data_workbook = dict2xlsx(data, _type='report_detail')
local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook)
s3_key=save_report_to_S3(report.report_id, local_workbook)
if query_set["is_daily_report"]:
# Save overview dashboard
# multiple accuracy by 100
save_data = copy.deepcopy(_save_data)
for i, dat in enumerate(report_fine_data):
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
keys_percent = "images_quality"
for x_key in report_fine_data[i][keys_percent].keys():
if "percent" not in x_key:
continue
report_fine_data[i][keys_percent][x_key] = report_fine_data[i][keys_percent][x_key]*100
for key in keys:
if report_fine_data[i][key]:
for x_key in report_fine_data[i][key].keys():
report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100
data_workbook = dict2xlsx(report_fine_data, _type='report')
overview_filename = query_set["subsidiary"] + "_" + query_set["report_overview_duration"] + ".xlsx"
local_workbook = save_workbook_file(overview_filename, report, data_workbook, settings.OVERVIEW_REPORT_ROOT)
s3_key=save_report_to_S3(report.report_id, local_workbook)
redis_client.set_cache(settings.OVERVIEW_REPORT_ROOT, overview_filename.replace(".xlsx", ""), json.dumps(save_data))
except IndexError as e:
print(e)
traceback.print_exc()
print("NotFound request by report id, %d", report_id)
except Exception as e:
print("[ERROR]: an error occured while processing report: ", report_id)
traceback.print_exc()
return 400

View File

@ -42,7 +42,7 @@ app.conf.update({
Queue('remove_local_file'),
Queue('csv_feedback'),
Queue('report'),
Queue('report_2'),
],
'task_routes': {
'process_sap_invoice_result': {'queue': 'invoice_sap_rs'},
@ -61,6 +61,7 @@ app.conf.update({
'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"},
'make_a_report_2': {'queue': "report_2"},
}
})

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-02-04 23:32
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0178_alter_reportfile_acc'),
]
operations = [
migrations.AddField(
model_name='reportfile',
name='is_bad_image',
field=models.BooleanField(default=False),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-02-05 02:44
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0179_reportfile_is_bad_image'),
]
operations = [
migrations.AlterField(
model_name='reportfile',
name='time_cost',
field=models.FloatField(default=None, null=True),
),
]

View File

@ -16,6 +16,7 @@ class ReportFile(models.Model):
# Data
S3_uploaded = models.BooleanField(default=False)
doc_type = models.CharField(max_length=200)
is_bad_image = models.BooleanField(default=False)
predict_result = models.JSONField(null=True)
feedback_result = models.JSONField(null=True)
@ -25,7 +26,7 @@ class ReportFile(models.Model):
reviewed_accuracy = models.JSONField(null=True)
acc = models.FloatField(default=0, null=True)
time_cost = models.FloatField(default=0)
time_cost = models.FloatField(default=None, null=True)
is_reviewed = models.CharField(default="NA", max_length=5) # NA, No, Yes
bad_image_reason = models.TextField(default="")
counter_measures = models.TextField(default="")

View File

@ -5,14 +5,307 @@ import copy
from typing import Any
from .ocr_utils.ocr_metrics import eval_ocr_metric
from .ocr_utils.sbt_report import post_processing_str
import uuid
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile
from ..celery_worker.client_connector import c_connector
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
from django.db.models import Q
from django.utils import timezone
import redis
from fwd import settings
from ..models import SubscriptionRequest, Report, ReportFile
import json
BAD_THRESHOLD = 0.75
valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"]
class ReportAccumulateByRequest:
def __init__(self, sub):
# self.redis_client = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, decode_responses=True)
self.sub = sub
self.current_time = None
self.data = {} # {"month": [total, {"day": day_data}]}
self.total_format = {
'subs': "+",
'extraction_date': "Subtotal ()",
'total_images': 0,
'images_quality': {
'successful': 0,
'successful_percent': 0,
'bad': 0,
'bad_percent': 0
},
'average_accuracy_rate': {
'imei': IterAvg(),
'purchase_date': IterAvg(),
'retailer_name': IterAvg(),
'sold_to_party': IterAvg()
},
'average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg()
},
'usage': {
'imei':0,
'invoice': 0,
'request': 0
},
'feedback_accuracy': {
'imei_number': IterAvg(),
'purchase_date': IterAvg(),
'retailername': IterAvg(),
'sold_to_party': IterAvg()
},
'reviewed_accuracy': {
'imei_number': IterAvg(),
'purchase_date': IterAvg(),
'retailername': IterAvg(),
'sold_to_party': IterAvg()
},
'num_request': 0
}
self.day_format = {
'subs': sub,
'extraction_date': "",
'num_imei': 0,
'num_invoice': 0,
'total_images': 0,
'images_quality': {
'successful': 0,
'successful_percent': 0,
'bad': 0,
'bad_percent': 0
},
'average_accuracy_rate': {
'imei': IterAvg(),
'purchase_date': IterAvg(),
'retailer_name': IterAvg(),
'sold_to_party': IterAvg()
},
'average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg()
},
'usage': {
'imei': 0,
'invoice': 0,
'request': 0
},
'feedback_accuracy': {
'imei_number': IterAvg(),
'purchase_date': IterAvg(),
'retailername': IterAvg(),
'sold_to_party': IterAvg()
},
'reviewed_accuracy': {
'imei_number': IterAvg(),
'purchase_date': IterAvg(),
'retailername': IterAvg(),
'sold_to_party': IterAvg()
},
"report_files": [],
'num_request': 0
},
@staticmethod
def update_total(total, report_file):
total["total_images"] += 1
total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
# total["report_files"].append(report_file)
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
total["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", []))
total["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", []))
total["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", []))
total["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", []))
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
total["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", []))
total["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", []))
total["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", []))
total["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", []))
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, []))
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
if not total["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
total["average_processing_time"] = IterAvg()
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0
total["usage"]["invoice"] += 1 if report_file.doc_type == "invoice" else 0
return total
@staticmethod
def update_day(day_data, report_file):
day_data["total_images"] += 1
day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
day_data["num_imei"] += 1 if report_file.doc_type == "imei" else 0
day_data["num_invoice"] += 1 if report_file.doc_type == "invoice" else 0
day_data["report_files"].append(report_file)
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
day_data["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", 0))
day_data["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", 0))
day_data["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", 0))
day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", 0))
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
day_data["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", 0))
day_data["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", 0))
day_data["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", 0))
day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", 0))
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
day_data["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, 0))
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, 0))
if not day_data["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
day_data["average_processing_time"] = IterAvg()
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
return day_data
def add(self, request, report_files):
this_month = request.created_at.strftime("%Y%m")
this_day = request.created_at.strftime("%Y%m%d")
if not self.data.get(this_month, None):
self.data[this_month] = [copy.deepcopy(self.total_format), {}]
if not self.data[this_month][1].get(this_day, None):
self.data[this_month][1][this_day] = copy.deepcopy(self.day_format)[0]
self.data[this_month][1][this_day]['extraction_date'] = request.created_at.strftime("%Y-%m-%d")
usage = self.count_transactions_within_day(this_day)
self.data[this_month][1][this_day]["usage"]["imei"] = usage.get("imei", 0)
self.data[this_month][1][this_day]["usage"]["invoice"] = usage.get("invoice", 0)
self.data[this_month][1][this_day]["usage"]["request"] = usage.get("request", 0)
self.data[this_month][1][this_day]['num_request'] += 1
self.data[this_month][0]['num_request'] += 1
for report_file in report_files:
self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day
def count_transactions_within_day(self, date_string):
# convert this day into timezone.datetime at UTC
start_date = datetime.strptime(date_string, "%Y%m%d")
start_date_with_timezone = timezone.make_aware(start_date)
end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1)
return count_transactions(start_date_with_timezone, end_date_with_timezone, self.sub)
def save(self, root_report_id, is_daily_report=False, include_test=False):
report_data = self.get()
fine_data = []
save_data = {"file": {"overview": f"{root_report_id}/{root_report_id}.xlsx"},
"data": fine_data} # {"sub_report_id": "S3 location", "data": fine_data}
# extract data
for month in report_data.keys():
fine_data.append(report_data[month][0])
for day in report_data[month][1].keys():
fine_data.append(report_data[month][1][day])
# save daily reports
report_id = root_report_id + "_" + day
start_date = datetime.strptime(day, "%Y%m%d")
start_date_with_timezone = timezone.make_aware(start_date)
end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1)
_average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](),
"invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count}
_average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None
acumulated_acc = {"feedback_accuracy": {},
"reviewed_accuracy": {}}
for acc_type in ["feedback_accuracy", "reviewed_accuracy"]:
avg_acc = IterAvg()
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
acumulated_acc[acc_type][key] = self.data[month][1][day][acc_type][key]()
acumulated_acc[acc_type][key+"_count"] = self.data[month][1][day][acc_type][key].count
avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
acumulated_acc[acc_type]["avg"] = avg_acc()
acumulated_acc[acc_type]["avg_count"] = avg_acc.count
new_report: Report = Report(
report_id=report_id,
is_daily_report=is_daily_report,
subsidiary=self.sub.lower().replace(" ", ""),
include_test=include_test,
start_at=start_date_with_timezone,
end_at=end_date_with_timezone,
status="Ready",
number_request=report_data[month][1][day]["num_request"],
number_images=report_data[month][1][day]["total_images"],
number_imei=report_data[month][1][day]["num_imei"],
number_invoice=report_data[month][1][day]["num_invoice"],
number_bad_images=report_data[month][1][day]["images_quality"]["bad"],
average_OCR_time=_average_OCR_time,
number_imei_transaction=report_data[month][1][day]["usage"]["imei"],
number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"],
feedback_accuracy=acumulated_acc["feedback_accuracy"],
reviewed_accuracy=acumulated_acc["reviewed_accuracy"],
)
new_report.save()
data = extract_report_detail_list(self.data[month][1][day]["report_files"], lower=True)
data_workbook = dict2xlsx(data, _type='report_detail')
local_workbook = save_workbook_file(report_id + ".xlsx", new_report, data_workbook)
s3_key=save_report_to_S3(report_id, local_workbook)
return fine_data, save_data
def get(self) -> Any:
# FIXME: This looks like a junk
_data = copy.deepcopy(self.data)
for month in _data.keys():
_data[month][0]["images_quality"]["successful_percent"] = _data[month][0]["images_quality"]["successful"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
_data[month][0]["images_quality"]["bad_percent"] = _data[month][0]["images_quality"]["bad"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
num_transaction_imei = 0
num_transaction_invoice = 0
for day in _data[month][1].keys():
num_transaction_imei += _data[month][1][day]["usage"].get("imei", 0)
num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0)
_data[month][1][day]["average_accuracy_rate"]["imei"] = _data[month][1][day]["average_accuracy_rate"]["imei"]()
_data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]()
_data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]()
_data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]()
_data[month][1][day]["average_processing_time"]["imei"] = _data[month][1][day]["average_processing_time"]["imei"]()
_data[month][1][day]["average_processing_time"]["invoice"] = _data[month][1][day]["average_processing_time"]["invoice"]()
_data[month][1][day]["feedback_accuracy"]["imei_number"] = _data[month][1][day]["feedback_accuracy"]["imei_number"]()
_data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]()
_data[month][1][day]["feedback_accuracy"]["retailername"] = _data[month][1][day]["feedback_accuracy"]["retailername"]()
_data[month][1][day]["feedback_accuracy"]["sold_to_party"] = _data[month][1][day]["feedback_accuracy"]["sold_to_party"]()
_data[month][1][day]["reviewed_accuracy"]["imei_number"] = _data[month][1][day]["reviewed_accuracy"]["imei_number"]()
_data[month][1][day]["reviewed_accuracy"]["purchase_date"] = _data[month][1][day]["reviewed_accuracy"]["purchase_date"]()
_data[month][1][day]["reviewed_accuracy"]["retailername"] = _data[month][1][day]["reviewed_accuracy"]["retailername"]()
_data[month][1][day]["reviewed_accuracy"]["sold_to_party"] = _data[month][1][day]["reviewed_accuracy"]["sold_to_party"]()
_data[month][1][day].pop("report_files")
_data[month][1][day]["images_quality"]["successful_percent"] = _data[month][1][day]["images_quality"]["successful"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0
_data[month][1][day]["images_quality"]["bad_percent"] = _data[month][1][day]["images_quality"]["bad"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0
_data[month][0]["usage"]["imei"] = num_transaction_imei
_data[month][0]["usage"]["invoice"] = num_transaction_invoice
_data[month][0]["average_accuracy_rate"]["imei"] = _data[month][0]["average_accuracy_rate"]["imei"]()
_data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]()
_data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]()
_data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]()
_data[month][0]["average_processing_time"]["imei"] = _data[month][0]["average_processing_time"]["imei"]()
_data[month][0]["average_processing_time"]["invoice"] = _data[month][0]["average_processing_time"]["invoice"]()
_data[month][0]["feedback_accuracy"]["imei_number"] = _data[month][0]["feedback_accuracy"]["imei_number"]()
_data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]()
_data[month][0]["feedback_accuracy"]["retailername"] = _data[month][0]["feedback_accuracy"]["retailername"]()
_data[month][0]["feedback_accuracy"]["sold_to_party"] = _data[month][0]["feedback_accuracy"]["sold_to_party"]()
_data[month][0]["reviewed_accuracy"]["imei_number"] = _data[month][0]["reviewed_accuracy"]["imei_number"]()
_data[month][0]["reviewed_accuracy"]["purchase_date"] = _data[month][0]["reviewed_accuracy"]["purchase_date"]()
_data[month][0]["reviewed_accuracy"]["retailername"] = _data[month][0]["reviewed_accuracy"]["retailername"]()
_data[month][0]["reviewed_accuracy"]["sold_to_party"] = _data[month][0]["reviewed_accuracy"]["sold_to_party"]()
return _data
class MonthReportAccumulate:
def __init__(self):
self.month = None
@ -89,7 +382,7 @@ class MonthReportAccumulate:
self.total["usage"]["invoice"] += report.number_invoice_transaction
def add(self, report):
report_month = report.created_at.month
report_month = report.start_at.month
if self.month is None:
self.month = report_month
@ -103,7 +396,7 @@ class MonthReportAccumulate:
new_data = copy.deepcopy(self.data_format)[0]
new_data["num_imei"] = report.number_imei
new_data["subs"] = report.subsidiary
new_data["extraction_date"] = report.created_at
new_data["extraction_date"] = report.start_at
new_data["num_invoice"] = report.number_invoice
new_data["total_images"] = report.number_images
new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images
@ -130,10 +423,38 @@ class MonthReportAccumulate:
self.accumulate(report)
return True
def clear(self):
self.month = None
self.total = {
'subs': "+",
'extraction_date': "Subtotal ()",
'total_images': 0,
'images_quality': {
'successful': 0,
'successful_percent': 0,
'bad': 0,
'bad_percent': 0
},
'average_accuracy_rate': {
'imei': IterAvg(),
'purchase_date': IterAvg(),
'retailer_name': IterAvg()
},
'average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg()
},
'usage': {
'imei':0,
'invoice': 0
}
}
self.data = []
def __call__(self):
self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] if self.total["total_images"] else 0
self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] if self.total["total_images"] else 0
total = copy.deepcopy(self.total)
total["images_quality"]["successful_percent"] = total["images_quality"]["successful"]/total["total_images"] if total["total_images"] else 0
total["images_quality"]["bad_percent"] = total["images_quality"]["bad"]/total["total_images"] if total["total_images"] else 0
total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]()
total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]()
total["average_accuracy_rate"]["retailer_name"] = total["average_accuracy_rate"]["retailer_name"]()
@ -167,6 +488,16 @@ class IterAvg:
def __call__(self):
return self.avg
def validate_feedback_file(feedback, predict):
if feedback:
imei_feedback = feedback.get("imei_number", [])
imei_feedback = [x for x in imei_feedback if x != ""]
num_imei_feedback = len(imei_feedback)
num_imei_predict = len(predict.get("imei_number", []))
if num_imei_feedback != num_imei_predict:
return False
return True
def first_of_list(the_list):
if not the_list:
return None
@ -210,9 +541,11 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
data[i][key] = data[i][key]*100
return data
def count_transactions(start_date, end_date):
def count_transactions(start_date, end_date, subsidiary="all"):
base_query = Q(created_at__range=(start_date, end_date))
base_query &= Q(is_test_request=False)
if subsidiary and subsidiary.lower().replace(" ", "")!="all":
base_query &= Q(redemption_id__startswith=subsidiary)
transaction_att = {}
print(f"[DEBUG]: atracting transactions attribute...")
@ -226,6 +559,10 @@ def count_transactions(start_date, end_date):
transaction_att[doc_type] = 1
else:
transaction_att[doc_type] += 1
if not transaction_att.get("request", None):
transaction_att["request"] = 1
else:
transaction_att["request"] += 1
return transaction_att
def convert_datetime_format(date_string: str, is_gt=False) -> str:
@ -359,6 +696,7 @@ def calculate_and_save_subcription_file(report, request):
reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"],
time_cost=image.processing_time,
is_bad_image=att["is_bad_image"],
bad_image_reason=image.reason,
counter_measures=image.counter_measures,
error="|".join(att["err"])
@ -387,6 +725,72 @@ def calculate_and_save_subcription_file(report, request):
continue
return request_att
def calculate_a_request(report, request):
request_att = {"acc": {"feedback": {"imei_number": [],
"purchase_date": [],
"retailername": [],
"sold_to_party": [],
},
"reviewed": {"imei_number": [],
"purchase_date": [],
"retailername": [],
"sold_to_party": [],
}},
"err": [],
"time_cost": {},
"total_images": 0,
"bad_images": 0}
images = SubscriptionRequestFile.objects.filter(request=request)
report_files = []
for image in images:
status, att = calculate_subcription_file(image)
if status != 200:
continue
image.feedback_accuracy = att["acc"]["feedback"]
image.reviewed_accuracy = att["acc"]["reviewed"]
image.is_bad_image_quality = att["is_bad_image"]
image.save()
new_report_file = ReportFile(report=report,
correspond_request_id=request.request_id,
correspond_redemption_id=request.redemption_id,
doc_type=image.doc_type,
predict_result=image.predict_result,
feedback_result=image.feedback_result,
reviewed_result=image.reviewed_result,
feedback_accuracy=att["acc"]["feedback"],
reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"],
is_bad_image=att["is_bad_image"],
time_cost=image.processing_time,
bad_image_reason=image.reason,
counter_measures=image.counter_measures,
error="|".join(att["err"])
)
report_files.append(new_report_file)
if request_att["time_cost"].get(image.doc_type, None):
request_att["time_cost"][image.doc_type].append(image.processing_time)
else:
request_att["time_cost"][image.doc_type] = [image.processing_time]
try:
request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"]
request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"]
request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"]
request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"]
request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"]
request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"]
request_att["bad_images"] += int(att["is_bad_image"])
request_att["total_images"] += 1
request_att["err"] += att["err"]
except Exception as e:
print(e)
continue
return request_att, report_files
def calculate_subcription_file(subcription_request_file):
@ -490,5 +894,5 @@ def calculate_attributions(request): # for one request, return in order
return acc, data, time_cost, image_quality_num, error
def shadow_report(report_id, query):
c_connector.make_a_report(
c_connector.make_a_report_2(
(report_id, query))

View File

@ -7,6 +7,7 @@ import json
from PIL import Image, ExifTags
from django.core.files.uploadedfile import TemporaryUploadedFile
from django.utils import timezone
from datetime import datetime
from fwd import settings
from ..utils import s3 as S3Util
@ -30,6 +31,16 @@ s3_client = S3Util.MinioS3Client(
bucket_name=settings.S3_BUCKET_NAME
)
def convert_date_string(date_string):
# Parse the input date string
date_format = "%Y-%m-%d %H:%M:%S.%f %z"
parsed_date = datetime.strptime(date_string, date_format)
# Format the date as "YYYYMMDD"
formatted_date = parsed_date.strftime("%Y%m%d")
return formatted_date
def validate_report_list(request):
start_date_str = request.GET.get('start_date')
end_date_str = request.GET.get('end_date')
@ -190,10 +201,13 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict)
csvfile.write(file_contents)
return file_path
def save_workbook_file(file_name: str, rp: Report, workbook):
def save_workbook_file(file_name: str, rp: Report, workbook, prefix=""):
report_id = str(rp.report_id)
folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id)
if not prefix:
folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id)
else:
folder_path = os.path.join(settings.MEDIA_ROOT, "report", prefix)
os.makedirs(folder_path, exist_ok = True)
file_path = os.path.join(folder_path, file_name)
@ -388,12 +402,17 @@ def build_media_url_v2(media_id: str, user_id: int, sub_id: int, u_sync_id: str)
def get_value(_dict, keys):
keys = keys.split('.')
value = _dict
for key in keys:
if not key in value.keys():
return "-"
else:
value = value.get(key, {})
try:
for key in keys:
if not key in value.keys():
return "-"
else:
value = value.get(key, {})
except Exception as e:
print(f"[ERROR]: {e}")
print(f"[ERROR]: value: {value}")
print(f"[ERROR]: keys: {keys}")
if not value:
return "-"
elif isinstance(value, list):
@ -475,13 +494,23 @@ def dict2xlsx(input: json, _type='report'):
ws[key + str(start_index)].border = border
if _type == 'report':
ws[key + str(start_index)].font = font_black_bold
if key_index == 0 or (key_index >= 9 and key_index <= 15):
ws[key + str(start_index)].fill = fill_gray
elif key_index == 1:
ws[key + str(start_index)].fill = fill_green
elif key_index >= 4 and key_index <= 8:
ws[key + str(start_index)].fill = fill_yellow
if subtotal['subs'] == '+':
ws[key + str(start_index)].font = font_black_bold
if key_index == 0 or (key_index >= 9 and key_index <= 15):
ws[key + str(start_index)].fill = fill_gray
elif key_index == 1:
ws[key + str(start_index)].fill = fill_green
elif key_index >= 4 and key_index <= 8:
ws[key + str(start_index)].fill = fill_yellow
else:
if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95:
ws[key + str(start_index)].style = normal_cell_red
elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0:
ws[key + str(start_index)].style = normal_cell_red
elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10:
ws[key + str(start_index)].style = normal_cell_red
else :
ws[key + str(start_index)].style = normal_cell
elif _type == 'report_detail':
if 'accuracy' in mapping[key] and type(value) in [int, float] and value < 75:
ws[key + str(start_index)].style = normal_cell_red
@ -491,21 +520,5 @@ def dict2xlsx(input: json, _type='report'):
ws[key + str(start_index)].style = normal_cell
start_index += 1
if 'data' in subtotal.keys():
for record in subtotal['data']:
for key in mapping.keys():
value = get_value(record, mapping[key])
ws[key + str(start_index)] = value
if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95:
ws[key + str(start_index)].style = normal_cell_red
elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0:
ws[key + str(start_index)].style = normal_cell_red
elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10:
ws[key + str(start_index)].style = normal_cell_red
else :
ws[key + str(start_index)].style = normal_cell
start_index += 1
return wb

View File

@ -22,6 +22,9 @@ class RedisUtils:
for key, value in self.redis_client.hgetall(request_id).items():
resutlt[key] = json.loads(value)
return resutlt
def get_specific_cache(self, request_id, key):
return json.loads(self.redis_client.hget(request_id, key))
def get_size(self, request_id):
return self.redis_client.hlen(request_id)

View File

@ -0,0 +1,11 @@
from fwd.settings import SUBS
def map_subsidiary_long_to_short(long_sub):
short_sub = SUBS.get(long_sub.upper(), "all")
return short_sub.upper()
def map_subsidiary_short_to_long(short_sub):
for k, v in SUBS.items():
if v == short_sub.upper():
return k
return "ALL"

View File

@ -0,0 +1,9 @@
def is_the_same_day(first_day, second_day):
if first_day.day == second_day.day and first_day.month == second_day.month and first_day.year == second_day.year:
return True
return False
def is_the_same_month(first_day, second_day):
if first_day.month == second_day.month and first_day.year == second_day.year:
return True
return False

View File

@ -0,0 +1,68 @@
import os
import time
import requests
from datetime import datetime
# Get the proxy URL from the environment variable
interval = 60*60*1 # 1 minute
update_cost = 60*3
proxy_url = os.getenv('PROXY', "localhost")
# Define the login API URL
login_url = f'{proxy_url}/api/ctel/login/'
login_token = None
# Define the login credentials
login_credentials = {
'username': 'sbt',
'password': '7Eg4AbWIXDnufgn'
}
# Define the command to call the update API
update_url = f'{proxy_url}/api/ctel/make_report/'
update_params = {
'is_daily_report': 'true',
'report_overview_duration': '',
'subsidiary': None
}
"report_overview_duration"
def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]):
headers = {'Authorization': login_token}
for dur in report_overview_duration:
for sub in subsidiary:
update_params["report_overview_duration"] = dur
update_params["subsidiary"] = sub
update_response = requests.get(update_url, params=update_params, headers=headers)
print("[INFO]: update_response at {} by {} - {} with status {}".format(datetime.now(), dur, sub, update_response.status_code))
update_response.raise_for_status()
time.sleep(update_cost)
# Define the interval in seconds between API calls
# time.sleep(60)
while True:
# Call the login API and retrieve the login token
if not login_token:
login_response = requests.post(login_url, data=login_credentials)
# login_response.raise_for_status()
if login_response.status_code == 200:
login_token = login_response.json()['token']
print("[INFO] relogged in at {}".format(datetime.now()))
# Call the update API
try:
update_report(login_token)
except Exception as e:
print(f"[ERROR]: {e}")
print(f"[ERROR]: Failed to update_response, retrying...")
login_response = requests.post(login_url, data=login_credentials)
# login_response.raise_for_status()
if login_response.status_code == 200:
login_token = login_response.json()['token']
print("[INFO] relogged in at {}".format(datetime.now()))
update_report(login_token)
# Wait for the specified interval
time.sleep(interval)

View File

@ -84,12 +84,12 @@ services:
depends_on:
db-sbt:
condition: service_started
# command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
# python manage.py makemigrations &&
# python manage.py migrate &&
# python manage.py compilemessages &&
# gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
command: bash -c "tail -f > /dev/null"
command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
python manage.py makemigrations &&
python manage.py migrate &&
python manage.py compilemessages &&
gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
# command: bash -c "tail -f > /dev/null"
minio:
image: minio/minio
@ -175,6 +175,7 @@ services:
working_dir: /app
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
# command: bash -c "tail -f > /dev/null"
# Back-end persistent
db-sbt:

@ -0,0 +1 @@
Subproject commit 220954c5c6bfed15e93e26b2adacf28ff8b75baf

View File

@ -0,0 +1,17 @@
from datetime import datetime
# Assuming you have two datetime objects for the same day in different months
date_jan = datetime(2022, 2, 15, 12, 30, 0)
date_feb = datetime(2022, 2, 15, 8, 45, 0)
# Check if they are the same day
if date_jan.day == date_feb.day and date_jan.month == date_feb.month and date_jan.year == date_feb.year:
print("They are the same day")
else:
print("They are different days")
# Check if they are the same month
if date_jan.month == date_feb.month and date_jan.year == date_feb.year:
print("They are the same month")
else:
print("They are different months")