Add: multiple API

This commit is contained in:
dx-tan 2024-02-01 14:32:20 +07:00
parent dd206c4a3c
commit 394af1067c
11 changed files with 273 additions and 88 deletions

View File

@ -2,24 +2,21 @@ from rest_framework import status, viewsets
from rest_framework.decorators import action
from rest_framework.response import Response
from django.core.paginator import Paginator
from django.http import JsonResponse
from django.http import JsonResponse, FileResponse, HttpResponse
from django.utils import timezone
from django.db.models import Q
import uuid
import os
from fwd import settings
from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes
# from drf_spectacular.types import OpenApiString
import json
from ..exception.exceptions import InvalidException, RequiredFieldException
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate
from ..utils.file import validate_report_list
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3
from ..utils.process import string_to_boolean
def first_of_list(the_list):
if not the_list:
return None
return the_list[0]
from ..celery_worker.client_connector import c_connector
class AccuracyViewSet(viewsets.ViewSet):
lookup_field = "username"
@ -239,10 +236,10 @@ class AccuracyViewSet(viewsets.ViewSet):
end_date_str = request.GET.get('end_date')
request_id = request.GET.get('request_id', None)
redemption_id = request.GET.get('redemption_id', None)
is_reviewed = string_to_boolean(request.data.get('is_reviewed', "false"))
include_test = string_to_boolean(request.data.get('include_test', "false"))
is_reviewed = string_to_boolean(request.GET.get('is_reviewed', "false"))
include_test = string_to_boolean(request.GET.get('include_test', "false"))
subsidiary = request.GET.get("subsidiary", "all")
is_daily_report = string_to_boolean(request.data.get('is_daily_report', "false"))
is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false"))
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
@ -269,7 +266,10 @@ class AccuracyViewSet(viewsets.ViewSet):
include_reviewed=is_reviewed,
start_at=start_date,
end_at=end_date,
status="Processing",
)
if is_daily_report:
new_report.created_at = end_date
new_report.save()
# Background job to calculate accuracy
shadow_report(report_id, query_set)
@ -314,33 +314,13 @@ class AccuracyViewSet(viewsets.ViewSet):
paginator = Paginator(report_files, page_size)
page = paginator.get_page(page_number)
data = []
for report_file in page:
data.append({
"Request ID": report_file.correspond_request_id,
"Redemption Number": report_file.correspond_redemption_id,
"Image type": report_file.doc_type,
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])),
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
"Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None),
"Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []),
"Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])),
"Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None),
"Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None),
"Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])),
"OCR Image Accuracy": report_file.acc,
"OCR Image Speed (seconds)": report_file.time_cost,
"Reviewed?": "No",
"Bad Image Reasons": report_file.bad_image_reason,
"Countermeasures": report_file.counter_measures,
"IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])),
"Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])),
"Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])),
})
data = extract_report_detail_list(page, in_percent=False)
response = {
'report_detail': data,
'metadata': {"subsidiary": report.subsidiary,
"start_at": report.start_at,
"end_at": report.end_at},
'page': {
'number': page.number,
'total_pages': page.paginator.num_pages,
@ -428,7 +408,7 @@ class AccuracyViewSet(viewsets.ViewSet):
"IMEI Acc": report.feedback_accuracy.get("imei_number", None) if report.reviewed_accuracy else None,
"Avg. Accuracy": report.feedback_accuracy.get("avg", None) if report.reviewed_accuracy else None,
"Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0,
"Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_client_time else 0,
"Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_OCR_time else 0,
"report_id": report.report_id,
})
@ -492,20 +472,20 @@ class AccuracyViewSet(viewsets.ViewSet):
page_number = int(request.GET.get('page', 1))
page_size = int(request.GET.get('page_size', 10))
base_query = Q()
if not start_date_str or not end_date_str:
reports = Report.objects.all()
else:
if start_date_str and end_date_str:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
base_query = Q(created_at__range=(start_date, end_date))
if subsidiary:
base_query &= Q(subsidiary=subsidiary)
base_query &= Q(is_daily_report=True)
reports = Report.objects.filter(base_query).order_by('created_at')
base_query &= Q(created_at__range=(start_date, end_date))
if subsidiary:
base_query &= Q(subsidiary=subsidiary)
base_query &= Q(is_daily_report=True)
reports = Report.objects.filter(base_query).order_by('created_at')
paginator = Paginator(reports, page_size)
page = paginator.get_page(page_number)
@ -525,8 +505,15 @@ class AccuracyViewSet(viewsets.ViewSet):
_, _data, total = this_month_report()
data += [total]
data += _data
# Generate xlsx file
# workbook = dict2xlsx(data, _type="report")
# tmp_file = f"/tmp/{str(uuid.uuid4())}.xlsx"
# os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
# workbook.save(tmp_file)
# c_connector.remove_local_file((tmp_file, "fake_request_id"))
response = {
# 'file': load_xlsx_file(),
'overview_data': data,
'page': {
'number': page.number,
@ -538,28 +525,59 @@ class AccuracyViewSet(viewsets.ViewSet):
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path=r"get_report_file/(?P<report_id>[\w\-]+)", methods=["GET"])
def get_report_file(self, request, report_id):
if request.method == 'GET':
# report_id = request.GET.get('report_id', None)
if not report_id:
raise RequiredFieldException(excArgs="report_id1")
report_num = Report.objects.filter(report_id=report_id).count()
if report_num == 0:
raise NotFoundException(excArgs=f"report: {report_id}")
report = Report.objects.filter(report_id=report_id).first()
# download from s3 to local
tmp_file = "/tmp/" + "report_" + uuid.uuid4().hex + ".xlsx"
os.makedirs("/tmp", exist_ok=True)
if not report.S3_file_name:
raise NotFoundException(excArgs="S3 file name")
download_from_S3(report.S3_file_name, tmp_file)
file = open(tmp_file, 'rb')
response = FileResponse(file, status=200)
# Set the content type and content disposition headers
response['Content-Type'] = 'application/octet-stream'
response['Content-Disposition'] = 'attachment; filename="{0}"'.format(os.path.basename(tmp_file))
return response
return JsonResponse({'error': 'Invalid request method.'}, status=405)
class RequestViewSet(viewsets.ViewSet):
lookup_field = "username"
@extend_schema(request = {
@extend_schema(
request={
'multipart/form-data': {
'type': 'object',
'properties': {
'reviewed_result': {
'type': 'string',
'default': '''{"request_id": "Sample request_id", "imei_number": ["sample_imei1", "sample_imei2"], "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party"}''',
},
}
},
},
}, responses=None, tags=['Request']
},
responses=None,
tags=['Request']
)
@action(detail=False, url_path=r"request/(?P<request_id>[\w\-]+)", methods=["GET", "POST"])
def get_subscription_request(self, request, request_id=None):
if request.method == 'GET':
base_query = Q(request_id=request_id)
subscription_request = SubscriptionRequest.objects.filter(base_query).first()
data = []
imeis = []
@ -611,7 +629,7 @@ class RequestViewSet(viewsets.ViewSet):
subscription_request = SubscriptionRequest.objects.filter(base_query).first()
reviewed_result = json.loads(data["reviewed_result"][1:-1])
reviewed_result = json.loads(data["reviewed_result"])
for field in ['retailername', 'sold_to_party', 'purchase_date', 'imei_number']:
if not field in reviewed_result.keys():
raise RequiredFieldException(excArgs=f'reviewed_result.{field}')

View File

@ -32,6 +32,7 @@ class CeleryConnector:
'upload_file_to_s3': {'queue': "upload_file_to_s3"},
'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"},
'upload_obj_to_s3': {'queue': "upload_obj_to_s3"},
'upload_report_to_s3': {'queue': "upload_report_to_s3"},
'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"},
@ -50,8 +51,10 @@ class CeleryConnector:
return self.send_task('do_pdf', args)
def upload_file_to_s3(self, args):
return self.send_task('upload_file_to_s3', args)
def upload_feedback_to_s3(self, args):
return self.send_task('upload_feedback_to_s3', args)
def upload_file_to_s3(self, args):
return self.send_task('upload_file_to_s3', args)
def upload_report_to_s3(self, args):
return self.send_task('upload_report_to_s3', args)
def upload_obj_to_s3(self, args):
return self.send_task('upload_obj_to_s3', args)
def remove_local_file(self, args):

View File

@ -9,7 +9,7 @@ from fwd_api.models import SubscriptionRequest, UserProfile
from fwd_api.celery_worker.worker import app
from ..constant.common import FolderFileType, image_extensions
from ..exception.exceptions import FileContentInvalidException
from fwd_api.models import SubscriptionRequestFile, FeedbackRequest
from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
from ..utils import file as FileUtils
from ..utils import process as ProcessUtil
from ..utils import s3 as S3Util
@ -211,6 +211,22 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
else:
logger.info(f"S3 is not available, skipping,...")
@app.task(name='upload_report_to_s3')
def upload_report_to_s3(local_file_path, s3_key, report_id):
if s3_client.s3_client is not None:
try:
s3_client.upload_file(local_file_path, s3_key)
report = Report.objects.filter(report_id=report_id)[0]
report.S3_uploaded = True
report.S3_file_name = s3_key
report.save()
except Exception as e:
logger.error(f"Unable to set S3: {e}")
print(f"Unable to set S3: {e}")
return
else:
logger.info(f"S3 is not available, skipping,...")
@app.task(name='remove_local_file')
def remove_local_file(local_file_path, request_id):
print(f"[INFO] Removing local file: {local_file_path}, ...")

View File

@ -1,24 +1,12 @@
import time
import uuid
import os
import base64
import traceback
from multiprocessing.pool import ThreadPool
from fwd_api.models import SubscriptionRequest, UserProfile
from fwd_api.models import SubscriptionRequest, Report, ReportFile
from fwd_api.celery_worker.worker import app
from ..constant.common import FolderFileType, image_extensions
from ..exception.exceptions import FileContentInvalidException
from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
from ..utils import file as FileUtils
from ..utils import process as ProcessUtil
from ..utils import s3 as S3Util
from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file
from fwd_api.constant.common import ProcessType
from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
from django.utils import timezone
from django.db.models import Q
import csv
import json
from celery.utils.log import get_task_logger
from fwd import settings
@ -117,28 +105,45 @@ def make_a_report(report_id, query_set):
errors += request_att["err"]
num_request += 1
transaction_att = count_transactions(start_date, end_date)
# Do saving process
report.number_request = num_request
report.number_images = number_images
report.number_imei = time_cost["imei"].count
report.number_invoice = time_cost["invoice"].count
report.number_bad_images = number_bad_images
# FIXME: refactor this data stream for endurability
report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](),
"invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count}
report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"])
report.number_imei_transaction = transaction_att.get("imei", 0)
report.number_invoice_transaction = transaction_att.get("invoice", 0)
acumulated_acc = {"feedback": {},
"reviewed": {}}
for acc_type in ["feedback", "reviewed"]:
avg_acc = IterAvg()
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
acumulated_acc[acc_type][key] = accuracy[acc_type][key]()
acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count
avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
acumulated_acc[acc_type]["avg"] = avg_acc()
report.feedback_accuracy = acumulated_acc["feedback"]
report.reviewed_accuracy = acumulated_acc["reviewed"]
report.errors = "|".join(errors)
report.status = "Ready"
report.save()
# Saving a xlsx file
report_files = ReportFile.objects.filter(report=report)
data = extract_report_detail_list(report_files, lower=True)
data_workbook = dict2xlsx(data, _type='report_detail')
local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook)
s3_key=save_report_to_S3(report.report_id, local_workbook)
except IndexError as e:
print(e)
traceback.print_exc()

View File

@ -38,6 +38,7 @@ app.conf.update({
Queue('upload_file_to_s3'),
Queue('upload_feedback_to_s3'),
Queue('upload_obj_to_s3'),
Queue('upload_report_to_s3'),
Queue('remove_local_file'),
Queue('csv_feedback'),
Queue('report'),
@ -56,6 +57,7 @@ app.conf.update({
'upload_file_to_s3': {'queue': "upload_file_to_s3"},
'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"},
'upload_obj_to_s3': {'queue': "upload_obj_to_s3"},
'upload_report_to_s3': {'queue': "upload_report_to_s3"},
'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"},

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-01-31 09:31
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0175_rename_number_ivoice_transaction_report_number_imei_and_more'),
]
operations = [
migrations.AddField(
model_name='report',
name='S3_file_name',
field=models.TextField(default=None, null=True),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-02-01 03:27
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0176_report_s3_file_name'),
]
operations = [
migrations.AlterField(
model_name='report',
name='subsidiary',
field=models.CharField(default='', max_length=200, null=True),
),
]

View File

@ -16,12 +16,13 @@ class Report(models.Model):
status = models.CharField(max_length=100)
is_daily_report = models.BooleanField(default=False)
errors = models.TextField(default="", null=True)
subsidiary = models.TextField(default="", null=True)
include_reviewed = models.TextField(default="", null=True)
subsidiary = models.CharField(default="", null=True, max_length=200)
include_reviewed = models.TextField(default="", null=True, )
include_test = models.CharField(max_length=200, default="")
# Data
S3_uploaded = models.BooleanField(default=False)
S3_file_name = models.TextField(default=None, null=True)
number_request = models.IntegerField(default=0)
number_images = models.IntegerField(default=0)
number_bad_images = models.IntegerField(default=0)

View File

@ -2,10 +2,12 @@ import re
from datetime import datetime
import copy
from typing import Any
from .ocr_utils.ocr_metrics import eval_ocr_metric
from .ocr_utils.sbt_report import post_processing_str
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile
from ..celery_worker.client_connector import c_connector
from django.db.models import Q
BAD_THRESHOLD = 0.75
@ -40,6 +42,8 @@ class MonthReportAccumulate:
}
self.data = []
self.data_format = {
'subs': "",
'extraction_date': "",
'num_imei': 0,
'num_invoice': 0,
'total_images': 0,
@ -69,6 +73,7 @@ class MonthReportAccumulate:
self.total["images_quality"]["successful"] += report.number_images - report.number_bad_images
self.total["images_quality"]["bad"] += report.number_bad_images
if sum([report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
self.total["average_accuracy_rate"]["imei"].add_avg(report.reviewed_accuracy.get("imei_number", 0), report.reviewed_accuracy.get("imei_number_count", 0))
self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.reviewed_accuracy.get("purchase_date", 0), report.reviewed_accuracy.get("purchase_date_count", 0))
@ -78,8 +83,8 @@ class MonthReportAccumulate:
self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.feedback_accuracy.get("purchase_date", 0), report.feedback_accuracy.get("purchase_date_count", 0))
self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.feedback_accuracy.get("retailername", 0), report.feedback_accuracy.get("retailername_count", 0))
self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0))
self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0))
self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) if report.average_OCR_time else 0
self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) if report.average_OCR_time else 0
self.total["usage"]["imei"] += report.number_imei_transaction
self.total["usage"]["invoice"] += report.number_invoice_transaction
@ -89,6 +94,7 @@ class MonthReportAccumulate:
if self.month is None:
self.month = report_month
self.total["extraction_date"] = f"Subtotal ({self.month})"
elif self.month != report_month:
self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"]
self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"]
@ -96,11 +102,15 @@ class MonthReportAccumulate:
# accumulate fields
new_data = copy.deepcopy(self.data_format)[0]
new_data["num_imei"] = report.number_imei
new_data["subs"] = report.subsidiary
new_data["extraction_date"] = report.created_at
new_data["num_invoice"] = report.number_invoice
new_data["total_images"] = report.number_images
new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images
new_data["images_quality"]["bad"] = report.number_bad_images
report.reviewed_accuracy = {} if report.reviewed_accuracy is None else report.reviewed_accuracy
report.feedback_accuracy = {} if report.feedback_accuracy is None else report.feedback_accuracy
if sum([ report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]):
new_data["average_accuracy_rate"]["imei"] = report.reviewed_accuracy.get("imei_number", None)
new_data["average_accuracy_rate"]["purchase_date"] = report.reviewed_accuracy.get("purchase_date", None)
@ -109,20 +119,20 @@ class MonthReportAccumulate:
new_data["average_accuracy_rate"]["imei"] = report.feedback_accuracy.get("imei_number", None)
new_data["average_accuracy_rate"]["purchase_date"] = report.feedback_accuracy.get("purchase_date", None)
new_data["average_accuracy_rate"]["retailer_name"] = report.feedback_accuracy.get("retailername", None)
new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0)
new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0)
new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) if report.average_OCR_time else 0
new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) if report.average_OCR_time else 0
new_data["usage"]["imei"] = report.number_imei_transaction
new_data["usage"]["invoice"] = report.number_invoice_transaction
new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"]
new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"]
new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] if new_data["total_images"] else 0
new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] if new_data["total_images"] else 0
self.data.append(new_data)
self.accumulate(report)
return True
def __call__(self):
self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"]
self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"]
self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] if self.total["total_images"] else 0
self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] if self.total["total_images"] else 0
total = copy.deepcopy(self.total)
total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]()
total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]()
@ -157,6 +167,67 @@ class IterAvg:
def __call__(self):
return self.avg
def first_of_list(the_list):
if not the_list:
return None
return the_list[0]
def extract_report_detail_list(report_detail_list, lower=False, in_percent=True):
data = []
for report_file in report_detail_list:
data.append({
"Request ID": report_file.correspond_request_id,
"Redemption Number": report_file.correspond_redemption_id,
"Image type": report_file.doc_type,
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])),
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
"Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None),
"Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []),
"Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])),
"Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None),
"Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None),
"Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])),
"OCR Image Accuracy": report_file.acc,
"OCR Image Speed (seconds)": report_file.time_cost,
"Reviewed?": "No",
"Bad Image Reasons": report_file.bad_image_reason,
"Countermeasures": report_file.counter_measures,
"IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])),
"Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])),
"Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])),
})
if lower:
for i, dat in enumerate(data):
keys = list(dat.keys())
for old_key in keys:
data[i][old_key.lower().replace(" ", "_")] = data[i].pop(old_key)
if in_percent:
for i, dat in enumerate(data):
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
for key in keys:
if data[i][key]:
data[i][key] = data[i][key]*100
return data
def count_transactions(start_date, end_date):
base_query = Q(created_at__range=(start_date, end_date))
base_query &= Q(is_test_request=False)
transaction_att = {}
print(f"[DEBUG]: atracting transactions attribute...")
total_transaction_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')
for request in total_transaction_requests:
if not request.doc_type:
continue
doc_types = request.doc_type.split(",")
for doc_type in doc_types:
if transaction_att.get(doc_type, None) == None:
transaction_att[doc_type] = 1
else:
transaction_att[doc_type] += 1
return transaction_att
def convert_datetime_format(date_string: str, is_gt=False) -> str:
# pattern_date_string = "2023-02-28"
input_format = "%Y-%m-%d"

View File

@ -9,10 +9,11 @@ from django.core.files.uploadedfile import TemporaryUploadedFile
from django.utils import timezone
from fwd import settings
from ..utils import s3 as S3Util
from fwd_api.constant.common import allowed_file_extensions
from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \
ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException
from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile
from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile, Report, ReportFile
from fwd_api.utils import process as ProcessUtil
from fwd_api.utils.crypto import image_authenticator
from fwd_api.utils.image import resize
@ -22,6 +23,13 @@ import csv
from openpyxl import load_workbook
from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle
s3_client = S3Util.MinioS3Client(
endpoint=settings.S3_ENDPOINT,
access_key=settings.S3_ACCESS_KEY,
secret_key=settings.S3_SECRET_KEY,
bucket_name=settings.S3_BUCKET_NAME
)
def validate_report_list(request):
start_date_str = request.GET.get('start_date')
end_date_str = request.GET.get('end_date')
@ -182,6 +190,16 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict)
csvfile.write(file_contents)
return file_path
def save_workbook_file(file_name: str, rp: Report, workbook):
report_id = str(rp.report_id)
folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id)
os.makedirs(folder_path, exist_ok = True)
file_path = os.path.join(folder_path, file_name)
workbook.save(file_path)
return file_path
def delete_file_with_path(file_path: str) -> bool:
try:
os.remove(file_path)
@ -266,6 +284,19 @@ def save_feedback_to_S3(file_name, id, local_file_path):
print(f"[ERROR]: {e}")
raise ServiceUnavailableException()
def save_report_to_S3(id, local_file_path):
try:
s3_key = os.path.join("report", local_file_path.split("/")[-2], local_file_path.split("/")[-1])
c_connector.upload_report_to_s3((local_file_path, s3_key, id))
c_connector.remove_local_file((local_file_path, id))
return s3_key
except Exception as e:
print(f"[ERROR]: {e}")
raise ServiceUnavailableException()
def download_from_S3(s3_key, local_file_path):
s3_client.download_file(s3_key, local_file_path)
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path):
try:
file_path = os.path.join(folder_path, file_name)
@ -363,10 +394,11 @@ def get_value(_dict, keys):
else:
value = value.get(key, {})
if value != 0:
return value
else:
if not value:
return "-"
elif isinstance(value, list):
value = str(value)
return value
def dict2xlsx(input: json, _type='report'):

View File

@ -67,6 +67,7 @@ services:
- ADMIN_PASSWORD=${ADMIN_PASSWORD}
- STANDARD_USER_NAME=${STANDARD_USER_NAME}
- STANDARD_PASSWORD=${STANDARD_PASSWORD}
- S3_ENDPOINT=${S3_ENDPOINT}
- S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY}
- S3_BUCKET_NAME=${S3_BUCKET_NAME}