Merge branch 'main' of https://code.sdsdev.co.kr/SDSRV-IDP/sbt-idp into vietanh99-update-xlsx
This commit is contained in:
commit
6cf90f1d19
1
.gitignore
vendored
1
.gitignore
vendored
@ -38,3 +38,4 @@ cope2n-ai-fi/Dockerfile_old_work
|
|||||||
/feedback/
|
/feedback/
|
||||||
cope2n-api/public/SBT_report_20240122.csv
|
cope2n-api/public/SBT_report_20240122.csv
|
||||||
Jan.csv
|
Jan.csv
|
||||||
|
*.csv
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 6907ea0183b141e3b4f3c21758c9123f1e9b2a27
|
Subproject commit b6d4fab46f7f8689dd6b050cfbff2faa6a6f3fec
|
@ -433,10 +433,10 @@ class AccuracyViewSet(viewsets.ViewSet):
|
|||||||
@action(detail=False, url_path="overview", methods=["GET"])
|
@action(detail=False, url_path="overview", methods=["GET"])
|
||||||
def overview(self, request):
|
def overview(self, request):
|
||||||
if request.method == 'GET':
|
if request.method == 'GET':
|
||||||
subsidiary = request.GET.get('subsidiary', "ALL")
|
_subsidiary = request.GET.get('subsidiary', "ALL")
|
||||||
duration = request.GET.get('duration', "")
|
duration = request.GET.get('duration', "")
|
||||||
|
|
||||||
subsidiary = map_subsidiary_long_to_short(subsidiary)
|
subsidiary = map_subsidiary_long_to_short(_subsidiary)
|
||||||
|
|
||||||
# Retrive data from Redis
|
# Retrive data from Redis
|
||||||
key = f"{subsidiary}_{duration}"
|
key = f"{subsidiary}_{duration}"
|
||||||
|
18
cope2n-api/fwd_api/migrations/0181_reportfile_subsidiary.py
Normal file
18
cope2n-api/fwd_api/migrations/0181_reportfile_subsidiary.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 4.1.3 on 2024-02-15 09:12
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('fwd_api', '0180_alter_reportfile_time_cost'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='reportfile',
|
||||||
|
name='subsidiary',
|
||||||
|
field=models.CharField(default='', max_length=200, null=True),
|
||||||
|
),
|
||||||
|
]
|
@ -9,6 +9,7 @@ class ReportFile(models.Model):
|
|||||||
id = models.AutoField(primary_key=True)
|
id = models.AutoField(primary_key=True)
|
||||||
correspond_request_id = models.CharField(max_length=200, default="")
|
correspond_request_id = models.CharField(max_length=200, default="")
|
||||||
correspond_redemption_id = models.CharField(max_length=200, default="")
|
correspond_redemption_id = models.CharField(max_length=200, default="")
|
||||||
|
subsidiary = models.CharField(default="", null=True, max_length=200)
|
||||||
created_at = models.DateTimeField(default=timezone.now, db_index=True)
|
created_at = models.DateTimeField(default=timezone.now, db_index=True)
|
||||||
updated_at = models.DateTimeField(auto_now=True)
|
updated_at = models.DateTimeField(auto_now=True)
|
||||||
report = models.ForeignKey(Report, related_name="files", on_delete=models.CASCADE)
|
report = models.ForeignKey(Report, related_name="files", on_delete=models.CASCADE)
|
||||||
|
@ -9,6 +9,7 @@ import uuid
|
|||||||
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile
|
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile
|
||||||
from ..celery_worker.client_connector import c_connector
|
from ..celery_worker.client_connector import c_connector
|
||||||
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
|
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
|
||||||
|
from ..utils.subsidiary import map_subsidiary_short_to_long
|
||||||
from django.db.models import Q
|
from django.db.models import Q
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
import redis
|
import redis
|
||||||
@ -29,6 +30,8 @@ class ReportAccumulateByRequest:
|
|||||||
self.total_format = {
|
self.total_format = {
|
||||||
'subs': "+",
|
'subs': "+",
|
||||||
'extraction_date': "Subtotal ()",
|
'extraction_date': "Subtotal ()",
|
||||||
|
'num_imei': 0,
|
||||||
|
'num_invoice': 0,
|
||||||
'total_images': 0,
|
'total_images': 0,
|
||||||
'images_quality': {
|
'images_quality': {
|
||||||
'successful': 0,
|
'successful': 0,
|
||||||
@ -49,6 +52,7 @@ class ReportAccumulateByRequest:
|
|||||||
'usage': {
|
'usage': {
|
||||||
'imei':0,
|
'imei':0,
|
||||||
'invoice': 0,
|
'invoice': 0,
|
||||||
|
'total_images': 0,
|
||||||
'request': 0
|
'request': 0
|
||||||
},
|
},
|
||||||
'feedback_accuracy': {
|
'feedback_accuracy': {
|
||||||
@ -90,6 +94,7 @@ class ReportAccumulateByRequest:
|
|||||||
'usage': {
|
'usage': {
|
||||||
'imei': 0,
|
'imei': 0,
|
||||||
'invoice': 0,
|
'invoice': 0,
|
||||||
|
'total_images': 0,
|
||||||
'request': 0
|
'request': 0
|
||||||
},
|
},
|
||||||
'feedback_accuracy': {
|
'feedback_accuracy': {
|
||||||
@ -113,7 +118,13 @@ class ReportAccumulateByRequest:
|
|||||||
total["total_images"] += 1
|
total["total_images"] += 1
|
||||||
total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
|
total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
|
||||||
total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
|
total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
|
||||||
# total["report_files"].append(report_file)
|
doc_type = "imei"
|
||||||
|
if report_file.doc_type in ["imei", "invoice", "all"]:
|
||||||
|
doc_type = report_file.doc_type
|
||||||
|
else:
|
||||||
|
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||||
|
total["num_imei"] += 1 if doc_type == "imei" else 0
|
||||||
|
total["num_invoice"] += 1 if doc_type == "invoice" else 0
|
||||||
|
|
||||||
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
||||||
total["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", []))
|
total["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", []))
|
||||||
@ -136,8 +147,14 @@ class ReportAccumulateByRequest:
|
|||||||
total["average_processing_time"][report_file.doc_type] = IterAvg()
|
total["average_processing_time"][report_file.doc_type] = IterAvg()
|
||||||
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||||
|
|
||||||
total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0
|
doc_type = "imei"
|
||||||
total["usage"]["invoice"] += 1 if report_file.doc_type == "invoice" else 0
|
if report_file.doc_type in ["imei", "invoice", "all"]:
|
||||||
|
doc_type = report_file.doc_type
|
||||||
|
else:
|
||||||
|
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||||
|
total["usage"]["imei"] += 1 if doc_type == "imei" else 0
|
||||||
|
total["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
|
||||||
|
total["usage"]["total_images"] += 1
|
||||||
|
|
||||||
return total
|
return total
|
||||||
|
|
||||||
@ -146,8 +163,13 @@ class ReportAccumulateByRequest:
|
|||||||
day_data["total_images"] += 1
|
day_data["total_images"] += 1
|
||||||
day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
|
day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
|
||||||
day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
|
day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
|
||||||
day_data["num_imei"] += 1 if report_file.doc_type == "imei" else 0
|
doc_type = "imei"
|
||||||
day_data["num_invoice"] += 1 if report_file.doc_type == "invoice" else 0
|
if report_file.doc_type in ["imei", "invoice", "all"]:
|
||||||
|
doc_type = report_file.doc_type
|
||||||
|
else:
|
||||||
|
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||||
|
day_data["num_imei"] += 1 if doc_type == "imei" else 0
|
||||||
|
day_data["num_invoice"] += 1 if doc_type == "invoice" else 0
|
||||||
day_data["report_files"].append(report_file)
|
day_data["report_files"].append(report_file)
|
||||||
|
|
||||||
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
||||||
@ -186,6 +208,7 @@ class ReportAccumulateByRequest:
|
|||||||
self.data[this_month][1][this_day]["usage"]["imei"] = usage.get("imei", 0)
|
self.data[this_month][1][this_day]["usage"]["imei"] = usage.get("imei", 0)
|
||||||
self.data[this_month][1][this_day]["usage"]["invoice"] = usage.get("invoice", 0)
|
self.data[this_month][1][this_day]["usage"]["invoice"] = usage.get("invoice", 0)
|
||||||
self.data[this_month][1][this_day]["usage"]["request"] = usage.get("request", 0)
|
self.data[this_month][1][this_day]["usage"]["request"] = usage.get("request", 0)
|
||||||
|
self.data[this_month][1][this_day]["usage"]["total_images"] = usage.get("imei", 0) + usage.get("invoice", 0)
|
||||||
|
|
||||||
self.data[this_month][1][this_day]['num_request'] += 1
|
self.data[this_month][1][this_day]['num_request'] += 1
|
||||||
self.data[this_month][0]['num_request'] += 1
|
self.data[this_month][0]['num_request'] += 1
|
||||||
@ -213,6 +236,7 @@ class ReportAccumulateByRequest:
|
|||||||
day_keys = list(report_data[month][1].keys())
|
day_keys = list(report_data[month][1].keys())
|
||||||
day_keys.sort(reverse = True)
|
day_keys.sort(reverse = True)
|
||||||
for day in day_keys:
|
for day in day_keys:
|
||||||
|
report_data[month][1][day]['subs'] = map_subsidiary_short_to_long(report_data[month][1][day]['subs'])
|
||||||
fine_data.append(report_data[month][1][day])
|
fine_data.append(report_data[month][1][day])
|
||||||
# save daily reports
|
# save daily reports
|
||||||
report_id = root_report_id + "_" + day
|
report_id = root_report_id + "_" + day
|
||||||
@ -254,10 +278,10 @@ class ReportAccumulateByRequest:
|
|||||||
)
|
)
|
||||||
if is_daily_report:
|
if is_daily_report:
|
||||||
new_report.save()
|
new_report.save()
|
||||||
data = extract_report_detail_list(self.data[month][1][day]["report_files"], lower=True)
|
data = extract_report_detail_list(self.data[month][1][day]["report_files"], lower=True)
|
||||||
data_workbook = dict2xlsx(data, _type='report_detail')
|
data_workbook = dict2xlsx(data, _type='report_detail')
|
||||||
local_workbook = save_workbook_file(report_id + ".xlsx", new_report, data_workbook)
|
local_workbook = save_workbook_file(report_id + ".xlsx", new_report, data_workbook)
|
||||||
s3_key=save_report_to_S3(report_id, local_workbook)
|
s3_key=save_report_to_S3(report_id, local_workbook)
|
||||||
return fine_data, save_data
|
return fine_data, save_data
|
||||||
|
|
||||||
def get(self) -> Any:
|
def get(self) -> Any:
|
||||||
@ -293,6 +317,7 @@ class ReportAccumulateByRequest:
|
|||||||
|
|
||||||
_data[month][0]["usage"]["imei"] = num_transaction_imei
|
_data[month][0]["usage"]["imei"] = num_transaction_imei
|
||||||
_data[month][0]["usage"]["invoice"] = num_transaction_invoice
|
_data[month][0]["usage"]["invoice"] = num_transaction_invoice
|
||||||
|
_data[month][0]["usage"]["total_images"] = num_transaction_invoice + num_transaction_imei
|
||||||
_data[month][0]["average_accuracy_rate"]["imei"] = _data[month][0]["average_accuracy_rate"]["imei"]()
|
_data[month][0]["average_accuracy_rate"]["imei"] = _data[month][0]["average_accuracy_rate"]["imei"]()
|
||||||
_data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]()
|
_data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]()
|
||||||
_data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]()
|
_data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]()
|
||||||
@ -311,7 +336,6 @@ class ReportAccumulateByRequest:
|
|||||||
|
|
||||||
return _data
|
return _data
|
||||||
|
|
||||||
|
|
||||||
class MonthReportAccumulate:
|
class MonthReportAccumulate:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.month = None
|
self.month = None
|
||||||
@ -513,6 +537,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
|
|||||||
data = []
|
data = []
|
||||||
for report_file in report_detail_list:
|
for report_file in report_detail_list:
|
||||||
data.append({
|
data.append({
|
||||||
|
"Subs": report_file.subsidiary,
|
||||||
"Request ID": report_file.correspond_request_id,
|
"Request ID": report_file.correspond_request_id,
|
||||||
"Redemption Number": report_file.correspond_redemption_id,
|
"Redemption Number": report_file.correspond_redemption_id,
|
||||||
"Image type": report_file.doc_type,
|
"Image type": report_file.doc_type,
|
||||||
@ -600,6 +625,9 @@ def align_fine_result(ready_predict, fine_result):
|
|||||||
# print(f"[DEBUG]: fine_result: {fine_result}")
|
# print(f"[DEBUG]: fine_result: {fine_result}")
|
||||||
# print(f"[DEBUG]: ready_predict: {ready_predict}")
|
# print(f"[DEBUG]: ready_predict: {ready_predict}")
|
||||||
if fine_result:
|
if fine_result:
|
||||||
|
if isinstance(ready_predict["purchase_date"], str):
|
||||||
|
ready_predict["purchase_date"] = [ready_predict["purchase_date"]]
|
||||||
|
# ready_predict.save()
|
||||||
if fine_result["purchase_date"] and len(ready_predict["purchase_date"]) == 0:
|
if fine_result["purchase_date"] and len(ready_predict["purchase_date"]) == 0:
|
||||||
ready_predict["purchase_date"] = [None]
|
ready_predict["purchase_date"] = [None]
|
||||||
if fine_result["retailername"] and not ready_predict["retailername"]:
|
if fine_result["retailername"] and not ready_predict["retailername"]:
|
||||||
@ -616,6 +644,7 @@ def update_temp_accuracy(accuracy, acc, keys):
|
|||||||
for key in keys:
|
for key in keys:
|
||||||
accuracy[key].add(acc[key])
|
accuracy[key].add(acc[key])
|
||||||
return accuracy
|
return accuracy
|
||||||
|
|
||||||
def calculate_accuracy(key_name, inference, target):
|
def calculate_accuracy(key_name, inference, target):
|
||||||
"""_summary_
|
"""_summary_
|
||||||
|
|
||||||
@ -661,7 +690,10 @@ def calculate_avg_accuracy(acc, type, keys=[]):
|
|||||||
acc_list = []
|
acc_list = []
|
||||||
# print(f"[DEBUG]: type: {type} - acc: {acc}")
|
# print(f"[DEBUG]: type: {type} - acc: {acc}")
|
||||||
for key in keys:
|
for key in keys:
|
||||||
acc_list += acc.get(type, {}).get(key, [])
|
this_acc = acc.get(type, {}).get(key, [])
|
||||||
|
if len(this_acc) > 0:
|
||||||
|
this_acc = [max(this_acc)]
|
||||||
|
acc_list += this_acc
|
||||||
|
|
||||||
acc_list = [x for x in acc_list if x is not None]
|
acc_list = [x for x in acc_list if x is not None]
|
||||||
return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None
|
return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None
|
||||||
@ -732,6 +764,12 @@ def calculate_and_save_subcription_file(report, request):
|
|||||||
|
|
||||||
return request_att
|
return request_att
|
||||||
|
|
||||||
|
def acc_maximize_list_values(acc):
|
||||||
|
for k in acc.keys():
|
||||||
|
if isinstance(acc[k], list) and len(acc[k]) > 0:
|
||||||
|
acc[k] = [max(acc[k])]
|
||||||
|
return acc
|
||||||
|
|
||||||
def calculate_a_request(report, request):
|
def calculate_a_request(report, request):
|
||||||
request_att = {"acc": {"feedback": {"imei_number": [],
|
request_att = {"acc": {"feedback": {"imei_number": [],
|
||||||
"purchase_date": [],
|
"purchase_date": [],
|
||||||
@ -753,19 +791,30 @@ def calculate_a_request(report, request):
|
|||||||
status, att = calculate_subcription_file(image)
|
status, att = calculate_subcription_file(image)
|
||||||
if status != 200:
|
if status != 200:
|
||||||
continue
|
continue
|
||||||
image.feedback_accuracy = att["acc"]["feedback"]
|
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
|
||||||
image.reviewed_accuracy = att["acc"]["reviewed"]
|
image.reviewed_accuracy = att["acc"]["reviewed"] # dict {key: [values]}
|
||||||
image.is_bad_image_quality = att["is_bad_image"]
|
image.is_bad_image_quality = att["is_bad_image"]
|
||||||
|
if not image.doc_type:
|
||||||
|
# try to revert doc type from filename
|
||||||
|
_doc_type = image.file_name.split("_")[1]
|
||||||
|
if _doc_type in ["imei", "invoice"]:
|
||||||
|
image.doc_type = _doc_type
|
||||||
image.save()
|
image.save()
|
||||||
|
_sub = "NA"
|
||||||
|
if request.redemption_id:
|
||||||
|
_sub = map_subsidiary_short_to_long(request.redemption_id[:2])
|
||||||
|
else:
|
||||||
|
print(f"[WARM]: empty redemption_id, check request: {request.request_id}")
|
||||||
new_report_file = ReportFile(report=report,
|
new_report_file = ReportFile(report=report,
|
||||||
|
subsidiary=_sub,
|
||||||
correspond_request_id=request.request_id,
|
correspond_request_id=request.request_id,
|
||||||
correspond_redemption_id=request.redemption_id,
|
correspond_redemption_id=request.redemption_id,
|
||||||
doc_type=image.doc_type,
|
doc_type=image.doc_type,
|
||||||
predict_result=image.predict_result,
|
predict_result=image.predict_result,
|
||||||
feedback_result=image.feedback_result,
|
feedback_result=image.feedback_result,
|
||||||
reviewed_result=image.reviewed_result,
|
reviewed_result=image.reviewed_result,
|
||||||
feedback_accuracy=att["acc"]["feedback"],
|
feedback_accuracy=acc_maximize_list_values(att["acc"]["feedback"]),
|
||||||
reviewed_accuracy=att["acc"]["reviewed"],
|
reviewed_accuracy=acc_maximize_list_values(att["acc"]["reviewed"]),
|
||||||
acc=att["avg_acc"],
|
acc=att["avg_acc"],
|
||||||
is_bad_image=att["is_bad_image"],
|
is_bad_image=att["is_bad_image"],
|
||||||
time_cost=image.processing_time,
|
time_cost=image.processing_time,
|
||||||
@ -797,7 +846,6 @@ def calculate_a_request(report, request):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
return request_att, report_files
|
return request_att, report_files
|
||||||
|
|
||||||
|
|
||||||
def calculate_subcription_file(subcription_request_file):
|
def calculate_subcription_file(subcription_request_file):
|
||||||
att = {"acc": {"feedback": {},
|
att = {"acc": {"feedback": {},
|
||||||
|
@ -17,8 +17,8 @@ login_token = None
|
|||||||
# Define the login credentials
|
# Define the login credentials
|
||||||
login_credentials = {
|
login_credentials = {
|
||||||
'username': 'sbt',
|
'username': 'sbt',
|
||||||
'password': '7Eg4AbWIXDnufgn'
|
# 'password': '7Eg4AbWIXDnufgn'
|
||||||
# 'password': 'abc'
|
'password': 'abc'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Define the command to call the update API
|
# Define the command to call the update API
|
||||||
|
@ -5,7 +5,7 @@ from datetime import datetime
|
|||||||
|
|
||||||
# Get the proxy URL from the environment variable
|
# Get the proxy URL from the environment variable
|
||||||
interval = 60*60*1 # 1 minute
|
interval = 60*60*1 # 1 minute
|
||||||
update_cost = 60*2
|
update_cost = int(60*1.5)
|
||||||
proxy_url = os.getenv('PROXY', "localhost")
|
proxy_url = os.getenv('PROXY', "localhost")
|
||||||
|
|
||||||
# Define the login API URL
|
# Define the login API URL
|
||||||
@ -15,8 +15,8 @@ login_token = None
|
|||||||
# Define the login credentials
|
# Define the login credentials
|
||||||
login_credentials = {
|
login_credentials = {
|
||||||
'username': 'sbt',
|
'username': 'sbt',
|
||||||
# 'password': '7Eg4AbWIXDnufgn'
|
'password': '7Eg4AbWIXDnufgn'
|
||||||
'password': 'abc'
|
# 'password': 'abc'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Define the command to call the update API
|
# Define the command to call the update API
|
||||||
|
@ -7,7 +7,7 @@ tag=$1
|
|||||||
echo "[INFO] Tag received from Python: $tag"
|
echo "[INFO] Tag received from Python: $tag"
|
||||||
|
|
||||||
# echo "[INFO] Updating everything the remote..."
|
# echo "[INFO] Updating everything the remote..."
|
||||||
# git submodule update --recursive --remote
|
git submodule update --recursive --remote
|
||||||
|
|
||||||
echo "[INFO] Pushing AI image with tag: $tag..."
|
echo "[INFO] Pushing AI image with tag: $tag..."
|
||||||
docker compose -f docker-compose-dev.yml build cope2n-fi-sbt
|
docker compose -f docker-compose-dev.yml build cope2n-fi-sbt
|
||||||
|
@ -84,12 +84,12 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
db-sbt:
|
db-sbt:
|
||||||
condition: service_started
|
condition: service_started
|
||||||
# command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
|
command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
|
||||||
# python manage.py makemigrations &&
|
python manage.py makemigrations &&
|
||||||
# python manage.py migrate &&
|
python manage.py migrate &&
|
||||||
# python manage.py compilemessages &&
|
python manage.py compilemessages &&
|
||||||
# gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
|
gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
|
||||||
command: bash -c "tail -f > /dev/null"
|
# command: bash -c "tail -f > /dev/null"
|
||||||
|
|
||||||
minio:
|
minio:
|
||||||
image: minio/minio
|
image: minio/minio
|
||||||
@ -174,8 +174,8 @@ services:
|
|||||||
- ./cope2n-api:/app
|
- ./cope2n-api:/app
|
||||||
|
|
||||||
working_dir: /app
|
working_dir: /app
|
||||||
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
|
# command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
|
||||||
# command: bash -c "tail -f > /dev/null"
|
command: bash -c "tail -f > /dev/null"
|
||||||
|
|
||||||
# Back-end persistent
|
# Back-end persistent
|
||||||
db-sbt:
|
db-sbt:
|
||||||
|
@ -10,23 +10,34 @@ from pytz import timezone
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv("../.env_prod")
|
load_dotenv("../.env_prod")
|
||||||
|
# load_dotenv(".env_prod")
|
||||||
# load_dotenv("../.env")
|
# load_dotenv("../.env")
|
||||||
|
|
||||||
OUTPUT_NAME = "0131-0206"
|
OUTPUT_NAME = "0116-0216"
|
||||||
START_DATE = datetime(2024, 1, 31, tzinfo=timezone('Asia/Singapore'))
|
START_DATE = datetime(2024, 1, 16, tzinfo=timezone('Asia/Singapore'))
|
||||||
END_DATE = datetime(2024, 2, 6, tzinfo=timezone('Asia/Singapore'))
|
END_DATE = datetime(2024, 2, 16, tzinfo=timezone('Asia/Singapore'))
|
||||||
BAD_THRESHOLD = 0.75
|
BAD_THRESHOLD = 0.75
|
||||||
|
# ("requestId", "redemptionNumber", "fileName", "userSubmitResults", "OCRResults", "revisedResults_by_SDSRV", "accuracy")
|
||||||
|
REQUEST_ID_COL = 3
|
||||||
|
REQUEST_NUMBER_COL = 6
|
||||||
|
REQUEST_REDEMPTION_COL = 27
|
||||||
|
FILE_NAME_COL = 1
|
||||||
|
OCR_RESULT_COL = 16
|
||||||
|
FEEDBACK_RESULT_COL = 15
|
||||||
|
REVIEWED_RESULT_COL = 17
|
||||||
|
|
||||||
REVIEW_ACC_COL = 19
|
REVIEW_ACC_COL = 19
|
||||||
FEEDBACK_ACC_COL = 18
|
FEEDBACK_ACC_COL = 18
|
||||||
REQUEST_ID_COL = 6
|
|
||||||
|
|
||||||
# Database connection details
|
# Database connection details
|
||||||
db_host = os.environ.get('DB_HOST', "")
|
db_host = os.environ.get('DB_HOST', "")
|
||||||
# db_host = "42.96.42.13"
|
|
||||||
db_name = os.environ.get('DB_SCHEMA', "")
|
db_name = os.environ.get('DB_SCHEMA', "")
|
||||||
db_user = os.environ.get('DB_USER', "")
|
db_user = os.environ.get('DB_USER', "")
|
||||||
db_password = os.environ.get('DB_PASSWORD', "")
|
db_password = os.environ.get('DB_PASSWORD', "")
|
||||||
|
# db_host = "sbt.cxetpslawu4p.ap-southeast-1.rds.amazonaws.com"
|
||||||
|
# db_name = "sbt2"
|
||||||
|
# db_user = "sbt"
|
||||||
|
# db_password = "sbtCH240"
|
||||||
|
|
||||||
# S3 bucket details
|
# S3 bucket details
|
||||||
s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "")
|
s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "")
|
||||||
@ -36,40 +47,6 @@ s3_folder_prefix = 'sbt_invoice'
|
|||||||
access_key = os.environ.get('S3_ACCESS_KEY', "")
|
access_key = os.environ.get('S3_ACCESS_KEY', "")
|
||||||
secret_key = os.environ.get('S3_SECRET_KEY', "")
|
secret_key = os.environ.get('S3_SECRET_KEY', "")
|
||||||
|
|
||||||
class RequestAtt:
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.feedback_accuracy = []
|
|
||||||
self.reiviewed_accuracy = []
|
|
||||||
self.acc = 0
|
|
||||||
self.request_id = None
|
|
||||||
self.is_bad = False
|
|
||||||
self.data = []
|
|
||||||
|
|
||||||
def add_file(self, file):
|
|
||||||
self.data.append(file)
|
|
||||||
if file[REVIEW_ACC_COL]:
|
|
||||||
for key in file[REVIEW_ACC_COL].keys():
|
|
||||||
self.feedback_accuracy += file[REVIEW_ACC_COL][key]
|
|
||||||
if file[FEEDBACK_ACC_COL]:
|
|
||||||
for key in file[FEEDBACK_ACC_COL].keys():
|
|
||||||
self.feedback_accuracy += file[FEEDBACK_ACC_COL][key]
|
|
||||||
|
|
||||||
def is_bad_image(self):
|
|
||||||
fb = min(self.feedback_accuracy)/len(self.feedback_accuracy) if len(self.feedback_accuracy) else None
|
|
||||||
rv = min(self.reiviewed_accuracy)/len(self.reiviewed_accuracy) if len(self.reiviewed_accuracy) else None
|
|
||||||
if not fb and not rv:
|
|
||||||
self.is_bad = False
|
|
||||||
return False
|
|
||||||
elif fb and rv is None:
|
|
||||||
self.is_bad = fb < BAD_THRESHOLD
|
|
||||||
self.acc = fb
|
|
||||||
return fb < BAD_THRESHOLD
|
|
||||||
elif fb and rv:
|
|
||||||
self.is_bad = rv < BAD_THRESHOLD
|
|
||||||
self.acc = rv
|
|
||||||
return rv < BAD_THRESHOLD
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get_request(cursor, request_in_id):
|
def get_request(cursor, request_in_id):
|
||||||
query = "SELECT * FROM fwd_api_subscriptionrequest WHERE id = %s"
|
query = "SELECT * FROM fwd_api_subscriptionrequest WHERE id = %s"
|
||||||
cursor.execute(query, (request_in_id,))
|
cursor.execute(query, (request_in_id,))
|
||||||
@ -99,44 +76,62 @@ def main():
|
|||||||
|
|
||||||
# Define the CSV file path
|
# Define the CSV file path
|
||||||
csv_file_path = f'{OUTPUT_NAME}.csv'
|
csv_file_path = f'{OUTPUT_NAME}.csv'
|
||||||
data_dict = {}
|
|
||||||
|
bad_image_list = [] # [("requestId", "redemptionNumber", "fileName", "userSubmitResults", "OCRResults", "revisedResults_by_SDSRV", "accuracy"), ...]
|
||||||
|
request_ids = [] # for crawling images
|
||||||
# Filter out requests request that has quality < 75%
|
# Filter out requests request that has quality < 75%
|
||||||
for i, _d in enumerate(data):
|
for i, _d in enumerate(data):
|
||||||
if not data_dict.get(_d[REQUEST_ID_COL], None):
|
if _d[FEEDBACK_ACC_COL] and _d[FEEDBACK_RESULT_COL]:
|
||||||
data_dict[_d[REQUEST_ID_COL]] = RequestAtt()
|
acc_len = 0
|
||||||
data_dict[_d[REQUEST_ID_COL]].request_id = _d[REQUEST_ID_COL]
|
for key in _d[FEEDBACK_ACC_COL].keys():
|
||||||
data_dict[_d[REQUEST_ID_COL]].add_file(_d)
|
if key == "purchase_date":
|
||||||
|
continue
|
||||||
bad_images = []
|
acc_len += len(_d[FEEDBACK_ACC_COL][key])
|
||||||
for k in data_dict.keys():
|
if len(_d[FEEDBACK_ACC_COL][key]):
|
||||||
if data_dict[k].is_bad_image():
|
if min(_d[FEEDBACK_ACC_COL][key]) < BAD_THRESHOLD:
|
||||||
bad_images.append(data_dict[k])
|
parent_request = get_request(cursor, _d[REQUEST_NUMBER_COL])
|
||||||
|
requestId = parent_request[REQUEST_ID_COL]
|
||||||
|
redemptionNumber = parent_request[REQUEST_REDEMPTION_COL]
|
||||||
|
fileName = _d[FILE_NAME_COL]
|
||||||
|
userSubmitResults = str(_d[FEEDBACK_RESULT_COL][key]) if _d[FEEDBACK_RESULT_COL] else ""
|
||||||
|
OCRResults = str(_d[OCR_RESULT_COL][key]) if _d[OCR_RESULT_COL] else ""
|
||||||
|
revisedResults_by_SDSRV = str(_d[REVIEWED_RESULT_COL][key]) if _d[REVIEWED_RESULT_COL] else ""
|
||||||
|
accuracy = _d[FEEDBACK_ACC_COL][key]
|
||||||
|
bad_image_list.append((requestId, redemptionNumber, fileName, userSubmitResults, OCRResults, revisedResults_by_SDSRV, accuracy))
|
||||||
|
request_ids.append(requestId)
|
||||||
|
if acc_len == 0: # This is the request with acc < 0.75
|
||||||
|
for key in _d[FEEDBACK_ACC_COL].keys():
|
||||||
|
if key == "purchase_date":
|
||||||
|
continue
|
||||||
|
# if not
|
||||||
|
if str(_d[FEEDBACK_RESULT_COL][key]) == str(_d[OCR_RESULT_COL][key]):
|
||||||
|
continue
|
||||||
|
parent_request = get_request(cursor, _d[REQUEST_NUMBER_COL])
|
||||||
|
requestId = parent_request[REQUEST_ID_COL]
|
||||||
|
redemptionNumber = parent_request[REQUEST_REDEMPTION_COL]
|
||||||
|
fileName = _d[FILE_NAME_COL]
|
||||||
|
userSubmitResults = str(_d[FEEDBACK_RESULT_COL][key]) if _d[FEEDBACK_RESULT_COL] else ""
|
||||||
|
OCRResults = str(_d[OCR_RESULT_COL][key]) if _d[OCR_RESULT_COL] else ""
|
||||||
|
revisedResults_by_SDSRV = str(_d[REVIEWED_RESULT_COL][key]) if _d[REVIEWED_RESULT_COL] else ""
|
||||||
|
accuracy = "Unknown (avg request acc < 0.75 is excluded from the acc report)"
|
||||||
|
bad_image_list.append((requestId, redemptionNumber, fileName, userSubmitResults, OCRResults, revisedResults_by_SDSRV, accuracy))
|
||||||
|
request_ids.append(requestId)
|
||||||
|
|
||||||
request_ids = []
|
|
||||||
# Write the data to the CSV file
|
# Write the data to the CSV file
|
||||||
for bad_image in bad_images:
|
# for bad_image in bad_images:
|
||||||
request = get_request(cursor, bad_image.request_id)
|
# request = get_request(cursor, bad_image.request_id)
|
||||||
if request:
|
# if request:
|
||||||
request_ids.append(request[3])
|
# request_ids.append(request[3])
|
||||||
|
|
||||||
# ###################### Get bad requests ######################
|
# ###################### Get bad requests ######################
|
||||||
placeholders = ','.join(['%s'] * len(request_ids))
|
|
||||||
|
|
||||||
# Execute the SELECT query with the filter
|
|
||||||
query = f"SELECT * FROM fwd_api_subscriptionrequest WHERE request_id IN ({placeholders})"
|
|
||||||
cursor.execute(query, request_ids)
|
|
||||||
|
|
||||||
# Fetch the filtered data
|
|
||||||
data = cursor.fetchall()
|
|
||||||
|
|
||||||
# Define the CSV file path
|
# Define the CSV file path
|
||||||
csv_file_path = f'{OUTPUT_NAME}.csv'
|
csv_file_path = f'{OUTPUT_NAME}.csv'
|
||||||
|
|
||||||
# Write the data to the CSV file
|
# Write the data to the CSV file
|
||||||
with open(csv_file_path, 'w', newline='') as csv_file:
|
with open(csv_file_path, 'w', newline='') as csv_file:
|
||||||
writer = csv.writer(csv_file)
|
writer = csv.writer(csv_file)
|
||||||
writer.writerow([desc[0] for desc in cursor.description]) # Write column headers
|
writer.writerow(["requestId", "redemptionNumber", "fileName", "userSubmitResults", "OCRResults", "revisedResults_by_SDSRV", "accuracy"]) # Write column headers
|
||||||
writer.writerows(data) # Write the filtered data rows
|
writer.writerows(bad_image_list) # Write the filtered data rows
|
||||||
|
|
||||||
# Close the cursor and database connection
|
# Close the cursor and database connection
|
||||||
cursor.close()
|
cursor.close()
|
||||||
@ -149,6 +144,8 @@ def main():
|
|||||||
aws_secret_access_key=secret_key
|
aws_secret_access_key=secret_key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
request_ids = list(set(request_ids))
|
||||||
|
|
||||||
for request_id in tqdm(request_ids):
|
for request_id in tqdm(request_ids):
|
||||||
folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/
|
folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/
|
||||||
local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files
|
local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files
|
||||||
|
Loading…
Reference in New Issue
Block a user