Merge pull request #42 from SDSRV-IDP/dev/all_rq_for_acc

Update: request_list API
This commit is contained in:
Nguyen Viet Anh 2024-02-21 12:50:11 +07:00 committed by GitHub Enterprise
commit c82fac1572
9 changed files with 87 additions and 45 deletions

1
.gitignore vendored
View File

@ -41,3 +41,4 @@ Jan.csv
*.csv
cope2n-api/reviewed/date.xlsx
cope2n-api/reviewed/retailer.xlsx
/scripts/*

View File

@ -43,7 +43,7 @@ class AccuracyViewSet(viewsets.ViewSet):
default='2024-01-10T00:00:00+0700',
),
OpenApiParameter(
name='include_test',
name='includes_test',
location=OpenApiParameter.QUERY,
description='Whether to include test record or not',
type=OpenApiTypes.BOOL,
@ -53,7 +53,14 @@ class AccuracyViewSet(viewsets.ViewSet):
location=OpenApiParameter.QUERY,
description='Which records to be query',
type=OpenApiTypes.STR,
enum=['reviewed', 'not reviewed', 'all'],
enum=['reviewed', 'not_reviewed', 'all'],
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Which subsidiary to be included',
type=OpenApiTypes.STR,
enum=list(settings.SUBS.keys()),
),
OpenApiParameter(
name='request_id',
@ -94,15 +101,27 @@ class AccuracyViewSet(viewsets.ViewSet):
request_id = request.GET.get('request_id', None)
redemption_id = request.GET.get('redemption_id', None)
is_reviewed = request.GET.get('is_reviewed', None)
include_test = request.GET.get('include_test', False)
include_test = request.GET.get('includes_test', False)
subsidiary = request.data.get("subsidiary", "all")
subsidiary = map_subsidiary_long_to_short(subsidiary)
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
base_query = Q()
if start_date_str or end_date_str:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%d') # We care only about day precision only
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%d')
# Round:
# end_date_str to the beginning of the next day
# start_date_str to the start of the date
start_date = timezone.make_aware(start_date)
end_date = timezone.make_aware(end_date)
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') # inside logic will include second precision with timezone for calculation
end_date_str = (end_date + timezone.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S%z')
base_query &= Q(created_at__range=(start_date, end_date))
except Exception as e:
raise InvalidException(excArgs="Date format")
base_query = Q(created_at__range=(start_date, end_date))
if request_id:
base_query &= Q(request_id=request_id)
if redemption_id:
@ -113,17 +132,19 @@ class AccuracyViewSet(viewsets.ViewSet):
if include_test:
# base_query = ~base_query
base_query.children = base_query.children[:-1]
elif isinstance(include_test, bool):
if include_test:
base_query = ~base_query
if isinstance(is_reviewed, str):
if is_reviewed == "reviewed":
base_query &= Q(is_reviewed=True)
elif is_reviewed == "not reviewed":
elif is_reviewed == "not_reviewed":
base_query &= Q(is_reviewed=False)
elif is_reviewed == "all":
pass
if isinstance(subsidiary, str):
if subsidiary and subsidiary.lower().replace(" ", "")!="all":
base_query &= Q(redemption_id__startswith=subsidiary)
subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')

View File

@ -259,9 +259,10 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
logger.info(f"S3 is not available, skipping,...")
@app.task(name='upload_report_to_s3')
def upload_report_to_s3(local_file_path, s3_key, report_id):
def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
if s3_client.s3_client is not None:
try:
time.sleep(delay)
s3_client.upload_file(local_file_path, s3_key)
report = Report.objects.filter(report_id=report_id)[0]
report.S3_uploaded = True

View File

@ -287,7 +287,7 @@ def make_a_report_2(report_id, query_set):
data = extract_report_detail_list(report_files, lower=True)
data_workbook = dict2xlsx(data, _type='report_detail')
local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook)
s3_key=save_report_to_S3(report.report_id, local_workbook)
s3_key=save_report_to_S3(report.report_id, local_workbook, 5)
if query_set["is_daily_report"]:
# Save overview dashboard
# multiple accuracy by 100

View File

@ -552,7 +552,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
"Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])),
"OCR Image Accuracy": report_file.acc,
"OCR Image Speed (seconds)": report_file.time_cost,
"Reviewed?": "No",
"Is Reviewed": report_file.is_reviewed,
"Bad Image Reasons": report_file.bad_image_reason,
"Countermeasures": report_file.counter_measures,
"IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])),
@ -801,9 +801,6 @@ def calculate_a_request(report, request):
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value)
report_files = []
for image in images:
if image.reason in settings.ACC_EXCLUDE_RESEASONS:
continue
status, att = calculate_subcription_file(image)
if status != 200:
continue
@ -821,8 +818,6 @@ def calculate_a_request(report, request):
_sub = map_subsidiary_short_to_long(request.redemption_id[:2])
else:
print(f"[WARM]: empty redemption_id, check request: {request.request_id}")
if att["is_bad_image"]:
request_att["bad_image_list"].append(image.file_name)
new_report_file = ReportFile(report=report,
subsidiary=_sub,
correspond_request_id=request.request_id,
@ -835,35 +830,47 @@ def calculate_a_request(report, request):
reviewed_accuracy=acc_maximize_list_values(att["acc"]["reviewed"]),
acc=att["avg_acc"],
is_bad_image=att["is_bad_image"],
is_reviewed="Yes" if request.is_reviewed else "No",
time_cost=image.processing_time,
bad_image_reason=image.reason,
counter_measures=image.counter_measures,
error="|".join(att["err"])
)
report_files.append(new_report_file)
_att = copy.deepcopy(att)
if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS:
request_att["bad_image_list"].append(image.file_name)
# TODO: Exclude bad image accuracy from average accuracy
_att["avg_acc"] = None
for t in ["feedback", "reviewed"]:
for k in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
_att["acc"][t][k] = []
if request_att["time_cost"].get(image.doc_type, None):
request_att["time_cost"][image.doc_type].append(image.processing_time)
else:
request_att["time_cost"][image.doc_type] = [image.processing_time]
try:
request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"]
request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"]
request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["feedback"]["imei_number"] += _att["acc"]["feedback"]["imei_number"]
request_att["acc"]["feedback"]["purchase_date"] += _att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["feedback"]["retailername"] += _att["acc"]["feedback"]["retailername"]
request_att["acc"]["feedback"]["sold_to_party"] += _att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"]
request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"]
request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"]
request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"]
request_att["acc"]["reviewed"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"]
request_att["acc"]["reviewed"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"]
request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"]
request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"]
request_att["acc"]["acumulated"]["imei_number"] += att["acc"]["reviewed"]["imei_number"] if att["acc"]["reviewed"]["imei_number"] else att["acc"]["feedback"]["imei_number"]
request_att["acc"]["acumulated"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"] if att["acc"]["reviewed"]["purchase_date"] else att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["acumulated"]["retailername"] += att["acc"]["reviewed"]["retailername"] if att["acc"]["reviewed"]["retailername"] else att["acc"]["feedback"]["retailername"]
request_att["acc"]["acumulated"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"] if att["acc"]["reviewed"]["sold_to_party"] else att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if att["acc"]["reviewed"]["imei_number"] else att["acc"]["feedback"]["imei_number"]
request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if att["acc"]["reviewed"]["purchase_date"] else att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if att["acc"]["reviewed"]["retailername"] else att["acc"]["feedback"]["retailername"]
request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if att["acc"]["reviewed"]["sold_to_party"] else att["acc"]["feedback"]["sold_to_party"]
request_att["bad_images"] += int(att["is_bad_image"])
request_att["bad_images"] += int(_att["is_bad_image"])
request_att["total_images"] += 1
request_att["err"] += att["err"]
request_att["err"] += _att["err"]
except Exception as e:
print(f"[ERROR]: failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}")
continue
@ -907,18 +914,18 @@ def calculate_subcription_file(subcription_request_file):
if avg_reviewed is not None:
avg_acc = avg_reviewed
att["avg_acc"] = avg_acc
if avg_acc < BAD_THRESHOLD:
att["is_bad_image"] = True
# exclude bad images
for key_name in valid_keys:
att["acc"]["feedback"][key_name] = []
att["acc"]["reviewed"][key_name] = []
att["avg_acc"] = None
else:
att["avg_acc"] = avg_acc
# for key_name in valid_keys:
# att["acc"]["feedback"][key_name] = []
# att["acc"]["reviewed"][key_name] = []
# att["avg_acc"] = None
return 200, att
def calculate_attributions(request): # for one request, return in order
# Deprecated
acc = {"feedback": {},
"reviewed": {}} # {"feedback": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]},
# "reviewed": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]}}

View File

@ -329,10 +329,10 @@ def save_feedback_to_S3(file_name, id, local_file_path):
print(f"[ERROR]: {e}")
raise ServiceUnavailableException()
def save_report_to_S3(id, local_file_path):
def save_report_to_S3(id, local_file_path, delay=0):
try:
s3_key = os.path.join("report", local_file_path.split("/")[-2], local_file_path.split("/")[-1])
c_connector.upload_report_to_s3((local_file_path, s3_key, id))
c_connector.upload_report_to_s3((local_file_path, s3_key, id, delay))
c_connector.remove_local_file((local_file_path, id))
return s3_key
except Exception as e:
@ -444,7 +444,7 @@ def get_value(_dict, keys):
print(f"[ERROR]: value: {value}")
print(f"[ERROR]: keys: {keys}")
if not value:
if value is None:
return "-"
elif isinstance(value, list):
value = str(value)

@ -1 +1 @@
Subproject commit b6d4fab46f7f8689dd6b050cfbff2faa6a6f3fec
Subproject commit d01de312ab86db554ffa2f1b01396ef8d56b78ed

View File

@ -174,8 +174,8 @@ services:
- ./cope2n-api:/app
working_dir: /app
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
# command: bash -c "tail -f > /dev/null"
# command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
command: bash -c "tail -f > /dev/null"
# Back-end persistent
db-sbt:

12
unset_env.sh Normal file
View File

@ -0,0 +1,12 @@
#!/bin/bash
# Load .env file into current shell session
set -a
source .env
set +a
# Unset each variable defined in .env
while IFS= read -r line || [[ -n "$line" ]]; do
variable=$(echo "$line" | cut -d= -f1)
unset "$variable"
done < .env