This commit is contained in:
TannedCung 2024-06-12 14:45:41 +07:00
commit 51a04d38e4
6 changed files with 304 additions and 89 deletions

View File

@ -12,8 +12,8 @@ docker compose --profile local up -d
## Run tests ## Run tests
```bash ```bash
pip3 install pytest requests deepdiff pip install pytest requests deepdiff
IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
``` ```
IDP_HOST=http://ec2-13-212-90-87.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests IDP_HOST=http://ec2-54-169-227-39.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests

File diff suppressed because one or more lines are too long

View File

@ -136,14 +136,15 @@ def create_accuracy_report(report_id, **kwargs):
# rq_accuracy["feedback"] += rpf.feedback_accuracy[cl] # rq_accuracy["feedback"] += rpf.feedback_accuracy[cl]
# for cl in rpf.reviewed_accuracy.keys(): # for cl in rpf.reviewed_accuracy.keys():
# rq_accuracy["reviewed"] += rpf.reviewed_accuracy[cl] # rq_accuracy["reviewed"] += rpf.reviewed_accuracy[cl]
request.is_required = False
if len(rq_accuracy["reviewed"]) > 0: if len(rq_accuracy["reviewed"]) > 0:
request.raw_accuracy = mean_list(rq_accuracy["reviewed"]) request.raw_accuracy = mean_list(rq_accuracy["reviewed"])
elif len(rq_accuracy["feedback"]) > 0: elif len(rq_accuracy["feedback"]) > 0:
request.raw_accuracy = mean_list(rq_accuracy["feedback"]) request.raw_accuracy = mean_list(rq_accuracy["feedback"])
request.is_required = request.raw_accuracy < settings.BAD_THRESHOLD if isinstance(request.raw_accuracy, float) else False # No need to review by default request.is_required = request.raw_accuracy < settings.NEED_REVIEW if isinstance(request.raw_accuracy, float) else False # No need to review by default
request.save() request.save()
number_images += request_att["total_images"] number_images += request_att["total_images"]
number_bad_images += request_att["bad_images"] number_bad_images += request_att["bad_images"]
bad_image_list += request_att["bad_image_list"] bad_image_list += request_att["bad_image_list"]
@ -188,9 +189,9 @@ def create_accuracy_report(report_id, **kwargs):
avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
acumulated_acc[acc_type]["avg"] = avg_acc() acumulated_acc[acc_type]["avg"] = avg_acc()
report.feedback_accuracy = acumulated_acc["feedback"] report.feedback_accuracy = _save_data["report"]["feedback_accuracy"]
report.reviewed_accuracy = acumulated_acc["reviewed"] report.reviewed_accuracy = _save_data["report"]["reviewed_accuracy"]
report.combined_accuracy = acumulated_acc["acumulated"] report.combined_accuracy = _save_data["report"]["average_accuracy_rate"]
report.num_reviewed = review_progress.count(1) report.num_reviewed = review_progress.count(1)
report.num_not_reviewed = review_progress.count(0) report.num_not_reviewed = review_progress.count(0)

View File

@ -27,7 +27,7 @@ class ReportAccumulateByRequest:
self.sub = sub self.sub = sub
self.current_time = None self.current_time = None
self.data = {} # {"month": [total, {"day": day_data}]} self.data = {} # {"month": [total, {"day": day_data}]}
self.total_format = { self.month_format = {
'subs': "+", 'subs': "+",
'extraction_date': "Subtotal ()", 'extraction_date': "Subtotal ()",
'num_imei': 0, 'num_imei': 0,
@ -122,9 +122,11 @@ class ReportAccumulateByRequest:
"num_request": 0, "num_request": 0,
"review_progress": [] "review_progress": []
}, },
self.report = copy.deepcopy(self.month_format)
@staticmethod @staticmethod
def update_total(total, report_file): def update_total(total, report_file):
# Update total = update total
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0 total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
@ -162,6 +164,46 @@ class ReportAccumulateByRequest:
total["review_progress"].append(report_file.review_status) total["review_progress"].append(report_file.review_status)
return total return total
@staticmethod
def update_month(month, report_file):
# Update month = update month
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
month["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
month["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
month["total_images"] += 1
doc_type = "imei"
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
else:
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
month["num_imei"] += 1 if doc_type == "imei" else 0
month["num_invoice"] += 1 if doc_type == "invoice" else 0
for key in settings.FIELD:
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
month["average_accuracy_rate"][key].add(report_file.reviewed_accuracy.get(key, []))
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
month["average_accuracy_rate"][key].add(report_file.feedback_accuracy.get(key, []))
month["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, []))
month["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
if not month["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
month["average_processing_time"][report_file.doc_type] = IterAvg()
month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
doc_type = "imei"
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
else:
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
month["usage"]["imei"] += 1 if doc_type == "imei" else 0
month["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
month["usage"]["total_images"] += 1
month["review_progress"].append(report_file.review_status)
return month
@staticmethod @staticmethod
def update_day(day_data, report_file): def update_day(day_data, report_file):
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
@ -197,7 +239,7 @@ class ReportAccumulateByRequest:
this_month = timezone.localtime(request.created_at).strftime("%Y%m") this_month = timezone.localtime(request.created_at).strftime("%Y%m")
this_day = timezone.localtime(request.created_at).strftime("%Y%m%d") this_day = timezone.localtime(request.created_at).strftime("%Y%m%d")
if not self.data.get(this_month, None): if not self.data.get(this_month, None):
self.data[this_month] = [copy.deepcopy(self.total_format), {}] self.data[this_month] = [copy.deepcopy(self.month_format), {}]
self.data[this_month][0]["extraction_date"] = "Subtotal (" + timezone.localtime(request.created_at).strftime("%Y-%m") + ")" self.data[this_month][0]["extraction_date"] = "Subtotal (" + timezone.localtime(request.created_at).strftime("%Y-%m") + ")"
if not self.data[this_month][1].get(this_day, None): if not self.data[this_month][1].get(this_day, None):
print(f"[INFO] Adding a new day: {this_day} for report: {report.id} ...") print(f"[INFO] Adding a new day: {this_day} for report: {report.id} ...")
@ -221,7 +263,8 @@ class ReportAccumulateByRequest:
for t in _report_file.reviewed_accuracy.keys(): for t in _report_file.reviewed_accuracy.keys():
_report_file.reviewed_accuracy[t] = [] _report_file.reviewed_accuracy[t] = []
self.data[this_month][0] = self.update_total(self.data[this_month][0], _report_file) # Update the subtotal within the month self.report = self.update_total(self.report, _report_file)
self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file) # Update the subtotal of the day self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file) # Update the subtotal of the day
def count_transactions_within_day(self, date_string): def count_transactions_within_day(self, date_string):
@ -231,10 +274,11 @@ class ReportAccumulateByRequest:
return count_transactions(start_date_with_timezone, end_date_with_timezone, self.sub) return count_transactions(start_date_with_timezone, end_date_with_timezone, self.sub)
def save(self, root_report_id, is_daily_report=False, include_test=False): def save(self, root_report_id, is_daily_report=False, include_test=False):
report_data = self.get() report_data, overall_report = self.get()
fine_data = [] fine_data = []
save_data = {"file": {"overview": f"{root_report_id}/{root_report_id}.xlsx"}, save_data = {"file": {"overview": f"{root_report_id}/{root_report_id}.xlsx"},
"data": fine_data} # {"sub_report_id": "S3 location", "data": fine_data} "data": fine_data, # {"sub_report_id": "S3 location", "data": fine_data}
"report": overall_report}
# extract data # extract data
month_keys = list(report_data.keys()) month_keys = list(report_data.keys())
month_keys.sort(reverse=True) month_keys.sort(reverse=True)
@ -297,6 +341,26 @@ class ReportAccumulateByRequest:
def get(self) -> Any: def get(self) -> Any:
# FIXME: This looks like a junk # FIXME: This looks like a junk
_data = copy.deepcopy(self.data) _data = copy.deepcopy(self.data)
_report = copy.deepcopy(self.report)
# export report data
for key in _report["average_processing_time"].keys():
_report["average_processing_time"][key] = _report["average_processing_time"][key]()
avg_acc = 0
count_acc = 0
for key in settings.FIELD:
_report["average_accuracy_rate"][key] = _report["average_accuracy_rate"][key]()
for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]:
avg_acc = (avg_acc*count_acc + _report[accuracy_type][key].avg*_report[accuracy_type][key].count) / (_report[accuracy_type][key].count + count_acc)
count_acc += _report[accuracy_type][key].count
_report[accuracy_type][key] = _report[accuracy_type][key]()
_report["average_accuracy_rate"]["avg"] = avg_acc
_report["review_progress"] = _report["review_progress"].count(1)/(_report["review_progress"].count(0)+ _report["review_progress"].count(1)) if (_report["review_progress"].count(0)+ _report["review_progress"].count(1)) >0 else 0
_report["images_quality"]["successful_percent"] = _report["images_quality"]["successful"]/_report["total_images"] if _report["total_images"] > 0 else 0
_report["images_quality"]["bad_percent"] = _report["images_quality"]["bad"]/_report["total_images"] if _report["total_images"] > 0 else 0
# export data for dashboard
for month in _data.keys(): for month in _data.keys():
_data[month][0]["images_quality"]["successful_percent"] = _data[month][0]["images_quality"]["successful"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0 _data[month][0]["images_quality"]["successful_percent"] = _data[month][0]["images_quality"]["successful"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
_data[month][0]["images_quality"]["bad_percent"] = _data[month][0]["images_quality"]["bad"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0 _data[month][0]["images_quality"]["bad_percent"] = _data[month][0]["images_quality"]["bad"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
@ -328,7 +392,7 @@ class ReportAccumulateByRequest:
for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]: for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]:
_data[month][0][accuracy_type][key] = _data[month][0][accuracy_type][key]() _data[month][0][accuracy_type][key] = _data[month][0][accuracy_type][key]()
_data[month][0]["review_progress"] = _data[month][0]["review_progress"].count(1)/(_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) if (_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) >0 else 0 _data[month][0]["review_progress"] = _data[month][0]["review_progress"].count(1)/(_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) if (_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) >0 else 0
return _data return _data, _report
class MonthReportAccumulate: class MonthReportAccumulate:
def __init__(self): def __init__(self):
@ -846,7 +910,7 @@ def calculate_a_request(report, request):
fb_avg_acc = avg_dict(att["acc"]["feedback"]) fb_avg_acc = avg_dict(att["acc"]["feedback"])
rv_avg_acc = avg_dict(att["acc"]["reviewed"]) rv_avg_acc = avg_dict(att["acc"]["reviewed"])
image.is_required = fb_avg_acc < settings.BAD_THRESHOLD image.is_required = fb_avg_acc < settings.NEED_REVIEW
if image.processing_time < 0: if image.processing_time < 0:
continue continue

View File

@ -24,7 +24,7 @@ jsonschema==4.17.1
MarkupSafe==2.1.1 MarkupSafe==2.1.1
packaging==21.3 packaging==21.3
Pillow==9.3.0 Pillow==9.3.0
psycopg2==2.9.5 # psycopg2==2.9.5
psycopg2-binary==2.9.5 psycopg2-binary==2.9.5
pycparser==2.21 pycparser==2.21
pyparsing==3.0.9 pyparsing==3.0.9

View File

@ -17,25 +17,26 @@ services:
- ctel-sbt - ctel-sbt
privileged: true privileged: true
image: sidp/cope2n-ai-fi-sbt:latest image: sidp/cope2n-ai-fi-sbt:latest
# runtime: nvidia
environment: environment:
- PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module - PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module
- CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672 - CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672
- CUDA_VISIBLE_DEVICES=1 # - CUDA_VISIBLE_DEVICES=0
volumes: volumes:
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
working_dir: /workspace/cope2n-ai-fi working_dir: /workspace/cope2n-ai-fi
# deploy: deploy:
# resources: resources:
# reservations: reservations:
# devices: devices:
# - driver: nvidia - driver: nvidia
# count: 1 count: 1
# capabilities: [gpu] capabilities: [gpu]
# command: bash -c "tail -f > /dev/null" # command: bash -c "tail -f > /dev/null"
command: bash run.sh command: bash run.sh
deploy: # deploy:
mode: replicated # mode: replicated
replicas: 1 # replicas: 1
# Back-end services # Back-end services
be-ctel-sbt: be-ctel-sbt:
build: build:
@ -89,12 +90,12 @@ services:
depends_on: depends_on:
db-sbt: db-sbt:
condition: service_started condition: service_started
# command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
# python manage.py makemigrations && python manage.py makemigrations &&
# python manage.py migrate && python manage.py migrate &&
# python manage.py compilemessages && python manage.py compilemessages &&
# gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
command: "sleep infinity" # command: "sleep infinity"
minio: minio:
image: minio/minio image: minio/minio
@ -179,8 +180,8 @@ services:
- ./cope2n-api:/app - ./cope2n-api:/app
working_dir: /app working_dir: /app
# command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
command: bash -c "tail -f > /dev/null" # command: bash -c "tail -f > /dev/null"
# Back-end persistent # Back-end persistent
db-sbt: db-sbt:
@ -196,7 +197,7 @@ services:
- POSTGRES_PASSWORD=${DB_PASSWORD} - POSTGRES_PASSWORD=${DB_PASSWORD}
- POSTGRES_DB=${DB_SCHEMA} - POSTGRES_DB=${DB_SCHEMA}
# ports: # ports:
# - 15432:5432 # - 54321:5432
rabbitmq-sbt: rabbitmq-sbt:
mem_reservation: 600m mem_reservation: 600m
@ -220,7 +221,7 @@ services:
shm_size: 10gb shm_size: 10gb
privileged: true privileged: true
ports: ports:
- 59881:80 - 9881:80
depends_on: depends_on:
be-ctel-sbt: be-ctel-sbt:
condition: service_started condition: service_started