Merge branch 'main' of https://code.sdsdev.co.kr/SDSRV-IDP/sbt-idp into main

2024-06-12 14:45:41 +07:00 · 2024-06-12 14:45:41 +07:00 · 51a04d38e4
commit 51a04d38e4
parent 23bd648deb c0efb767e8
6 changed files with 304 additions and 89 deletions
--- a/README.md
+++ b/README.md
@ -12,8 +12,8 @@ docker compose --profile local up -d
 ## Run tests
 ```bash
-pip3 install pytest requests deepdiff
+pip install pytest requests deepdiff
 IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
 ```
-IDP_HOST=http://ec2-13-212-90-87.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
+IDP_HOST=http://ec2-54-169-227-39.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
--- a/cope2n-api/EDA/processing_time.ipynb
+++ b/cope2n-api/EDA/processing_time.ipynb
--- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py
+++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py
@ -136,14 +136,15 @@ def create_accuracy_report(report_id, **kwargs):
            #         rq_accuracy["feedback"] += rpf.feedback_accuracy[cl]
            #     for cl in rpf.reviewed_accuracy.keys():
            #         rq_accuracy["reviewed"] += rpf.reviewed_accuracy[cl]
-            
+            request.is_required = False
            if len(rq_accuracy["reviewed"]) > 0:
                request.raw_accuracy = mean_list(rq_accuracy["reviewed"])
            elif len(rq_accuracy["feedback"]) > 0:
                request.raw_accuracy = mean_list(rq_accuracy["feedback"])
-            request.is_required = request.raw_accuracy < settings.BAD_THRESHOLD if isinstance(request.raw_accuracy, float) else False # No need to review by default
+                request.is_required = request.raw_accuracy < settings.NEED_REVIEW if isinstance(request.raw_accuracy, float) else False # No need to review by default 
            request.save()
            number_images += request_att["total_images"]
            number_bad_images += request_att["bad_images"]
            bad_image_list += request_att["bad_image_list"]
@ -188,9 +189,9 @@ def create_accuracy_report(report_id, **kwargs):
                avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
            acumulated_acc[acc_type]["avg"] = avg_acc()
-        report.feedback_accuracy = acumulated_acc["feedback"]
+        report.feedback_accuracy = _save_data["report"]["feedback_accuracy"]
-        report.reviewed_accuracy = acumulated_acc["reviewed"]
+        report.reviewed_accuracy = _save_data["report"]["reviewed_accuracy"]
-        report.combined_accuracy = acumulated_acc["acumulated"]
+        report.combined_accuracy = _save_data["report"]["average_accuracy_rate"]
        report.num_reviewed = review_progress.count(1)
        report.num_not_reviewed = review_progress.count(0)
--- a/cope2n-api/fwd_api/utils/accuracy.py
+++ b/cope2n-api/fwd_api/utils/accuracy.py
@ -27,7 +27,7 @@ class ReportAccumulateByRequest:
        self.sub = sub
        self.current_time = None
        self.data = {} # {"month": [total, {"day": day_data}]}
-        self.total_format = {
+        self.month_format = {
                    'subs': "+",
                    'extraction_date': "Subtotal ()",
                    'num_imei': 0,
@ -122,9 +122,11 @@ class ReportAccumulateByRequest:
                    "num_request": 0,
                    "review_progress": []
                },
        self.report = copy.deepcopy(self.month_format)
    @staticmethod
    def update_total(total, report_file):
        # Update total = update total
        if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
            total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
            total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
@ -162,6 +164,46 @@ class ReportAccumulateByRequest:
            total["review_progress"].append(report_file.review_status)
        return total
    @staticmethod
    def update_month(month, report_file):
        # Update month = update month
        if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
            month["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
            month["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
            month["total_images"] += 1
            doc_type = "imei"
            if report_file.doc_type in ["imei", "invoice", "all"]:
                doc_type = report_file.doc_type
            else:
                print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
            month["num_imei"] += 1 if doc_type == "imei" else 0
            month["num_invoice"] += 1 if doc_type == "invoice" else 0
            for key in settings.FIELD:
                if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
                    month["average_accuracy_rate"][key].add(report_file.reviewed_accuracy.get(key, []))
                elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
                    month["average_accuracy_rate"][key].add(report_file.feedback_accuracy.get(key, []))
                month["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, []))
                month["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
            if not month["average_processing_time"].get(report_file.doc_type, None):
                print(f"[WARM]: Weird doctype: {report_file.doc_type}")
                month["average_processing_time"][report_file.doc_type] = IterAvg()
            month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
            month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 
            doc_type = "imei"
            if report_file.doc_type in ["imei", "invoice", "all"]:
                doc_type = report_file.doc_type
            else:
                print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
            month["usage"]["imei"] += 1 if doc_type == "imei" else 0
            month["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
            month["usage"]["total_images"] += 1
            month["review_progress"].append(report_file.review_status)
        return month
    @staticmethod
    def update_day(day_data, report_file):
        if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
@ -197,7 +239,7 @@ class ReportAccumulateByRequest:
        this_month = timezone.localtime(request.created_at).strftime("%Y%m")
        this_day = timezone.localtime(request.created_at).strftime("%Y%m%d")
        if not self.data.get(this_month, None):
-            self.data[this_month] = [copy.deepcopy(self.total_format), {}]
+            self.data[this_month] = [copy.deepcopy(self.month_format), {}]
            self.data[this_month][0]["extraction_date"] = "Subtotal (" + timezone.localtime(request.created_at).strftime("%Y-%m") + ")"
        if not self.data[this_month][1].get(this_day, None):
            print(f"[INFO] Adding a new day: {this_day} for report: {report.id} ...")
@ -221,7 +263,8 @@ class ReportAccumulateByRequest:
                for t in _report_file.reviewed_accuracy.keys():
                    _report_file.reviewed_accuracy[t] = []
-            self.data[this_month][0] = self.update_total(self.data[this_month][0], _report_file) # Update the subtotal within the month
+            self.report = self.update_total(self.report, _report_file)
            self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file) # Update the subtotal within the month
            self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file) # Update the subtotal of the day
    def count_transactions_within_day(self, date_string):
@ -231,10 +274,11 @@ class ReportAccumulateByRequest:
        return count_transactions(start_date_with_timezone, end_date_with_timezone, self.sub)
    def save(self, root_report_id, is_daily_report=False, include_test=False):
-        report_data = self.get()
+        report_data, overall_report = self.get()
        fine_data = []
        save_data = {"file": {"overview": f"{root_report_id}/{root_report_id}.xlsx"},
-                    "data": fine_data} # {"sub_report_id": "S3 location", "data": fine_data}
+                    "data": fine_data, # {"sub_report_id": "S3 location", "data": fine_data}
                    "report": overall_report}
        # extract data
        month_keys = list(report_data.keys())
        month_keys.sort(reverse=True)
@ -297,6 +341,26 @@ class ReportAccumulateByRequest:
    def get(self) -> Any:
        # FIXME: This looks like a junk
        _data = copy.deepcopy(self.data)
        _report = copy.deepcopy(self.report)
        # export report data
        for key in _report["average_processing_time"].keys():
            _report["average_processing_time"][key] = _report["average_processing_time"][key]()
        avg_acc = 0
        count_acc = 0
        for key in settings.FIELD:
            _report["average_accuracy_rate"][key] = _report["average_accuracy_rate"][key]()
            for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]:
                avg_acc = (avg_acc*count_acc + _report[accuracy_type][key].avg*_report[accuracy_type][key].count) / (_report[accuracy_type][key].count + count_acc)
                count_acc += _report[accuracy_type][key].count
                _report[accuracy_type][key] = _report[accuracy_type][key]()
        _report["average_accuracy_rate"]["avg"] = avg_acc
        _report["review_progress"] = _report["review_progress"].count(1)/(_report["review_progress"].count(0)+ _report["review_progress"].count(1)) if (_report["review_progress"].count(0)+ _report["review_progress"].count(1)) >0 else 0
        _report["images_quality"]["successful_percent"] = _report["images_quality"]["successful"]/_report["total_images"]  if _report["total_images"] > 0 else 0
        _report["images_quality"]["bad_percent"] = _report["images_quality"]["bad"]/_report["total_images"] if _report["total_images"] > 0 else 0
        # export data for dashboard
        for month in _data.keys():
            _data[month][0]["images_quality"]["successful_percent"] = _data[month][0]["images_quality"]["successful"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
            _data[month][0]["images_quality"]["bad_percent"] = _data[month][0]["images_quality"]["bad"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
@ -328,7 +392,7 @@ class ReportAccumulateByRequest:
                for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]:
                    _data[month][0][accuracy_type][key] = _data[month][0][accuracy_type][key]()
            _data[month][0]["review_progress"] = _data[month][0]["review_progress"].count(1)/(_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) if (_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) >0 else 0
-        return _data
+        return _data, _report
 class MonthReportAccumulate:
    def __init__(self):
@ -846,7 +910,7 @@ def calculate_a_request(report, request):
        fb_avg_acc = avg_dict(att["acc"]["feedback"])
        rv_avg_acc = avg_dict(att["acc"]["reviewed"])
-        image.is_required = fb_avg_acc < settings.BAD_THRESHOLD
+        image.is_required = fb_avg_acc < settings.NEED_REVIEW
        if image.processing_time < 0:
            continue
--- a/cope2n-api/requirements.txt
+++ b/cope2n-api/requirements.txt
@ -24,7 +24,7 @@ jsonschema==4.17.1
 MarkupSafe==2.1.1
 packaging==21.3
 Pillow==9.3.0
-psycopg2==2.9.5
+# psycopg2==2.9.5
 psycopg2-binary==2.9.5
 pycparser==2.21
 pyparsing==3.0.9
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@ -17,25 +17,26 @@ services:
      - ctel-sbt
    privileged: true
    image: sidp/cope2n-ai-fi-sbt:latest
    # runtime: nvidia
    environment:
      - PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi  # For import module
      - CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672
-      - CUDA_VISIBLE_DEVICES=1
+      # - CUDA_VISIBLE_DEVICES=0
    volumes:
      - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
    working_dir: /workspace/cope2n-ai-fi
-    # deploy:
+    deploy:
-    #   resources:
+      resources:
-    #     reservations:
+        reservations:
-    #       devices:
+          devices:
-    #         - driver: nvidia
+            - driver: nvidia
-    #           count: 1
+              count: 1
-    #           capabilities: [gpu]
+              capabilities: [gpu]
    # command: bash -c "tail -f > /dev/null"
    command: bash run.sh
-    deploy:
+    # deploy:
-      mode: replicated
+    #   mode: replicated
-      replicas: 1
+    #   replicas: 1
  # Back-end services
  be-ctel-sbt:
    build:
@ -89,12 +90,12 @@ services:
    depends_on:
      db-sbt:
        condition: service_started
-    # command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
+    command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
-    #               python manage.py makemigrations &&
+                  python manage.py makemigrations &&
-    #               python manage.py migrate &&
+                  python manage.py migrate &&
-    #               python manage.py compilemessages &&
+                  python manage.py compilemessages &&
-    #               gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
+                  gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
-    command: "sleep infinity"
+    # command: "sleep infinity"
  minio:
    image: minio/minio
@ -179,8 +180,8 @@ services:
      - ./cope2n-api:/app
    working_dir: /app
-    # command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
+    command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
-    command: bash -c "tail -f > /dev/null"
+    # command: bash -c "tail -f > /dev/null"
  # Back-end persistent
  db-sbt:
@ -196,7 +197,7 @@ services:
      - POSTGRES_PASSWORD=${DB_PASSWORD}
      - POSTGRES_DB=${DB_SCHEMA}
    # ports:
-    #   - 15432:5432
+    #   - 54321:5432
  rabbitmq-sbt:
    mem_reservation: 600m
@ -220,7 +221,7 @@ services:
    shm_size: 10gb
    privileged: true
    ports:
-      - 59881:80
+      - 9881:80
    depends_on:
      be-ctel-sbt:
        condition: service_started