From e30ba0f40b43e47481cbc5aba33628a1849fe5dd Mon Sep 17 00:00:00 2001 From: dx-tan Date: Fri, 17 May 2024 19:39:31 +0700 Subject: [PATCH] Update: API, logic --- README.md | 5 +- cope2n-ai-fi/modules/sdsvkvu | 2 +- cope2n-api/Dockerfile | 6 +- cope2n-api/add_user.py | 10 -- cope2n-api/celery_config.txt | 2 +- cope2n-api/fwd_api/api/accuracy_view.py | 104 +++++++++++------- .../celery_worker/process_report_tasks.py | 1 + .../migrate-datebase-edit-Lazada-reason.py | 67 +++++++++++ .../migrate-datebase-edit-None-invoice-no.py | 1 - ...ubscriptionrequest_is_required_and_more.py | 28 +++++ .../fwd_api/models/SubscriptionRequest.py | 1 + .../fwd_api/models/SubscriptionRequestFile.py | 2 + cope2n-api/fwd_api/utils/accuracy.py | 15 ++- cope2n-api/fwd_api/utils/sdsvkvu | 2 +- cope2n-api/modified.txt | 0 docker-compose-dev.yml | 22 ++-- 16 files changed, 196 insertions(+), 72 deletions(-) delete mode 100755 cope2n-api/add_user.py create mode 100644 cope2n-api/fwd_api/management/commands/migrate-datebase-edit-Lazada-reason.py create mode 100644 cope2n-api/fwd_api/migrations/0191_subscriptionrequest_is_required_and_more.py create mode 100644 cope2n-api/modified.txt diff --git a/README.md b/README.md index 7735cea..cc16762 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ docker compose --profile local up -d ## Run tests ```bash -pip3 install pytest requests -IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME= IDP_PASSWORD= python3 -m pytest --log-cli-level=DEBUG --capture=sys tests +pip3 install pytest requests deepdiff +IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests ``` +IDP_HOST=http://ec2-13-212-90-87.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests diff --git a/cope2n-ai-fi/modules/sdsvkvu b/cope2n-ai-fi/modules/sdsvkvu index 5eedd64..ec0d9a0 160000 --- a/cope2n-ai-fi/modules/sdsvkvu +++ b/cope2n-ai-fi/modules/sdsvkvu @@ -1 +1 @@ -Subproject commit 5eedd64e87c405918ad9fc37cb6163f8d04ce49d +Subproject commit ec0d9a0a5a8c17c9a78165d1f78103951a2e138b diff --git a/cope2n-api/Dockerfile b/cope2n-api/Dockerfile index ed843a2..20eeeb2 100755 --- a/cope2n-api/Dockerfile +++ b/cope2n-api/Dockerfile @@ -32,9 +32,9 @@ RUN pip install -r requirements.txt --no-cache-dir COPY --chown=${UID}:${GID} . /app -RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir -RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir -RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir # For intergration with sdskvu diff --git a/cope2n-api/add_user.py b/cope2n-api/add_user.py deleted file mode 100755 index 13ae49d..0000000 --- a/cope2n-api/add_user.py +++ /dev/null @@ -1,10 +0,0 @@ -from fwd_api.models.UserProfile import UserProfile -from fwd_api.models.UserProfile import Subscription -from fwd_api.models.UserProfile import PricingPlan - -def add_user(user_name, _id): - pricing_plan = PricingPlan(id=_id, code=10, token_limitations=1000000000, duration=1000000000) - user = UserProfile(id=_id, full_name=user_name) - subscription = Subscription(id=_id, current_token=0, limit_token=1000000000, pricing_plan=pricing_plan, ) - - diff --git a/cope2n-api/celery_config.txt b/cope2n-api/celery_config.txt index ac2992d..ec5c509 100644 --- a/cope2n-api/celery_config.txt +++ b/cope2n-api/celery_config.txt @@ -1317,7 +1317,7 @@ "queue": "csv_feedback" }, "do_pdf": { - "queue": "do_pdf" + "queue": "do_pdf" }, "make_a_report": { "queue": "report" diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index 54f0a3c..2d32d69 100755 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -38,16 +38,16 @@ class AccuracyViewSet(viewsets.ViewSet): OpenApiParameter( name='start_date', location=OpenApiParameter.QUERY, - description='Start date (YYYY-mm-DDTHH:MM:SSZ)', + description='Start date (YYYY-mm-DD)', type=OpenApiTypes.DATE, - default='2023-01-02T00:00:00+0700', + default='2023-01-02', ), OpenApiParameter( name='end_date', location=OpenApiParameter.QUERY, - description='End date (YYYY-mm-DDTHH:MM:SSZ)', + description='End date (YYYY-mm-DD', type=OpenApiTypes.DATE, - default='2024-01-10T00:00:00+0700', + default='2024-01-10', ), OpenApiParameter( name='includes_test', @@ -62,6 +62,13 @@ class AccuracyViewSet(viewsets.ViewSet): type=OpenApiTypes.STR, enum=['reviewed', 'not_reviewed', 'all'], ), + OpenApiParameter( + name='is_required', + location=OpenApiParameter.QUERY, + description='Which records to be query', + type=OpenApiTypes.STR, + enum=['required', 'not_required'], + ), OpenApiParameter( name='subsidiary', location=OpenApiParameter.QUERY, @@ -115,6 +122,7 @@ class AccuracyViewSet(viewsets.ViewSet): request_id = request.GET.get('request_id', None) with_redemption_id = request.GET.get('with_redemption_id', True) is_reviewed = request.GET.get('is_reviewed', None) + is_required = request.GET.get('is_required', None) include_test = request.GET.get('includes_test', False) subsidiary = request.GET.get("subsidiary", "all") max_accuracy = float(request.GET.get("max_accuracy", 100)) @@ -169,6 +177,11 @@ class AccuracyViewSet(viewsets.ViewSet): base_query &= Q(is_reviewed=False) elif is_reviewed == "all": pass + if isinstance(is_required, str): + if is_required == "required": + base_query &= Q(is_required=True) + elif is_required == "not_required": + base_query &= Q(is_required=False) if isinstance(subsidiary, str): if subsidiary.lower() != "seao": if subsidiary not in list(settings.SUBS.keys()): @@ -721,9 +734,13 @@ class AccuracyViewSet(viewsets.ViewSet): 'multipart/form-data': { 'type': 'object', 'properties': { - 'reviewed_result': { - 'type': 'string', - 'default': '''{"request_id": "Sample request_id", "imei_number": ["sample_imei1", "sample_imei2"], "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party", "invoice_no": "Sample Invoice no"}''', + 'request_file_results': { + 'type': 'array', + 'items': { + 'type': 'string', + # 'format': 'binary', + 'default': '''{"request_file_id": "Sample request_id", "imei_number": "sample_imei1", "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party", "invoice_no": "Sample Invoice no"}''', + } }, }, }, @@ -851,43 +868,48 @@ class AccuracyViewSet(viewsets.ViewSet): if subscription_request.count() == 0: raise NotFoundException(excArgs=request_id) + if "request_file_results" not in data or not isinstance(data.get("request_file_results", None), list): + raise InvalidException(excArgs='request_file_results') + # TODO: check if requestfiles all comes from the request_id subscription_request = subscription_request.first() - subscription_request_files = SubscriptionRequestFile.objects.filter( - request=subscription_request.id) + reviewed_result = data["request_file_results"] - if "reviewed_result" not in data: - raise InvalidException(excArgs=f'reviewed_result') - - reviewed_result = data["reviewed_result"] - if not subscription_request.predict_result: - raise InvalidException(excArgs=f'request_id') - validate_review(reviewed_result, len(subscription_request.predict_result.get( - "content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", []))) - reviewed_result['request_id'] = request_id - - for subscription_request_file in subscription_request_files: - if subscription_request_file.doc_type == 'invoice': - subscription_request_file.reviewed_result = copy.deepcopy( - reviewed_result) - subscription_request_file.reviewed_result['imei_number'] = [ - ] - elif subscription_request_file.doc_type == 'imei': - subscription_request_file.reviewed_result = { - "retailername": None, - "sold_to_party": None, - "invoice_no": None, - "purchase_date": [], - "imei_number": []} - if len(reviewed_result["imei_number"])-1 >= subscription_request_file.index_in_request: - subscription_request_file.reviewed_result["imei_number"] = [reviewed_result[ - "imei_number"][subscription_request_file.index_in_request]] - subscription_request_file.save() - - subscription_request.reviewed_result = reviewed_result - subscription_request.reviewed_result['request_id'] = request_id - subscription_request.is_reviewed = True - subscription_request.save() + requestfiles = [] + for file_result in reviewed_result: + request_file_id = file_result.get("request_file_id", None) + if request_file_id is None: + raise InvalidException("request_file_id") + subscription_request_files = SubscriptionRequestFile.objects.filter( + request=request_file_id) + if subscription_request_files.count() == 0: + raise InvalidException(excArgs=f'request_file_id: {request_file_id}') + + this_file = subscription_request_files.first() + requestfiles.append(this_file) + sample_result = { + "request_id": request_id, + "retailername": None, + "sold_to_party": None, + "invoice_no": None, + "purchase_date": None, + "imei_number": [] + } + for k, v in file_result.items(): + if k in sample_result.keys(): + if not isinstance(sample_result[k], list): + sample_result[k] = v + else: + sample_result[k].append(v) + + if this_file.reviewed_result is not None: + this_file.reviewed_result.update(sample_result) + else: + this_file.reviewed_result = sample_result + + for rf in requestfiles: + rf.is_reviewed = True + rf.save() return JsonResponse({'message': 'success.'}, status=200) else: diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index c738c82..ac4e25a 100755 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -141,6 +141,7 @@ def create_accuracy_report(report_id, **kwargs): request.raw_accuracy = mean_list(rq_accuracy["reviewed"]) elif len(rq_accuracy["feedback"]) > 0: request.raw_accuracy = mean_list(rq_accuracy["feedback"]) + request.is_required = request.raw_accuracy < settings.BAD_THRESHOLD request.save() number_images += request_att["total_images"] diff --git a/cope2n-api/fwd_api/management/commands/migrate-datebase-edit-Lazada-reason.py b/cope2n-api/fwd_api/management/commands/migrate-datebase-edit-Lazada-reason.py new file mode 100644 index 0000000..44652f7 --- /dev/null +++ b/cope2n-api/fwd_api/management/commands/migrate-datebase-edit-Lazada-reason.py @@ -0,0 +1,67 @@ +from django.core.management.base import BaseCommand +from tqdm import tqdm +from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest +from fwd_api.exception.exceptions import InvalidException +from fwd_api.utils.accuracy import predict_result_to_ready +import traceback +import copy +from django.utils import timezone + +KEY = "retailername" +VALUE = "Lazada" +REASONS_TO_CONSIDER = ["invalid_image", "Invalid Input"] + +class Command(BaseCommand): + help = 'Refactor database for image level' + + def add_arguments(self, parser): + # Add your command-line arguments here + parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700') + parser.add_argument('end', type=str, help='end date, sample: 2023-01-03T00:00:00+0700') + + def process_request(self, request, file): + if len(request.request_id.split(".")[0].split("_")) < 2: + return + images = SubscriptionRequestFile.objects.filter(request=request) + + if not request.predict_result: + self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}")) + return + if request.predict_result.get("status", 200) != 200: + self.stdout.write(self.style.WARNING(f"Not a sucess request {request.request_id}")) + return + + for i, image in enumerate(images): + try: + if isinstance(image.reviewed_result, dict): + # print(f"[INFO]: dict ... {request.predict_result}") + if image.reviewed_result.get(KEY, None)==VALUE and image.reason in REASONS_TO_CONSIDER: + image.reason = "" + image.counter_measures = "" + image.save() + file.write(str(image.code) + "\n") + except Exception as e: + self.stdout.write(self.style.ERROR(f"Request: {request.request_id} failed with {e}")) + print(traceback.format_exc()) + continue + + def handle(self, *args, **options): + start = options['start'] + end = options['end'] + if start or end: + try: + start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only + end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z') + except Exception as e: + print(f"[INFO]: start: {start}") + print(f"[INFO]: end: {end}") + raise InvalidException(excArgs="Date format") + subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date)) + else: + subcription_iter = SubscriptionRequest.objects.all() + + file = open("modified.txt", "w") + for request in tqdm(subcription_iter.iterator()): + self.process_request(request, file) + file.close() + self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) \ No newline at end of file diff --git a/cope2n-api/fwd_api/management/commands/migrate-datebase-edit-None-invoice-no.py b/cope2n-api/fwd_api/management/commands/migrate-datebase-edit-None-invoice-no.py index 90bfb37..7230cca 100644 --- a/cope2n-api/fwd_api/management/commands/migrate-datebase-edit-None-invoice-no.py +++ b/cope2n-api/fwd_api/management/commands/migrate-datebase-edit-None-invoice-no.py @@ -48,7 +48,6 @@ class Command(BaseCommand): for i, image in enumerate(images): try: if isinstance(image.predict_result, dict): - # print(f"[INFO]: dict ... {request.predict_result}") image.predict_result[KEY] = None if isinstance(image.feedback_result, dict): image.feedback_result[KEY] = None diff --git a/cope2n-api/fwd_api/migrations/0191_subscriptionrequest_is_required_and_more.py b/cope2n-api/fwd_api/migrations/0191_subscriptionrequest_is_required_and_more.py new file mode 100644 index 0000000..dc15a55 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0191_subscriptionrequest_is_required_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.3 on 2024-05-15 09:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0190_rename_accuracy_subscriptionrequest_raw_accuracy'), + ] + + operations = [ + migrations.AddField( + model_name='subscriptionrequest', + name='is_required', + field=models.BooleanField(default=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='is_required', + field=models.BooleanField(default=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='is_reviewed', + field=models.BooleanField(default=False), + ), + ] diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index c8de868..80bb44f 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -37,4 +37,5 @@ class SubscriptionRequest(models.Model): total_memory = models.FloatField(default=-1) gpu_stats = models.CharField(max_length=100, null=True) is_reviewed = models.BooleanField(default=False) + is_required = models.BooleanField(default=True) subsidiary = models.CharField(default="", null=True, max_length=200) diff --git a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py index cf873b4..bed1a31 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py @@ -25,6 +25,8 @@ class SubscriptionRequestFile(models.Model): processing_time = models.FloatField(default=-1) # in milisecond reason = models.TextField(blank=True) counter_measures = models.TextField(blank=True) + is_reviewed = models.BooleanField(default=False) + is_required = models.BooleanField(default=True) # subsidiary = models.CharField(default="", null=True, max_length=200) # save already in SubscriptionRequest predict_result = models.JSONField(null=True) diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 1d5e938..d5954c4 100755 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -791,6 +791,14 @@ def create_billing_data(subscription_requests): }) return billing_data + +def avg_dict(data): + values = [] + for k, v in data.items(): + if isinstance(v, list): + values += v + return sum(values)/len(values) if len(values) > 0 else -1 + def calculate_a_request(report, request): def review_status_map(input): review_status = {-1: "Not Required", @@ -827,7 +835,6 @@ def calculate_a_request(report, request): images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value) report_files = [] for image in images: - status, att = calculate_subcription_file(image) atts.append(att) att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"]) @@ -835,6 +842,12 @@ def calculate_a_request(report, request): _att = copy.deepcopy(att) + fb_avg_acc = avg_dict(att["acc"]["feedback"]) + rv_avg_acc = avg_dict(att["acc"]["reviewed"]) + + image.is_required = fb_avg_acc < settings.BAD_THRESHOLD + image.is_reviewed = rv_avg_acc > 0 + if image.processing_time < 0: continue if status != 200: diff --git a/cope2n-api/fwd_api/utils/sdsvkvu b/cope2n-api/fwd_api/utils/sdsvkvu index 5eedd64..ec0d9a0 160000 --- a/cope2n-api/fwd_api/utils/sdsvkvu +++ b/cope2n-api/fwd_api/utils/sdsvkvu @@ -1 +1 @@ -Subproject commit 5eedd64e87c405918ad9fc37cb6163f8d04ce49d +Subproject commit ec0d9a0a5a8c17c9a78165d1f78103951a2e138b diff --git a/cope2n-api/modified.txt b/cope2n-api/modified.txt new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 3d31685..611a33c 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -20,7 +20,7 @@ services: environment: - PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module - CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672 - - CUDA_VISIBLE_DEVICES=0 + - CUDA_VISIBLE_DEVICES=1 volumes: - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only working_dir: /workspace/cope2n-ai-fi @@ -89,12 +89,12 @@ services: depends_on: db-sbt: condition: service_started - command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && - python manage.py makemigrations && - python manage.py migrate && - python manage.py compilemessages && - gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod - # command: "sleep infinity" + # command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && + # python manage.py makemigrations && + # python manage.py migrate && + # python manage.py compilemessages && + # gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod + command: "sleep infinity" minio: image: minio/minio @@ -179,8 +179,8 @@ services: - ./cope2n-api:/app working_dir: /app - command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" - # command: bash -c "tail -f > /dev/null" + # command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" + command: bash -c "tail -f > /dev/null" # Back-end persistent db-sbt: @@ -196,7 +196,7 @@ services: - POSTGRES_PASSWORD=${DB_PASSWORD} - POSTGRES_DB=${DB_SCHEMA} ports: - - 15432:5432 + - 54321:5432 rabbitmq-sbt: mem_reservation: 600m @@ -220,7 +220,7 @@ services: shm_size: 10gb privileged: true ports: - - 9881:80 + - 29881:80 depends_on: be-ctel-sbt: condition: service_started