diff --git a/.gitignore b/.gitignore index 7cffb73..ff69baa 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ cope2n-ai-fi/Dockerfile_old_work *.sql *.sql .env_prod +/feedback/ +cope2n-api/public/SBT_report_20240122.csv +Jan.csv diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index 27caeae..d5e9df2 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -17,6 +17,7 @@ from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list from ..utils.file import download_from_S3, convert_date_string from ..utils.redis import RedisUtils from ..utils.process import string_to_boolean +from ..request.ReportCreationSerializer import ReportCreationSerializer from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long redis_client = RedisUtils() @@ -177,79 +178,21 @@ class AccuracyViewSet(viewsets.ViewSet): return JsonResponse({'error': 'Invalid request method.'}, status=405) @extend_schema( - parameters=[ - OpenApiParameter( - name='is_daily_report', - location=OpenApiParameter.QUERY, - description='Whether to include test record or not', - type=OpenApiTypes.BOOL, - ), - OpenApiParameter( - name='start_date', - location=OpenApiParameter.QUERY, - description='Start date (YYYY-mm-DDTHH:MM:SSZ)', - type=OpenApiTypes.DATE, - default='2023-01-02T00:00:00+0700', - ), - OpenApiParameter( - name='end_date', - location=OpenApiParameter.QUERY, - description='End date (YYYY-mm-DDTHH:MM:SSZ)', - type=OpenApiTypes.DATE, - default='2024-01-10T00:00:00+0700', - ), - OpenApiParameter( - name='include_test', - location=OpenApiParameter.QUERY, - description='Whether to include test record or not', - type=OpenApiTypes.BOOL, - ), - OpenApiParameter( - name='is_reviewed', - location=OpenApiParameter.QUERY, - description='Which records to be query', - type=OpenApiTypes.STR, - enum=['reviewed', 'not reviewed', 'all'], - ), - OpenApiParameter( - name='request_id', - location=OpenApiParameter.QUERY, - description='Specific request id', - type=OpenApiTypes.STR, - ), - OpenApiParameter( - name='redemption_id', - location=OpenApiParameter.QUERY, - description='Specific redemption id', - type=OpenApiTypes.STR, - ), - OpenApiParameter( - name='subsidiary', - location=OpenApiParameter.QUERY, - description='Subsidiary', - type=OpenApiTypes.STR, - ), - OpenApiParameter( - name='report_overview_duration', - location=OpenApiParameter.QUERY, - description=f'open of {settings.OVERVIEW_REPORT_DURATION}', - type=OpenApiTypes.STR, - ), - ], + request=ReportCreationSerializer(), responses=None, tags=['Accuracy'] ) - @action(detail=False, url_path="make_report", methods=["GET"]) + @action(detail=False, url_path="make_report", methods=["POST"]) def make_report(self, request): - if request.method == 'GET': - start_date_str = request.GET.get('start_date') - end_date_str = request.GET.get('end_date') - request_id = request.GET.get('request_id', None) - redemption_id = request.GET.get('redemption_id', None) - is_reviewed = string_to_boolean(request.GET.get('is_reviewed', "false")) - include_test = string_to_boolean(request.GET.get('include_test', "false")) - subsidiary = request.GET.get("subsidiary", "all") - is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false")) - report_overview_duration = request.GET.get("report_overview_duration", "") + if request.method == 'POST': + start_date_str = request.data.get('start_date') + end_date_str = request.data.get('end_date') + request_id = request.data.get('request_id', None) + redemption_id = request.data.get('redemption_id', None) + is_reviewed = request.data.get('is_reviewed', False) + include_test = request.data.get('include_test', False) + subsidiary = request.data.get("subsidiary", "all") + is_daily_report = request.data.get('is_daily_report', False) + report_overview_duration = request.data.get("report_overview_duration", "") subsidiary = map_subsidiary_long_to_short(subsidiary) if is_daily_report: @@ -261,12 +204,20 @@ class AccuracyViewSet(viewsets.ViewSet): else: start_date = end_date - timezone.timedelta(days=7) start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0) - start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') + start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') # inside logic will include second precision with timezone for calculation end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%S%z') else: try: - start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') - end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') + start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%d') # We care only about day precision only + end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%d') + # Round: + # end_date_str to the beginning of the next day + # start_date_str to the start of the date + start_date = timezone.make_aware(start_date) + end_date = timezone.make_aware(end_date) + + start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') # inside logic will include second precision with timezone for calculation + end_date_str = (end_date + timezone.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S%z') except ValueError: raise InvalidException(excArgs="Date format") diff --git a/cope2n-api/fwd_api/request/ReportCreationSerializer.py b/cope2n-api/fwd_api/request/ReportCreationSerializer.py new file mode 100644 index 0000000..fac7c8a --- /dev/null +++ b/cope2n-api/fwd_api/request/ReportCreationSerializer.py @@ -0,0 +1,39 @@ +from rest_framework import serializers +from django.conf import settings + +class ReportCreationSerializer(serializers.Serializer): + is_daily_report = serializers.BooleanField( + help_text='Whether to include test record or not', + default=False + ) + start_date = serializers.DateField( + help_text='Start date (YYYY-mm-DD)', + default='2024-01-02' + ) + end_date = serializers.DateField( + help_text='End date (YYYY-mm-DD)', + default='2024-01-10' + ) + include_test = serializers.BooleanField( + help_text='Whether to include test record or not', + default=False + ) + # is_reviewed = serializers.ChoiceField( + # help_text='Which records to be query', + # # choices=['reviewed', 'not reviewed', 'all'], + # default=False + # ) + # request_id = serializers.CharField( + # help_text='Specific request id' + # ) + # redemption_id = serializers.CharField( + # help_text='Specific redemption id' + # ) + subsidiary = serializers.CharField( + help_text='Subsidiary', + default="all" + ) + report_overview_duration = serializers.CharField( + help_text=f'open of {settings.OVERVIEW_REPORT_DURATION}', + default=None + ) \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 0c8d98e..a1ab5e0 100644 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -133,7 +133,7 @@ class ReportAccumulateByRequest: if not total["average_processing_time"].get(report_file.doc_type, None): print(f"[WARM]: Weird doctype: {report_file.doc_type}") - total["average_processing_time"] = IterAvg() + total["average_processing_time"][report_file.doc_type] = IterAvg() total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0 @@ -168,7 +168,7 @@ class ReportAccumulateByRequest: if not day_data["average_processing_time"].get(report_file.doc_type, None): print(f"[WARM]: Weird doctype: {report_file.doc_type}") - day_data["average_processing_time"] = IterAvg() + day_data["average_processing_time"][report_file.doc_type] = IterAvg() day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 return day_data @@ -274,8 +274,8 @@ class ReportAccumulateByRequest: _data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]() _data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]() _data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]() - _data[month][1][day]["average_processing_time"]["imei"] = _data[month][1][day]["average_processing_time"]["imei"]() - _data[month][1][day]["average_processing_time"]["invoice"] = _data[month][1][day]["average_processing_time"]["invoice"]() + for key in _data[month][1][day]["average_processing_time"].keys(): + _data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]() _data[month][1][day]["feedback_accuracy"]["imei_number"] = _data[month][1][day]["feedback_accuracy"]["imei_number"]() _data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]() @@ -296,8 +296,8 @@ class ReportAccumulateByRequest: _data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]() _data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]() _data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]() - _data[month][0]["average_processing_time"]["imei"] = _data[month][0]["average_processing_time"]["imei"]() - _data[month][0]["average_processing_time"]["invoice"] = _data[month][0]["average_processing_time"]["invoice"]() + for key in _data[month][0]["average_processing_time"].keys(): + _data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]() _data[month][0]["feedback_accuracy"]["imei_number"] = _data[month][0]["feedback_accuracy"]["imei_number"]() _data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]() diff --git a/cope2n-api/scripts/re_feedback.py b/cope2n-api/scripts/re_feedback.py index 34f82a7..34ce227 100644 --- a/cope2n-api/scripts/re_feedback.py +++ b/cope2n-api/scripts/re_feedback.py @@ -17,8 +17,8 @@ login_token = None # Define the login credentials login_credentials = { 'username': 'sbt', - # 'password': '7Eg4AbWIXDnufgn' - 'password': 'abc' + 'password': '7Eg4AbWIXDnufgn' + # 'password': 'abc' } # Define the command to call the update API diff --git a/cope2n-api/scripts/script.py b/cope2n-api/scripts/script.py index 7ff2d96..fa554c6 100644 --- a/cope2n-api/scripts/script.py +++ b/cope2n-api/scripts/script.py @@ -21,8 +21,8 @@ login_credentials = { # Define the command to call the update API update_url = f'{proxy_url}/api/ctel/make_report/' -update_params = { - 'is_daily_report': 'true', +update_data = { + 'is_daily_report': True, 'report_overview_duration': '', 'subsidiary': None } @@ -33,9 +33,9 @@ def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiar headers = {'Authorization': login_token} for dur in report_overview_duration: for sub in subsidiary: - update_params["report_overview_duration"] = dur - update_params["subsidiary"] = sub - update_response = requests.get(update_url, params=update_params, headers=headers) + update_data["report_overview_duration"] = dur + update_data["subsidiary"] = sub + update_response = requests.post(update_url, data=update_data, headers=headers) print("[INFO]: update_response at {} by {} - {} with status {}".format(datetime.now(), dur, sub, update_response.status_code)) update_response.raise_for_status() time.sleep(update_cost) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 1d9e946..613a8e7 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -16,7 +16,7 @@ services: networks: - ctel-sbt privileged: true - image: sidp/cope2n-ai-fi-sbt + image: sidp/cope2n-ai-fi-sbt:latest environment: - PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module - CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672 @@ -41,7 +41,7 @@ services: build: context: cope2n-api dockerfile: Dockerfile - image: sidp/cope2n-be-fi-sbt + image: sidp/cope2n-be-fi-sbt:latest environment: - MEDIA_ROOT=${MEDIA_ROOT} - DB_ENGINE=${DB_ENGINE} @@ -84,12 +84,12 @@ services: depends_on: db-sbt: condition: service_started - command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && - python manage.py makemigrations && - python manage.py migrate && - python manage.py compilemessages && - gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod - # command: bash -c "tail -f > /dev/null" + # command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && + # python manage.py makemigrations && + # python manage.py migrate && + # python manage.py compilemessages && + # gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod + command: bash -c "tail -f > /dev/null" minio: image: minio/minio @@ -135,7 +135,7 @@ services: build: context: cope2n-api dockerfile: Dockerfile - image: sidp/cope2n-be-fi-sbt + image: sidp/cope2n-be-fi-sbt:latest environment: - MEDIA_ROOT=${MEDIA_ROOT} - PYTHONPATH=${PYTHONPATH}:/app # For import module @@ -211,7 +211,7 @@ services: context: cope2n-fe shm_size: 10gb dockerfile: Dockerfile - image: sidp/cope2n-fe-fi-sbt + image: sidp/cope2n-fe-fi-sbt:latest shm_size: 10gb privileged: true ports: diff --git a/scripts/crawl_database_by_time_with_accuracy_contrain.py b/scripts/crawl_database_by_time_with_accuracy_contrain.py new file mode 100644 index 0000000..afb0b71 --- /dev/null +++ b/scripts/crawl_database_by_time_with_accuracy_contrain.py @@ -0,0 +1,171 @@ +import csv +from typing import Any +import psycopg2 +import boto3 +import os +from tqdm import tqdm +from datetime import datetime, timedelta +from pytz import timezone + +from dotenv import load_dotenv + +load_dotenv("../.env_prod") +# load_dotenv("../.env") + +OUTPUT_NAME = "0131-0206" +START_DATE = datetime(2024, 1, 31, tzinfo=timezone('Asia/Singapore')) +END_DATE = datetime(2024, 2, 6, tzinfo=timezone('Asia/Singapore')) +BAD_THRESHOLD = 0.75 + +REVIEW_ACC_COL = 19 +FEEDBACK_ACC_COL = 18 +REQUEST_ID_COL = 6 + +# Database connection details +db_host = os.environ.get('DB_HOST', "") +# db_host = "42.96.42.13" +db_name = os.environ.get('DB_SCHEMA', "") +db_user = os.environ.get('DB_USER', "") +db_password = os.environ.get('DB_PASSWORD', "") + +# S3 bucket details +s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "") +s3_folder_prefix = 'sbt_invoice' + +# S3 access credentials +access_key = os.environ.get('S3_ACCESS_KEY', "") +secret_key = os.environ.get('S3_SECRET_KEY', "") + +class RequestAtt: + def __init__(self) -> None: + self.feedback_accuracy = [] + self.reiviewed_accuracy = [] + self.acc = 0 + self.request_id = None + self.is_bad = False + self.data = [] + + def add_file(self, file): + self.data.append(file) + if file[REVIEW_ACC_COL]: + for key in file[REVIEW_ACC_COL].keys(): + self.feedback_accuracy += file[REVIEW_ACC_COL][key] + if file[FEEDBACK_ACC_COL]: + for key in file[FEEDBACK_ACC_COL].keys(): + self.feedback_accuracy += file[FEEDBACK_ACC_COL][key] + + def is_bad_image(self): + fb = min(self.feedback_accuracy)/len(self.feedback_accuracy) if len(self.feedback_accuracy) else None + rv = min(self.reiviewed_accuracy)/len(self.reiviewed_accuracy) if len(self.reiviewed_accuracy) else None + if not fb and not rv: + self.is_bad = False + return False + elif fb and rv is None: + self.is_bad = fb < BAD_THRESHOLD + self.acc = fb + return fb < BAD_THRESHOLD + elif fb and rv: + self.is_bad = rv < BAD_THRESHOLD + self.acc = rv + return rv < BAD_THRESHOLD + return False + +def get_request(cursor, request_in_id): + query = "SELECT * FROM fwd_api_subscriptionrequest WHERE id = %s" + cursor.execute(query, (request_in_id,)) + data = cursor.fetchone() + return data if data else None + +# Request IDs for filtering +def main(): + # Connect to the PostgreSQL database + conn = psycopg2.connect( + host=db_host, + database=db_name, + user=db_user, + password=db_password + ) + + # Create a cursor + cursor = conn.cursor() + + + # Execute the SELECT query with the filter + query = "SELECT * FROM fwd_api_subscriptionrequestfile WHERE created_at >= %s AND created_at <= %s AND feedback_accuracy IS NOT NULL" + cursor.execute(query, (START_DATE, END_DATE)) + + # Fetch the filtered data + data = cursor.fetchall() + + # Define the CSV file path + csv_file_path = f'{OUTPUT_NAME}.csv' + data_dict = {} + # Filter out requests request that has quality < 75% + for i, _d in enumerate(data): + if not data_dict.get(_d[REQUEST_ID_COL], None): + data_dict[_d[REQUEST_ID_COL]] = RequestAtt() + data_dict[_d[REQUEST_ID_COL]].request_id = _d[REQUEST_ID_COL] + data_dict[_d[REQUEST_ID_COL]].add_file(_d) + + bad_images = [] + for k in data_dict.keys(): + if data_dict[k].is_bad_image(): + bad_images.append(data_dict[k]) + + request_ids = [] + # Write the data to the CSV file + for bad_image in bad_images: + request = get_request(cursor, bad_image.request_id) + if request: + request_ids.append(request[3]) + + # ###################### Get bad requests ###################### + placeholders = ','.join(['%s'] * len(request_ids)) + + # Execute the SELECT query with the filter + query = f"SELECT * FROM fwd_api_subscriptionrequest WHERE request_id IN ({placeholders})" + cursor.execute(query, request_ids) + + # Fetch the filtered data + data = cursor.fetchall() + + # Define the CSV file path + csv_file_path = f'{OUTPUT_NAME}.csv' + + # Write the data to the CSV file + with open(csv_file_path, 'w', newline='') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([desc[0] for desc in cursor.description]) # Write column headers + writer.writerows(data) # Write the filtered data rows + + # Close the cursor and database connection + cursor.close() + conn.close() + + # Download folders from S3 + s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key + ) + + for request_id in tqdm(request_ids): + folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ + local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files + os.makedirs(OUTPUT_NAME, exist_ok=True) + os.makedirs(local_folder_path, exist_ok=True) + + + # List objects in the S3 folder + response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) + objects = response.get('Contents', []) + + for s3_object in objects: + object_key = s3_object['Key'] + local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key + + # Download the S3 object to the local file + s3_client.download_file(s3_bucket_name, object_key, local_file_path) + +if __name__ == "__main__": + main() \ No newline at end of file