From ac654753566c6ad593b07ae0fce52e36e06d3f98 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Fri, 5 Jan 2024 14:18:16 +0700 Subject: [PATCH] Add: API list request --- cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py | 1 + .../celery_worker/mock_process_tasks_fi.py | 1 + cope2n-ai-fi/modules/sdsvkvu | 2 +- cope2n-api/fwd_api/api/accuracy_view.py | 183 ++++++++++++++++++ cope2n-api/fwd_api/api/ctel_view.py | 3 +- cope2n-api/fwd_api/api_router.py | 3 + ...ionrequest_client_request_time_and_more.py | 28 +++ .../fwd_api/models/SubscriptionRequest.py | 3 + deploy_images.sh | 23 ++- docker-compose-dev.yml | 9 +- scripts/crawl_database.py | 80 ++++++-- scripts/crawl_database_by_time.py | 93 +++++++++ 12 files changed, 402 insertions(+), 27 deletions(-) create mode 100644 cope2n-api/fwd_api/api/accuracy_view.py create mode 100644 cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py create mode 100644 scripts/crawl_database_by_time.py diff --git a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py index 49038d9..c03b041 100755 --- a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py +++ b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py @@ -33,6 +33,7 @@ def sbt_predict(image_url, engine) -> None: img = cv2.imdecode(arr, -1) save_dir = "./tmp_results" + os.makedirs(save_dir, exist_ok=True) # image_path = os.path.join(save_dir, f"{image_url}.jpg") os.makedirs(save_dir, exist_ok = True) tmp_image_path = os.path.join(save_dir, f"{uuid.uuid4()}.jpg") diff --git a/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py b/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py index ef16d45..b114330 100755 --- a/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py +++ b/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py @@ -69,6 +69,7 @@ def process_sbt_invoice(rq_id, list_url, metadata): c_connector.process_sbt_invoice_result((rq_id, hoadon, metadata)) return {"rq_id": rq_id} except Exception as e: + print(f"[ERROR]: Failed to extract invoice: {e}") print(e) hoadon = {"status": 404, "content": {}} c_connector.process_sbt_invoice_result((rq_id, hoadon, metadata)) diff --git a/cope2n-ai-fi/modules/sdsvkvu b/cope2n-ai-fi/modules/sdsvkvu index 11fb958..6907ea0 160000 --- a/cope2n-ai-fi/modules/sdsvkvu +++ b/cope2n-ai-fi/modules/sdsvkvu @@ -1 +1 @@ -Subproject commit 11fb9588df7e6cb03e7a761e3f728f11045bee09 +Subproject commit 6907ea0183b141e3b4f3c21758c9123f1e9b2a27 diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py new file mode 100644 index 0000000..e482e58 --- /dev/null +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -0,0 +1,183 @@ +from rest_framework import status, viewsets +from rest_framework.decorators import action +from rest_framework.response import Response +from django.core.paginator import Paginator +from django.http import JsonResponse +from datetime import datetime +from django.utils import timezone +from django.db.models import Q + +from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes +# from drf_spectacular.types import OpenApiString +from ..models import SubscriptionRequest + + +class AccuracyViewSet(viewsets.ViewSet): + lookup_field = "username" + + @extend_schema( + parameters=[ + OpenApiParameter( + name='start_date', + location=OpenApiParameter.QUERY, + description='Start date (YYYY-mm-DDTHH:MM:SS)', + type=OpenApiTypes.DATE, + default='2023-01-02T00:00:00', + ), + OpenApiParameter( + name='end_date', + location=OpenApiParameter.QUERY, + description='End date (YYYY-mm-DDTHH:MM:SS)', + type=OpenApiTypes.DATE, + default='2024-01-10T00:00:00', + ), + OpenApiParameter( + name='include_test', + location=OpenApiParameter.QUERY, + description='Whether to include test record or not', + type=OpenApiTypes.BOOL, + ), + OpenApiParameter( + name='is_reviewed', + location=OpenApiParameter.QUERY, + description='Which records to be query', + type=OpenApiTypes.STR, + enum=['reviewed', 'not reviewed', 'all'], + ), + OpenApiParameter( + name='request_id', + location=OpenApiParameter.QUERY, + description='Specific request id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='redemption_id', + location=OpenApiParameter.QUERY, + description='Specific redemption id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='quality', + location=OpenApiParameter.QUERY, + description='One or more of [bad, good, all]', + type=OpenApiTypes.STR, + enum=['bad', 'good', 'all'], + ), + OpenApiParameter( + name='page', + location=OpenApiParameter.QUERY, + description='Page number', + type=OpenApiTypes.INT, + required=False + ), + OpenApiParameter( + name='page_size', + location=OpenApiParameter.QUERY, + description='Number of items per page', + type=OpenApiTypes.INT, + required=False + ), + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="request_list", methods=["GET"]) + def get_subscription_requests(self, request): + if request.method == 'GET': + start_date_str = request.GET.get('start_date') + end_date_str = request.GET.get('end_date') + page_number = int(request.GET.get('page', 1)) + page_size = int(request.GET.get('page_size', 10)) + request_id = request.GET.get('request_id', None) + redemption_id = request.GET.get('redemption_id', None) + is_reviewed = request.GET.get('is_reviewed', None) + include_test = request.GET.get('include_test', False) + quality = request.GET.get('quality', None) + + try: + start_date = datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S') + end_date = datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S') + except ValueError: + return JsonResponse({'error': 'Invalid date format. Please use YYYY-MM-DD.'}, status=400) + + base_query = Q(created_at__range=(start_date, end_date)) + if request_id: + base_query &= Q(request_id=request_id) + if redemption_id: + base_query &= Q(redemption_id=redemption_id) + base_query &= Q(is_test_request=False) + if isinstance(include_test, str): + include_test = True if include_test=="true" else False + if include_test: + # base_query = ~base_query + base_query.children = base_query.children[:-1] + + elif isinstance(include_test, bool): + if include_test: + base_query = ~base_query + if isinstance(is_reviewed, str): + if is_reviewed == "reviewed": + base_query &= Q(is_reviewed=True) + elif is_reviewed == "not reviewed": + base_query &= Q(is_reviewed=False) + elif is_reviewed == "all": + pass + if isinstance(quality, str): + if quality == "good": + base_query &= Q(is_bad_image_quality=False) + elif quality == "bad": + base_query &= Q(is_bad_image_quality=True) + elif quality == "all": + pass + + subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') + + paginator = Paginator(subscription_requests, page_size) + page = paginator.get_page(page_number) + + data = [] + for request in page: + imeis = [] + purchase_date = [] + retailer = "" + try: + if request.reviewed_result is not None: + imeis = request.reviewed_result.get("imei_number", []) + purchase_date = request.reviewed_result.get("purchase_date", []) + retailer = request.reviewed_result.get("retailername", "") + elif request.feedback_result is not None : + imeis = request.feedback_result.get("imei_number", []) + purchase_date = request.feedback_result.get("purchase_date", []) + retailer = request.feedback_result.get("retailername", "") + elif request.predict_result is not None: + if request.predict_result.get("status", 404) == 200: + imeis = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[3].get("value", []) + purchase_date = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[2].get("value", []) + retailer = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[0].get("value", []) + except Exception as e: + print(f"[ERROR]: {e}") + print(f"[ERROR]: {request}") + data.append({ + 'RequestID': request.request_id, + 'RedemptionID': request.redemption_id, + 'IMEIs': imeis, + 'Purchase Date': purchase_date, + 'Retailer': retailer, + 'Client Request Time (ms)': request.client_request_time, + 'Server Processing Time (ms)': request.preprocessing_time + request.ai_inference_time, + 'Is Reviewed': request.is_reviewed, + 'Is Bad Quality': request.is_bad_image_quality, + 'created_at': request.created_at.isoformat() + }) + + response = { + 'subscription_requests': data, + 'page': { + 'number': page.number, + 'total_pages': page.paginator.num_pages, + 'count': page.paginator.count, + } + } + + return JsonResponse(response) + + return JsonResponse({'error': 'Invalid request method.'}, status=405) \ No newline at end of file diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 57155e2..6b77471 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -347,8 +347,7 @@ class CtelViewSet(viewsets.ViewSet): S3_path = FileUtils.save_to_S3(file_name, subcription_request, file_path) return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) - - + @extend_schema(request=None, responses=None, tags=['Data']) @extend_schema(request=None, responses=None, tags=['templates'], methods=['GET']) @action(detail=False, url_path=r"media/(?P\w+)/(?P\w+)", methods=["GET"]) diff --git a/cope2n-api/fwd_api/api_router.py b/cope2n-api/fwd_api/api_router.py index 6743957..9a466dc 100755 --- a/cope2n-api/fwd_api/api_router.py +++ b/cope2n-api/fwd_api/api_router.py @@ -2,6 +2,8 @@ from django.conf import settings from rest_framework.routers import DefaultRouter, SimpleRouter from fwd_api.api.ctel_view import CtelViewSet +from fwd_api.api.accuracy_view import AccuracyViewSet + from fwd_api.api.ctel_user_view import CtelUserViewSet from fwd_api.api.ctel_template_view import CtelTemplateViewSet @@ -13,6 +15,7 @@ else: router.register("ctel", CtelViewSet, basename="CtelAPI") router.register("ctel", CtelUserViewSet, basename="CtelUserAPI") +router.register("ctel", AccuracyViewSet, basename="AccuracyAPI") app_name = "api" urlpatterns = router.urls diff --git a/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py b/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py new file mode 100644 index 0000000..b86ff1e --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.3 on 2024-01-04 08:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0163_subscriptionrequest_ai_inference_profile'), + ] + + operations = [ + migrations.AddField( + model_name='subscriptionrequest', + name='client_request_time', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='redemption_id', + field=models.CharField(max_length=200, null=True), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='reviewed_result', + field=models.JSONField(null=True), + ), + ] diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index a852104..6018274 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -10,10 +10,12 @@ class SubscriptionRequest(models.Model): pages_left: int = models.IntegerField(default=1) doc_type: str = models.CharField(max_length=100) request_id = models.CharField(max_length=200) # Change to request_id + redemption_id = models.CharField(max_length=200, null=True) # Change to request_id process_type = models.CharField(max_length=200) # driver/id/invoice provider_code = models.CharField(max_length=200, default="Guest") # Request source FWD/CTel predict_result = models.JSONField(null=True) feedback_result = models.JSONField(null=True) + reviewed_result = models.JSONField(null=True) status = models.IntegerField() # 1: Processing(Pending) 2: PredictCompleted 3: ReturnCompleted subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) created_at = models.DateTimeField(default=timezone.now, db_index=True) @@ -23,6 +25,7 @@ class SubscriptionRequest(models.Model): ai_inference_profile = models.JSONField(null=True) preprocessing_time = models.FloatField(default=-1) + client_request_time = models.FloatField(default=-1) ai_inference_start_time = models.FloatField(default=0) ai_inference_time = models.FloatField(default=0) cpu_percent = models.FloatField(default=-1) diff --git a/deploy_images.sh b/deploy_images.sh index 75d720d..cb89a0e 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -2,26 +2,33 @@ set -e tag=$1 +is_prod=${$2:-False} echo "[INFO] Tag received from Python: $tag" -echo "[INFO] Updating everything the remote..." -git submodule update --recursive --remote +# echo "[INFO] Updating everything the remote..." +# git submodule update --recursive --remote echo "[INFO] Pushing AI image with tag: $tag..." docker compose -f docker-compose-dev.yml build cope2n-fi-sbt -docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} +docker tag sidp/cope2n-ai-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:${tag} +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:${tag} +# docker tag sidp/cope2n-ai-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:production +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:production echo "[INFO] Pushing BE image with tag: $tag..." docker compose -f docker-compose-dev.yml build be-ctel-sbt -docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} +docker tag sidp/cope2n-be-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:${tag} +# docker tag sidp/cope2n-be-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:production +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:${tag} +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:production echo "[INFO] Pushing FE image with tag: $tag..." docker compose -f docker-compose-dev.yml build fe-sbt -docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} +docker tag sidp/cope2n-fe-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:${tag} +# docker tag sidp/cope2n-fe-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:production +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:${tag} +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:production cp ./docker-compose-prod.yml ./docker-compose_${tag}.yml sed -i "s/{{tag}}/$tag/g" ./docker-compose_${tag}.yml diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 5244ab2..5d58c16 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -75,14 +75,14 @@ services: networks: - ctel-sbt volumes: - # - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} + # - BE_media:${MEDIA_ROOT} - BE_static:/app/static - ./cope2n-api:/app working_dir: /app depends_on: db-sbt: condition: service_started - command: sh -c "chmod -R 777 /app/static; sleep 5; python manage.py collectstatic --no-input && + command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && python manage.py makemigrations && python manage.py migrate && python manage.py compilemessages && @@ -165,7 +165,7 @@ services: rabbitmq-sbt: condition: service_started volumes: - # - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} + # - BE_media:${MEDIA_ROOT} - ./cope2n-api:/app working_dir: /app @@ -223,4 +223,5 @@ services: volumes: db_data: - BE_static: \ No newline at end of file + BE_static: + BE_media: \ No newline at end of file diff --git a/scripts/crawl_database.py b/scripts/crawl_database.py index 7a06cf0..7cfacd5 100644 --- a/scripts/crawl_database.py +++ b/scripts/crawl_database.py @@ -4,7 +4,11 @@ import boto3 import os from tqdm import tqdm -OUTPUT_NAME = "issue_7" +from dotenv import load_dotenv + +load_dotenv("../.env_prod") + +OUTPUT_NAME = "5Jan" # Database connection details db_host = os.environ.get('DB_HOST', "") @@ -22,17 +26,69 @@ secret_key = os.environ.get('S3_SECRET_KEY', "") # Request IDs for filtering request_ids = [ -'SAPe39e970592394b27a17d4a64c39f7ed0', -'SAP477a02a21faf41ecbd1a0bb21636e644', -'SAP459d58a7dba84e7195f5ad8f46fc1530', -'SAPa5aaa0e1ce8c4824a7b0ded2e550caec', -'SAP492c063db44049c6b1e44f59c531f8d8', -'SAP3d0bdd5cb4ce4291b0cb77d7de0a48e9', -'SAP7e2c673e49c441a991661d1227342131', -'SAPc26974bcac2649b28227981459a427aa', -'SAP25b12dde6b854c70b512ac79059ac1d4', -'SAP_20240102194138_bf4a3cc4e0304d0385126b6592c2632d', -'SAP_20240102214550_8389ec1b84a249738eed9d2152bf0922', +'SAP_20240104082259_85c7f4dd262946d183dbec826fc6709e', +'SAP_20240104082709_c05319c56fd3422dbf133aee33fc3e10', +'SAP_20240104091512_23ae1a81f1314be0a27ebeae0e8fa0d7', +'SAP_20240104091512_23ae1a81f1314be0a27ebeae0e8fa0d7', +'SAP_20240104091816_025c90b9789246ed811772003622fa0d', +'SAP_20240104092541_5c71e535f07c4cc8803b45336ec70f77', +'SAP_20240104100259_5a667d33cb914e7ba5a4447b9e17d649', +'SAP_20240104101145_a7010bac159f47bc95d5866e6c5f5bdf', +'SAP_20240104105702_95252229252b4e238add117919ce882a', +'SAP_20240104112108_34b2cca84a42473ca77bc316e787fe2e', +'SAP_20240104114038_dd57ecf7982c4a5eaf1409f5ef050fab', +'SAP_20240104115942_1b77f411791940a4a85c838c2e9931ad', +'SAP_20240104120746_d63319f4cde343d894f9b89706756a9d', +'SAP_20240104123607_48d25c04fec6411dbf013c6a19054e77', +'SAP_20240104130957_ece21bad331b4f2cad0887693331aa3a', +'SAP_20240104131228_edebee4000ae4bd382feaea5d6c82031', +'SAP_20240104132641_97909efd013f45e89d83d36a5ea35c52', +'SAP_20240104133527_ad55f6ee667643ba8ae65e9ef1c32418', +'SAP_20240104134014_2d2cdbc1b06a44868ce1b32cdb53864f', +'SAP_20240104134425_9b37555ef8094153838e6048f7c63c9b', +'SAP_20240104134457_55a1cf1e371146d995c8849cc0ba7c7b', +'SAP_20240104134609_3f7d308e467d43dbb59a7bcc02e3a7d2', +'SAP_20240104134709_c708daf83f7e4aa69ab9696afe1a9081', +'SAP_20240104135007_44b7a30c5e9c41a0b8065ac4e7000223', +'SAP_20240104141547_7203ddb915274e99a08ae6e54ec49cbd', +'SAP_20240104141559_62fd19a6179248ecb4ff15b33338b294', +'SAP_20240104142352_68699cbe140f4264b858981a3ac67e40', +'SAP_20240104143937_801931cc1f344a4ca8384dfe13d1accc', +'SAP_20240104144730_3180a8919e604e26a188ce051465c392', +'SAP_20240104144933_3380f64019634769befed49e9a671bc6', +'SAP_20240104151239_76ae2f1d02444f7fabbc104eb77fe45f', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151638_a08a61448a58459a8f2209f64e54c213', +'SAP_20240104152030_479259e84c5b449499df2cb1023e91ac', +'SAP_20240104160108_a03634c80583454494b77efcdecbcc71', +'SAP_20240104160108_a03634c80583454494b77efcdecbcc71', +'SAP_20240104160311_e7cb02a11bbd4ea1906b3758e97f33ab', +'SAP_20240104161305_89c5518563224ab89345439dffd504a5', +'SAP_20240104161305_89c5518563224ab89345439dffd504a5', +'SAP_20240104164022_0b94af24db9d4ebe9af2086a4bd3cd7e', +'SAP_20240104170837_58165ec9f88d4e4aa3095ba3dda201d7', +'SAP_20240104171740_10279cfebbf344f184bbb429cb9a15ad', +'SAP_20240104175202_247892a4dc7f40f28eafac9c2ad85971', +'SAP_20240104180517_8ce7a1981dc743e08e09284fd904d536', +'SAP_20240104182034_406bac0ab0684727b9efb1bb9b422026', +'SAP_20240104182426_92a48bb4b85a4c3abb48e0d7cf727777', +'SAP_20240104183506_aa1fa7d6774a4509a142a6f4a7b5af29', +'SAP_20240104185716_f9d464e42c314370910913b37133e6c3', +'SAP_20240104190220_573244d03bb8408dbca422ff60eb527a', +'SAP_20240104191236_deedcc588b7b4928a950f7dc2ce4230c', +'SAP_20240104191236_deedcc588b7b4928a950f7dc2ce4230c', +'SAP_20240104192614_990bf10c38e144a7bf489548d356720e', +'SAP_20240104192614_990bf10c38e144a7bf489548d356720e', +'SAP_20240104212143_f8c1b4a6e6e443fcb5e882c7a5b917f3', +'SAP_20240104212924_ee1998a60d6848af9576292ac383037f', +'SAP_20240104214418_f8e1abf808c8499097ecddf014d401c7', +'SAP_20240104214619_8d27c05a9ce74b738b20195cb816bfbf', +'SAP_20240104215037_477863cdc0aa4d5fa1f05bbb0ae673ed', +'SAP_20240104221543_37605982df624324ad2594e268054361', +'SAP_20240104225026_acacd06ea6de4a738bc47683dc53f378', +'SAP_20240104235743_b48aa3e744ed428795171d84066adefe', ] # Connect to the PostgreSQL database diff --git a/scripts/crawl_database_by_time.py b/scripts/crawl_database_by_time.py new file mode 100644 index 0000000..17f6570 --- /dev/null +++ b/scripts/crawl_database_by_time.py @@ -0,0 +1,93 @@ +import csv +import psycopg2 +import boto3 +import os +from tqdm import tqdm +from datetime import datetime, timedelta +from pytz import timezone + +from dotenv import load_dotenv + +load_dotenv("../.env_prod") + +OUTPUT_NAME = "missing_records" +START_DATE = datetime(2023, 12, 28, tzinfo=timezone('Asia/Ho_Chi_Minh')) +END_DATE = datetime(2024, 1, 3, tzinfo=timezone('Asia/Ho_Chi_Minh')) + +# Database connection details +db_host = os.environ.get('DB_HOST', "") +db_name = os.environ.get('DB_SCHEMA', "") +db_user = os.environ.get('DB_USER', "") +db_password = os.environ.get('DB_PASSWORD', "") + +# S3 bucket details +s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "") +s3_folder_prefix = 'sbt_invoice' + +# S3 access credentials +access_key = os.environ.get('S3_ACCESS_KEY', "") +secret_key = os.environ.get('S3_SECRET_KEY', "") + +# Request IDs for filtering + +# Connect to the PostgreSQL database +conn = psycopg2.connect( + host=db_host, + database=db_name, + user=db_user, + password=db_password +) + +# Create a cursor +cursor = conn.cursor() + + +# Execute the SELECT query with the filter +query = "SELECT * FROM fwd_api_subscriptionrequest WHERE created_at >= %s AND created_at <= %s" +cursor.execute(query, (START_DATE, END_DATE)) + +# Fetch the filtered data +data = cursor.fetchall() + +# Define the CSV file path +csv_file_path = f'{OUTPUT_NAME}.csv' + +# Write the data to the CSV file +with open(csv_file_path, 'w', newline='') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([desc[0] for desc in cursor.description]) # Write column headers + writer.writerows(data) # Write the filtered data rows + +# Close the cursor and database connection +cursor.close() +conn.close() + +# Download folders from S3 +s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key +) + +request_ids = [] +for rq in data: + rq_id = rq[3] + request_ids.append(rq_id) + +for request_id in tqdm(request_ids): + folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ + local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files + os.makedirs(OUTPUT_NAME, exist_ok=True) + os.makedirs(local_folder_path, exist_ok=True) + + + # List objects in the S3 folder + response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) + objects = response.get('Contents', []) + + for s3_object in objects: + object_key = s3_object['Key'] + local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key + + # Download the S3 object to the local file + s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file