Fix: align start, end date
This commit is contained in:
parent
efdfbf7295
commit
0070cc1e5f
3
.gitignore
vendored
3
.gitignore
vendored
@ -35,3 +35,6 @@ cope2n-ai-fi/Dockerfile_old_work
|
|||||||
*.sql
|
*.sql
|
||||||
*.sql
|
*.sql
|
||||||
.env_prod
|
.env_prod
|
||||||
|
/feedback/
|
||||||
|
cope2n-api/public/SBT_report_20240122.csv
|
||||||
|
Jan.csv
|
||||||
|
@ -17,6 +17,7 @@ from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list
|
|||||||
from ..utils.file import download_from_S3, convert_date_string
|
from ..utils.file import download_from_S3, convert_date_string
|
||||||
from ..utils.redis import RedisUtils
|
from ..utils.redis import RedisUtils
|
||||||
from ..utils.process import string_to_boolean
|
from ..utils.process import string_to_boolean
|
||||||
|
from ..request.ReportCreationSerializer import ReportCreationSerializer
|
||||||
from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long
|
from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long
|
||||||
|
|
||||||
redis_client = RedisUtils()
|
redis_client = RedisUtils()
|
||||||
@ -177,79 +178,21 @@ class AccuracyViewSet(viewsets.ViewSet):
|
|||||||
return JsonResponse({'error': 'Invalid request method.'}, status=405)
|
return JsonResponse({'error': 'Invalid request method.'}, status=405)
|
||||||
|
|
||||||
@extend_schema(
|
@extend_schema(
|
||||||
parameters=[
|
request=ReportCreationSerializer(),
|
||||||
OpenApiParameter(
|
|
||||||
name='is_daily_report',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description='Whether to include test record or not',
|
|
||||||
type=OpenApiTypes.BOOL,
|
|
||||||
),
|
|
||||||
OpenApiParameter(
|
|
||||||
name='start_date',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description='Start date (YYYY-mm-DDTHH:MM:SSZ)',
|
|
||||||
type=OpenApiTypes.DATE,
|
|
||||||
default='2023-01-02T00:00:00+0700',
|
|
||||||
),
|
|
||||||
OpenApiParameter(
|
|
||||||
name='end_date',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description='End date (YYYY-mm-DDTHH:MM:SSZ)',
|
|
||||||
type=OpenApiTypes.DATE,
|
|
||||||
default='2024-01-10T00:00:00+0700',
|
|
||||||
),
|
|
||||||
OpenApiParameter(
|
|
||||||
name='include_test',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description='Whether to include test record or not',
|
|
||||||
type=OpenApiTypes.BOOL,
|
|
||||||
),
|
|
||||||
OpenApiParameter(
|
|
||||||
name='is_reviewed',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description='Which records to be query',
|
|
||||||
type=OpenApiTypes.STR,
|
|
||||||
enum=['reviewed', 'not reviewed', 'all'],
|
|
||||||
),
|
|
||||||
OpenApiParameter(
|
|
||||||
name='request_id',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description='Specific request id',
|
|
||||||
type=OpenApiTypes.STR,
|
|
||||||
),
|
|
||||||
OpenApiParameter(
|
|
||||||
name='redemption_id',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description='Specific redemption id',
|
|
||||||
type=OpenApiTypes.STR,
|
|
||||||
),
|
|
||||||
OpenApiParameter(
|
|
||||||
name='subsidiary',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description='Subsidiary',
|
|
||||||
type=OpenApiTypes.STR,
|
|
||||||
),
|
|
||||||
OpenApiParameter(
|
|
||||||
name='report_overview_duration',
|
|
||||||
location=OpenApiParameter.QUERY,
|
|
||||||
description=f'open of {settings.OVERVIEW_REPORT_DURATION}',
|
|
||||||
type=OpenApiTypes.STR,
|
|
||||||
),
|
|
||||||
],
|
|
||||||
responses=None, tags=['Accuracy']
|
responses=None, tags=['Accuracy']
|
||||||
)
|
)
|
||||||
@action(detail=False, url_path="make_report", methods=["GET"])
|
@action(detail=False, url_path="make_report", methods=["POST"])
|
||||||
def make_report(self, request):
|
def make_report(self, request):
|
||||||
if request.method == 'GET':
|
if request.method == 'POST':
|
||||||
start_date_str = request.GET.get('start_date')
|
start_date_str = request.data.get('start_date')
|
||||||
end_date_str = request.GET.get('end_date')
|
end_date_str = request.data.get('end_date')
|
||||||
request_id = request.GET.get('request_id', None)
|
request_id = request.data.get('request_id', None)
|
||||||
redemption_id = request.GET.get('redemption_id', None)
|
redemption_id = request.data.get('redemption_id', None)
|
||||||
is_reviewed = string_to_boolean(request.GET.get('is_reviewed', "false"))
|
is_reviewed = request.data.get('is_reviewed', False)
|
||||||
include_test = string_to_boolean(request.GET.get('include_test', "false"))
|
include_test = request.data.get('include_test', False)
|
||||||
subsidiary = request.GET.get("subsidiary", "all")
|
subsidiary = request.data.get("subsidiary", "all")
|
||||||
is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false"))
|
is_daily_report = request.data.get('is_daily_report', False)
|
||||||
report_overview_duration = request.GET.get("report_overview_duration", "")
|
report_overview_duration = request.data.get("report_overview_duration", "")
|
||||||
subsidiary = map_subsidiary_long_to_short(subsidiary)
|
subsidiary = map_subsidiary_long_to_short(subsidiary)
|
||||||
|
|
||||||
if is_daily_report:
|
if is_daily_report:
|
||||||
@ -261,12 +204,20 @@ class AccuracyViewSet(viewsets.ViewSet):
|
|||||||
else:
|
else:
|
||||||
start_date = end_date - timezone.timedelta(days=7)
|
start_date = end_date - timezone.timedelta(days=7)
|
||||||
start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
|
start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z')
|
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') # inside logic will include second precision with timezone for calculation
|
||||||
end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%S%z')
|
end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%S%z')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
|
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%d') # We care only about day precision only
|
||||||
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
|
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%d')
|
||||||
|
# Round:
|
||||||
|
# end_date_str to the beginning of the next day
|
||||||
|
# start_date_str to the start of the date
|
||||||
|
start_date = timezone.make_aware(start_date)
|
||||||
|
end_date = timezone.make_aware(end_date)
|
||||||
|
|
||||||
|
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') # inside logic will include second precision with timezone for calculation
|
||||||
|
end_date_str = (end_date + timezone.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S%z')
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise InvalidException(excArgs="Date format")
|
raise InvalidException(excArgs="Date format")
|
||||||
|
|
||||||
|
39
cope2n-api/fwd_api/request/ReportCreationSerializer.py
Normal file
39
cope2n-api/fwd_api/request/ReportCreationSerializer.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
from rest_framework import serializers
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
class ReportCreationSerializer(serializers.Serializer):
|
||||||
|
is_daily_report = serializers.BooleanField(
|
||||||
|
help_text='Whether to include test record or not',
|
||||||
|
default=False
|
||||||
|
)
|
||||||
|
start_date = serializers.DateField(
|
||||||
|
help_text='Start date (YYYY-mm-DD)',
|
||||||
|
default='2024-01-02'
|
||||||
|
)
|
||||||
|
end_date = serializers.DateField(
|
||||||
|
help_text='End date (YYYY-mm-DD)',
|
||||||
|
default='2024-01-10'
|
||||||
|
)
|
||||||
|
include_test = serializers.BooleanField(
|
||||||
|
help_text='Whether to include test record or not',
|
||||||
|
default=False
|
||||||
|
)
|
||||||
|
# is_reviewed = serializers.ChoiceField(
|
||||||
|
# help_text='Which records to be query',
|
||||||
|
# # choices=['reviewed', 'not reviewed', 'all'],
|
||||||
|
# default=False
|
||||||
|
# )
|
||||||
|
# request_id = serializers.CharField(
|
||||||
|
# help_text='Specific request id'
|
||||||
|
# )
|
||||||
|
# redemption_id = serializers.CharField(
|
||||||
|
# help_text='Specific redemption id'
|
||||||
|
# )
|
||||||
|
subsidiary = serializers.CharField(
|
||||||
|
help_text='Subsidiary',
|
||||||
|
default="all"
|
||||||
|
)
|
||||||
|
report_overview_duration = serializers.CharField(
|
||||||
|
help_text=f'open of {settings.OVERVIEW_REPORT_DURATION}',
|
||||||
|
default=None
|
||||||
|
)
|
@ -133,7 +133,7 @@ class ReportAccumulateByRequest:
|
|||||||
|
|
||||||
if not total["average_processing_time"].get(report_file.doc_type, None):
|
if not total["average_processing_time"].get(report_file.doc_type, None):
|
||||||
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
||||||
total["average_processing_time"] = IterAvg()
|
total["average_processing_time"][report_file.doc_type] = IterAvg()
|
||||||
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||||
|
|
||||||
total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0
|
total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0
|
||||||
@ -168,7 +168,7 @@ class ReportAccumulateByRequest:
|
|||||||
|
|
||||||
if not day_data["average_processing_time"].get(report_file.doc_type, None):
|
if not day_data["average_processing_time"].get(report_file.doc_type, None):
|
||||||
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
||||||
day_data["average_processing_time"] = IterAvg()
|
day_data["average_processing_time"][report_file.doc_type] = IterAvg()
|
||||||
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||||
|
|
||||||
return day_data
|
return day_data
|
||||||
@ -274,8 +274,8 @@ class ReportAccumulateByRequest:
|
|||||||
_data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]()
|
_data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]()
|
||||||
_data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]()
|
_data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]()
|
||||||
_data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]()
|
_data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]()
|
||||||
_data[month][1][day]["average_processing_time"]["imei"] = _data[month][1][day]["average_processing_time"]["imei"]()
|
for key in _data[month][1][day]["average_processing_time"].keys():
|
||||||
_data[month][1][day]["average_processing_time"]["invoice"] = _data[month][1][day]["average_processing_time"]["invoice"]()
|
_data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]()
|
||||||
|
|
||||||
_data[month][1][day]["feedback_accuracy"]["imei_number"] = _data[month][1][day]["feedback_accuracy"]["imei_number"]()
|
_data[month][1][day]["feedback_accuracy"]["imei_number"] = _data[month][1][day]["feedback_accuracy"]["imei_number"]()
|
||||||
_data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]()
|
_data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]()
|
||||||
@ -296,8 +296,8 @@ class ReportAccumulateByRequest:
|
|||||||
_data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]()
|
_data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]()
|
||||||
_data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]()
|
_data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]()
|
||||||
_data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]()
|
_data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]()
|
||||||
_data[month][0]["average_processing_time"]["imei"] = _data[month][0]["average_processing_time"]["imei"]()
|
for key in _data[month][0]["average_processing_time"].keys():
|
||||||
_data[month][0]["average_processing_time"]["invoice"] = _data[month][0]["average_processing_time"]["invoice"]()
|
_data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]()
|
||||||
|
|
||||||
_data[month][0]["feedback_accuracy"]["imei_number"] = _data[month][0]["feedback_accuracy"]["imei_number"]()
|
_data[month][0]["feedback_accuracy"]["imei_number"] = _data[month][0]["feedback_accuracy"]["imei_number"]()
|
||||||
_data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]()
|
_data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]()
|
||||||
|
@ -17,8 +17,8 @@ login_token = None
|
|||||||
# Define the login credentials
|
# Define the login credentials
|
||||||
login_credentials = {
|
login_credentials = {
|
||||||
'username': 'sbt',
|
'username': 'sbt',
|
||||||
# 'password': '7Eg4AbWIXDnufgn'
|
'password': '7Eg4AbWIXDnufgn'
|
||||||
'password': 'abc'
|
# 'password': 'abc'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Define the command to call the update API
|
# Define the command to call the update API
|
||||||
|
@ -21,8 +21,8 @@ login_credentials = {
|
|||||||
|
|
||||||
# Define the command to call the update API
|
# Define the command to call the update API
|
||||||
update_url = f'{proxy_url}/api/ctel/make_report/'
|
update_url = f'{proxy_url}/api/ctel/make_report/'
|
||||||
update_params = {
|
update_data = {
|
||||||
'is_daily_report': 'true',
|
'is_daily_report': True,
|
||||||
'report_overview_duration': '',
|
'report_overview_duration': '',
|
||||||
'subsidiary': None
|
'subsidiary': None
|
||||||
}
|
}
|
||||||
@ -33,9 +33,9 @@ def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiar
|
|||||||
headers = {'Authorization': login_token}
|
headers = {'Authorization': login_token}
|
||||||
for dur in report_overview_duration:
|
for dur in report_overview_duration:
|
||||||
for sub in subsidiary:
|
for sub in subsidiary:
|
||||||
update_params["report_overview_duration"] = dur
|
update_data["report_overview_duration"] = dur
|
||||||
update_params["subsidiary"] = sub
|
update_data["subsidiary"] = sub
|
||||||
update_response = requests.get(update_url, params=update_params, headers=headers)
|
update_response = requests.post(update_url, data=update_data, headers=headers)
|
||||||
print("[INFO]: update_response at {} by {} - {} with status {}".format(datetime.now(), dur, sub, update_response.status_code))
|
print("[INFO]: update_response at {} by {} - {} with status {}".format(datetime.now(), dur, sub, update_response.status_code))
|
||||||
update_response.raise_for_status()
|
update_response.raise_for_status()
|
||||||
time.sleep(update_cost)
|
time.sleep(update_cost)
|
||||||
|
@ -16,7 +16,7 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- ctel-sbt
|
- ctel-sbt
|
||||||
privileged: true
|
privileged: true
|
||||||
image: sidp/cope2n-ai-fi-sbt
|
image: sidp/cope2n-ai-fi-sbt:latest
|
||||||
environment:
|
environment:
|
||||||
- PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module
|
- PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module
|
||||||
- CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672
|
- CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672
|
||||||
@ -41,7 +41,7 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: cope2n-api
|
context: cope2n-api
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
image: sidp/cope2n-be-fi-sbt
|
image: sidp/cope2n-be-fi-sbt:latest
|
||||||
environment:
|
environment:
|
||||||
- MEDIA_ROOT=${MEDIA_ROOT}
|
- MEDIA_ROOT=${MEDIA_ROOT}
|
||||||
- DB_ENGINE=${DB_ENGINE}
|
- DB_ENGINE=${DB_ENGINE}
|
||||||
@ -84,12 +84,12 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
db-sbt:
|
db-sbt:
|
||||||
condition: service_started
|
condition: service_started
|
||||||
command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
|
# command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
|
||||||
python manage.py makemigrations &&
|
# python manage.py makemigrations &&
|
||||||
python manage.py migrate &&
|
# python manage.py migrate &&
|
||||||
python manage.py compilemessages &&
|
# python manage.py compilemessages &&
|
||||||
gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
|
# gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
|
||||||
# command: bash -c "tail -f > /dev/null"
|
command: bash -c "tail -f > /dev/null"
|
||||||
|
|
||||||
minio:
|
minio:
|
||||||
image: minio/minio
|
image: minio/minio
|
||||||
@ -135,7 +135,7 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: cope2n-api
|
context: cope2n-api
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
image: sidp/cope2n-be-fi-sbt
|
image: sidp/cope2n-be-fi-sbt:latest
|
||||||
environment:
|
environment:
|
||||||
- MEDIA_ROOT=${MEDIA_ROOT}
|
- MEDIA_ROOT=${MEDIA_ROOT}
|
||||||
- PYTHONPATH=${PYTHONPATH}:/app # For import module
|
- PYTHONPATH=${PYTHONPATH}:/app # For import module
|
||||||
@ -211,7 +211,7 @@ services:
|
|||||||
context: cope2n-fe
|
context: cope2n-fe
|
||||||
shm_size: 10gb
|
shm_size: 10gb
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
image: sidp/cope2n-fe-fi-sbt
|
image: sidp/cope2n-fe-fi-sbt:latest
|
||||||
shm_size: 10gb
|
shm_size: 10gb
|
||||||
privileged: true
|
privileged: true
|
||||||
ports:
|
ports:
|
||||||
|
171
scripts/crawl_database_by_time_with_accuracy_contrain.py
Normal file
171
scripts/crawl_database_by_time_with_accuracy_contrain.py
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
import csv
|
||||||
|
from typing import Any
|
||||||
|
import psycopg2
|
||||||
|
import boto3
|
||||||
|
import os
|
||||||
|
from tqdm import tqdm
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pytz import timezone
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv("../.env_prod")
|
||||||
|
# load_dotenv("../.env")
|
||||||
|
|
||||||
|
OUTPUT_NAME = "0131-0206"
|
||||||
|
START_DATE = datetime(2024, 1, 31, tzinfo=timezone('Asia/Singapore'))
|
||||||
|
END_DATE = datetime(2024, 2, 6, tzinfo=timezone('Asia/Singapore'))
|
||||||
|
BAD_THRESHOLD = 0.75
|
||||||
|
|
||||||
|
REVIEW_ACC_COL = 19
|
||||||
|
FEEDBACK_ACC_COL = 18
|
||||||
|
REQUEST_ID_COL = 6
|
||||||
|
|
||||||
|
# Database connection details
|
||||||
|
db_host = os.environ.get('DB_HOST', "")
|
||||||
|
# db_host = "42.96.42.13"
|
||||||
|
db_name = os.environ.get('DB_SCHEMA', "")
|
||||||
|
db_user = os.environ.get('DB_USER', "")
|
||||||
|
db_password = os.environ.get('DB_PASSWORD', "")
|
||||||
|
|
||||||
|
# S3 bucket details
|
||||||
|
s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "")
|
||||||
|
s3_folder_prefix = 'sbt_invoice'
|
||||||
|
|
||||||
|
# S3 access credentials
|
||||||
|
access_key = os.environ.get('S3_ACCESS_KEY', "")
|
||||||
|
secret_key = os.environ.get('S3_SECRET_KEY', "")
|
||||||
|
|
||||||
|
class RequestAtt:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.feedback_accuracy = []
|
||||||
|
self.reiviewed_accuracy = []
|
||||||
|
self.acc = 0
|
||||||
|
self.request_id = None
|
||||||
|
self.is_bad = False
|
||||||
|
self.data = []
|
||||||
|
|
||||||
|
def add_file(self, file):
|
||||||
|
self.data.append(file)
|
||||||
|
if file[REVIEW_ACC_COL]:
|
||||||
|
for key in file[REVIEW_ACC_COL].keys():
|
||||||
|
self.feedback_accuracy += file[REVIEW_ACC_COL][key]
|
||||||
|
if file[FEEDBACK_ACC_COL]:
|
||||||
|
for key in file[FEEDBACK_ACC_COL].keys():
|
||||||
|
self.feedback_accuracy += file[FEEDBACK_ACC_COL][key]
|
||||||
|
|
||||||
|
def is_bad_image(self):
|
||||||
|
fb = min(self.feedback_accuracy)/len(self.feedback_accuracy) if len(self.feedback_accuracy) else None
|
||||||
|
rv = min(self.reiviewed_accuracy)/len(self.reiviewed_accuracy) if len(self.reiviewed_accuracy) else None
|
||||||
|
if not fb and not rv:
|
||||||
|
self.is_bad = False
|
||||||
|
return False
|
||||||
|
elif fb and rv is None:
|
||||||
|
self.is_bad = fb < BAD_THRESHOLD
|
||||||
|
self.acc = fb
|
||||||
|
return fb < BAD_THRESHOLD
|
||||||
|
elif fb and rv:
|
||||||
|
self.is_bad = rv < BAD_THRESHOLD
|
||||||
|
self.acc = rv
|
||||||
|
return rv < BAD_THRESHOLD
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_request(cursor, request_in_id):
|
||||||
|
query = "SELECT * FROM fwd_api_subscriptionrequest WHERE id = %s"
|
||||||
|
cursor.execute(query, (request_in_id,))
|
||||||
|
data = cursor.fetchone()
|
||||||
|
return data if data else None
|
||||||
|
|
||||||
|
# Request IDs for filtering
|
||||||
|
def main():
|
||||||
|
# Connect to the PostgreSQL database
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
host=db_host,
|
||||||
|
database=db_name,
|
||||||
|
user=db_user,
|
||||||
|
password=db_password
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a cursor
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
|
||||||
|
# Execute the SELECT query with the filter
|
||||||
|
query = "SELECT * FROM fwd_api_subscriptionrequestfile WHERE created_at >= %s AND created_at <= %s AND feedback_accuracy IS NOT NULL"
|
||||||
|
cursor.execute(query, (START_DATE, END_DATE))
|
||||||
|
|
||||||
|
# Fetch the filtered data
|
||||||
|
data = cursor.fetchall()
|
||||||
|
|
||||||
|
# Define the CSV file path
|
||||||
|
csv_file_path = f'{OUTPUT_NAME}.csv'
|
||||||
|
data_dict = {}
|
||||||
|
# Filter out requests request that has quality < 75%
|
||||||
|
for i, _d in enumerate(data):
|
||||||
|
if not data_dict.get(_d[REQUEST_ID_COL], None):
|
||||||
|
data_dict[_d[REQUEST_ID_COL]] = RequestAtt()
|
||||||
|
data_dict[_d[REQUEST_ID_COL]].request_id = _d[REQUEST_ID_COL]
|
||||||
|
data_dict[_d[REQUEST_ID_COL]].add_file(_d)
|
||||||
|
|
||||||
|
bad_images = []
|
||||||
|
for k in data_dict.keys():
|
||||||
|
if data_dict[k].is_bad_image():
|
||||||
|
bad_images.append(data_dict[k])
|
||||||
|
|
||||||
|
request_ids = []
|
||||||
|
# Write the data to the CSV file
|
||||||
|
for bad_image in bad_images:
|
||||||
|
request = get_request(cursor, bad_image.request_id)
|
||||||
|
if request:
|
||||||
|
request_ids.append(request[3])
|
||||||
|
|
||||||
|
# ###################### Get bad requests ######################
|
||||||
|
placeholders = ','.join(['%s'] * len(request_ids))
|
||||||
|
|
||||||
|
# Execute the SELECT query with the filter
|
||||||
|
query = f"SELECT * FROM fwd_api_subscriptionrequest WHERE request_id IN ({placeholders})"
|
||||||
|
cursor.execute(query, request_ids)
|
||||||
|
|
||||||
|
# Fetch the filtered data
|
||||||
|
data = cursor.fetchall()
|
||||||
|
|
||||||
|
# Define the CSV file path
|
||||||
|
csv_file_path = f'{OUTPUT_NAME}.csv'
|
||||||
|
|
||||||
|
# Write the data to the CSV file
|
||||||
|
with open(csv_file_path, 'w', newline='') as csv_file:
|
||||||
|
writer = csv.writer(csv_file)
|
||||||
|
writer.writerow([desc[0] for desc in cursor.description]) # Write column headers
|
||||||
|
writer.writerows(data) # Write the filtered data rows
|
||||||
|
|
||||||
|
# Close the cursor and database connection
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Download folders from S3
|
||||||
|
s3_client = boto3.client(
|
||||||
|
's3',
|
||||||
|
aws_access_key_id=access_key,
|
||||||
|
aws_secret_access_key=secret_key
|
||||||
|
)
|
||||||
|
|
||||||
|
for request_id in tqdm(request_ids):
|
||||||
|
folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/
|
||||||
|
local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files
|
||||||
|
os.makedirs(OUTPUT_NAME, exist_ok=True)
|
||||||
|
os.makedirs(local_folder_path, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
# List objects in the S3 folder
|
||||||
|
response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key)
|
||||||
|
objects = response.get('Contents', [])
|
||||||
|
|
||||||
|
for s3_object in objects:
|
||||||
|
object_key = s3_object['Key']
|
||||||
|
local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key
|
||||||
|
|
||||||
|
# Download the S3 object to the local file
|
||||||
|
s3_client.download_file(s3_bucket_name, object_key, local_file_path)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user