Fix: align start, end date

This commit is contained in:
dx-tan 2024-02-07 12:39:24 +07:00
parent efdfbf7295
commit 0070cc1e5f
8 changed files with 260 additions and 96 deletions

3
.gitignore vendored
View File

@ -35,3 +35,6 @@ cope2n-ai-fi/Dockerfile_old_work
*.sql
*.sql
.env_prod
/feedback/
cope2n-api/public/SBT_report_20240122.csv
Jan.csv

View File

@ -17,6 +17,7 @@ from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list
from ..utils.file import download_from_S3, convert_date_string
from ..utils.redis import RedisUtils
from ..utils.process import string_to_boolean
from ..request.ReportCreationSerializer import ReportCreationSerializer
from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long
redis_client = RedisUtils()
@ -177,79 +178,21 @@ class AccuracyViewSet(viewsets.ViewSet):
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='is_daily_report',
location=OpenApiParameter.QUERY,
description='Whether to include test record or not',
type=OpenApiTypes.BOOL,
),
OpenApiParameter(
name='start_date',
location=OpenApiParameter.QUERY,
description='Start date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2023-01-02T00:00:00+0700',
),
OpenApiParameter(
name='end_date',
location=OpenApiParameter.QUERY,
description='End date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2024-01-10T00:00:00+0700',
),
OpenApiParameter(
name='include_test',
location=OpenApiParameter.QUERY,
description='Whether to include test record or not',
type=OpenApiTypes.BOOL,
),
OpenApiParameter(
name='is_reviewed',
location=OpenApiParameter.QUERY,
description='Which records to be query',
type=OpenApiTypes.STR,
enum=['reviewed', 'not reviewed', 'all'],
),
OpenApiParameter(
name='request_id',
location=OpenApiParameter.QUERY,
description='Specific request id',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='redemption_id',
location=OpenApiParameter.QUERY,
description='Specific redemption id',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='report_overview_duration',
location=OpenApiParameter.QUERY,
description=f'open of {settings.OVERVIEW_REPORT_DURATION}',
type=OpenApiTypes.STR,
),
],
request=ReportCreationSerializer(),
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="make_report", methods=["GET"])
@action(detail=False, url_path="make_report", methods=["POST"])
def make_report(self, request):
if request.method == 'GET':
start_date_str = request.GET.get('start_date')
end_date_str = request.GET.get('end_date')
request_id = request.GET.get('request_id', None)
redemption_id = request.GET.get('redemption_id', None)
is_reviewed = string_to_boolean(request.GET.get('is_reviewed', "false"))
include_test = string_to_boolean(request.GET.get('include_test', "false"))
subsidiary = request.GET.get("subsidiary", "all")
is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false"))
report_overview_duration = request.GET.get("report_overview_duration", "")
if request.method == 'POST':
start_date_str = request.data.get('start_date')
end_date_str = request.data.get('end_date')
request_id = request.data.get('request_id', None)
redemption_id = request.data.get('redemption_id', None)
is_reviewed = request.data.get('is_reviewed', False)
include_test = request.data.get('include_test', False)
subsidiary = request.data.get("subsidiary", "all")
is_daily_report = request.data.get('is_daily_report', False)
report_overview_duration = request.data.get("report_overview_duration", "")
subsidiary = map_subsidiary_long_to_short(subsidiary)
if is_daily_report:
@ -261,12 +204,20 @@ class AccuracyViewSet(viewsets.ViewSet):
else:
start_date = end_date - timezone.timedelta(days=7)
start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z')
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') # inside logic will include second precision with timezone for calculation
end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%S%z')
else:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%d') # We care only about day precision only
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%d')
# Round:
# end_date_str to the beginning of the next day
# start_date_str to the start of the date
start_date = timezone.make_aware(start_date)
end_date = timezone.make_aware(end_date)
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S%z') # inside logic will include second precision with timezone for calculation
end_date_str = (end_date + timezone.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")

View File

@ -0,0 +1,39 @@
from rest_framework import serializers
from django.conf import settings
class ReportCreationSerializer(serializers.Serializer):
is_daily_report = serializers.BooleanField(
help_text='Whether to include test record or not',
default=False
)
start_date = serializers.DateField(
help_text='Start date (YYYY-mm-DD)',
default='2024-01-02'
)
end_date = serializers.DateField(
help_text='End date (YYYY-mm-DD)',
default='2024-01-10'
)
include_test = serializers.BooleanField(
help_text='Whether to include test record or not',
default=False
)
# is_reviewed = serializers.ChoiceField(
# help_text='Which records to be query',
# # choices=['reviewed', 'not reviewed', 'all'],
# default=False
# )
# request_id = serializers.CharField(
# help_text='Specific request id'
# )
# redemption_id = serializers.CharField(
# help_text='Specific redemption id'
# )
subsidiary = serializers.CharField(
help_text='Subsidiary',
default="all"
)
report_overview_duration = serializers.CharField(
help_text=f'open of {settings.OVERVIEW_REPORT_DURATION}',
default=None
)

View File

@ -133,7 +133,7 @@ class ReportAccumulateByRequest:
if not total["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
total["average_processing_time"] = IterAvg()
total["average_processing_time"][report_file.doc_type] = IterAvg()
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0
@ -168,7 +168,7 @@ class ReportAccumulateByRequest:
if not day_data["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
day_data["average_processing_time"] = IterAvg()
day_data["average_processing_time"][report_file.doc_type] = IterAvg()
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
return day_data
@ -274,8 +274,8 @@ class ReportAccumulateByRequest:
_data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]()
_data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]()
_data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]()
_data[month][1][day]["average_processing_time"]["imei"] = _data[month][1][day]["average_processing_time"]["imei"]()
_data[month][1][day]["average_processing_time"]["invoice"] = _data[month][1][day]["average_processing_time"]["invoice"]()
for key in _data[month][1][day]["average_processing_time"].keys():
_data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]()
_data[month][1][day]["feedback_accuracy"]["imei_number"] = _data[month][1][day]["feedback_accuracy"]["imei_number"]()
_data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]()
@ -296,8 +296,8 @@ class ReportAccumulateByRequest:
_data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]()
_data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]()
_data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]()
_data[month][0]["average_processing_time"]["imei"] = _data[month][0]["average_processing_time"]["imei"]()
_data[month][0]["average_processing_time"]["invoice"] = _data[month][0]["average_processing_time"]["invoice"]()
for key in _data[month][0]["average_processing_time"].keys():
_data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]()
_data[month][0]["feedback_accuracy"]["imei_number"] = _data[month][0]["feedback_accuracy"]["imei_number"]()
_data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]()

View File

@ -17,8 +17,8 @@ login_token = None
# Define the login credentials
login_credentials = {
'username': 'sbt',
# 'password': '7Eg4AbWIXDnufgn'
'password': 'abc'
'password': '7Eg4AbWIXDnufgn'
# 'password': 'abc'
}
# Define the command to call the update API

View File

@ -21,8 +21,8 @@ login_credentials = {
# Define the command to call the update API
update_url = f'{proxy_url}/api/ctel/make_report/'
update_params = {
'is_daily_report': 'true',
update_data = {
'is_daily_report': True,
'report_overview_duration': '',
'subsidiary': None
}
@ -33,9 +33,9 @@ def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiar
headers = {'Authorization': login_token}
for dur in report_overview_duration:
for sub in subsidiary:
update_params["report_overview_duration"] = dur
update_params["subsidiary"] = sub
update_response = requests.get(update_url, params=update_params, headers=headers)
update_data["report_overview_duration"] = dur
update_data["subsidiary"] = sub
update_response = requests.post(update_url, data=update_data, headers=headers)
print("[INFO]: update_response at {} by {} - {} with status {}".format(datetime.now(), dur, sub, update_response.status_code))
update_response.raise_for_status()
time.sleep(update_cost)

View File

@ -16,7 +16,7 @@ services:
networks:
- ctel-sbt
privileged: true
image: sidp/cope2n-ai-fi-sbt
image: sidp/cope2n-ai-fi-sbt:latest
environment:
- PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module
- CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672
@ -41,7 +41,7 @@ services:
build:
context: cope2n-api
dockerfile: Dockerfile
image: sidp/cope2n-be-fi-sbt
image: sidp/cope2n-be-fi-sbt:latest
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- DB_ENGINE=${DB_ENGINE}
@ -84,12 +84,12 @@ services:
depends_on:
db-sbt:
condition: service_started
command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
python manage.py makemigrations &&
python manage.py migrate &&
python manage.py compilemessages &&
gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
# command: bash -c "tail -f > /dev/null"
# command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
# python manage.py makemigrations &&
# python manage.py migrate &&
# python manage.py compilemessages &&
# gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
command: bash -c "tail -f > /dev/null"
minio:
image: minio/minio
@ -135,7 +135,7 @@ services:
build:
context: cope2n-api
dockerfile: Dockerfile
image: sidp/cope2n-be-fi-sbt
image: sidp/cope2n-be-fi-sbt:latest
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- PYTHONPATH=${PYTHONPATH}:/app # For import module
@ -211,7 +211,7 @@ services:
context: cope2n-fe
shm_size: 10gb
dockerfile: Dockerfile
image: sidp/cope2n-fe-fi-sbt
image: sidp/cope2n-fe-fi-sbt:latest
shm_size: 10gb
privileged: true
ports:

View File

@ -0,0 +1,171 @@
import csv
from typing import Any
import psycopg2
import boto3
import os
from tqdm import tqdm
from datetime import datetime, timedelta
from pytz import timezone
from dotenv import load_dotenv
load_dotenv("../.env_prod")
# load_dotenv("../.env")
OUTPUT_NAME = "0131-0206"
START_DATE = datetime(2024, 1, 31, tzinfo=timezone('Asia/Singapore'))
END_DATE = datetime(2024, 2, 6, tzinfo=timezone('Asia/Singapore'))
BAD_THRESHOLD = 0.75
REVIEW_ACC_COL = 19
FEEDBACK_ACC_COL = 18
REQUEST_ID_COL = 6
# Database connection details
db_host = os.environ.get('DB_HOST', "")
# db_host = "42.96.42.13"
db_name = os.environ.get('DB_SCHEMA', "")
db_user = os.environ.get('DB_USER', "")
db_password = os.environ.get('DB_PASSWORD', "")
# S3 bucket details
s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "")
s3_folder_prefix = 'sbt_invoice'
# S3 access credentials
access_key = os.environ.get('S3_ACCESS_KEY', "")
secret_key = os.environ.get('S3_SECRET_KEY', "")
class RequestAtt:
def __init__(self) -> None:
self.feedback_accuracy = []
self.reiviewed_accuracy = []
self.acc = 0
self.request_id = None
self.is_bad = False
self.data = []
def add_file(self, file):
self.data.append(file)
if file[REVIEW_ACC_COL]:
for key in file[REVIEW_ACC_COL].keys():
self.feedback_accuracy += file[REVIEW_ACC_COL][key]
if file[FEEDBACK_ACC_COL]:
for key in file[FEEDBACK_ACC_COL].keys():
self.feedback_accuracy += file[FEEDBACK_ACC_COL][key]
def is_bad_image(self):
fb = min(self.feedback_accuracy)/len(self.feedback_accuracy) if len(self.feedback_accuracy) else None
rv = min(self.reiviewed_accuracy)/len(self.reiviewed_accuracy) if len(self.reiviewed_accuracy) else None
if not fb and not rv:
self.is_bad = False
return False
elif fb and rv is None:
self.is_bad = fb < BAD_THRESHOLD
self.acc = fb
return fb < BAD_THRESHOLD
elif fb and rv:
self.is_bad = rv < BAD_THRESHOLD
self.acc = rv
return rv < BAD_THRESHOLD
return False
def get_request(cursor, request_in_id):
query = "SELECT * FROM fwd_api_subscriptionrequest WHERE id = %s"
cursor.execute(query, (request_in_id,))
data = cursor.fetchone()
return data if data else None
# Request IDs for filtering
def main():
# Connect to the PostgreSQL database
conn = psycopg2.connect(
host=db_host,
database=db_name,
user=db_user,
password=db_password
)
# Create a cursor
cursor = conn.cursor()
# Execute the SELECT query with the filter
query = "SELECT * FROM fwd_api_subscriptionrequestfile WHERE created_at >= %s AND created_at <= %s AND feedback_accuracy IS NOT NULL"
cursor.execute(query, (START_DATE, END_DATE))
# Fetch the filtered data
data = cursor.fetchall()
# Define the CSV file path
csv_file_path = f'{OUTPUT_NAME}.csv'
data_dict = {}
# Filter out requests request that has quality < 75%
for i, _d in enumerate(data):
if not data_dict.get(_d[REQUEST_ID_COL], None):
data_dict[_d[REQUEST_ID_COL]] = RequestAtt()
data_dict[_d[REQUEST_ID_COL]].request_id = _d[REQUEST_ID_COL]
data_dict[_d[REQUEST_ID_COL]].add_file(_d)
bad_images = []
for k in data_dict.keys():
if data_dict[k].is_bad_image():
bad_images.append(data_dict[k])
request_ids = []
# Write the data to the CSV file
for bad_image in bad_images:
request = get_request(cursor, bad_image.request_id)
if request:
request_ids.append(request[3])
# ###################### Get bad requests ######################
placeholders = ','.join(['%s'] * len(request_ids))
# Execute the SELECT query with the filter
query = f"SELECT * FROM fwd_api_subscriptionrequest WHERE request_id IN ({placeholders})"
cursor.execute(query, request_ids)
# Fetch the filtered data
data = cursor.fetchall()
# Define the CSV file path
csv_file_path = f'{OUTPUT_NAME}.csv'
# Write the data to the CSV file
with open(csv_file_path, 'w', newline='') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([desc[0] for desc in cursor.description]) # Write column headers
writer.writerows(data) # Write the filtered data rows
# Close the cursor and database connection
cursor.close()
conn.close()
# Download folders from S3
s3_client = boto3.client(
's3',
aws_access_key_id=access_key,
aws_secret_access_key=secret_key
)
for request_id in tqdm(request_ids):
folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/
local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files
os.makedirs(OUTPUT_NAME, exist_ok=True)
os.makedirs(local_folder_path, exist_ok=True)
# List objects in the S3 folder
response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key)
objects = response.get('Contents', [])
for s3_object in objects:
object_key = s3_object['Key']
local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key
# Download the S3 object to the local file
s3_client.download_file(s3_bucket_name, object_key, local_file_path)
if __name__ == "__main__":
main()