Merge pull request #145 from SDSRV-IDP/dev/logging

Dev/logging
This commit is contained in:
Phan Thành Trung 2024-06-26 15:53:24 +07:00 committed by GitHub Enterprise
commit f93e9813ca
28 changed files with 451 additions and 317 deletions

View File

@ -1,4 +1,5 @@
MEDIA_ROOT=/app/media
LOG_ROOT=/app/log
# DATABASE django setup
DB_ENGINE=django.db.backends.postgresql_psycopg2
DB_SCHEMA=sbt_prod_20240422_1

1
.gitignore vendored
View File

@ -45,3 +45,4 @@ cope2n-api/reviewed/retailer.xlsx
scripts/crawl_database.py
redundant_models/
junk.json
cope2n-api/result.json

View File

@ -13,7 +13,7 @@ docker compose --profile local up -d
```bash
pip install pytest requests deepdiff
IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
IDP_HOST=http://ocrnlb-stagging-671b98778cba8a08.elb.ap-southeast-1.amazonaws.com IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
```
IDP_HOST=http://ec2-54-169-227-39.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests

File diff suppressed because one or more lines are too long

View File

@ -76,6 +76,7 @@ MIDDLEWARE = [
'corsheaders.middleware.CorsMiddleware',
"whitenoise.middleware.WhiteNoiseMiddleware",
"django.middleware.locale.LocaleMiddleware",
"fwd_api.middleware.logging_request_response_middleware.LoggingMiddleware"
]
LOCALE_PATHS = [
@ -207,6 +208,7 @@ FILE_UPLOAD_HANDLERS = [
CORS_ORIGIN_ALLOW_ALL = True
MEDIA_ROOT = env.str("MEDIA_ROOT", default=r"/var/www/example.com/media/")
LOG_ROOT = env.str("LOG_ROOT", default="/app/log")
BROKER_URL = env.str("BROKER_URL", default="amqp://test:test@107.120.70.226:5672//")
CELERY_TASK_TRACK_STARTED = True
# CELERY_TASK_TIME_LIMIT = 30 * 60
@ -256,3 +258,42 @@ CACHES = {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
}
}
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'verbose': {
'format': '{levelname} {asctime} {module} {message}',
'style': '{',
},
'simple': {
'format': '{levelname} {message}',
'style': '{',
},
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'formatter': 'verbose',
},
'file': {
"class": 'logging.handlers.TimedRotatingFileHandler',
'filename': f'{LOG_ROOT}/sbt_idp_BE.log', # Specify the path and filename for the log file
"when": "midnight",
"interval": 1,
'backupCount': 10,
'formatter': 'verbose',
},
},
'loggers': {
'django': {
'handlers': ['console', 'file'],
'level': 'INFO',
},
'': {
'handlers': ['console', 'file'],
'level': 'INFO',
}
},
}

View File

@ -26,6 +26,9 @@ from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_shor
from ..utils.report import aggregate_overview
from fwd_api.utils.accuracy import predict_result_to_ready
import copy
import logging
logger = logging.getLogger(__name__)
redis_client = RedisUtils()
@ -227,8 +230,8 @@ class AccuracyViewSet(viewsets.ViewSet):
retailer = rq.predict_result.get("content", {}).get("document", [])[
0].get("content", [])[0].get("value", [])
except Exception as e:
print(f"[ERROR]: {e}")
print(f"[ERROR]: {rq}")
logger.error(f"{e}")
logger.error(f"{rq}")
data.append({
'RequestID': rq.request_id,
'RedemptionID': rq.redemption_id,
@ -603,8 +606,8 @@ class AccuracyViewSet(viewsets.ViewSet):
subsidiary_overview_reports += this_overview
except Exception as e:
print(
f"[WARM]: Unable to retrive data {key} from Redis, skipping...")
logger.warn(
f"Unable to retrive data {key} from Redis, skipping...")
data = aggregate_overview(subsidiary_overview_reports)
for item in data:
if item.get("subs") == "+":

View File

@ -21,7 +21,9 @@ from ..utils import file as FileUtils
from ..utils import process as ProcessUtil
from ..utils.process import UserData
from drf_spectacular.utils import extend_schema
import logging
logger = logging.getLogger(__name__)
class CtelTemplateViewSet(viewsets.ViewSet):
lookup_field = "username"
@ -121,7 +123,7 @@ class CtelTemplateViewSet(viewsets.ViewSet):
serializer = CreateTemplateRequest(data=request.data)
serializer.is_valid()
if not serializer.is_valid():
print(serializer.errors)
logger.error(serializer.errors)
raise InvalidException(excArgs=list(serializer.errors.keys()))
data = serializer.validated_data
@ -156,7 +158,7 @@ class CtelTemplateViewSet(viewsets.ViewSet):
serializer = UpdateTemplateRequest(data=data)
serializer.is_valid()
if not serializer.is_valid():
print(serializer.errors)
logger.error(serializer.errors)
raise InvalidException(excArgs=list(serializer.errors.keys()))
data = serializer.validated_data

View File

@ -21,6 +21,9 @@ from ..utils.crypto import sds_authenticator, admin_sds_authenticator, SdsAuthen
import datetime
from ..request.LoginRequest import LoginRequest
from ..utils.date import default_zone
import logging
logger = logging.getLogger(__name__)
from fwd import settings
@ -45,7 +48,7 @@ class CtelUserViewSet(viewsets.ViewSet):
@action(detail=False, url_path="login", methods=["POST"])
def login(self, request):
serializer = LoginRequest(data=request.data)
print(serializer.is_valid(raise_exception=True))
logger.error(serializer.is_valid(raise_exception=True))
data = serializer.validated_data
token_limit = 999999
@ -157,7 +160,7 @@ class CtelUserViewSet(viewsets.ViewSet):
serializer = UpsertUserRequest(data=request.data)
serializer.is_valid()
if not serializer.is_valid():
print(serializer.errors)
logger.error(serializer.errors)
raise InvalidException(excArgs=list(serializer.errors.keys()))
data = serializer.validated_data

View File

@ -24,6 +24,9 @@ from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate,
from ..response.ReportSerializer import ReportSerializer
from ..utils import file as FileUtils
from ..utils import process as ProcessUtil
import logging
logger = logging.getLogger(__name__)
class CtelViewSet(viewsets.ViewSet):
lookup_field = "username"
@ -92,8 +95,8 @@ class CtelViewSet(viewsets.ViewSet):
elif file_extension in image_extensions:
b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user, "all", 0)
j_time = time.time()
print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
print(f"[INFO]: b_url: {b_url}")
logger.info(f"Duration of Pre-processing: {j_time - s_time}s")
logger.info(f"b_url: {b_url}")
if p_type in standard_ocr_list:
ProcessUtil.send_to_queue2(rq_id + "_sub_0", sub.id, b_url, user.id, p_type)
if p_type == ProcessType.TEMPLATE_MATCHING.value:
@ -441,7 +444,7 @@ class CtelViewSet(viewsets.ViewSet):
content_type = 'application/pdf' if temp.file_name.split(".")[-1] in pdf_extensions else content_type
if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
raise PermissionDeniedException()
print(temp.file_path)
logger.info(temp.file_path)
return HttpResponse(FileWrapper(FileUtils.get_file(temp.file_path)), status=status.HTTP_200_OK,
headers={'Content-Disposition': 'filename={fn}'.format(fn=temp.file_name)},
content_type=content_type)
@ -459,7 +462,6 @@ class CtelViewSet(viewsets.ViewSet):
if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
raise PermissionDeniedException()
# print(f"[DEBUG]: rq: {rq}, file_name: {file_name}")
file_data = SubscriptionRequestFile.objects.filter(request=rq, file_name=file_name)[0]
except IndexError:
raise NotFoundException(excArgs='file')
@ -527,7 +529,6 @@ class CtelViewSet(viewsets.ViewSet):
serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
serializer.is_valid()
# print(f"[DEBUG]: result: {serializer.data[0]}")
if report_filter[0].status == 400:
raise FileContentInvalidException()
if report_filter[0].status == 100: # continue, only return when result is fullfilled

View File

@ -2,8 +2,25 @@ from celery import Celery
from fwd import settings
from fwd_api.exception.exceptions import GeneralException
from kombu.utils.uuid import uuid
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
def is_it_an_index(id):
ret = True
if not isinstance(id, str):
logger.info("NOT A STRING")
return False
if "/" in id:
logger.info("/ in ID")
return False
if id.count("_") > 5 or id.count("_") < 1:
_c = id.count("_")
logger.info(f"_ HAS {_c}")
return False
return ret
class CeleryConnector:
task_routes = {
# save result
@ -110,6 +127,8 @@ class CeleryConnector:
def send_task(self, name=None, args=None, countdown=None):
if name not in self.task_routes or 'queue' not in self.task_routes[name]:
raise GeneralException("System")
return self.app.send_task(name, args, queue=self.task_routes[name]['queue'], expires=300, countdown=countdown)
task_id = args[0] + "_" + uuid()[:4] if isinstance(args, tuple) and is_it_an_index(args[0]) else uuid()
logger.info(f"SEND task name: {name} - {task_id} | args: {args} | countdown: {countdown}")
return self.app.send_task(name, args, queue=self.task_routes[name]['queue'], expires=300, countdown=countdown, task_id=task_id)
c_connector = CeleryConnector()

View File

@ -7,6 +7,7 @@ from multiprocessing.pool import ThreadPool
from fwd_api.models import SubscriptionRequest, UserProfile
from fwd_api.celery_worker.worker import app
from fwd_api.celery_worker.task_warpper import VerboseTask
from ..constant.common import FolderFileType, image_extensions
from ..exception.exceptions import FileContentInvalidException
from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
@ -23,7 +24,6 @@ from fwd_api.utils.accuracy import predict_result_to_ready
from celery.utils.log import get_task_logger
from fwd import settings
logger = get_task_logger(__name__)
s3_client = S3Util.MinioS3Client(
@ -66,7 +66,7 @@ def process_image_file(file_name: str, file_path, request, user) -> list:
'request_file_id': new_request_file.code
}]
@app.task(name="csv_feedback")
@app.task(base=VerboseTask, name="csv_feedback")
def process_csv_feedback(csv_file_path, feedback_id):
# load file to RAM
status = {}
@ -124,7 +124,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
try:
image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request]
except Exception as e:
print(f"[ERROR] image.doc_type: {image.doc_type} - image.index_in_request: {image.index_in_request} - time_cost: {time_cost} - {e}")
logger.error(f"image.doc_type: {image.doc_type} - image.index_in_request: {image.index_in_request} - time_cost: {time_cost} - {e}")
if not validate_feedback_file(_feedback_result, _predict_result):
status[request_id] = "Missalign imei number between feedback and predict"
# continue
@ -138,7 +138,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
_predict_result = {"retailername": None, "sold_to_party": None, "invoice_no": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]}
_feedback_result = {"retailername": None, "sold_to_party": None, "invoice_no": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result and len(_feedback_result["imei_number"]) > image.index_in_request else None
except Exception as e:
print (f"[ERROR]: {request_id} - {e}")
logger.error(f"{request_id} - {e}")
image.predict_result = _predict_result
image.feedback_result = _feedback_result
# image.reviewed_result = _reviewed_result
@ -160,10 +160,9 @@ def process_csv_feedback(csv_file_path, feedback_id):
os.remove(file_path)
except Exception as e:
logger.error(f"Unable to set S3: {e}")
print(f"Unable to set S3: {e}")
feedback_rq.save()
@app.task(name='do_pdf')
@app.task(base=VerboseTask, name='do_pdf')
def process_pdf(rq_id, sub_id, p_type, user_id, files):
"""
files: [{
@ -229,7 +228,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
for sub_rq_id, sub_id, urls, user_id, p_type, metadata in to_queue:
ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type, metadata)
@app.task(name='upload_file_to_s3')
@app.task(base=VerboseTask, name='upload_file_to_s3')
def upload_file_to_s3(local_file_path, s3_key, request_id):
if s3_client.s3_client is not None:
try:
@ -239,12 +238,11 @@ def upload_file_to_s3(local_file_path, s3_key, request_id):
sub_request.save()
except Exception as e:
logger.error(f"Unable to set S3: {e}")
print(f"Unable to set S3: {e}")
return
else:
logger.info(f"S3 is not available, skipping,...")
@app.task(name='upload_feedback_to_s3')
@app.task(base=VerboseTask, name='upload_feedback_to_s3')
def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
if s3_client.s3_client is not None:
try:
@ -254,12 +252,11 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
feed_request.save()
except Exception as e:
logger.error(f"Unable to set S3: {e}")
print(f"Unable to set S3: {e}")
return
else:
logger.info(f"S3 is not available, skipping,...")
@app.task(name='upload_report_to_s3')
@app.task(base=VerboseTask, name='upload_report_to_s3')
def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
if s3_client.s3_client is not None:
try:
@ -271,20 +268,19 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
report.save()
except Exception as e:
logger.error(f"Unable to set S3: {e}")
print(f"Unable to set S3: {e}")
return
else:
logger.info(f"S3 is not available, skipping,...")
@app.task(name='remove_local_file')
@app.task(base=VerboseTask, name='remove_local_file')
def remove_local_file(local_file_path, request_id):
print(f"[INFO] Removing local file: {local_file_path}, ...")
logger.info(f"Removing local file: {local_file_path}, ...")
try:
os.remove(local_file_path)
except Exception as e:
logger.info(f"Unable to remove local file: {e}")
@app.task(name='upload_obj_to_s3')
@app.task(base=VerboseTask, name='upload_obj_to_s3')
def upload_obj_to_s3(byte_obj, s3_key):
if s3_client.s3_client is not None:
obj = base64.b64decode(byte_obj)

View File

@ -2,6 +2,7 @@ import traceback
from fwd_api.models import SubscriptionRequest, Report, ReportFile
from fwd_api.celery_worker.worker import app
from fwd_api.celery_worker.task_warpper import VerboseTask
from ..utils import s3 as S3Util
from ..utils.accuracy import (update_temp_accuracy, IterAvg,
count_transactions, extract_report_detail_list, calculate_a_request,
@ -37,7 +38,7 @@ def mean_list(l):
return 0
return sum(l)/len(l)
@app.task(name='make_a_report_2')
@app.task(base=VerboseTask, name='make_a_report_2')
def make_a_report_2(report_id, query_set):
report_type = query_set.pop("report_type", "accuracy")
if report_type == "accuracy":
@ -236,11 +237,11 @@ def create_accuracy_report(report_id, **kwargs):
s3_key = save_report_to_S3(report.report_id, local_workbook)
except IndexError as e:
print(e)
logger.error(e)
traceback.print_exc()
print("NotFound request by report id, %d", report_id)
logger.error("NotFound request by report id, %d", report_id)
except Exception as e:
print("[ERROR]: an error occured while processing report: ", report_id)
logger.error("An error occured while processing report: ", report_id)
traceback.print_exc()
return 400
@ -286,10 +287,10 @@ def create_billing_report(report_id, **kwargs):
local_workbook = save_workbook_file(detail_file_name, report, data_workbook)
s3_key = save_report_to_S3(report.report_id, local_workbook)
except IndexError as e:
print(e)
logger.error(e)
traceback.print_exc()
print("NotFound request by report id, %d", report_id)
logger.error("NotFound request by report id, %d", report_id)
except Exception as e:
print("[ERROR]: an error occured while processing report: ", report_id)
logger.error("An error occured while processing report: ", report_id)
traceback.print_exc()
return 400

View File

@ -12,6 +12,10 @@ from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile
from fwd_api.constant.common import ProcessType
from fwd_api.utils.redis import RedisUtils
from fwd_api.utils import process as ProcessUtil
from fwd_api.celery_worker.task_warpper import VerboseTask
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
redis_client = RedisUtils()
@ -55,11 +59,10 @@ def aggregate_result(results):
return des_result
def print_id(rq_id):
print(" [x] Received {rq}".format(rq=rq_id))
logger.info(" [x] Received {rq}".format(rq=rq_id))
def to_status(result):
print('X')
if 'status' in result and result['status'] not in [200, 201, 202]:
return 4
return 3
@ -72,7 +75,7 @@ def update_user(rq: SubscriptionRequest):
sub.current_token += ProcessUtil.token_value(int(rq.process_type))
sub.save()
@app.task(name='process_sap_invoice_result')
@app.task(base=VerboseTask, name='process_sap_invoice_result')
def process_invoice_sap_result(rq_id, result):
try:
rq: SubscriptionRequest = \
@ -85,16 +88,16 @@ def process_invoice_sap_result(rq_id, result):
update_user(rq)
except IndexError as e:
print(e)
print("NotFound request by requestId, %d", rq_id)
logger.error(e)
logger.error("NotFound request by requestId, %d", rq_id)
except Exception as e:
print(e)
print("Fail Invoice %d", rq_id)
logger.error(e)
logger.error("Fail Invoice %d", rq_id)
traceback.print_exc()
return "FailInvoice"
@app.task(name='process_fi_invoice_result')
@app.task(base=VerboseTask, name='process_fi_invoice_result')
def process_invoice_fi_result(rq_id, result):
try:
rq: SubscriptionRequest = \
@ -107,15 +110,15 @@ def process_invoice_fi_result(rq_id, result):
update_user(rq)
except IndexError as e:
print(e)
print("NotFound request by requestId, %d", rq_id)
logger.error(e)
logger.error("NotFound request by requestId, %d", rq_id)
except Exception as e:
print(e)
print("Fail Invoice %d", rq_id)
logger.error(e)
logger.error("Fail Invoice %d", rq_id)
traceback.print_exc()
return "FailInvoice"
@app.task(name='process_manulife_invoice_result')
@app.task(base=VerboseTask, name='process_manulife_invoice_result')
def process_invoice_manulife_result(rq_id, result):
try:
rq: SubscriptionRequest = \
@ -128,17 +131,17 @@ def process_invoice_manulife_result(rq_id, result):
update_user(rq)
except IndexError as e:
print(e)
print("NotFound request by requestId, %d", rq_id)
logger.error(e)
logger.error("NotFound request by requestId, %d", rq_id)
except Exception as e:
print(e)
print("Fail Invoice %d", rq_id)
logger.error(e)
logger.error("Fail Invoice %d", rq_id)
traceback.print_exc()
return "FailInvoice"
random_processor_name = None
@app.task(name='process_sbt_invoice_result')
@app.task(base=VerboseTask, name='process_sbt_invoice_result')
def process_invoice_sbt_result(rq_id, result, metadata):
global random_processor_name
if random_processor_name is None:
@ -161,7 +164,6 @@ def process_invoice_sbt_result(rq_id, result, metadata):
rq.status = 200 # stop waiting
results = redis_client.get_all_cache(rq_id)
rq.predict_result = aggregate_result(results)
# print(f"[DEBUG]: rq.predict_result: {rq.predict_result}")
ai_inference_profile = {}
doc_type_string = ""
for idx, result in results.items():
@ -180,13 +182,13 @@ def process_invoice_sbt_result(rq_id, result, metadata):
rq.save()
update_user(rq)
except IndexError as e:
print(e)
print("NotFound request by requestId, %d", rq_id)
logger.error(e)
logger.error("NotFound request by requestId, %d", rq_id)
rq.ai_inference_time = 0
rq.save()
except Exception as e:
print(e)
print("Fail Invoice %d", rq_id)
logger.error(e)
logger.error("Fail Invoice %d", rq_id)
traceback.print_exc()
rq.ai_inference_time = 0
rq.save()

View File

@ -0,0 +1,16 @@
from celery import Task
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
class VerboseTask(Task):
abstract = True
def on_failure(self, exc, task_id, args, kwargs, einfo):
# Task failed. What do you want to do?
logger.error('FAILURE: Task raised an exception: {}'.format(exc))
def on_success(self, retval, task_id, args, kwargs):
logger.info(f"SUCCESS: Task: {task_id} succeeded | retval: {retval} | args: {args} | kwargs: {kwargs}")
def before_start(self, task_id, args, kwargs):
logger.info(f"BEFORE_START: Task: {task_id} | args: {args} | kwargs: {kwargs}")

View File

@ -2,7 +2,9 @@ import os
import django
from celery import Celery
from celery.signals import setup_logging # noqa
from kombu import Queue
import copy
from fwd import settings
@ -19,13 +21,15 @@ app: Celery = Celery(
app.config_from_object("django.conf:settings", namespace="CELERY")
app.autodiscover_tasks()
@app.on_after_finalize.connect
def setup_periodic_tasks(sender, **kwargs):
from fwd_api.bg_tasks.clean_local_files import clean_media
sender.add_periodic_task(
10.0, clean_media.s(), expires=120.0
)
@setup_logging.connect
def config_loggers(*args, **kwargs):
from logging.config import dictConfig # noqa
from django.conf import settings # noqa
log_config = copy.deepcopy(settings.LOGGING)
if log_config.get("handlers", {}).get("file", {}).get("filename", None):
log_config["handlers"]["file"]["filename"] = log_config["handlers"]["file"]["filename"].replace(".log", "_celery.log")
dictConfig(log_config)
app.conf.update({
'task_queues':

View File

@ -9,7 +9,9 @@ from fwd_api.annotation.api import throw_on_failure
from fwd_api.utils import date as DateUtil
from fwd_api.utils.crypto import ctel_cryptor, sds_authenticator, image_authenticator
from fwd_api.exception.exceptions import InvalidException, TokenExpiredException, PermissionDeniedException
import logging
logger = logging.getLogger(__name__)
@throw_on_failure(TokenExpiredException())
def authenticate_by_param_token(request):
@ -27,8 +29,7 @@ def authenticate_by_param_token(request):
# @throw_on_failure(TokenExpiredException())
def authenticate_by_authorization_header(request):
token = request.headers['Authorization']
print(f"[INFO]: recived request at {datetime.datetime.utcnow()}, with path {request.path}")
# print(f"[DEBUG]: token: {token}")
logger.info(f"recived request at {datetime.datetime.utcnow()}, with path {request.path}")
data = sds_authenticator.decode_data(token)
if 'expired_at' not in data or 'status' not in data or 'subscription_id' not in data:
raise PermissionDeniedException()
@ -72,7 +73,7 @@ class AuthFilter(authentication.BaseAuthentication):
def authenticate(self, request):
if request.path in self.white_list_path:
print("App API")
logger.debug("App API")
return None, None
if '/api/ctel/media/' in request.path or '/api/ctel/v2/media/' in request.path:
return authenticate_by_param_token(request)

View File

@ -151,8 +151,8 @@ class Command(BaseCommand):
prepared_data_copy = copy.deepcopy(prepared_data)
s3_client = MinioS3Client(
# endpoint='http://107.120.133.27:9884',
access_key='AKIA3AFPFVWZHTZHB6FW',
secret_key='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',
access_key='secret',
secret_key='secret+HRcfOsbXhx0YSNOLxdW',
bucket_name='ocr-sds'
)
# file = open("modified.txt", "w")

View File

@ -0,0 +1,34 @@
import logging
import uuid
from django.utils.deprecation import MiddlewareMixin
logger = logging.getLogger(__name__)
class LoggingMiddleware(MiddlewareMixin):
def process_request(self, request):
trace_id = request.headers.get('X-Trace-ID', str(uuid.uuid4()))
request.META['X-Trace-ID'] = trace_id
request_body = ""
content_type = request.headers.get("Content-Type", "")
if request.method in ('POST', 'PUT', 'PATCH') and not content_type.startswith("multipart/form-data"):
request_body = request.body.decode(encoding="utf-8", errors="ignore")
logger.info(f"Request: {request.method} {request.path} | Trace ID: {trace_id} | "
f"Body: {request_body} | Headers: {request.headers}")
def process_response(self, request, response):
trace_id = request.META.get('X-Trace-ID', str(uuid.uuid4()))
response_body = response.content.decode("utf-8") if response.content else ""
logger.info(f"Response: {request.method} {request.path} | Trace ID: {trace_id} | "
f"Status: {response.status_code} | Body: {response_body}")
response['X-Trace-ID'] = trace_id
return response
def process_exception(self, request, exception):
trace_id = request.META.get('X-Trace-ID', str(uuid.uuid4()))
logger.error(f"Exception: {request.method} {request.path} | Trace ID: {trace_id} | "
f"Error: {exception}")

View File

@ -16,6 +16,9 @@ from django.utils import timezone
from fwd import settings
from ..models import SubscriptionRequest, Report, ReportFile
from typing import Union, List, Dict
import logging
logger = logging.getLogger(__name__)
VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
OPTIONAL_KEYS = ['invoice_no']
@ -145,7 +148,7 @@ class ReportAccumulateByRequest:
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
else:
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
total["num_imei"] += 1 if doc_type == "imei" else 0
total["num_invoice"] += 1 if doc_type == "invoice" else 0
@ -160,13 +163,13 @@ class ReportAccumulateByRequest:
total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
if not total["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
logger.warning(f"Weird doctype: {report_file.doc_type}")
total["average_processing_time"][report_file.doc_type] = IterAvg()
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
total["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not total["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
logger.warning(f"Weird doctype: {report_file.doc_type}")
total["file_average_processing_time"][report_file.doc_type] = IterAvg()
total["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
total["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
@ -175,7 +178,7 @@ class ReportAccumulateByRequest:
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
else:
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
total["usage"]["imei"] += 1 if doc_type == "imei" else 0
total["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
total["usage"]["total_images"] += 1
@ -193,7 +196,7 @@ class ReportAccumulateByRequest:
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
else:
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
month["num_imei"] += 1 if doc_type == "imei" else 0
month["num_invoice"] += 1 if doc_type == "invoice" else 0
@ -206,13 +209,13 @@ class ReportAccumulateByRequest:
month["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
if not month["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
logger.warning(f"Weird doctype: {report_file.doc_type}")
month["average_processing_time"][report_file.doc_type] = IterAvg()
month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not month["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
logger.warning(f"Weird doctype: {report_file.doc_type}")
month["file_average_processing_time"][report_file.doc_type] = IterAvg()
month["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
month["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
@ -221,7 +224,7 @@ class ReportAccumulateByRequest:
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
else:
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
month["usage"]["imei"] += 1 if doc_type == "imei" else 0
month["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
month["usage"]["total_images"] += 1
@ -238,7 +241,7 @@ class ReportAccumulateByRequest:
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
else:
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
day_data["num_imei"] += 1 if doc_type == "imei" else 0
day_data["num_invoice"] += 1 if doc_type == "invoice" else 0
day_data["report_files"].append(report_file)
@ -252,13 +255,13 @@ class ReportAccumulateByRequest:
day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
if not day_data["average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
logger.warning(f"Weird doctype: {report_file.doc_type}")
day_data["average_processing_time"][report_file.doc_type] = IterAvg()
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
day_data["average_processing_time"]['avg'].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not day_data["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
logger.warning(f"Weird doctype: {report_file.doc_type}")
day_data["file_average_processing_time"][report_file.doc_type] = IterAvg()
day_data["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
day_data["file_average_processing_time"]['avg'].add_avg(image_avg_cost, 1) if image_avg_cost else 0
@ -273,7 +276,7 @@ class ReportAccumulateByRequest:
self.data[this_month] = [copy.deepcopy(self.month_format), {}]
self.data[this_month][0]["extraction_date"] = "Subtotal (" + timezone.localtime(request.created_at).strftime("%Y-%m") + ")"
if not self.data[this_month][1].get(this_day, None):
print(f"[INFO] Adding a new day: {this_day} for report: {report.id} ...")
logger.info(f" Adding a new day: {this_day} for report: {report.id} ...")
self.data[this_month][1][this_day] = copy.deepcopy(self.day_format)[0]
self.data[this_month][1][this_day]['extraction_date'] = timezone.localtime(request.created_at).strftime("%Y-%m-%d")
usage = self.count_transactions_within_day(this_day)
@ -289,6 +292,7 @@ class ReportAccumulateByRequest:
_ai_cost = request.ai_inference_time
processing_time_by_averaging_request_cost = (_be_cost + _ai_cost)/_number_of_file if _number_of_file > 0 else 0
for report_file in report_files:
# report_file.time_cost = processing_time_by_averaging_request_cost
_report_file = copy.deepcopy(report_file)
if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
_report_file.acc = None
@ -844,7 +848,6 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta
def calculate_avg_accuracy(acc, type, keys=[]):
acc_list = []
# print(f"[DEBUG]: type: {type} - acc: {acc}")
for key in keys:
this_acc = acc.get(type, {}).get(key, [])
if len(this_acc) > 0:
@ -985,7 +988,7 @@ def calculate_a_request(report, request):
if request.redemption_id:
_sub = map_subsidiary_short_to_long(request.redemption_id[:2])
else:
print(f"[WARN]: empty redemption_id, check request: {request.request_id}")
logger.warning(f"empty redemption_id, check request: {request.request_id}")
# Little trick to replace purchase date to normalized
if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0:
@ -1008,9 +1011,6 @@ def calculate_a_request(report, request):
if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS:
if image.reason in settings.ACC_EXCLUDE_RESEASONS:
# if image.reason in settings.ACC_EXCLUDE_RESEASONS:
# print(f"[DEBUG]: {image.reason}")
# TODO: Exclude bad image accuracy from average accuracy
_att["avg_acc"] = None
for t in _att["acc"].keys():
for k in _att["acc"][t].keys():
@ -1073,7 +1073,7 @@ def calculate_a_request(report, request):
request_att["total_images"] += 1
request_att["err"] += _att["err"]
except Exception as e:
print(f"[ERROR]: failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}")
logger.error(f"Failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}")
continue
return request_att, report_files, atts

View File

@ -23,6 +23,9 @@ import imagesize
import csv
from openpyxl import load_workbook
from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle, numbers, Alignment
import logging
logger = logging.getLogger(__name__)
s3_client = S3Util.MinioS3Client(
endpoint=settings.S3_ENDPOINT,
@ -96,7 +99,6 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES
for f in files:
if not isinstance(f, TemporaryUploadedFile):
# print(f'[DEBUG]: {f.name}')
raise InvalidException(excArgs="files")
extension = f.name.split(".")[-1].lower() in allowed_file_extensions
if not extension or "." not in f.name:
@ -116,7 +118,6 @@ def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv
for f in files:
if not isinstance(f, TemporaryUploadedFile):
# print(f'[DEBUG]: {f.name}')
raise InvalidException(excArgs="files")
extension = f.name.split(".")[-1].lower() in ["csv"]
if not extension or "." not in f.name:
@ -131,7 +132,7 @@ def get_file(file_path: str):
try:
return open(file_path, 'rb')
except Exception as e:
print(e)
logger.error(e)
raise GeneralException("System")
@ -226,7 +227,7 @@ def delete_file_with_path(file_path: str) -> bool:
os.remove(file_path)
return True
except Exception as e:
print(e)
logger.error(e)
return False
@ -239,7 +240,7 @@ def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedF
os.makedirs(folder_path)
return save_file_with_path(file_name, file, quality, folder_path)
except Exception as e:
print(e)
logger.error(e)
raise ServiceUnavailableException()
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path: str):
@ -253,7 +254,7 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(e)
logger.error(e)
raise ServiceUnavailableException()
return file_path
@ -306,7 +307,7 @@ def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: Temporar
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(f"[ERROR]: {e}")
logger.error(f"{e}")
raise ServiceUnavailableException()
def save_to_S3(file_name, rq, local_file_path):
@ -319,21 +320,19 @@ def save_to_S3(file_name, rq, local_file_path):
c_connector.remove_local_file((local_file_path, request_id))
return s3_key
except Exception as e:
print(f"[ERROR]: {e}")
logger.error(f"{e}")
raise ServiceUnavailableException()
def save_feedback_to_S3(file_name, id, local_file_path):
try:
# print(f"[DEBUG]: Uploading feedback to S3 with local path {local_file_path}, id: {id}, file_name: {file_name}")
assert len(local_file_path.split("/")) >= 3, "file_path must have at least feedback_folder and feedback_id"
# s3_key = os.path.join(local_file_path.split("/")[-3], local_file_path.split("/")[-2], file_name)
s3_key = os.path.join("feedback", local_file_path.split("/")[-2], file_name)
# print(f"[DEBUG]: Uploading feedback to S3 with s3_key {s3_key}")
c_connector.upload_feedback_to_s3((local_file_path, s3_key, id))
c_connector.remove_local_file((local_file_path, id))
return s3_key
except Exception as e:
print(f"[ERROR]: {e}")
logger.error(f"{e}")
raise ServiceUnavailableException()
def save_report_to_S3(id, local_file_path, delay=0):
@ -343,7 +342,7 @@ def save_report_to_S3(id, local_file_path, delay=0):
c_connector.remove_local_file((local_file_path, id))
return s3_key
except Exception as e:
print(f"[ERROR]: {e}")
logger.error(f"{e}")
raise ServiceUnavailableException()
def download_from_S3(s3_key, local_file_path):
@ -361,7 +360,7 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(e)
logger.error(e)
raise ServiceUnavailableException()
return file_path
@ -400,8 +399,8 @@ def save_img(file_path: str, file: TemporaryUploadedFile, quality):
elif orientation == 8:
image = image.transpose(Image.ROTATE_90)
except Exception as ex:
print(ex)
print("Rotation Error")
logger.error(ex)
logger.error("Rotation Error")
traceback.print_exc()
image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1])
@ -449,9 +448,9 @@ def get_value(_dict, keys):
else:
value = value.get(key, {})
except Exception as e:
print(f"[ERROR]: {e}")
print(f"[ERROR]: value: {value}")
print(f"[ERROR]: keys: {keys}")
logger.error(f"{e}")
logger.error(f"value: {value}")
logger.error(f"keys: {keys}")
if value is None:
return "-"

View File

@ -5,7 +5,9 @@ from terminaltables import AsciiTable
from rapidfuzz.distance import Levenshtein
from .wiki_diff import inline_diff
import logging
logger = logging.getLogger(__name__)
def is_type_list(x, type):
@ -381,5 +383,5 @@ def eval_kie(preds_e2e: dict[str, dict[str, str]], gt_e2e: dict[str, dict[str, s
)
table = AsciiTable(table_data)
print(table.table)
logger.debug(table.table)
return results, fail_cases

View File

@ -14,6 +14,9 @@ from .ocr_metrics import eval_ocr_metric
import sys
# sys.path.append(os.path.dirname(__file__))
from sdsvkvu.utils.query.sbt_v2 import get_seller, post_process_seller
import logging
logger = logging.getLogger(__name__)
def read_json(file_path: str):
@ -108,7 +111,7 @@ def convert_predict_from_csv(
n_pages = request['pages']
req_id = request['request_id']
if not isinstance(request['doc_type'], str) or not isinstance(request['predict_result'], str):
print(f"[WARNING] Skipped request id {req_id}")
logger.warning(f"] Skipped request id {req_id}")
continue
doc_type_list = request['doc_type'].split(',')
@ -178,7 +181,7 @@ def init_csv(
for request_id in list_request_id:
gt_path = os.path.join(gt_dir, request_id, request_id+".json")
if not os.path.exists(gt_path):
print(f"[WARNING] Skipped request id {os.path.basename(os.path.dirname(gt_path))}")
logger.warning(f"] Skipped request id {os.path.basename(os.path.dirname(gt_path))}")
continue
gt_data = read_json(gt_path)
json_file_list = glob.glob(os.path.join(pred_dir, request_id, "temp_*.json"))
@ -306,7 +309,7 @@ def pick_sample_to_revise(
img_path = [ff for ff in glob.glob(img_path_folder.replace(".json", ".*")) if ".json" not in ff]
if len(img_path) == 0:
print(f"[WARNING] Skipped request id {request_id}")
logger.warning(f"] Skipped request id {request_id}")
continue
img_path = img_path[0]
# img_path = [ff for ff in glob.glob(json_path.replace(".json", ".*"))][0]
@ -354,41 +357,41 @@ if __name__ == "__main__":
csv_path_end_user = "logs/eval_20240115/OCR_15Jan2024.csv"
# Step 1: Convert a csv file to get user submitted results for each request
print("[INFO] Starting convert csv from customer to json")
logger.info(f" Starting convert csv from customer to json")
os.system(f"sudo chmod -R 777 {save_path}")
convert_groundtruth_from_csv(csv_path=csv_path_end_user, save_dir=save_path)
print("[INFO] Converted")
logger.info(f" Converted")
# # Step 2: Convert a csv file to get predict OCR results for each image
print("[INFO] Starting convert csv from SDSV to json")
logger.info(f" Starting convert csv from SDSV to json")
convert_predict_from_csv(csv_path=csv_path, save_dir=save_path)
print("[INFO] Converted")
logger.info(f" Converted")
# # Step 3: Gen initial csv file and calculate OCR result between submitted results and ocr results
print("[INFO] Starting generate csv to get performance")
logger.info(f" Starting generate csv to get performance")
gt_path = save_path
pred_path = save_path
req_to_red_dict = gen_req_to_red_dict(csv_path_end_user)
init_data = init_csv(gt_dir=gt_path, pred_dir=pred_path, req_to_red=req_to_red_dict)
pd.DataFrame(init_data).to_csv(os.path.join(save_csv, "init1.csv"), index=False)
print("[INFO] Done")
logger.info(f" Done")
# # Step 4: Split requests whose accuracy is less than 1 to revise
# print("[INFO] Starting split data to review")
# logger.info(f" Starting split data to review")
# revised_path = os.path.join(save_csv, "revised")
# # shutil.rmtree(revised_path)
# pick_sample_to_revise(ocr_accuracy=init_data, gt_dir=save_path, save_dir=revised_path)
# print("[INFO] Done")
# logger.info(f" Done")
# # Step 5: Merge revised results to gt folder
# print("[INFO] Merging revised data to ground truth folder")
# logger.info(f" Merging revised data to ground truth folder")
# revised_path = os.path.join(save_csv, "revised")
# revised_path = [f'{revised_path}/empty_results', f'{revised_path}/diff_results']
# merge_revised_sample(revised_path_list=revised_path, save_dir=save_path)
# print("Done")
# # Step 6: Caculate OCR result between ocr results and revised results
# print("[INFO] Exporting OCR report")
# logger.info(f" Exporting OCR report")
# init_csv_path = os.path.join(save_csv, "init1.csv")
# report = export_report(init_csv=init_csv_path)
# error_path = os.path.join(save_csv, "errors")
@ -427,6 +430,6 @@ if __name__ == "__main__":
# report.drop(columns=["file_path", "class_name"]).to_csv(os.path.join(save_csv, f"SBT_report_{time.strftime('%Y%m%d')}.csv"), index=False)
# print("[INFO] Done")
# logger.info(f" Done")

View File

@ -4,6 +4,9 @@ import unidecode
import os
import glob
import pandas as pd
import logging
logger = logging.getLogger(__name__)
VOWELS = 'aeouiy' + 'AEOUIY'
CONSONANTS = 'bcdfghjklmnpqrstvxwz' + 'BCDFGHJKLMNPQRSTVXWZ'
@ -194,7 +197,7 @@ def main(overwrite=False):
df.to_csv(f'{SAVE_PATH}/wiki_diff.csv', sep='\t')
df_ = pd.DataFrame(ddata_specialchars)
df_.to_csv(f'{SAVE_PATH}/wiki_diff_specialchars.csv', sep='\t')
print(TOTAL_WORD)
logger.info(TOTAL_WORD)
if __name__ == '__main__':

View File

@ -25,10 +25,9 @@ from ..exception.exceptions import InvalidException, NotFoundException, \
from ..models import UserProfile, OcrTemplate, OcrTemplateBox, \
Subscription, SubscriptionRequestFile, SubscriptionRequest
from ..celery_worker.client_connector import c_connector
import logging
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
logger = logging.getLogger(__name__)
class UserData:
user: UserProfile = None
@ -107,7 +106,6 @@ def validate_ocr_request_and_get(request, subscription):
# validated_data['is_test_request'] = bool(request.data.get('is_test_request', False))
validated_data['is_test_request'] = string_to_boolean(request.data.get('is_test_request', "false"))
# print(f"[DEBUG]: is_test_request: ", validated_data['is_test_request'])
return validated_data
@ -143,7 +141,6 @@ def sbt_validate_ocr_request_and_get(request, subscription):
validated_data['invoice_file'] = invoice_file
validated_data['redemption_ID'] = redemption_ID
validated_data['is_test_request'] = string_to_boolean(request.data.get('is_test_request', "false"))
# print(f"[DEBUG]: is_test_request: ", validated_data['is_test_request'])
subsidiary = request.data.get("subsidiary", None)
valid_subs = list(settings.SUBS.keys())[:-2] # remove "ALL" and "SEAO"
@ -246,7 +243,7 @@ def get_random_string(length):
# choose from all lowercase letter
letters = string.ascii_lowercase
result_str = ''.join(random.choice(letters) for _ in range(length))
print("Random string of length", length, "is:", result_str)
logger.debug("Random string of length", length, "is:", result_str)
return result_str
@ -346,7 +343,7 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}):
elif typez == ProcessType.SBT_INVOICE.value:
c_connector.process_invoice_sbt((rq_id, file_url, metadata))
except Exception as e:
print(e)
logger.error(e)
raise BadGatewayException()
def build_template_matching_data(template):
@ -383,7 +380,7 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid):
c_connector.process_template_matching(
(rq_id, template.subscription.id, folder_name, file_url, template_data, uid))
except Exception as e:
print(e)
logger.error(e)
raise BadGatewayException()
def process_feedback(feedback_id, local_file_path):
@ -451,7 +448,7 @@ def pdf_to_images_urls(doc_path, request: SubscriptionRequest, user, dpi: int =
image = get_first_page_pdf(doc_path, 300)
image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1])
image.save(saving_path)
print(f"Saving {saving_path}")
logger.debug(f"Saving {saving_path}")
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path,
request=request,
file_name=break_file_name,

View File

@ -1,4 +1,7 @@
import boto3
import logging
logger = logging.getLogger(__name__)
class MinioS3Client:
def __init__(self, access_key, secret_key, bucket_name, endpoint=""):
@ -25,7 +28,7 @@ class MinioS3Client:
's3')
except Exception as e:
print(f"[WARM] Unable to create an s3 client, {e}")
logger.warning(f"Unable to create an s3 client, {e}")
self.s3_client = None
def update_object(self, s3_key, content):
@ -38,7 +41,7 @@ class MinioS3Client:
# print(f"Object '{s3_key}' updated in S3 with res: {res}")
return res
except Exception as e:
print(f"Error updating object in S3: {str(e)}")
logger.error(f"Error updating object in S3: {str(e)}")
def upload_file(self, local_file_path, s3_key):
try:
@ -46,7 +49,7 @@ class MinioS3Client:
# print(f"File '{local_file_path}' uploaded to S3 with key '{s3_key}'")
return res
except Exception as e:
print(f"Error uploading file to S3: {str(e)}")
logger.error(f"Error uploading file to S3: {str(e)}")
def download_file(self, s3_key, local_file_path):
try:
@ -54,7 +57,7 @@ class MinioS3Client:
# print(f"File '{s3_key}' downloaded from S3 to '{local_file_path}'")
return res
except Exception as e:
print(f"Error downloading file from S3: {str(e)}")
logger.error(f"Error downloading file from S3: {str(e)}")
def create_url_with_expiration(self, s3_key, expiration_time):
try:
@ -68,7 +71,7 @@ class MinioS3Client:
# print(f"URL for file '{s3_key}' expires in {expiration_time} seconds")
return res
except Exception as e:
print(f"Error generating URL for file '{s3_key}': {str(e)}")
logger.error(f"Error generating URL for file '{s3_key}': {str(e)}")
if __name__=="__main__":

View File

@ -45,6 +45,7 @@ services:
image: sidp/cope2n-be-fi-sbt:latest
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- LOG_ROOT=${LOG_ROOT}
- DB_ENGINE=${DB_ENGINE}
- DB_SCHEMA=${DB_SCHEMA}
- DB_USER=${DB_USER}
@ -84,13 +85,15 @@ services:
- ctel-sbt
volumes:
- BE_media:${MEDIA_ROOT}
- BE_log:${LOG_ROOT}
- BE_static:/app/static
- /mnt/hdd4T/TannedCung/OCR/Data/SBT_lost_data:/external_data
- ./cope2n-api:/app
working_dir: /app
depends_on:
db-sbt:
condition: service_started
command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
command: sh -c "sudo chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
python manage.py makemigrations &&
python manage.py migrate &&
python manage.py compilemessages &&
@ -105,7 +108,8 @@ services:
- MINIO_ACCESS_KEY=${S3_ACCESS_KEY}
- MINIO_SECRET_KEY=${S3_SECRET_KEY}
volumes:
- ./data/minio_data:/data
# - ./data/minio_data:/data
- /home/dxtan/TannedCung/persistent/minio:/data
ports:
- 9884:9884
- 9885:9885
@ -144,6 +148,7 @@ services:
image: sidp/cope2n-be-fi-sbt:latest
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- LOG_ROOT=${LOG_ROOT}
- PYTHONPATH=${PYTHONPATH}:/app # For import module
- PYTHONUNBUFFERED=1 # For show print log
- DB_ENGINE=${DB_ENGINE}
@ -177,6 +182,7 @@ services:
condition: service_started
volumes:
- BE_media:${MEDIA_ROOT}
- BE_log:${LOG_ROOT}
- ./cope2n-api:/app
working_dir: /app
@ -189,7 +195,8 @@ services:
mem_reservation: 500m
image: postgres:15.4-alpine
volumes:
- ./data/postgres_data:/var/lib/postgresql/data
# - ./data/postgres_data:/var/lib/postgresql/data
- /home/dxtan/TannedCung/persistent/postgres:/var/lib/postgresql/data
networks:
- ctel-sbt
environment:
@ -255,3 +262,4 @@ volumes:
db_data:
BE_static:
BE_media:
BE_log:

View File

@ -25,6 +25,7 @@ services:
be-ctel-sbt:
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- LOG_ROOT=${LOG_ROOT}
- DB_ENGINE=${DB_ENGINE}
- DB_SCHEMA=${DB_SCHEMA}
- DB_USER=${DB_USER}
@ -112,6 +113,7 @@ services:
be-celery-sbt:
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- LOG_ROOT=${LOG_ROOT}
- PYTHONPATH=${PYTHONPATH}:/app # For import module
- PYTHONUNBUFFERED=1 # For show print log
- DB_ENGINE=${DB_ENGINE}