Merge pull request #145 from SDSRV-IDP/dev/logging

Dev/logging
2024-06-26 15:53:24 +07:00 · 2024-06-26 15:53:24 +07:00 · f93e9813ca
commit f93e9813ca
parent 10d1bfe10c 68e0cff28b
28 changed files with 451 additions and 317 deletions
--- a/.env_dev
+++ b/.env_dev
@ -1,4 +1,5 @@
 MEDIA_ROOT=/app/media
+LOG_ROOT=/app/log
 # DATABASE django setup
 DB_ENGINE=django.db.backends.postgresql_psycopg2
 DB_SCHEMA=sbt_prod_20240422_1
--- a/.gitignore
+++ b/.gitignore
@ -45,3 +45,4 @@ cope2n-api/reviewed/retailer.xlsx
 scripts/crawl_database.py
 redundant_models/
 junk.json
+cope2n-api/result.json
--- a/README.md
+++ b/README.md
@ -13,7 +13,7 @@ docker compose --profile local up -d

 ```bash
 pip install pytest requests deepdiff
-IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
+IDP_HOST=http://ocrnlb-stagging-671b98778cba8a08.elb.ap-southeast-1.amazonaws.com IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
 ```
 IDP_HOST=http://ec2-54-169-227-39.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests

--- a/cope2n-api/EDA/processing_time.ipynb
+++ b/cope2n-api/EDA/processing_time.ipynb
--- a/cope2n-api/fwd/settings.py
+++ b/cope2n-api/fwd/settings.py
@ -76,6 +76,7 @@ MIDDLEWARE = [
    'corsheaders.middleware.CorsMiddleware',
    "whitenoise.middleware.WhiteNoiseMiddleware",
    "django.middleware.locale.LocaleMiddleware",
+    "fwd_api.middleware.logging_request_response_middleware.LoggingMiddleware"
 ]

 LOCALE_PATHS = [
@ -207,6 +208,7 @@ FILE_UPLOAD_HANDLERS = [
 CORS_ORIGIN_ALLOW_ALL = True

 MEDIA_ROOT = env.str("MEDIA_ROOT", default=r"/var/www/example.com/media/")
+LOG_ROOT = env.str("LOG_ROOT", default="/app/log")
 BROKER_URL = env.str("BROKER_URL", default="amqp://test:test@107.120.70.226:5672//")
 CELERY_TASK_TRACK_STARTED = True
 # CELERY_TASK_TIME_LIMIT = 30 * 60
@ -256,3 +258,42 @@ CACHES = {
        'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
    }
 }
+
+LOGGING = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'verbose': {
+            'format': '{levelname} {asctime} {module} {message}',
+            'style': '{',
+        },
+        'simple': {
+            'format': '{levelname} {message}',
+            'style': '{',
+        },
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'verbose',
+        },
+        'file': {
+            "class": 'logging.handlers.TimedRotatingFileHandler',
+            'filename': f'{LOG_ROOT}/sbt_idp_BE.log',  # Specify the path and filename for the log file
+            "when": "midnight",
+            "interval": 1,
+            'backupCount': 10,
+            'formatter': 'verbose',
+        },
+    },
+    'loggers': {
+        'django': {
+            'handlers': ['console', 'file'],
+            'level': 'INFO',
+        },
+        '': {
+            'handlers': ['console', 'file'],
+            'level': 'INFO',
+        }
+    },
+}
--- a/cope2n-api/fwd_api/api/accuracy_view.py
+++ b/cope2n-api/fwd_api/api/accuracy_view.py
@ -26,6 +26,9 @@ from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_shor
 from ..utils.report import aggregate_overview
 from fwd_api.utils.accuracy import predict_result_to_ready
 import copy
+import logging
+
+logger = logging.getLogger(__name__)

 redis_client = RedisUtils()

@ -227,8 +230,8 @@ class AccuracyViewSet(viewsets.ViewSet):
                            retailer = rq.predict_result.get("content", {}).get("document", [])[
                                0].get("content", [])[0].get("value", [])
                except Exception as e:
-                    print(f"[ERROR]: {e}")
-                    print(f"[ERROR]: {rq}")
+                    logger.error(f"{e}")
+                    logger.error(f"{rq}")
                data.append({
                    'RequestID': rq.request_id,
                    'RedemptionID': rq.redemption_id,
@ -603,8 +606,8 @@ class AccuracyViewSet(viewsets.ViewSet):
                        subsidiary_overview_reports += this_overview

                    except Exception as e:
-                        print(
-                            f"[WARM]: Unable to retrive data {key} from Redis, skipping...")
+                        logger.warn(
+                            f"Unable to retrive data {key} from Redis, skipping...")
                data = aggregate_overview(subsidiary_overview_reports)
                for item in data:
                    if item.get("subs") == "+":
--- a/cope2n-api/fwd_api/api/ctel_template_view.py
+++ b/cope2n-api/fwd_api/api/ctel_template_view.py
@ -21,7 +21,9 @@ from ..utils import file as FileUtils
 from ..utils import process as ProcessUtil
 from ..utils.process import UserData
 from drf_spectacular.utils import extend_schema
+import logging

+logger = logging.getLogger(__name__)

 class CtelTemplateViewSet(viewsets.ViewSet):
    lookup_field = "username"
@ -121,7 +123,7 @@ class CtelTemplateViewSet(viewsets.ViewSet):
        serializer = CreateTemplateRequest(data=request.data)
        serializer.is_valid()
        if not serializer.is_valid():
-            print(serializer.errors)
+            logger.error(serializer.errors)
            raise InvalidException(excArgs=list(serializer.errors.keys()))
        data = serializer.validated_data

@ -156,7 +158,7 @@ class CtelTemplateViewSet(viewsets.ViewSet):
        serializer = UpdateTemplateRequest(data=data)
        serializer.is_valid()
        if not serializer.is_valid():
-            print(serializer.errors)
+            logger.error(serializer.errors)
            raise InvalidException(excArgs=list(serializer.errors.keys()))
        data = serializer.validated_data

--- a/cope2n-api/fwd_api/api/ctel_user_view.py
+++ b/cope2n-api/fwd_api/api/ctel_user_view.py
@ -21,6 +21,9 @@ from ..utils.crypto import sds_authenticator, admin_sds_authenticator, SdsAuthen
 import datetime
 from ..request.LoginRequest import LoginRequest
 from ..utils.date import default_zone
+import logging
+
+logger = logging.getLogger(__name__)

 from fwd import settings

@ -45,7 +48,7 @@ class CtelUserViewSet(viewsets.ViewSet):
    @action(detail=False, url_path="login", methods=["POST"])
    def login(self, request):
        serializer = LoginRequest(data=request.data)
-        print(serializer.is_valid(raise_exception=True))
+        logger.error(serializer.is_valid(raise_exception=True))

        data = serializer.validated_data
        token_limit = 999999
@ -157,7 +160,7 @@ class CtelUserViewSet(viewsets.ViewSet):
        serializer = UpsertUserRequest(data=request.data)
        serializer.is_valid()
        if not serializer.is_valid():
-            print(serializer.errors)
+            logger.error(serializer.errors)
            raise InvalidException(excArgs=list(serializer.errors.keys()))
        data = serializer.validated_data

--- a/cope2n-api/fwd_api/api/ctel_view.py
+++ b/cope2n-api/fwd_api/api/ctel_view.py
@ -24,6 +24,9 @@ from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate,
 from ..response.ReportSerializer import ReportSerializer
 from ..utils import file as FileUtils
 from ..utils import process as ProcessUtil
+import logging
+
+logger = logging.getLogger(__name__)

 class CtelViewSet(viewsets.ViewSet):
    lookup_field = "username"
@ -92,8 +95,8 @@ class CtelViewSet(viewsets.ViewSet):
        elif file_extension in image_extensions:
            b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user, "all", 0)
            j_time = time.time()
-            print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
-            print(f"[INFO]: b_url: {b_url}")
+            logger.info(f"Duration of Pre-processing: {j_time - s_time}s")
+            logger.info(f"b_url: {b_url}")
            if p_type in standard_ocr_list:
                ProcessUtil.send_to_queue2(rq_id + "_sub_0", sub.id, b_url, user.id, p_type)
            if p_type == ProcessType.TEMPLATE_MATCHING.value:
@ -441,7 +444,7 @@ class CtelViewSet(viewsets.ViewSet):
            content_type = 'application/pdf' if temp.file_name.split(".")[-1] in pdf_extensions else content_type
            if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
                raise PermissionDeniedException()
-            print(temp.file_path)
+            logger.info(temp.file_path)
            return HttpResponse(FileWrapper(FileUtils.get_file(temp.file_path)), status=status.HTTP_200_OK,
                                headers={'Content-Disposition': 'filename={fn}'.format(fn=temp.file_name)},
                                content_type=content_type)
@ -459,7 +462,6 @@ class CtelViewSet(viewsets.ViewSet):

                if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
                    raise PermissionDeniedException()
-                # print(f"[DEBUG]: rq: {rq}, file_name: {file_name}")
                file_data = SubscriptionRequestFile.objects.filter(request=rq, file_name=file_name)[0]
            except IndexError:
                raise NotFoundException(excArgs='file')
@ -527,7 +529,6 @@ class CtelViewSet(viewsets.ViewSet):

        serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
        serializer.is_valid()
-        # print(f"[DEBUG]: result: {serializer.data[0]}")
        if report_filter[0].status == 400:
            raise FileContentInvalidException()
        if report_filter[0].status == 100: # continue, only return when result is fullfilled
--- a/cope2n-api/fwd_api/celery_worker/client_connector.py
+++ b/cope2n-api/fwd_api/celery_worker/client_connector.py
@ -2,8 +2,25 @@ from celery import Celery

 from fwd import settings
 from fwd_api.exception.exceptions import GeneralException
+from kombu.utils.uuid import uuid
+from celery.utils.log import get_task_logger
+logger = get_task_logger(__name__)


+def is_it_an_index(id):
+    ret = True
+    if not isinstance(id, str):
+        logger.info("NOT A STRING")
+        return False
+    if "/" in id:
+        logger.info("/ in ID")
+        return False
+    if id.count("_") > 5 or id.count("_") < 1:
+        _c = id.count("_")
+        logger.info(f"_ HAS {_c}")
+        return False
+    return ret
+
 class CeleryConnector:
    task_routes = {
        # save result
@ -110,6 +127,8 @@ class CeleryConnector:
    def send_task(self, name=None, args=None, countdown=None):
        if name not in self.task_routes or 'queue' not in self.task_routes[name]:
            raise GeneralException("System")
-        return self.app.send_task(name, args, queue=self.task_routes[name]['queue'], expires=300, countdown=countdown)
+        task_id = args[0] + "_" + uuid()[:4] if isinstance(args, tuple) and is_it_an_index(args[0]) else uuid()
+        logger.info(f"SEND task name: {name} - {task_id} | args: {args} | countdown: {countdown}")
+        return self.app.send_task(name, args, queue=self.task_routes[name]['queue'], expires=300, countdown=countdown, task_id=task_id)

 c_connector = CeleryConnector()
--- a/cope2n-api/fwd_api/celery_worker/internal_task.py
+++ b/cope2n-api/fwd_api/celery_worker/internal_task.py
@ -7,6 +7,7 @@ from multiprocessing.pool import ThreadPool

 from fwd_api.models import SubscriptionRequest, UserProfile
 from fwd_api.celery_worker.worker import app
+from fwd_api.celery_worker.task_warpper import VerboseTask
 from ..constant.common import FolderFileType, image_extensions
 from ..exception.exceptions import FileContentInvalidException
 from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
@ -23,7 +24,6 @@ from fwd_api.utils.accuracy import predict_result_to_ready
 from celery.utils.log import get_task_logger
 from fwd import settings

-
 logger = get_task_logger(__name__)

 s3_client = S3Util.MinioS3Client(
@ -66,7 +66,7 @@ def process_image_file(file_name: str, file_path, request, user) -> list:
        'request_file_id': new_request_file.code
    }]

-@app.task(name="csv_feedback")
+@app.task(base=VerboseTask, name="csv_feedback")
 def process_csv_feedback(csv_file_path, feedback_id):
    # load file to RAM
    status = {}
@ -124,7 +124,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
                try:
                    image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request]
                except Exception as e:
-                    print(f"[ERROR] image.doc_type: {image.doc_type} - image.index_in_request: {image.index_in_request} - time_cost: {time_cost} - {e}")
+                    logger.error(f"image.doc_type: {image.doc_type} - image.index_in_request: {image.index_in_request} - time_cost: {time_cost} - {e}")
                if not validate_feedback_file(_feedback_result, _predict_result):
                    status[request_id] = "Missalign imei number between feedback and predict"
                    # continue
@ -138,7 +138,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
                        _predict_result = {"retailername": None, "sold_to_party": None, "invoice_no": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]}
                        _feedback_result = {"retailername": None, "sold_to_party": None, "invoice_no": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result and len(_feedback_result["imei_number"]) > image.index_in_request else None
                    except Exception as e:
-                        print (f"[ERROR]: {request_id} - {e}")
+                        logger.error(f"{request_id} - {e}")
                image.predict_result = _predict_result
                image.feedback_result = _feedback_result
                # image.reviewed_result = _reviewed_result
@ -160,10 +160,9 @@ def process_csv_feedback(csv_file_path, feedback_id):
            os.remove(file_path)
        except Exception as e:
            logger.error(f"Unable to set S3: {e}")
-            print(f"Unable to set S3: {e}")
    feedback_rq.save()

-@app.task(name='do_pdf')
+@app.task(base=VerboseTask, name='do_pdf')
 def process_pdf(rq_id, sub_id, p_type, user_id, files):
    """
    files: [{
@ -229,7 +228,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
    for sub_rq_id, sub_id, urls, user_id, p_type, metadata in to_queue:
        ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type, metadata)

-@app.task(name='upload_file_to_s3')
+@app.task(base=VerboseTask, name='upload_file_to_s3')
 def upload_file_to_s3(local_file_path, s3_key, request_id):
    if s3_client.s3_client is not None:
        try:
@ -239,12 +238,11 @@ def upload_file_to_s3(local_file_path, s3_key, request_id):
            sub_request.save()
        except Exception as e:
            logger.error(f"Unable to set S3: {e}")
-            print(f"Unable to set S3: {e}")
            return
    else:
        logger.info(f"S3 is not available, skipping,...")

-@app.task(name='upload_feedback_to_s3')
+@app.task(base=VerboseTask, name='upload_feedback_to_s3')
 def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
    if s3_client.s3_client is not None:
        try:
@ -254,12 +252,11 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
            feed_request.save()
        except Exception as e:
            logger.error(f"Unable to set S3: {e}")
-            print(f"Unable to set S3: {e}")
            return
    else:
        logger.info(f"S3 is not available, skipping,...")

-@app.task(name='upload_report_to_s3')
+@app.task(base=VerboseTask, name='upload_report_to_s3')
 def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
    if s3_client.s3_client is not None:
        try:
@ -271,20 +268,19 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
                report.save()
        except Exception as e:
            logger.error(f"Unable to set S3: {e}")
-            print(f"Unable to set S3: {e}")
            return
    else:
        logger.info(f"S3 is not available, skipping,...")

-@app.task(name='remove_local_file')
+@app.task(base=VerboseTask, name='remove_local_file')
 def remove_local_file(local_file_path, request_id):
-    print(f"[INFO] Removing local file: {local_file_path}, ...")
+    logger.info(f"Removing local file: {local_file_path}, ...")
    try:
        os.remove(local_file_path)
    except Exception as e:
        logger.info(f"Unable to remove local file: {e}")

-@app.task(name='upload_obj_to_s3')
+@app.task(base=VerboseTask, name='upload_obj_to_s3')
 def upload_obj_to_s3(byte_obj, s3_key):
    if s3_client.s3_client is not None:
        obj = base64.b64decode(byte_obj)
--- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py
+++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py
@ -2,6 +2,7 @@ import traceback

 from fwd_api.models import SubscriptionRequest, Report, ReportFile
 from fwd_api.celery_worker.worker import app
+from fwd_api.celery_worker.task_warpper import VerboseTask
 from ..utils import s3 as S3Util
 from ..utils.accuracy import (update_temp_accuracy, IterAvg,
                              count_transactions, extract_report_detail_list, calculate_a_request,
@ -37,7 +38,7 @@ def mean_list(l):
        return 0
    return sum(l)/len(l)

-@app.task(name='make_a_report_2')
+@app.task(base=VerboseTask, name='make_a_report_2')
 def make_a_report_2(report_id, query_set):
    report_type = query_set.pop("report_type", "accuracy")
    if report_type == "accuracy":
@ -236,11 +237,11 @@ def create_accuracy_report(report_id, **kwargs):
        s3_key = save_report_to_S3(report.report_id, local_workbook)

    except IndexError as e:
-        print(e)
+        logger.error(e)
        traceback.print_exc()
-        print("NotFound request by report id, %d", report_id)
+        logger.error("NotFound request by report id, %d", report_id)
    except Exception as e:
-        print("[ERROR]: an error occured while processing report: ", report_id)
+        logger.error("An error occured while processing report: ", report_id)
        traceback.print_exc()
        return 400

@ -286,10 +287,10 @@ def create_billing_report(report_id, **kwargs):
        local_workbook = save_workbook_file(detail_file_name, report, data_workbook)
        s3_key = save_report_to_S3(report.report_id, local_workbook)
    except IndexError as e:
-        print(e)
+        logger.error(e)
        traceback.print_exc()
-        print("NotFound request by report id, %d", report_id)
+        logger.error("NotFound request by report id, %d", report_id)
    except Exception as e:
-        print("[ERROR]: an error occured while processing report: ", report_id)
+        logger.error("An error occured while processing report: ", report_id)
        traceback.print_exc()
        return 400
--- a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py
+++ b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py
@ -12,6 +12,10 @@ from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile
 from fwd_api.constant.common import ProcessType
 from fwd_api.utils.redis import RedisUtils
 from fwd_api.utils import process as ProcessUtil
+from fwd_api.celery_worker.task_warpper import VerboseTask
+from celery.utils.log import get_task_logger
+
+logger = get_task_logger(__name__)

 redis_client = RedisUtils()

@ -55,11 +59,10 @@ def aggregate_result(results):
    return des_result

 def print_id(rq_id):
-    print(" [x] Received {rq}".format(rq=rq_id))
+    logger.info(" [x] Received {rq}".format(rq=rq_id))


 def to_status(result):
-    print('X')
    if 'status' in result and result['status'] not in [200, 201, 202]:
        return 4
    return 3
@ -72,7 +75,7 @@ def update_user(rq: SubscriptionRequest):
        sub.current_token += ProcessUtil.token_value(int(rq.process_type))
        sub.save()

-@app.task(name='process_sap_invoice_result')
+@app.task(base=VerboseTask, name='process_sap_invoice_result')
 def process_invoice_sap_result(rq_id, result):
    try:
        rq: SubscriptionRequest = \
@ -85,16 +88,16 @@ def process_invoice_sap_result(rq_id, result):

        update_user(rq)
    except IndexError as e:
-        print(e)
-        print("NotFound request by requestId, %d", rq_id)
+        logger.error(e)
+        logger.error("NotFound request by requestId, %d", rq_id)
    except Exception as e:
-        print(e)
-        print("Fail Invoice %d", rq_id)
+        logger.error(e)
+        logger.error("Fail Invoice %d", rq_id)
        traceback.print_exc()
        return "FailInvoice"


-@app.task(name='process_fi_invoice_result')
+@app.task(base=VerboseTask, name='process_fi_invoice_result')
 def process_invoice_fi_result(rq_id, result):
    try:
        rq: SubscriptionRequest = \
@ -107,15 +110,15 @@ def process_invoice_fi_result(rq_id, result):

        update_user(rq)
    except IndexError as e:
-        print(e)
-        print("NotFound request by requestId, %d", rq_id)
+        logger.error(e)
+        logger.error("NotFound request by requestId, %d", rq_id)
    except Exception as e:
-        print(e)
-        print("Fail Invoice %d", rq_id)
+        logger.error(e)
+        logger.error("Fail Invoice %d", rq_id)
        traceback.print_exc()
        return "FailInvoice"
    
-@app.task(name='process_manulife_invoice_result')
+@app.task(base=VerboseTask, name='process_manulife_invoice_result')
 def process_invoice_manulife_result(rq_id, result):
    try:
        rq: SubscriptionRequest = \
@ -128,17 +131,17 @@ def process_invoice_manulife_result(rq_id, result):

        update_user(rq)
    except IndexError as e:
-        print(e)
-        print("NotFound request by requestId, %d", rq_id)
+        logger.error(e)
+        logger.error("NotFound request by requestId, %d", rq_id)
    except Exception as e:
-        print(e)
-        print("Fail Invoice %d", rq_id)
+        logger.error(e)
+        logger.error("Fail Invoice %d", rq_id)
        traceback.print_exc()
        return "FailInvoice"
    
 random_processor_name = None

-@app.task(name='process_sbt_invoice_result')
+@app.task(base=VerboseTask, name='process_sbt_invoice_result')
 def process_invoice_sbt_result(rq_id, result, metadata):
    global random_processor_name
    if random_processor_name is None:
@ -161,7 +164,6 @@ def process_invoice_sbt_result(rq_id, result, metadata):
                rq.status = 200 # stop waiting
                results = redis_client.get_all_cache(rq_id)
                rq.predict_result = aggregate_result(results)
-                # print(f"[DEBUG]: rq.predict_result: {rq.predict_result}")
                ai_inference_profile = {}
                doc_type_string = ""
                for idx, result in results.items():
@ -180,13 +182,13 @@ def process_invoice_sbt_result(rq_id, result, metadata):
            rq.save()
        update_user(rq)
    except IndexError as e:
-        print(e)
-        print("NotFound request by requestId, %d", rq_id)
+        logger.error(e)
+        logger.error("NotFound request by requestId, %d", rq_id)
        rq.ai_inference_time = 0
        rq.save()
    except Exception as e:
-        print(e)
-        print("Fail Invoice %d", rq_id)
+        logger.error(e)
+        logger.error("Fail Invoice %d", rq_id)
        traceback.print_exc()
        rq.ai_inference_time = 0
        rq.save()
--- a/cope2n-api/fwd_api/celery_worker/task_warpper.py
+++ b/cope2n-api/fwd_api/celery_worker/task_warpper.py
@ -0,0 +1,16 @@
+from celery import Task
+from celery.utils.log import get_task_logger
+logger = get_task_logger(__name__)
+
+class VerboseTask(Task):
+    abstract = True
+
+    def on_failure(self, exc, task_id, args, kwargs, einfo):
+        # Task failed. What do you want to do?
+        logger.error('FAILURE: Task raised an exception: {}'.format(exc))
+    
+    def on_success(self, retval, task_id, args, kwargs):
+        logger.info(f"SUCCESS: Task: {task_id} succeeded | retval: {retval} | args: {args} | kwargs: {kwargs}")
+
+    def before_start(self, task_id, args, kwargs):
+        logger.info(f"BEFORE_START: Task: {task_id} | args: {args} | kwargs: {kwargs}")
--- a/cope2n-api/fwd_api/celery_worker/worker.py
+++ b/cope2n-api/fwd_api/celery_worker/worker.py
@ -2,7 +2,9 @@ import os

 import django
 from celery import Celery
+from celery.signals import setup_logging  # noqa
 from kombu import Queue
+import copy

 from fwd import settings

@ -19,13 +21,15 @@ app: Celery = Celery(
 app.config_from_object("django.conf:settings", namespace="CELERY")
 app.autodiscover_tasks()

-@app.on_after_finalize.connect
-def setup_periodic_tasks(sender, **kwargs):
-    from fwd_api.bg_tasks.clean_local_files import clean_media
-    sender.add_periodic_task(
-        10.0, clean_media.s(), expires=120.0
-    )
+@setup_logging.connect
+def config_loggers(*args, **kwargs):
+    from logging.config import dictConfig  # noqa
+    from django.conf import settings  # noqa

+    log_config = copy.deepcopy(settings.LOGGING)
+    if log_config.get("handlers", {}).get("file", {}).get("filename", None):
+        log_config["handlers"]["file"]["filename"] = log_config["handlers"]["file"]["filename"].replace(".log", "_celery.log")
+    dictConfig(log_config)

 app.conf.update({
    'task_queues':
--- a/cope2n-api/fwd_api/filter/AuthFilter.py
+++ b/cope2n-api/fwd_api/filter/AuthFilter.py
@ -9,7 +9,9 @@ from fwd_api.annotation.api import throw_on_failure
 from fwd_api.utils import date as DateUtil
 from fwd_api.utils.crypto import ctel_cryptor, sds_authenticator, image_authenticator
 from fwd_api.exception.exceptions import InvalidException, TokenExpiredException, PermissionDeniedException
+import logging

+logger = logging.getLogger(__name__)

@throw_on_failure(TokenExpiredException())
 def authenticate_by_param_token(request):
@ -27,8 +29,7 @@ def authenticate_by_param_token(request):
 # @throw_on_failure(TokenExpiredException())
 def authenticate_by_authorization_header(request):
    token = request.headers['Authorization']
-    print(f"[INFO]: recived request at {datetime.datetime.utcnow()}, with path {request.path}")
-    # print(f"[DEBUG]: token: {token}")
+    logger.info(f"recived request at {datetime.datetime.utcnow()}, with path {request.path}")
    data = sds_authenticator.decode_data(token)
    if 'expired_at' not in data or 'status' not in data or 'subscription_id' not in data:
        raise PermissionDeniedException()
@ -72,7 +73,7 @@ class AuthFilter(authentication.BaseAuthentication):
    def authenticate(self, request):

        if request.path in self.white_list_path:
-            print("App API")
+            logger.debug("App API")
            return None, None
        if '/api/ctel/media/' in request.path or '/api/ctel/v2/media/' in request.path:
            return authenticate_by_param_token(request)
--- a/cope2n-api/fwd_api/management/commands/migrate-datebase-fillup-images.py
+++ b/cope2n-api/fwd_api/management/commands/migrate-datebase-fillup-images.py
@ -151,8 +151,8 @@ class Command(BaseCommand):
        prepared_data_copy = copy.deepcopy(prepared_data)
        s3_client = MinioS3Client(
            # endpoint='http://107.120.133.27:9884',
-            access_key='AKIA3AFPFVWZHTZHB6FW',
-            secret_key='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',
+            access_key='secret',
+            secret_key='secret+HRcfOsbXhx0YSNOLxdW',
            bucket_name='ocr-sds'
            )
        # file = open("modified.txt", "w")
--- a/cope2n-api/fwd_api/middleware/init.py
+++ b/cope2n-api/fwd_api/middleware/init.py
--- a/cope2n-api/fwd_api/middleware/logging_request_response_middleware.py
+++ b/cope2n-api/fwd_api/middleware/logging_request_response_middleware.py
@ -0,0 +1,34 @@
+import logging
+import uuid
+
+from django.utils.deprecation import MiddlewareMixin
+
+logger = logging.getLogger(__name__)
+
+class LoggingMiddleware(MiddlewareMixin):
+    def process_request(self, request):
+        trace_id = request.headers.get('X-Trace-ID', str(uuid.uuid4()))
+        request.META['X-Trace-ID'] = trace_id
+
+        request_body = ""
+        content_type = request.headers.get("Content-Type", "")
+        if request.method in ('POST', 'PUT', 'PATCH') and not content_type.startswith("multipart/form-data"):
+            request_body = request.body.decode(encoding="utf-8", errors="ignore")
+
+        logger.info(f"Request: {request.method} {request.path} | Trace ID: {trace_id} | "
+                    f"Body: {request_body} | Headers: {request.headers}")
+
+    def process_response(self, request, response):
+        trace_id = request.META.get('X-Trace-ID', str(uuid.uuid4()))
+        response_body = response.content.decode("utf-8") if response.content else ""
+
+        logger.info(f"Response: {request.method} {request.path} | Trace ID: {trace_id} | "
+                    f"Status: {response.status_code} | Body: {response_body}")
+
+        response['X-Trace-ID'] = trace_id
+        return response
+
+    def process_exception(self, request, exception):
+        trace_id = request.META.get('X-Trace-ID', str(uuid.uuid4()))
+        logger.error(f"Exception: {request.method} {request.path} | Trace ID: {trace_id} | "
+                     f"Error: {exception}")
--- a/cope2n-api/fwd_api/utils/accuracy.py
+++ b/cope2n-api/fwd_api/utils/accuracy.py
@ -16,6 +16,9 @@ from django.utils import timezone
 from fwd import settings
 from ..models import SubscriptionRequest, Report, ReportFile
 from typing import Union, List, Dict
+import logging
+
+logger = logging.getLogger(__name__)

 VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
 OPTIONAL_KEYS = ['invoice_no']
@ -145,7 +148,7 @@ class ReportAccumulateByRequest:
            if report_file.doc_type in ["imei", "invoice", "all"]:
                doc_type = report_file.doc_type
            else:
-                print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
+                logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
            total["num_imei"] += 1 if doc_type == "imei" else 0
            total["num_invoice"] += 1 if doc_type == "invoice" else 0

@ -160,13 +163,13 @@ class ReportAccumulateByRequest:
                total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))

            if not total["average_processing_time"].get(report_file.doc_type, None):
-                print(f"[WARM]: Weird doctype: {report_file.doc_type}")
+                logger.warning(f"Weird doctype: {report_file.doc_type}")
                total["average_processing_time"][report_file.doc_type] = IterAvg()
            total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
            total["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 

            if not total["file_average_processing_time"].get(report_file.doc_type, None):
-                print(f"[WARM]: Weird doctype: {report_file.doc_type}")
+                logger.warning(f"Weird doctype: {report_file.doc_type}")
                total["file_average_processing_time"][report_file.doc_type] = IterAvg()
            total["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
            total["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
@ -175,7 +178,7 @@ class ReportAccumulateByRequest:
            if report_file.doc_type in ["imei", "invoice", "all"]:
                doc_type = report_file.doc_type
            else:
-                print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
+                logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
            total["usage"]["imei"] += 1 if doc_type == "imei" else 0
            total["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
            total["usage"]["total_images"] += 1
@ -193,7 +196,7 @@ class ReportAccumulateByRequest:
            if report_file.doc_type in ["imei", "invoice", "all"]:
                doc_type = report_file.doc_type
            else:
-                print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
+                logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
            month["num_imei"] += 1 if doc_type == "imei" else 0
            month["num_invoice"] += 1 if doc_type == "invoice" else 0

@ -206,13 +209,13 @@ class ReportAccumulateByRequest:
                month["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))

            if not month["average_processing_time"].get(report_file.doc_type, None):
-                print(f"[WARM]: Weird doctype: {report_file.doc_type}")
+                logger.warning(f"Weird doctype: {report_file.doc_type}")
                month["average_processing_time"][report_file.doc_type] = IterAvg()
            month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
            month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 

            if not month["file_average_processing_time"].get(report_file.doc_type, None):
-                print(f"[WARM]: Weird doctype: {report_file.doc_type}")
+                logger.warning(f"Weird doctype: {report_file.doc_type}")
                month["file_average_processing_time"][report_file.doc_type] = IterAvg()
            month["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
            month["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0 
@ -221,7 +224,7 @@ class ReportAccumulateByRequest:
            if report_file.doc_type in ["imei", "invoice", "all"]:
                doc_type = report_file.doc_type
            else:
-                print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
+                logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
            month["usage"]["imei"] += 1 if doc_type == "imei" else 0
            month["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
            month["usage"]["total_images"] += 1
@ -238,7 +241,7 @@ class ReportAccumulateByRequest:
            if report_file.doc_type in ["imei", "invoice", "all"]:
                doc_type = report_file.doc_type
            else:
-                print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
+                logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
            day_data["num_imei"] += 1 if doc_type == "imei" else 0
            day_data["num_invoice"] += 1 if doc_type == "invoice" else 0
            day_data["report_files"].append(report_file)
@ -252,13 +255,13 @@ class ReportAccumulateByRequest:
                day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))

            if not day_data["average_processing_time"].get(report_file.doc_type, None):
-                print(f"[WARM]: Weird doctype: {report_file.doc_type}")
+                logger.warning(f"Weird doctype: {report_file.doc_type}")
                day_data["average_processing_time"][report_file.doc_type] = IterAvg()
            day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
            day_data["average_processing_time"]['avg'].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 

            if not day_data["file_average_processing_time"].get(report_file.doc_type, None):
-                print(f"[WARM]: Weird doctype: {report_file.doc_type}")
+                logger.warning(f"Weird doctype: {report_file.doc_type}")
                day_data["file_average_processing_time"][report_file.doc_type] = IterAvg()
            day_data["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
            day_data["file_average_processing_time"]['avg'].add_avg(image_avg_cost, 1) if image_avg_cost else 0 
@ -273,7 +276,7 @@ class ReportAccumulateByRequest:
            self.data[this_month] = [copy.deepcopy(self.month_format), {}]
            self.data[this_month][0]["extraction_date"] = "Subtotal (" + timezone.localtime(request.created_at).strftime("%Y-%m") + ")"
        if not self.data[this_month][1].get(this_day, None):
-            print(f"[INFO] Adding a new day: {this_day} for report: {report.id} ...")
+            logger.info(f" Adding a new day: {this_day} for report: {report.id} ...")
            self.data[this_month][1][this_day] = copy.deepcopy(self.day_format)[0]
            self.data[this_month][1][this_day]['extraction_date'] = timezone.localtime(request.created_at).strftime("%Y-%m-%d")
            usage = self.count_transactions_within_day(this_day)
@ -289,6 +292,7 @@ class ReportAccumulateByRequest:
        _ai_cost = request.ai_inference_time
        processing_time_by_averaging_request_cost = (_be_cost + _ai_cost)/_number_of_file if _number_of_file > 0 else 0
        for report_file in report_files:
+            # report_file.time_cost = processing_time_by_averaging_request_cost
            _report_file = copy.deepcopy(report_file)
            if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
                _report_file.acc = None
@ -844,7 +848,6 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta

 def calculate_avg_accuracy(acc, type, keys=[]):
    acc_list = []
-    # print(f"[DEBUG]: type: {type} - acc: {acc}")
    for key in keys:
        this_acc = acc.get(type, {}).get(key, [])
        if len(this_acc) > 0:
@ -985,7 +988,7 @@ def calculate_a_request(report, request):
        if request.redemption_id:
            _sub = map_subsidiary_short_to_long(request.redemption_id[:2])
        else:
-            print(f"[WARN]: empty redemption_id, check request: {request.request_id}")
+            logger.warning(f"empty redemption_id, check request: {request.request_id}")

        # Little trick to replace purchase date to normalized
        if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0:
@ -1008,9 +1011,6 @@ def calculate_a_request(report, request):

        if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS:
            if image.reason in settings.ACC_EXCLUDE_RESEASONS:
-                # if image.reason in settings.ACC_EXCLUDE_RESEASONS:
-                #     print(f"[DEBUG]: {image.reason}")
-                # TODO: Exclude bad image accuracy from average accuracy
                _att["avg_acc"] = None
                for t in _att["acc"].keys():
                    for k in _att["acc"][t].keys():
@ -1073,7 +1073,7 @@ def calculate_a_request(report, request):
                request_att["total_images"] += 1
            request_att["err"] += _att["err"]
        except Exception as e:
-            print(f"[ERROR]: failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}")
+            logger.error(f"Failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}")
            continue
 
    return request_att, report_files, atts
--- a/cope2n-api/fwd_api/utils/file.py
+++ b/cope2n-api/fwd_api/utils/file.py
@ -23,6 +23,9 @@ import imagesize
 import csv
 from openpyxl import load_workbook
 from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle, numbers, Alignment
+import logging
+
+logger = logging.getLogger(__name__)

 s3_client = S3Util.MinioS3Client(
    endpoint=settings.S3_ENDPOINT,
@ -96,7 +99,6 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES

    for f in files:
        if not isinstance(f, TemporaryUploadedFile):
-            # print(f'[DEBUG]: {f.name}')
            raise InvalidException(excArgs="files")
        extension = f.name.split(".")[-1].lower() in allowed_file_extensions
        if not extension or "." not in f.name:
@ -116,7 +118,6 @@ def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv

    for f in files:
        if not isinstance(f, TemporaryUploadedFile):
-            # print(f'[DEBUG]: {f.name}')
            raise InvalidException(excArgs="files")
        extension = f.name.split(".")[-1].lower() in ["csv"]
        if not extension or "." not in f.name:
@ -131,7 +132,7 @@ def get_file(file_path: str):
    try:
        return open(file_path, 'rb')
    except Exception as e:
-        print(e)
+        logger.error(e)
        raise GeneralException("System")


@ -226,7 +227,7 @@ def delete_file_with_path(file_path: str) -> bool:
        os.remove(file_path)
        return True
    except Exception as e:
-        print(e)
+        logger.error(e)
        return False


@ -239,7 +240,7 @@ def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedF
            os.makedirs(folder_path)
        return save_file_with_path(file_name, file, quality, folder_path)
    except Exception as e:
-        print(e)
+        logger.error(e)
        raise ServiceUnavailableException()

 def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path: str):
@ -253,7 +254,7 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
    except InvalidDecompressedSizeException as e:
        raise e
    except Exception as e:
-        print(e)
+        logger.error(e)
        raise ServiceUnavailableException()
    return file_path

@ -306,7 +307,7 @@ def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: Temporar
    except InvalidDecompressedSizeException as e:
        raise e
    except Exception as e:
-        print(f"[ERROR]: {e}")
+        logger.error(f"{e}")
        raise ServiceUnavailableException()

 def save_to_S3(file_name, rq, local_file_path):
@ -319,21 +320,19 @@ def save_to_S3(file_name, rq, local_file_path):
        c_connector.remove_local_file((local_file_path, request_id))
        return s3_key
    except Exception as e:
-        print(f"[ERROR]: {e}")
+        logger.error(f"{e}")
        raise ServiceUnavailableException()

 def save_feedback_to_S3(file_name, id, local_file_path):
    try:
-        # print(f"[DEBUG]: Uploading feedback to S3 with local path {local_file_path}, id: {id}, file_name: {file_name}")
        assert len(local_file_path.split("/")) >= 3, "file_path must have at least feedback_folder and feedback_id"
        # s3_key = os.path.join(local_file_path.split("/")[-3], local_file_path.split("/")[-2], file_name)
        s3_key = os.path.join("feedback", local_file_path.split("/")[-2], file_name)
-        # print(f"[DEBUG]: Uploading feedback to S3 with s3_key {s3_key}")
        c_connector.upload_feedback_to_s3((local_file_path, s3_key, id))
        c_connector.remove_local_file((local_file_path, id))
        return s3_key
    except Exception as e:
-        print(f"[ERROR]: {e}")
+        logger.error(f"{e}")
        raise ServiceUnavailableException()

 def save_report_to_S3(id, local_file_path, delay=0):
@ -343,7 +342,7 @@ def save_report_to_S3(id, local_file_path, delay=0):
        c_connector.remove_local_file((local_file_path, id))
        return s3_key
    except Exception as e:
-        print(f"[ERROR]: {e}")
+        logger.error(f"{e}")
        raise ServiceUnavailableException()

 def download_from_S3(s3_key, local_file_path):
@ -361,7 +360,7 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
    except InvalidDecompressedSizeException as e:
        raise e
    except Exception as e:
-        print(e)
+        logger.error(e)
        raise ServiceUnavailableException()
    return file_path

@ -400,8 +399,8 @@ def save_img(file_path: str, file: TemporaryUploadedFile, quality):
                    elif orientation == 8:
                        image = image.transpose(Image.ROTATE_90)
        except Exception as ex:
-            print(ex)
-            print("Rotation Error")
+            logger.error(ex)
+            logger.error("Rotation Error")
            traceback.print_exc()

        image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1])
@ -449,9 +448,9 @@ def get_value(_dict, keys):
            else:
                value = value.get(key, {})
    except Exception as e:
-        print(f"[ERROR]: {e}")
-        print(f"[ERROR]: value: {value}")
-        print(f"[ERROR]: keys: {keys}")
+        logger.error(f"{e}")
+        logger.error(f"value: {value}")
+        logger.error(f"keys: {keys}")

    if value is None:
        return "-"
--- a/cope2n-api/fwd_api/utils/ocr_utils/ocr_metrics.py
+++ b/cope2n-api/fwd_api/utils/ocr_utils/ocr_metrics.py
@ -5,7 +5,9 @@ from terminaltables import AsciiTable
 from rapidfuzz.distance import Levenshtein

 from .wiki_diff import inline_diff
+import logging

+logger = logging.getLogger(__name__)

 def is_type_list(x, type):

@ -381,5 +383,5 @@ def eval_kie(preds_e2e: dict[str, dict[str, str]], gt_e2e: dict[str, dict[str, s
        )

    table = AsciiTable(table_data)
-    print(table.table)
+    logger.debug(table.table)
    return results, fail_cases
--- a/cope2n-api/fwd_api/utils/ocr_utils/sbt_report.py
+++ b/cope2n-api/fwd_api/utils/ocr_utils/sbt_report.py
@ -14,6 +14,9 @@ from .ocr_metrics import eval_ocr_metric
 import sys
 # sys.path.append(os.path.dirname(__file__))
 from sdsvkvu.utils.query.sbt_v2 import get_seller, post_process_seller
+import logging
+
+logger = logging.getLogger(__name__)


 def read_json(file_path: str):
@ -108,7 +111,7 @@ def convert_predict_from_csv(
        n_pages = request['pages']
        req_id = request['request_id']
        if not isinstance(request['doc_type'], str) or not isinstance(request['predict_result'], str):
-            print(f"[WARNING] Skipped request id {req_id}")
+            logger.warning(f"] Skipped request id {req_id}")
            continue
        
        doc_type_list = request['doc_type'].split(',')
@ -178,7 +181,7 @@ def init_csv(
    for request_id in list_request_id:
        gt_path = os.path.join(gt_dir, request_id, request_id+".json")
        if not os.path.exists(gt_path):
-            print(f"[WARNING] Skipped request id {os.path.basename(os.path.dirname(gt_path))}")
+            logger.warning(f"] Skipped request id {os.path.basename(os.path.dirname(gt_path))}")
            continue
        gt_data = read_json(gt_path)
        json_file_list = glob.glob(os.path.join(pred_dir, request_id, "temp_*.json"))
@ -306,7 +309,7 @@ def pick_sample_to_revise(
        img_path = [ff for ff in glob.glob(img_path_folder.replace(".json", ".*")) if ".json" not in ff]
            
        if len(img_path) == 0:
-            print(f"[WARNING] Skipped request id {request_id}")
+            logger.warning(f"] Skipped request id {request_id}")
            continue    
        img_path = img_path[0]
        # img_path = [ff for ff in glob.glob(json_path.replace(".json", ".*"))][0]
@ -354,41 +357,41 @@ if __name__ == "__main__":
    csv_path_end_user = "logs/eval_20240115/OCR_15Jan2024.csv"
    
    # Step 1: Convert a csv file to get user submitted results for each request
-    print("[INFO] Starting convert csv from customer to json")
+    logger.info(f" Starting convert csv from customer to json")
    os.system(f"sudo chmod -R 777 {save_path}")
    convert_groundtruth_from_csv(csv_path=csv_path_end_user, save_dir=save_path)
-    print("[INFO] Converted")
+    logger.info(f" Converted")
    
    # # Step 2: Convert a csv file to get predict OCR results for each image
-    print("[INFO] Starting convert csv from SDSV to json")
+    logger.info(f" Starting convert csv from SDSV to json")
    convert_predict_from_csv(csv_path=csv_path, save_dir=save_path)
-    print("[INFO] Converted")
+    logger.info(f" Converted")
    
    # # Step 3: Gen initial csv file and calculate OCR result between submitted results and ocr results
-    print("[INFO] Starting generate csv to get performance")
+    logger.info(f" Starting generate csv to get performance")
    gt_path = save_path
    pred_path = save_path
    req_to_red_dict = gen_req_to_red_dict(csv_path_end_user)
    init_data = init_csv(gt_dir=gt_path, pred_dir=pred_path, req_to_red=req_to_red_dict)
    pd.DataFrame(init_data).to_csv(os.path.join(save_csv, "init1.csv"), index=False)
-    print("[INFO] Done")
+    logger.info(f" Done")
    
    # # Step 4: Split requests whose accuracy is less than 1 to revise
-    # print("[INFO] Starting split data to review")
+    # logger.info(f" Starting split data to review")
    # revised_path = os.path.join(save_csv, "revised")
    # # shutil.rmtree(revised_path)
    # pick_sample_to_revise(ocr_accuracy=init_data, gt_dir=save_path, save_dir=revised_path)
-    # print("[INFO] Done")
+    # logger.info(f" Done")
    
    # # Step 5: Merge revised results to gt folder
-    # print("[INFO] Merging revised data to ground truth folder")
+    # logger.info(f" Merging revised data to ground truth folder")
    # revised_path = os.path.join(save_csv, "revised")
    # revised_path = [f'{revised_path}/empty_results', f'{revised_path}/diff_results']
    # merge_revised_sample(revised_path_list=revised_path, save_dir=save_path)
    # print("Done")
    
    # # Step 6: Caculate OCR result between ocr results and revised results
-    # print("[INFO] Exporting OCR report")
+    # logger.info(f" Exporting OCR report")
    # init_csv_path = os.path.join(save_csv, "init1.csv")
    # report = export_report(init_csv=init_csv_path)
    # error_path = os.path.join(save_csv, "errors")
@ -427,6 +430,6 @@ if __name__ == "__main__":
    
    
    # report.drop(columns=["file_path", "class_name"]).to_csv(os.path.join(save_csv, f"SBT_report_{time.strftime('%Y%m%d')}.csv"), index=False)
-    # print("[INFO] Done")
+    # logger.info(f" Done")
    
    
--- a/cope2n-api/fwd_api/utils/ocr_utils/wiki_diff.py
+++ b/cope2n-api/fwd_api/utils/ocr_utils/wiki_diff.py
@ -4,6 +4,9 @@ import unidecode
 import os
 import glob
 import pandas as pd
+import logging
+
+logger = logging.getLogger(__name__)

 VOWELS = 'aeouiy' + 'AEOUIY'
 CONSONANTS = 'bcdfghjklmnpqrstvxwz' + 'BCDFGHJKLMNPQRSTVXWZ'
@ -194,7 +197,7 @@ def main(overwrite=False):
        df.to_csv(f'{SAVE_PATH}/wiki_diff.csv', sep='\t')
        df_ = pd.DataFrame(ddata_specialchars)
        df_.to_csv(f'{SAVE_PATH}/wiki_diff_specialchars.csv', sep='\t')
-    print(TOTAL_WORD)
+    logger.info(TOTAL_WORD)


 if __name__ == '__main__':
--- a/cope2n-api/fwd_api/utils/process.py
+++ b/cope2n-api/fwd_api/utils/process.py
@ -25,10 +25,9 @@ from ..exception.exceptions import InvalidException, NotFoundException, \
 from ..models import UserProfile, OcrTemplate, OcrTemplateBox, \
    Subscription, SubscriptionRequestFile, SubscriptionRequest
 from ..celery_worker.client_connector import c_connector
+import logging

-from celery.utils.log import get_task_logger
-
-logger = get_task_logger(__name__)
+logger = logging.getLogger(__name__)

 class UserData:
    user: UserProfile = None
@ -107,7 +106,6 @@ def validate_ocr_request_and_get(request, subscription):

    # validated_data['is_test_request'] = bool(request.data.get('is_test_request', False))
    validated_data['is_test_request'] = string_to_boolean(request.data.get('is_test_request', "false"))
-    # print(f"[DEBUG]: is_test_request: ", validated_data['is_test_request'])

    return validated_data

@ -143,7 +141,6 @@ def sbt_validate_ocr_request_and_get(request, subscription):
    validated_data['invoice_file'] = invoice_file
    validated_data['redemption_ID'] = redemption_ID
    validated_data['is_test_request'] = string_to_boolean(request.data.get('is_test_request', "false"))
-    # print(f"[DEBUG]: is_test_request: ", validated_data['is_test_request'])

    subsidiary = request.data.get("subsidiary", None)
    valid_subs = list(settings.SUBS.keys())[:-2] # remove "ALL" and "SEAO"
@ -246,7 +243,7 @@ def get_random_string(length):
    # choose from all lowercase letter
    letters = string.ascii_lowercase
    result_str = ''.join(random.choice(letters) for _ in range(length))
-    print("Random string of length", length, "is:", result_str)
+    logger.debug("Random string of length", length, "is:", result_str)
    return result_str


@ -346,7 +343,7 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}):
        elif typez == ProcessType.SBT_INVOICE.value:
            c_connector.process_invoice_sbt((rq_id, file_url, metadata))
    except Exception as e:
-        print(e)
+        logger.error(e)
        raise BadGatewayException()

 def build_template_matching_data(template):
@ -383,7 +380,7 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid):
        c_connector.process_template_matching(
            (rq_id, template.subscription.id, folder_name, file_url, template_data, uid))
    except Exception as e:
-        print(e)
+        logger.error(e)
        raise BadGatewayException()

 def process_feedback(feedback_id, local_file_path):
@ -451,7 +448,7 @@ def pdf_to_images_urls(doc_path, request: SubscriptionRequest, user, dpi: int =
    image = get_first_page_pdf(doc_path, 300)
    image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1])
    image.save(saving_path)
-    print(f"Saving {saving_path}")
+    logger.debug(f"Saving {saving_path}")
    new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path,
                                                                        request=request,
                                                                        file_name=break_file_name,
--- a/cope2n-api/fwd_api/utils/s3.py
+++ b/cope2n-api/fwd_api/utils/s3.py
@ -1,4 +1,7 @@
 import boto3
+import logging
+
+logger = logging.getLogger(__name__)

 class MinioS3Client:
    def __init__(self, access_key, secret_key, bucket_name, endpoint=""):
@ -25,7 +28,7 @@ class MinioS3Client:
                    's3')
                
        except Exception as e:
-            print(f"[WARM] Unable to create an s3 client, {e}")
+            logger.warning(f"Unable to create an s3 client, {e}")
            self.s3_client = None

    def update_object(self, s3_key, content):
@ -38,7 +41,7 @@ class MinioS3Client:
            # print(f"Object '{s3_key}' updated in S3 with res: {res}")
            return res
        except Exception as e:
-            print(f"Error updating object in S3: {str(e)}")
+            logger.error(f"Error updating object in S3: {str(e)}")

    def upload_file(self, local_file_path, s3_key):
        try:
@ -46,7 +49,7 @@ class MinioS3Client:
            # print(f"File '{local_file_path}' uploaded to S3 with key '{s3_key}'")
            return res
        except Exception as e:
-            print(f"Error uploading file to S3: {str(e)}")
+            logger.error(f"Error uploading file to S3: {str(e)}")

    def download_file(self, s3_key, local_file_path):
        try:
@ -54,7 +57,7 @@ class MinioS3Client:
            # print(f"File '{s3_key}' downloaded from S3 to '{local_file_path}'")
            return res
        except Exception as e:
-            print(f"Error downloading file from S3: {str(e)}")
+            logger.error(f"Error downloading file from S3: {str(e)}")
    
    def create_url_with_expiration(self, s3_key, expiration_time):
        try:
@ -68,7 +71,7 @@ class MinioS3Client:
            # print(f"URL for file '{s3_key}' expires in {expiration_time} seconds")
            return res
        except Exception as e:
-            print(f"Error generating URL for file '{s3_key}': {str(e)}")
+            logger.error(f"Error generating URL for file '{s3_key}': {str(e)}")

 if __name__=="__main__":

--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@ -45,6 +45,7 @@ services:
    image: sidp/cope2n-be-fi-sbt:latest
    environment:
      - MEDIA_ROOT=${MEDIA_ROOT}
+      - LOG_ROOT=${LOG_ROOT}
      - DB_ENGINE=${DB_ENGINE}
      - DB_SCHEMA=${DB_SCHEMA}
      - DB_USER=${DB_USER}
@ -84,13 +85,15 @@ services:
      - ctel-sbt
    volumes:
      - BE_media:${MEDIA_ROOT}
+      - BE_log:${LOG_ROOT}
      - BE_static:/app/static
+      - /mnt/hdd4T/TannedCung/OCR/Data/SBT_lost_data:/external_data
      - ./cope2n-api:/app
    working_dir: /app
    depends_on:
      db-sbt:
        condition: service_started
-    command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
+    command: sh -c "sudo chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
                  python manage.py makemigrations &&
                  python manage.py migrate &&
                  python manage.py compilemessages &&
@ -105,7 +108,8 @@ services:
    - MINIO_ACCESS_KEY=${S3_ACCESS_KEY}
    - MINIO_SECRET_KEY=${S3_SECRET_KEY}
    volumes:
-    - ./data/minio_data:/data
+    # - ./data/minio_data:/data
+    - /home/dxtan/TannedCung/persistent/minio:/data
    ports:
      - 9884:9884
      - 9885:9885
@ -144,6 +148,7 @@ services:
    image: sidp/cope2n-be-fi-sbt:latest
    environment:
      - MEDIA_ROOT=${MEDIA_ROOT}
+      - LOG_ROOT=${LOG_ROOT}
      - PYTHONPATH=${PYTHONPATH}:/app  # For import module
      - PYTHONUNBUFFERED=1  # For show print log
      - DB_ENGINE=${DB_ENGINE}
@ -177,6 +182,7 @@ services:
        condition: service_started
    volumes:
      - BE_media:${MEDIA_ROOT}
+      - BE_log:${LOG_ROOT}
      - ./cope2n-api:/app

    working_dir: /app
@ -189,7 +195,8 @@ services:
    mem_reservation: 500m
    image: postgres:15.4-alpine
    volumes:
-      - ./data/postgres_data:/var/lib/postgresql/data
+      # - ./data/postgres_data:/var/lib/postgresql/data
+      - /home/dxtan/TannedCung/persistent/postgres:/var/lib/postgresql/data
    networks:
      - ctel-sbt
    environment:
@ -255,3 +262,4 @@ volumes:
  db_data:
  BE_static:
  BE_media:
+  BE_log:
--- a/docker-compose-prod.yml
+++ b/docker-compose-prod.yml
@ -25,6 +25,7 @@ services:
  be-ctel-sbt:
    environment:
      - MEDIA_ROOT=${MEDIA_ROOT}
+      - LOG_ROOT=${LOG_ROOT}
      - DB_ENGINE=${DB_ENGINE}
      - DB_SCHEMA=${DB_SCHEMA}
      - DB_USER=${DB_USER}
@ -112,6 +113,7 @@ services:
  be-celery-sbt:
    environment:
      - MEDIA_ROOT=${MEDIA_ROOT}
+      - LOG_ROOT=${LOG_ROOT}
      - PYTHONPATH=${PYTHONPATH}:/app  # For import module
      - PYTHONUNBUFFERED=1  # For show print log
      - DB_ENGINE=${DB_ENGINE}