commit
f93e9813ca
1
.env_dev
1
.env_dev
@ -1,4 +1,5 @@
|
||||
MEDIA_ROOT=/app/media
|
||||
LOG_ROOT=/app/log
|
||||
# DATABASE django setup
|
||||
DB_ENGINE=django.db.backends.postgresql_psycopg2
|
||||
DB_SCHEMA=sbt_prod_20240422_1
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -45,3 +45,4 @@ cope2n-api/reviewed/retailer.xlsx
|
||||
scripts/crawl_database.py
|
||||
redundant_models/
|
||||
junk.json
|
||||
cope2n-api/result.json
|
||||
|
@ -13,7 +13,7 @@ docker compose --profile local up -d
|
||||
|
||||
```bash
|
||||
pip install pytest requests deepdiff
|
||||
IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
|
||||
IDP_HOST=http://ocrnlb-stagging-671b98778cba8a08.elb.ap-southeast-1.amazonaws.com IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
|
||||
```
|
||||
IDP_HOST=http://ec2-54-169-227-39.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
@ -76,6 +76,7 @@ MIDDLEWARE = [
|
||||
'corsheaders.middleware.CorsMiddleware',
|
||||
"whitenoise.middleware.WhiteNoiseMiddleware",
|
||||
"django.middleware.locale.LocaleMiddleware",
|
||||
"fwd_api.middleware.logging_request_response_middleware.LoggingMiddleware"
|
||||
]
|
||||
|
||||
LOCALE_PATHS = [
|
||||
@ -207,6 +208,7 @@ FILE_UPLOAD_HANDLERS = [
|
||||
CORS_ORIGIN_ALLOW_ALL = True
|
||||
|
||||
MEDIA_ROOT = env.str("MEDIA_ROOT", default=r"/var/www/example.com/media/")
|
||||
LOG_ROOT = env.str("LOG_ROOT", default="/app/log")
|
||||
BROKER_URL = env.str("BROKER_URL", default="amqp://test:test@107.120.70.226:5672//")
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
# CELERY_TASK_TIME_LIMIT = 30 * 60
|
||||
@ -256,3 +258,42 @@ CACHES = {
|
||||
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
|
||||
}
|
||||
}
|
||||
|
||||
LOGGING = {
|
||||
'version': 1,
|
||||
'disable_existing_loggers': False,
|
||||
'formatters': {
|
||||
'verbose': {
|
||||
'format': '{levelname} {asctime} {module} {message}',
|
||||
'style': '{',
|
||||
},
|
||||
'simple': {
|
||||
'format': '{levelname} {message}',
|
||||
'style': '{',
|
||||
},
|
||||
},
|
||||
'handlers': {
|
||||
'console': {
|
||||
'class': 'logging.StreamHandler',
|
||||
'formatter': 'verbose',
|
||||
},
|
||||
'file': {
|
||||
"class": 'logging.handlers.TimedRotatingFileHandler',
|
||||
'filename': f'{LOG_ROOT}/sbt_idp_BE.log', # Specify the path and filename for the log file
|
||||
"when": "midnight",
|
||||
"interval": 1,
|
||||
'backupCount': 10,
|
||||
'formatter': 'verbose',
|
||||
},
|
||||
},
|
||||
'loggers': {
|
||||
'django': {
|
||||
'handlers': ['console', 'file'],
|
||||
'level': 'INFO',
|
||||
},
|
||||
'': {
|
||||
'handlers': ['console', 'file'],
|
||||
'level': 'INFO',
|
||||
}
|
||||
},
|
||||
}
|
@ -26,6 +26,9 @@ from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_shor
|
||||
from ..utils.report import aggregate_overview
|
||||
from fwd_api.utils.accuracy import predict_result_to_ready
|
||||
import copy
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
redis_client = RedisUtils()
|
||||
|
||||
@ -227,8 +230,8 @@ class AccuracyViewSet(viewsets.ViewSet):
|
||||
retailer = rq.predict_result.get("content", {}).get("document", [])[
|
||||
0].get("content", [])[0].get("value", [])
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: {e}")
|
||||
print(f"[ERROR]: {rq}")
|
||||
logger.error(f"{e}")
|
||||
logger.error(f"{rq}")
|
||||
data.append({
|
||||
'RequestID': rq.request_id,
|
||||
'RedemptionID': rq.redemption_id,
|
||||
@ -603,8 +606,8 @@ class AccuracyViewSet(viewsets.ViewSet):
|
||||
subsidiary_overview_reports += this_overview
|
||||
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[WARM]: Unable to retrive data {key} from Redis, skipping...")
|
||||
logger.warn(
|
||||
f"Unable to retrive data {key} from Redis, skipping...")
|
||||
data = aggregate_overview(subsidiary_overview_reports)
|
||||
for item in data:
|
||||
if item.get("subs") == "+":
|
||||
|
@ -21,7 +21,9 @@ from ..utils import file as FileUtils
|
||||
from ..utils import process as ProcessUtil
|
||||
from ..utils.process import UserData
|
||||
from drf_spectacular.utils import extend_schema
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CtelTemplateViewSet(viewsets.ViewSet):
|
||||
lookup_field = "username"
|
||||
@ -121,7 +123,7 @@ class CtelTemplateViewSet(viewsets.ViewSet):
|
||||
serializer = CreateTemplateRequest(data=request.data)
|
||||
serializer.is_valid()
|
||||
if not serializer.is_valid():
|
||||
print(serializer.errors)
|
||||
logger.error(serializer.errors)
|
||||
raise InvalidException(excArgs=list(serializer.errors.keys()))
|
||||
data = serializer.validated_data
|
||||
|
||||
@ -156,7 +158,7 @@ class CtelTemplateViewSet(viewsets.ViewSet):
|
||||
serializer = UpdateTemplateRequest(data=data)
|
||||
serializer.is_valid()
|
||||
if not serializer.is_valid():
|
||||
print(serializer.errors)
|
||||
logger.error(serializer.errors)
|
||||
raise InvalidException(excArgs=list(serializer.errors.keys()))
|
||||
data = serializer.validated_data
|
||||
|
||||
|
@ -21,6 +21,9 @@ from ..utils.crypto import sds_authenticator, admin_sds_authenticator, SdsAuthen
|
||||
import datetime
|
||||
from ..request.LoginRequest import LoginRequest
|
||||
from ..utils.date import default_zone
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from fwd import settings
|
||||
|
||||
@ -45,7 +48,7 @@ class CtelUserViewSet(viewsets.ViewSet):
|
||||
@action(detail=False, url_path="login", methods=["POST"])
|
||||
def login(self, request):
|
||||
serializer = LoginRequest(data=request.data)
|
||||
print(serializer.is_valid(raise_exception=True))
|
||||
logger.error(serializer.is_valid(raise_exception=True))
|
||||
|
||||
data = serializer.validated_data
|
||||
token_limit = 999999
|
||||
@ -157,7 +160,7 @@ class CtelUserViewSet(viewsets.ViewSet):
|
||||
serializer = UpsertUserRequest(data=request.data)
|
||||
serializer.is_valid()
|
||||
if not serializer.is_valid():
|
||||
print(serializer.errors)
|
||||
logger.error(serializer.errors)
|
||||
raise InvalidException(excArgs=list(serializer.errors.keys()))
|
||||
data = serializer.validated_data
|
||||
|
||||
|
@ -24,6 +24,9 @@ from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate,
|
||||
from ..response.ReportSerializer import ReportSerializer
|
||||
from ..utils import file as FileUtils
|
||||
from ..utils import process as ProcessUtil
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CtelViewSet(viewsets.ViewSet):
|
||||
lookup_field = "username"
|
||||
@ -92,8 +95,8 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
elif file_extension in image_extensions:
|
||||
b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user, "all", 0)
|
||||
j_time = time.time()
|
||||
print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
|
||||
print(f"[INFO]: b_url: {b_url}")
|
||||
logger.info(f"Duration of Pre-processing: {j_time - s_time}s")
|
||||
logger.info(f"b_url: {b_url}")
|
||||
if p_type in standard_ocr_list:
|
||||
ProcessUtil.send_to_queue2(rq_id + "_sub_0", sub.id, b_url, user.id, p_type)
|
||||
if p_type == ProcessType.TEMPLATE_MATCHING.value:
|
||||
@ -441,7 +444,7 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
content_type = 'application/pdf' if temp.file_name.split(".")[-1] in pdf_extensions else content_type
|
||||
if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
|
||||
raise PermissionDeniedException()
|
||||
print(temp.file_path)
|
||||
logger.info(temp.file_path)
|
||||
return HttpResponse(FileWrapper(FileUtils.get_file(temp.file_path)), status=status.HTTP_200_OK,
|
||||
headers={'Content-Disposition': 'filename={fn}'.format(fn=temp.file_name)},
|
||||
content_type=content_type)
|
||||
@ -459,7 +462,6 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
|
||||
if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
|
||||
raise PermissionDeniedException()
|
||||
# print(f"[DEBUG]: rq: {rq}, file_name: {file_name}")
|
||||
file_data = SubscriptionRequestFile.objects.filter(request=rq, file_name=file_name)[0]
|
||||
except IndexError:
|
||||
raise NotFoundException(excArgs='file')
|
||||
@ -527,7 +529,6 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
|
||||
serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
|
||||
serializer.is_valid()
|
||||
# print(f"[DEBUG]: result: {serializer.data[0]}")
|
||||
if report_filter[0].status == 400:
|
||||
raise FileContentInvalidException()
|
||||
if report_filter[0].status == 100: # continue, only return when result is fullfilled
|
||||
|
@ -2,8 +2,25 @@ from celery import Celery
|
||||
|
||||
from fwd import settings
|
||||
from fwd_api.exception.exceptions import GeneralException
|
||||
from kombu.utils.uuid import uuid
|
||||
from celery.utils.log import get_task_logger
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
def is_it_an_index(id):
|
||||
ret = True
|
||||
if not isinstance(id, str):
|
||||
logger.info("NOT A STRING")
|
||||
return False
|
||||
if "/" in id:
|
||||
logger.info("/ in ID")
|
||||
return False
|
||||
if id.count("_") > 5 or id.count("_") < 1:
|
||||
_c = id.count("_")
|
||||
logger.info(f"_ HAS {_c}")
|
||||
return False
|
||||
return ret
|
||||
|
||||
class CeleryConnector:
|
||||
task_routes = {
|
||||
# save result
|
||||
@ -110,6 +127,8 @@ class CeleryConnector:
|
||||
def send_task(self, name=None, args=None, countdown=None):
|
||||
if name not in self.task_routes or 'queue' not in self.task_routes[name]:
|
||||
raise GeneralException("System")
|
||||
return self.app.send_task(name, args, queue=self.task_routes[name]['queue'], expires=300, countdown=countdown)
|
||||
task_id = args[0] + "_" + uuid()[:4] if isinstance(args, tuple) and is_it_an_index(args[0]) else uuid()
|
||||
logger.info(f"SEND task name: {name} - {task_id} | args: {args} | countdown: {countdown}")
|
||||
return self.app.send_task(name, args, queue=self.task_routes[name]['queue'], expires=300, countdown=countdown, task_id=task_id)
|
||||
|
||||
c_connector = CeleryConnector()
|
||||
|
@ -7,6 +7,7 @@ from multiprocessing.pool import ThreadPool
|
||||
|
||||
from fwd_api.models import SubscriptionRequest, UserProfile
|
||||
from fwd_api.celery_worker.worker import app
|
||||
from fwd_api.celery_worker.task_warpper import VerboseTask
|
||||
from ..constant.common import FolderFileType, image_extensions
|
||||
from ..exception.exceptions import FileContentInvalidException
|
||||
from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
|
||||
@ -23,7 +24,6 @@ from fwd_api.utils.accuracy import predict_result_to_ready
|
||||
from celery.utils.log import get_task_logger
|
||||
from fwd import settings
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
s3_client = S3Util.MinioS3Client(
|
||||
@ -66,7 +66,7 @@ def process_image_file(file_name: str, file_path, request, user) -> list:
|
||||
'request_file_id': new_request_file.code
|
||||
}]
|
||||
|
||||
@app.task(name="csv_feedback")
|
||||
@app.task(base=VerboseTask, name="csv_feedback")
|
||||
def process_csv_feedback(csv_file_path, feedback_id):
|
||||
# load file to RAM
|
||||
status = {}
|
||||
@ -124,7 +124,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
|
||||
try:
|
||||
image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request]
|
||||
except Exception as e:
|
||||
print(f"[ERROR] image.doc_type: {image.doc_type} - image.index_in_request: {image.index_in_request} - time_cost: {time_cost} - {e}")
|
||||
logger.error(f"image.doc_type: {image.doc_type} - image.index_in_request: {image.index_in_request} - time_cost: {time_cost} - {e}")
|
||||
if not validate_feedback_file(_feedback_result, _predict_result):
|
||||
status[request_id] = "Missalign imei number between feedback and predict"
|
||||
# continue
|
||||
@ -138,7 +138,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
|
||||
_predict_result = {"retailername": None, "sold_to_party": None, "invoice_no": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]}
|
||||
_feedback_result = {"retailername": None, "sold_to_party": None, "invoice_no": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result and len(_feedback_result["imei_number"]) > image.index_in_request else None
|
||||
except Exception as e:
|
||||
print (f"[ERROR]: {request_id} - {e}")
|
||||
logger.error(f"{request_id} - {e}")
|
||||
image.predict_result = _predict_result
|
||||
image.feedback_result = _feedback_result
|
||||
# image.reviewed_result = _reviewed_result
|
||||
@ -160,10 +160,9 @@ def process_csv_feedback(csv_file_path, feedback_id):
|
||||
os.remove(file_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to set S3: {e}")
|
||||
print(f"Unable to set S3: {e}")
|
||||
feedback_rq.save()
|
||||
|
||||
@app.task(name='do_pdf')
|
||||
@app.task(base=VerboseTask, name='do_pdf')
|
||||
def process_pdf(rq_id, sub_id, p_type, user_id, files):
|
||||
"""
|
||||
files: [{
|
||||
@ -229,7 +228,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
|
||||
for sub_rq_id, sub_id, urls, user_id, p_type, metadata in to_queue:
|
||||
ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type, metadata)
|
||||
|
||||
@app.task(name='upload_file_to_s3')
|
||||
@app.task(base=VerboseTask, name='upload_file_to_s3')
|
||||
def upload_file_to_s3(local_file_path, s3_key, request_id):
|
||||
if s3_client.s3_client is not None:
|
||||
try:
|
||||
@ -239,12 +238,11 @@ def upload_file_to_s3(local_file_path, s3_key, request_id):
|
||||
sub_request.save()
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to set S3: {e}")
|
||||
print(f"Unable to set S3: {e}")
|
||||
return
|
||||
else:
|
||||
logger.info(f"S3 is not available, skipping,...")
|
||||
|
||||
@app.task(name='upload_feedback_to_s3')
|
||||
@app.task(base=VerboseTask, name='upload_feedback_to_s3')
|
||||
def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
|
||||
if s3_client.s3_client is not None:
|
||||
try:
|
||||
@ -254,12 +252,11 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
|
||||
feed_request.save()
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to set S3: {e}")
|
||||
print(f"Unable to set S3: {e}")
|
||||
return
|
||||
else:
|
||||
logger.info(f"S3 is not available, skipping,...")
|
||||
|
||||
@app.task(name='upload_report_to_s3')
|
||||
@app.task(base=VerboseTask, name='upload_report_to_s3')
|
||||
def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
|
||||
if s3_client.s3_client is not None:
|
||||
try:
|
||||
@ -271,20 +268,19 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
|
||||
report.save()
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to set S3: {e}")
|
||||
print(f"Unable to set S3: {e}")
|
||||
return
|
||||
else:
|
||||
logger.info(f"S3 is not available, skipping,...")
|
||||
|
||||
@app.task(name='remove_local_file')
|
||||
@app.task(base=VerboseTask, name='remove_local_file')
|
||||
def remove_local_file(local_file_path, request_id):
|
||||
print(f"[INFO] Removing local file: {local_file_path}, ...")
|
||||
logger.info(f"Removing local file: {local_file_path}, ...")
|
||||
try:
|
||||
os.remove(local_file_path)
|
||||
except Exception as e:
|
||||
logger.info(f"Unable to remove local file: {e}")
|
||||
|
||||
@app.task(name='upload_obj_to_s3')
|
||||
@app.task(base=VerboseTask, name='upload_obj_to_s3')
|
||||
def upload_obj_to_s3(byte_obj, s3_key):
|
||||
if s3_client.s3_client is not None:
|
||||
obj = base64.b64decode(byte_obj)
|
||||
|
@ -2,6 +2,7 @@ import traceback
|
||||
|
||||
from fwd_api.models import SubscriptionRequest, Report, ReportFile
|
||||
from fwd_api.celery_worker.worker import app
|
||||
from fwd_api.celery_worker.task_warpper import VerboseTask
|
||||
from ..utils import s3 as S3Util
|
||||
from ..utils.accuracy import (update_temp_accuracy, IterAvg,
|
||||
count_transactions, extract_report_detail_list, calculate_a_request,
|
||||
@ -37,7 +38,7 @@ def mean_list(l):
|
||||
return 0
|
||||
return sum(l)/len(l)
|
||||
|
||||
@app.task(name='make_a_report_2')
|
||||
@app.task(base=VerboseTask, name='make_a_report_2')
|
||||
def make_a_report_2(report_id, query_set):
|
||||
report_type = query_set.pop("report_type", "accuracy")
|
||||
if report_type == "accuracy":
|
||||
@ -236,11 +237,11 @@ def create_accuracy_report(report_id, **kwargs):
|
||||
s3_key = save_report_to_S3(report.report_id, local_workbook)
|
||||
|
||||
except IndexError as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
traceback.print_exc()
|
||||
print("NotFound request by report id, %d", report_id)
|
||||
logger.error("NotFound request by report id, %d", report_id)
|
||||
except Exception as e:
|
||||
print("[ERROR]: an error occured while processing report: ", report_id)
|
||||
logger.error("An error occured while processing report: ", report_id)
|
||||
traceback.print_exc()
|
||||
return 400
|
||||
|
||||
@ -286,10 +287,10 @@ def create_billing_report(report_id, **kwargs):
|
||||
local_workbook = save_workbook_file(detail_file_name, report, data_workbook)
|
||||
s3_key = save_report_to_S3(report.report_id, local_workbook)
|
||||
except IndexError as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
traceback.print_exc()
|
||||
print("NotFound request by report id, %d", report_id)
|
||||
logger.error("NotFound request by report id, %d", report_id)
|
||||
except Exception as e:
|
||||
print("[ERROR]: an error occured while processing report: ", report_id)
|
||||
logger.error("An error occured while processing report: ", report_id)
|
||||
traceback.print_exc()
|
||||
return 400
|
||||
|
@ -12,6 +12,10 @@ from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile
|
||||
from fwd_api.constant.common import ProcessType
|
||||
from fwd_api.utils.redis import RedisUtils
|
||||
from fwd_api.utils import process as ProcessUtil
|
||||
from fwd_api.celery_worker.task_warpper import VerboseTask
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
redis_client = RedisUtils()
|
||||
|
||||
@ -55,11 +59,10 @@ def aggregate_result(results):
|
||||
return des_result
|
||||
|
||||
def print_id(rq_id):
|
||||
print(" [x] Received {rq}".format(rq=rq_id))
|
||||
logger.info(" [x] Received {rq}".format(rq=rq_id))
|
||||
|
||||
|
||||
def to_status(result):
|
||||
print('X')
|
||||
if 'status' in result and result['status'] not in [200, 201, 202]:
|
||||
return 4
|
||||
return 3
|
||||
@ -72,7 +75,7 @@ def update_user(rq: SubscriptionRequest):
|
||||
sub.current_token += ProcessUtil.token_value(int(rq.process_type))
|
||||
sub.save()
|
||||
|
||||
@app.task(name='process_sap_invoice_result')
|
||||
@app.task(base=VerboseTask, name='process_sap_invoice_result')
|
||||
def process_invoice_sap_result(rq_id, result):
|
||||
try:
|
||||
rq: SubscriptionRequest = \
|
||||
@ -85,16 +88,16 @@ def process_invoice_sap_result(rq_id, result):
|
||||
|
||||
update_user(rq)
|
||||
except IndexError as e:
|
||||
print(e)
|
||||
print("NotFound request by requestId, %d", rq_id)
|
||||
logger.error(e)
|
||||
logger.error("NotFound request by requestId, %d", rq_id)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Fail Invoice %d", rq_id)
|
||||
logger.error(e)
|
||||
logger.error("Fail Invoice %d", rq_id)
|
||||
traceback.print_exc()
|
||||
return "FailInvoice"
|
||||
|
||||
|
||||
@app.task(name='process_fi_invoice_result')
|
||||
@app.task(base=VerboseTask, name='process_fi_invoice_result')
|
||||
def process_invoice_fi_result(rq_id, result):
|
||||
try:
|
||||
rq: SubscriptionRequest = \
|
||||
@ -107,15 +110,15 @@ def process_invoice_fi_result(rq_id, result):
|
||||
|
||||
update_user(rq)
|
||||
except IndexError as e:
|
||||
print(e)
|
||||
print("NotFound request by requestId, %d", rq_id)
|
||||
logger.error(e)
|
||||
logger.error("NotFound request by requestId, %d", rq_id)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Fail Invoice %d", rq_id)
|
||||
logger.error(e)
|
||||
logger.error("Fail Invoice %d", rq_id)
|
||||
traceback.print_exc()
|
||||
return "FailInvoice"
|
||||
|
||||
@app.task(name='process_manulife_invoice_result')
|
||||
@app.task(base=VerboseTask, name='process_manulife_invoice_result')
|
||||
def process_invoice_manulife_result(rq_id, result):
|
||||
try:
|
||||
rq: SubscriptionRequest = \
|
||||
@ -128,17 +131,17 @@ def process_invoice_manulife_result(rq_id, result):
|
||||
|
||||
update_user(rq)
|
||||
except IndexError as e:
|
||||
print(e)
|
||||
print("NotFound request by requestId, %d", rq_id)
|
||||
logger.error(e)
|
||||
logger.error("NotFound request by requestId, %d", rq_id)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Fail Invoice %d", rq_id)
|
||||
logger.error(e)
|
||||
logger.error("Fail Invoice %d", rq_id)
|
||||
traceback.print_exc()
|
||||
return "FailInvoice"
|
||||
|
||||
random_processor_name = None
|
||||
|
||||
@app.task(name='process_sbt_invoice_result')
|
||||
@app.task(base=VerboseTask, name='process_sbt_invoice_result')
|
||||
def process_invoice_sbt_result(rq_id, result, metadata):
|
||||
global random_processor_name
|
||||
if random_processor_name is None:
|
||||
@ -161,7 +164,6 @@ def process_invoice_sbt_result(rq_id, result, metadata):
|
||||
rq.status = 200 # stop waiting
|
||||
results = redis_client.get_all_cache(rq_id)
|
||||
rq.predict_result = aggregate_result(results)
|
||||
# print(f"[DEBUG]: rq.predict_result: {rq.predict_result}")
|
||||
ai_inference_profile = {}
|
||||
doc_type_string = ""
|
||||
for idx, result in results.items():
|
||||
@ -180,13 +182,13 @@ def process_invoice_sbt_result(rq_id, result, metadata):
|
||||
rq.save()
|
||||
update_user(rq)
|
||||
except IndexError as e:
|
||||
print(e)
|
||||
print("NotFound request by requestId, %d", rq_id)
|
||||
logger.error(e)
|
||||
logger.error("NotFound request by requestId, %d", rq_id)
|
||||
rq.ai_inference_time = 0
|
||||
rq.save()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Fail Invoice %d", rq_id)
|
||||
logger.error(e)
|
||||
logger.error("Fail Invoice %d", rq_id)
|
||||
traceback.print_exc()
|
||||
rq.ai_inference_time = 0
|
||||
rq.save()
|
||||
|
16
cope2n-api/fwd_api/celery_worker/task_warpper.py
Normal file
16
cope2n-api/fwd_api/celery_worker/task_warpper.py
Normal file
@ -0,0 +1,16 @@
|
||||
from celery import Task
|
||||
from celery.utils.log import get_task_logger
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
class VerboseTask(Task):
|
||||
abstract = True
|
||||
|
||||
def on_failure(self, exc, task_id, args, kwargs, einfo):
|
||||
# Task failed. What do you want to do?
|
||||
logger.error('FAILURE: Task raised an exception: {}'.format(exc))
|
||||
|
||||
def on_success(self, retval, task_id, args, kwargs):
|
||||
logger.info(f"SUCCESS: Task: {task_id} succeeded | retval: {retval} | args: {args} | kwargs: {kwargs}")
|
||||
|
||||
def before_start(self, task_id, args, kwargs):
|
||||
logger.info(f"BEFORE_START: Task: {task_id} | args: {args} | kwargs: {kwargs}")
|
@ -2,7 +2,9 @@ import os
|
||||
|
||||
import django
|
||||
from celery import Celery
|
||||
from celery.signals import setup_logging # noqa
|
||||
from kombu import Queue
|
||||
import copy
|
||||
|
||||
from fwd import settings
|
||||
|
||||
@ -19,13 +21,15 @@ app: Celery = Celery(
|
||||
app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||
app.autodiscover_tasks()
|
||||
|
||||
@app.on_after_finalize.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
from fwd_api.bg_tasks.clean_local_files import clean_media
|
||||
sender.add_periodic_task(
|
||||
10.0, clean_media.s(), expires=120.0
|
||||
)
|
||||
@setup_logging.connect
|
||||
def config_loggers(*args, **kwargs):
|
||||
from logging.config import dictConfig # noqa
|
||||
from django.conf import settings # noqa
|
||||
|
||||
log_config = copy.deepcopy(settings.LOGGING)
|
||||
if log_config.get("handlers", {}).get("file", {}).get("filename", None):
|
||||
log_config["handlers"]["file"]["filename"] = log_config["handlers"]["file"]["filename"].replace(".log", "_celery.log")
|
||||
dictConfig(log_config)
|
||||
|
||||
app.conf.update({
|
||||
'task_queues':
|
||||
|
@ -9,7 +9,9 @@ from fwd_api.annotation.api import throw_on_failure
|
||||
from fwd_api.utils import date as DateUtil
|
||||
from fwd_api.utils.crypto import ctel_cryptor, sds_authenticator, image_authenticator
|
||||
from fwd_api.exception.exceptions import InvalidException, TokenExpiredException, PermissionDeniedException
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@throw_on_failure(TokenExpiredException())
|
||||
def authenticate_by_param_token(request):
|
||||
@ -27,8 +29,7 @@ def authenticate_by_param_token(request):
|
||||
# @throw_on_failure(TokenExpiredException())
|
||||
def authenticate_by_authorization_header(request):
|
||||
token = request.headers['Authorization']
|
||||
print(f"[INFO]: recived request at {datetime.datetime.utcnow()}, with path {request.path}")
|
||||
# print(f"[DEBUG]: token: {token}")
|
||||
logger.info(f"recived request at {datetime.datetime.utcnow()}, with path {request.path}")
|
||||
data = sds_authenticator.decode_data(token)
|
||||
if 'expired_at' not in data or 'status' not in data or 'subscription_id' not in data:
|
||||
raise PermissionDeniedException()
|
||||
@ -72,7 +73,7 @@ class AuthFilter(authentication.BaseAuthentication):
|
||||
def authenticate(self, request):
|
||||
|
||||
if request.path in self.white_list_path:
|
||||
print("App API")
|
||||
logger.debug("App API")
|
||||
return None, None
|
||||
if '/api/ctel/media/' in request.path or '/api/ctel/v2/media/' in request.path:
|
||||
return authenticate_by_param_token(request)
|
||||
|
@ -151,8 +151,8 @@ class Command(BaseCommand):
|
||||
prepared_data_copy = copy.deepcopy(prepared_data)
|
||||
s3_client = MinioS3Client(
|
||||
# endpoint='http://107.120.133.27:9884',
|
||||
access_key='AKIA3AFPFVWZHTZHB6FW',
|
||||
secret_key='qYmEkfnO8ltQ7n9GfnF8+HRcfOsbXhx0YSNOLxdW',
|
||||
access_key='secret',
|
||||
secret_key='secret+HRcfOsbXhx0YSNOLxdW',
|
||||
bucket_name='ocr-sds'
|
||||
)
|
||||
# file = open("modified.txt", "w")
|
||||
|
0
cope2n-api/fwd_api/middleware/__init__.py
Normal file
0
cope2n-api/fwd_api/middleware/__init__.py
Normal file
@ -0,0 +1,34 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from django.utils.deprecation import MiddlewareMixin
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class LoggingMiddleware(MiddlewareMixin):
|
||||
def process_request(self, request):
|
||||
trace_id = request.headers.get('X-Trace-ID', str(uuid.uuid4()))
|
||||
request.META['X-Trace-ID'] = trace_id
|
||||
|
||||
request_body = ""
|
||||
content_type = request.headers.get("Content-Type", "")
|
||||
if request.method in ('POST', 'PUT', 'PATCH') and not content_type.startswith("multipart/form-data"):
|
||||
request_body = request.body.decode(encoding="utf-8", errors="ignore")
|
||||
|
||||
logger.info(f"Request: {request.method} {request.path} | Trace ID: {trace_id} | "
|
||||
f"Body: {request_body} | Headers: {request.headers}")
|
||||
|
||||
def process_response(self, request, response):
|
||||
trace_id = request.META.get('X-Trace-ID', str(uuid.uuid4()))
|
||||
response_body = response.content.decode("utf-8") if response.content else ""
|
||||
|
||||
logger.info(f"Response: {request.method} {request.path} | Trace ID: {trace_id} | "
|
||||
f"Status: {response.status_code} | Body: {response_body}")
|
||||
|
||||
response['X-Trace-ID'] = trace_id
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception):
|
||||
trace_id = request.META.get('X-Trace-ID', str(uuid.uuid4()))
|
||||
logger.error(f"Exception: {request.method} {request.path} | Trace ID: {trace_id} | "
|
||||
f"Error: {exception}")
|
@ -16,6 +16,9 @@ from django.utils import timezone
|
||||
from fwd import settings
|
||||
from ..models import SubscriptionRequest, Report, ReportFile
|
||||
from typing import Union, List, Dict
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
|
||||
OPTIONAL_KEYS = ['invoice_no']
|
||||
@ -145,7 +148,7 @@ class ReportAccumulateByRequest:
|
||||
if report_file.doc_type in ["imei", "invoice", "all"]:
|
||||
doc_type = report_file.doc_type
|
||||
else:
|
||||
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
total["num_imei"] += 1 if doc_type == "imei" else 0
|
||||
total["num_invoice"] += 1 if doc_type == "invoice" else 0
|
||||
|
||||
@ -160,13 +163,13 @@ class ReportAccumulateByRequest:
|
||||
total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
|
||||
|
||||
if not total["average_processing_time"].get(report_file.doc_type, None):
|
||||
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
||||
logger.warning(f"Weird doctype: {report_file.doc_type}")
|
||||
total["average_processing_time"][report_file.doc_type] = IterAvg()
|
||||
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||
total["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||
|
||||
if not total["file_average_processing_time"].get(report_file.doc_type, None):
|
||||
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
||||
logger.warning(f"Weird doctype: {report_file.doc_type}")
|
||||
total["file_average_processing_time"][report_file.doc_type] = IterAvg()
|
||||
total["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
|
||||
total["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
|
||||
@ -175,7 +178,7 @@ class ReportAccumulateByRequest:
|
||||
if report_file.doc_type in ["imei", "invoice", "all"]:
|
||||
doc_type = report_file.doc_type
|
||||
else:
|
||||
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
total["usage"]["imei"] += 1 if doc_type == "imei" else 0
|
||||
total["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
|
||||
total["usage"]["total_images"] += 1
|
||||
@ -193,7 +196,7 @@ class ReportAccumulateByRequest:
|
||||
if report_file.doc_type in ["imei", "invoice", "all"]:
|
||||
doc_type = report_file.doc_type
|
||||
else:
|
||||
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
month["num_imei"] += 1 if doc_type == "imei" else 0
|
||||
month["num_invoice"] += 1 if doc_type == "invoice" else 0
|
||||
|
||||
@ -206,13 +209,13 @@ class ReportAccumulateByRequest:
|
||||
month["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
|
||||
|
||||
if not month["average_processing_time"].get(report_file.doc_type, None):
|
||||
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
||||
logger.warning(f"Weird doctype: {report_file.doc_type}")
|
||||
month["average_processing_time"][report_file.doc_type] = IterAvg()
|
||||
month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||
month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||
|
||||
if not month["file_average_processing_time"].get(report_file.doc_type, None):
|
||||
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
||||
logger.warning(f"Weird doctype: {report_file.doc_type}")
|
||||
month["file_average_processing_time"][report_file.doc_type] = IterAvg()
|
||||
month["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
|
||||
month["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
|
||||
@ -221,7 +224,7 @@ class ReportAccumulateByRequest:
|
||||
if report_file.doc_type in ["imei", "invoice", "all"]:
|
||||
doc_type = report_file.doc_type
|
||||
else:
|
||||
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
month["usage"]["imei"] += 1 if doc_type == "imei" else 0
|
||||
month["usage"]["invoice"] += 1 if doc_type == "invoice" else 0
|
||||
month["usage"]["total_images"] += 1
|
||||
@ -238,7 +241,7 @@ class ReportAccumulateByRequest:
|
||||
if report_file.doc_type in ["imei", "invoice", "all"]:
|
||||
doc_type = report_file.doc_type
|
||||
else:
|
||||
print(f"[WARM]: Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}")
|
||||
day_data["num_imei"] += 1 if doc_type == "imei" else 0
|
||||
day_data["num_invoice"] += 1 if doc_type == "invoice" else 0
|
||||
day_data["report_files"].append(report_file)
|
||||
@ -252,13 +255,13 @@ class ReportAccumulateByRequest:
|
||||
day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
|
||||
|
||||
if not day_data["average_processing_time"].get(report_file.doc_type, None):
|
||||
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
||||
logger.warning(f"Weird doctype: {report_file.doc_type}")
|
||||
day_data["average_processing_time"][report_file.doc_type] = IterAvg()
|
||||
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||
day_data["average_processing_time"]['avg'].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
||||
|
||||
if not day_data["file_average_processing_time"].get(report_file.doc_type, None):
|
||||
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
||||
logger.warning(f"Weird doctype: {report_file.doc_type}")
|
||||
day_data["file_average_processing_time"][report_file.doc_type] = IterAvg()
|
||||
day_data["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
|
||||
day_data["file_average_processing_time"]['avg'].add_avg(image_avg_cost, 1) if image_avg_cost else 0
|
||||
@ -273,7 +276,7 @@ class ReportAccumulateByRequest:
|
||||
self.data[this_month] = [copy.deepcopy(self.month_format), {}]
|
||||
self.data[this_month][0]["extraction_date"] = "Subtotal (" + timezone.localtime(request.created_at).strftime("%Y-%m") + ")"
|
||||
if not self.data[this_month][1].get(this_day, None):
|
||||
print(f"[INFO] Adding a new day: {this_day} for report: {report.id} ...")
|
||||
logger.info(f" Adding a new day: {this_day} for report: {report.id} ...")
|
||||
self.data[this_month][1][this_day] = copy.deepcopy(self.day_format)[0]
|
||||
self.data[this_month][1][this_day]['extraction_date'] = timezone.localtime(request.created_at).strftime("%Y-%m-%d")
|
||||
usage = self.count_transactions_within_day(this_day)
|
||||
@ -289,6 +292,7 @@ class ReportAccumulateByRequest:
|
||||
_ai_cost = request.ai_inference_time
|
||||
processing_time_by_averaging_request_cost = (_be_cost + _ai_cost)/_number_of_file if _number_of_file > 0 else 0
|
||||
for report_file in report_files:
|
||||
# report_file.time_cost = processing_time_by_averaging_request_cost
|
||||
_report_file = copy.deepcopy(report_file)
|
||||
if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
|
||||
_report_file.acc = None
|
||||
@ -844,7 +848,6 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta
|
||||
|
||||
def calculate_avg_accuracy(acc, type, keys=[]):
|
||||
acc_list = []
|
||||
# print(f"[DEBUG]: type: {type} - acc: {acc}")
|
||||
for key in keys:
|
||||
this_acc = acc.get(type, {}).get(key, [])
|
||||
if len(this_acc) > 0:
|
||||
@ -985,7 +988,7 @@ def calculate_a_request(report, request):
|
||||
if request.redemption_id:
|
||||
_sub = map_subsidiary_short_to_long(request.redemption_id[:2])
|
||||
else:
|
||||
print(f"[WARN]: empty redemption_id, check request: {request.request_id}")
|
||||
logger.warning(f"empty redemption_id, check request: {request.request_id}")
|
||||
|
||||
# Little trick to replace purchase date to normalized
|
||||
if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0:
|
||||
@ -1008,9 +1011,6 @@ def calculate_a_request(report, request):
|
||||
|
||||
if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS:
|
||||
if image.reason in settings.ACC_EXCLUDE_RESEASONS:
|
||||
# if image.reason in settings.ACC_EXCLUDE_RESEASONS:
|
||||
# print(f"[DEBUG]: {image.reason}")
|
||||
# TODO: Exclude bad image accuracy from average accuracy
|
||||
_att["avg_acc"] = None
|
||||
for t in _att["acc"].keys():
|
||||
for k in _att["acc"][t].keys():
|
||||
@ -1073,7 +1073,7 @@ def calculate_a_request(report, request):
|
||||
request_att["total_images"] += 1
|
||||
request_att["err"] += _att["err"]
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}")
|
||||
logger.error(f"Failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}")
|
||||
continue
|
||||
|
||||
return request_att, report_files, atts
|
||||
|
@ -23,6 +23,9 @@ import imagesize
|
||||
import csv
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle, numbers, Alignment
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
s3_client = S3Util.MinioS3Client(
|
||||
endpoint=settings.S3_ENDPOINT,
|
||||
@ -96,7 +99,6 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES
|
||||
|
||||
for f in files:
|
||||
if not isinstance(f, TemporaryUploadedFile):
|
||||
# print(f'[DEBUG]: {f.name}')
|
||||
raise InvalidException(excArgs="files")
|
||||
extension = f.name.split(".")[-1].lower() in allowed_file_extensions
|
||||
if not extension or "." not in f.name:
|
||||
@ -116,7 +118,6 @@ def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv
|
||||
|
||||
for f in files:
|
||||
if not isinstance(f, TemporaryUploadedFile):
|
||||
# print(f'[DEBUG]: {f.name}')
|
||||
raise InvalidException(excArgs="files")
|
||||
extension = f.name.split(".")[-1].lower() in ["csv"]
|
||||
if not extension or "." not in f.name:
|
||||
@ -131,7 +132,7 @@ def get_file(file_path: str):
|
||||
try:
|
||||
return open(file_path, 'rb')
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
raise GeneralException("System")
|
||||
|
||||
|
||||
@ -226,7 +227,7 @@ def delete_file_with_path(file_path: str) -> bool:
|
||||
os.remove(file_path)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
return False
|
||||
|
||||
|
||||
@ -239,7 +240,7 @@ def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedF
|
||||
os.makedirs(folder_path)
|
||||
return save_file_with_path(file_name, file, quality, folder_path)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
raise ServiceUnavailableException()
|
||||
|
||||
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path: str):
|
||||
@ -253,7 +254,7 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
|
||||
except InvalidDecompressedSizeException as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
raise ServiceUnavailableException()
|
||||
return file_path
|
||||
|
||||
@ -306,7 +307,7 @@ def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: Temporar
|
||||
except InvalidDecompressedSizeException as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: {e}")
|
||||
logger.error(f"{e}")
|
||||
raise ServiceUnavailableException()
|
||||
|
||||
def save_to_S3(file_name, rq, local_file_path):
|
||||
@ -319,21 +320,19 @@ def save_to_S3(file_name, rq, local_file_path):
|
||||
c_connector.remove_local_file((local_file_path, request_id))
|
||||
return s3_key
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: {e}")
|
||||
logger.error(f"{e}")
|
||||
raise ServiceUnavailableException()
|
||||
|
||||
def save_feedback_to_S3(file_name, id, local_file_path):
|
||||
try:
|
||||
# print(f"[DEBUG]: Uploading feedback to S3 with local path {local_file_path}, id: {id}, file_name: {file_name}")
|
||||
assert len(local_file_path.split("/")) >= 3, "file_path must have at least feedback_folder and feedback_id"
|
||||
# s3_key = os.path.join(local_file_path.split("/")[-3], local_file_path.split("/")[-2], file_name)
|
||||
s3_key = os.path.join("feedback", local_file_path.split("/")[-2], file_name)
|
||||
# print(f"[DEBUG]: Uploading feedback to S3 with s3_key {s3_key}")
|
||||
c_connector.upload_feedback_to_s3((local_file_path, s3_key, id))
|
||||
c_connector.remove_local_file((local_file_path, id))
|
||||
return s3_key
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: {e}")
|
||||
logger.error(f"{e}")
|
||||
raise ServiceUnavailableException()
|
||||
|
||||
def save_report_to_S3(id, local_file_path, delay=0):
|
||||
@ -343,7 +342,7 @@ def save_report_to_S3(id, local_file_path, delay=0):
|
||||
c_connector.remove_local_file((local_file_path, id))
|
||||
return s3_key
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: {e}")
|
||||
logger.error(f"{e}")
|
||||
raise ServiceUnavailableException()
|
||||
|
||||
def download_from_S3(s3_key, local_file_path):
|
||||
@ -361,7 +360,7 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
|
||||
except InvalidDecompressedSizeException as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
raise ServiceUnavailableException()
|
||||
return file_path
|
||||
|
||||
@ -400,8 +399,8 @@ def save_img(file_path: str, file: TemporaryUploadedFile, quality):
|
||||
elif orientation == 8:
|
||||
image = image.transpose(Image.ROTATE_90)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
print("Rotation Error")
|
||||
logger.error(ex)
|
||||
logger.error("Rotation Error")
|
||||
traceback.print_exc()
|
||||
|
||||
image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1])
|
||||
@ -449,9 +448,9 @@ def get_value(_dict, keys):
|
||||
else:
|
||||
value = value.get(key, {})
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: {e}")
|
||||
print(f"[ERROR]: value: {value}")
|
||||
print(f"[ERROR]: keys: {keys}")
|
||||
logger.error(f"{e}")
|
||||
logger.error(f"value: {value}")
|
||||
logger.error(f"keys: {keys}")
|
||||
|
||||
if value is None:
|
||||
return "-"
|
||||
|
@ -5,7 +5,9 @@ from terminaltables import AsciiTable
|
||||
from rapidfuzz.distance import Levenshtein
|
||||
|
||||
from .wiki_diff import inline_diff
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def is_type_list(x, type):
|
||||
|
||||
@ -381,5 +383,5 @@ def eval_kie(preds_e2e: dict[str, dict[str, str]], gt_e2e: dict[str, dict[str, s
|
||||
)
|
||||
|
||||
table = AsciiTable(table_data)
|
||||
print(table.table)
|
||||
logger.debug(table.table)
|
||||
return results, fail_cases
|
||||
|
@ -14,6 +14,9 @@ from .ocr_metrics import eval_ocr_metric
|
||||
import sys
|
||||
# sys.path.append(os.path.dirname(__file__))
|
||||
from sdsvkvu.utils.query.sbt_v2 import get_seller, post_process_seller
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def read_json(file_path: str):
|
||||
@ -108,7 +111,7 @@ def convert_predict_from_csv(
|
||||
n_pages = request['pages']
|
||||
req_id = request['request_id']
|
||||
if not isinstance(request['doc_type'], str) or not isinstance(request['predict_result'], str):
|
||||
print(f"[WARNING] Skipped request id {req_id}")
|
||||
logger.warning(f"] Skipped request id {req_id}")
|
||||
continue
|
||||
|
||||
doc_type_list = request['doc_type'].split(',')
|
||||
@ -178,7 +181,7 @@ def init_csv(
|
||||
for request_id in list_request_id:
|
||||
gt_path = os.path.join(gt_dir, request_id, request_id+".json")
|
||||
if not os.path.exists(gt_path):
|
||||
print(f"[WARNING] Skipped request id {os.path.basename(os.path.dirname(gt_path))}")
|
||||
logger.warning(f"] Skipped request id {os.path.basename(os.path.dirname(gt_path))}")
|
||||
continue
|
||||
gt_data = read_json(gt_path)
|
||||
json_file_list = glob.glob(os.path.join(pred_dir, request_id, "temp_*.json"))
|
||||
@ -306,7 +309,7 @@ def pick_sample_to_revise(
|
||||
img_path = [ff for ff in glob.glob(img_path_folder.replace(".json", ".*")) if ".json" not in ff]
|
||||
|
||||
if len(img_path) == 0:
|
||||
print(f"[WARNING] Skipped request id {request_id}")
|
||||
logger.warning(f"] Skipped request id {request_id}")
|
||||
continue
|
||||
img_path = img_path[0]
|
||||
# img_path = [ff for ff in glob.glob(json_path.replace(".json", ".*"))][0]
|
||||
@ -354,41 +357,41 @@ if __name__ == "__main__":
|
||||
csv_path_end_user = "logs/eval_20240115/OCR_15Jan2024.csv"
|
||||
|
||||
# Step 1: Convert a csv file to get user submitted results for each request
|
||||
print("[INFO] Starting convert csv from customer to json")
|
||||
logger.info(f" Starting convert csv from customer to json")
|
||||
os.system(f"sudo chmod -R 777 {save_path}")
|
||||
convert_groundtruth_from_csv(csv_path=csv_path_end_user, save_dir=save_path)
|
||||
print("[INFO] Converted")
|
||||
logger.info(f" Converted")
|
||||
|
||||
# # Step 2: Convert a csv file to get predict OCR results for each image
|
||||
print("[INFO] Starting convert csv from SDSV to json")
|
||||
logger.info(f" Starting convert csv from SDSV to json")
|
||||
convert_predict_from_csv(csv_path=csv_path, save_dir=save_path)
|
||||
print("[INFO] Converted")
|
||||
logger.info(f" Converted")
|
||||
|
||||
# # Step 3: Gen initial csv file and calculate OCR result between submitted results and ocr results
|
||||
print("[INFO] Starting generate csv to get performance")
|
||||
logger.info(f" Starting generate csv to get performance")
|
||||
gt_path = save_path
|
||||
pred_path = save_path
|
||||
req_to_red_dict = gen_req_to_red_dict(csv_path_end_user)
|
||||
init_data = init_csv(gt_dir=gt_path, pred_dir=pred_path, req_to_red=req_to_red_dict)
|
||||
pd.DataFrame(init_data).to_csv(os.path.join(save_csv, "init1.csv"), index=False)
|
||||
print("[INFO] Done")
|
||||
logger.info(f" Done")
|
||||
|
||||
# # Step 4: Split requests whose accuracy is less than 1 to revise
|
||||
# print("[INFO] Starting split data to review")
|
||||
# logger.info(f" Starting split data to review")
|
||||
# revised_path = os.path.join(save_csv, "revised")
|
||||
# # shutil.rmtree(revised_path)
|
||||
# pick_sample_to_revise(ocr_accuracy=init_data, gt_dir=save_path, save_dir=revised_path)
|
||||
# print("[INFO] Done")
|
||||
# logger.info(f" Done")
|
||||
|
||||
# # Step 5: Merge revised results to gt folder
|
||||
# print("[INFO] Merging revised data to ground truth folder")
|
||||
# logger.info(f" Merging revised data to ground truth folder")
|
||||
# revised_path = os.path.join(save_csv, "revised")
|
||||
# revised_path = [f'{revised_path}/empty_results', f'{revised_path}/diff_results']
|
||||
# merge_revised_sample(revised_path_list=revised_path, save_dir=save_path)
|
||||
# print("Done")
|
||||
|
||||
# # Step 6: Caculate OCR result between ocr results and revised results
|
||||
# print("[INFO] Exporting OCR report")
|
||||
# logger.info(f" Exporting OCR report")
|
||||
# init_csv_path = os.path.join(save_csv, "init1.csv")
|
||||
# report = export_report(init_csv=init_csv_path)
|
||||
# error_path = os.path.join(save_csv, "errors")
|
||||
@ -427,6 +430,6 @@ if __name__ == "__main__":
|
||||
|
||||
|
||||
# report.drop(columns=["file_path", "class_name"]).to_csv(os.path.join(save_csv, f"SBT_report_{time.strftime('%Y%m%d')}.csv"), index=False)
|
||||
# print("[INFO] Done")
|
||||
# logger.info(f" Done")
|
||||
|
||||
|
@ -4,6 +4,9 @@ import unidecode
|
||||
import os
|
||||
import glob
|
||||
import pandas as pd
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VOWELS = 'aeouiy' + 'AEOUIY'
|
||||
CONSONANTS = 'bcdfghjklmnpqrstvxwz' + 'BCDFGHJKLMNPQRSTVXWZ'
|
||||
@ -194,7 +197,7 @@ def main(overwrite=False):
|
||||
df.to_csv(f'{SAVE_PATH}/wiki_diff.csv', sep='\t')
|
||||
df_ = pd.DataFrame(ddata_specialchars)
|
||||
df_.to_csv(f'{SAVE_PATH}/wiki_diff_specialchars.csv', sep='\t')
|
||||
print(TOTAL_WORD)
|
||||
logger.info(TOTAL_WORD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -25,10 +25,9 @@ from ..exception.exceptions import InvalidException, NotFoundException, \
|
||||
from ..models import UserProfile, OcrTemplate, OcrTemplateBox, \
|
||||
Subscription, SubscriptionRequestFile, SubscriptionRequest
|
||||
from ..celery_worker.client_connector import c_connector
|
||||
import logging
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class UserData:
|
||||
user: UserProfile = None
|
||||
@ -107,7 +106,6 @@ def validate_ocr_request_and_get(request, subscription):
|
||||
|
||||
# validated_data['is_test_request'] = bool(request.data.get('is_test_request', False))
|
||||
validated_data['is_test_request'] = string_to_boolean(request.data.get('is_test_request', "false"))
|
||||
# print(f"[DEBUG]: is_test_request: ", validated_data['is_test_request'])
|
||||
|
||||
return validated_data
|
||||
|
||||
@ -143,7 +141,6 @@ def sbt_validate_ocr_request_and_get(request, subscription):
|
||||
validated_data['invoice_file'] = invoice_file
|
||||
validated_data['redemption_ID'] = redemption_ID
|
||||
validated_data['is_test_request'] = string_to_boolean(request.data.get('is_test_request', "false"))
|
||||
# print(f"[DEBUG]: is_test_request: ", validated_data['is_test_request'])
|
||||
|
||||
subsidiary = request.data.get("subsidiary", None)
|
||||
valid_subs = list(settings.SUBS.keys())[:-2] # remove "ALL" and "SEAO"
|
||||
@ -246,7 +243,7 @@ def get_random_string(length):
|
||||
# choose from all lowercase letter
|
||||
letters = string.ascii_lowercase
|
||||
result_str = ''.join(random.choice(letters) for _ in range(length))
|
||||
print("Random string of length", length, "is:", result_str)
|
||||
logger.debug("Random string of length", length, "is:", result_str)
|
||||
return result_str
|
||||
|
||||
|
||||
@ -346,7 +343,7 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}):
|
||||
elif typez == ProcessType.SBT_INVOICE.value:
|
||||
c_connector.process_invoice_sbt((rq_id, file_url, metadata))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
raise BadGatewayException()
|
||||
|
||||
def build_template_matching_data(template):
|
||||
@ -383,7 +380,7 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid):
|
||||
c_connector.process_template_matching(
|
||||
(rq_id, template.subscription.id, folder_name, file_url, template_data, uid))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
raise BadGatewayException()
|
||||
|
||||
def process_feedback(feedback_id, local_file_path):
|
||||
@ -451,7 +448,7 @@ def pdf_to_images_urls(doc_path, request: SubscriptionRequest, user, dpi: int =
|
||||
image = get_first_page_pdf(doc_path, 300)
|
||||
image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1])
|
||||
image.save(saving_path)
|
||||
print(f"Saving {saving_path}")
|
||||
logger.debug(f"Saving {saving_path}")
|
||||
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path,
|
||||
request=request,
|
||||
file_name=break_file_name,
|
||||
|
@ -1,4 +1,7 @@
|
||||
import boto3
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MinioS3Client:
|
||||
def __init__(self, access_key, secret_key, bucket_name, endpoint=""):
|
||||
@ -25,7 +28,7 @@ class MinioS3Client:
|
||||
's3')
|
||||
|
||||
except Exception as e:
|
||||
print(f"[WARM] Unable to create an s3 client, {e}")
|
||||
logger.warning(f"Unable to create an s3 client, {e}")
|
||||
self.s3_client = None
|
||||
|
||||
def update_object(self, s3_key, content):
|
||||
@ -38,7 +41,7 @@ class MinioS3Client:
|
||||
# print(f"Object '{s3_key}' updated in S3 with res: {res}")
|
||||
return res
|
||||
except Exception as e:
|
||||
print(f"Error updating object in S3: {str(e)}")
|
||||
logger.error(f"Error updating object in S3: {str(e)}")
|
||||
|
||||
def upload_file(self, local_file_path, s3_key):
|
||||
try:
|
||||
@ -46,7 +49,7 @@ class MinioS3Client:
|
||||
# print(f"File '{local_file_path}' uploaded to S3 with key '{s3_key}'")
|
||||
return res
|
||||
except Exception as e:
|
||||
print(f"Error uploading file to S3: {str(e)}")
|
||||
logger.error(f"Error uploading file to S3: {str(e)}")
|
||||
|
||||
def download_file(self, s3_key, local_file_path):
|
||||
try:
|
||||
@ -54,7 +57,7 @@ class MinioS3Client:
|
||||
# print(f"File '{s3_key}' downloaded from S3 to '{local_file_path}'")
|
||||
return res
|
||||
except Exception as e:
|
||||
print(f"Error downloading file from S3: {str(e)}")
|
||||
logger.error(f"Error downloading file from S3: {str(e)}")
|
||||
|
||||
def create_url_with_expiration(self, s3_key, expiration_time):
|
||||
try:
|
||||
@ -68,7 +71,7 @@ class MinioS3Client:
|
||||
# print(f"URL for file '{s3_key}' expires in {expiration_time} seconds")
|
||||
return res
|
||||
except Exception as e:
|
||||
print(f"Error generating URL for file '{s3_key}': {str(e)}")
|
||||
logger.error(f"Error generating URL for file '{s3_key}': {str(e)}")
|
||||
|
||||
if __name__=="__main__":
|
||||
|
||||
|
@ -45,6 +45,7 @@ services:
|
||||
image: sidp/cope2n-be-fi-sbt:latest
|
||||
environment:
|
||||
- MEDIA_ROOT=${MEDIA_ROOT}
|
||||
- LOG_ROOT=${LOG_ROOT}
|
||||
- DB_ENGINE=${DB_ENGINE}
|
||||
- DB_SCHEMA=${DB_SCHEMA}
|
||||
- DB_USER=${DB_USER}
|
||||
@ -84,13 +85,15 @@ services:
|
||||
- ctel-sbt
|
||||
volumes:
|
||||
- BE_media:${MEDIA_ROOT}
|
||||
- BE_log:${LOG_ROOT}
|
||||
- BE_static:/app/static
|
||||
- /mnt/hdd4T/TannedCung/OCR/Data/SBT_lost_data:/external_data
|
||||
- ./cope2n-api:/app
|
||||
working_dir: /app
|
||||
depends_on:
|
||||
db-sbt:
|
||||
condition: service_started
|
||||
command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
|
||||
command: sh -c "sudo chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
|
||||
python manage.py makemigrations &&
|
||||
python manage.py migrate &&
|
||||
python manage.py compilemessages &&
|
||||
@ -105,7 +108,8 @@ services:
|
||||
- MINIO_ACCESS_KEY=${S3_ACCESS_KEY}
|
||||
- MINIO_SECRET_KEY=${S3_SECRET_KEY}
|
||||
volumes:
|
||||
- ./data/minio_data:/data
|
||||
# - ./data/minio_data:/data
|
||||
- /home/dxtan/TannedCung/persistent/minio:/data
|
||||
ports:
|
||||
- 9884:9884
|
||||
- 9885:9885
|
||||
@ -144,6 +148,7 @@ services:
|
||||
image: sidp/cope2n-be-fi-sbt:latest
|
||||
environment:
|
||||
- MEDIA_ROOT=${MEDIA_ROOT}
|
||||
- LOG_ROOT=${LOG_ROOT}
|
||||
- PYTHONPATH=${PYTHONPATH}:/app # For import module
|
||||
- PYTHONUNBUFFERED=1 # For show print log
|
||||
- DB_ENGINE=${DB_ENGINE}
|
||||
@ -177,6 +182,7 @@ services:
|
||||
condition: service_started
|
||||
volumes:
|
||||
- BE_media:${MEDIA_ROOT}
|
||||
- BE_log:${LOG_ROOT}
|
||||
- ./cope2n-api:/app
|
||||
|
||||
working_dir: /app
|
||||
@ -189,7 +195,8 @@ services:
|
||||
mem_reservation: 500m
|
||||
image: postgres:15.4-alpine
|
||||
volumes:
|
||||
- ./data/postgres_data:/var/lib/postgresql/data
|
||||
# - ./data/postgres_data:/var/lib/postgresql/data
|
||||
- /home/dxtan/TannedCung/persistent/postgres:/var/lib/postgresql/data
|
||||
networks:
|
||||
- ctel-sbt
|
||||
environment:
|
||||
@ -255,3 +262,4 @@ volumes:
|
||||
db_data:
|
||||
BE_static:
|
||||
BE_media:
|
||||
BE_log:
|
@ -25,6 +25,7 @@ services:
|
||||
be-ctel-sbt:
|
||||
environment:
|
||||
- MEDIA_ROOT=${MEDIA_ROOT}
|
||||
- LOG_ROOT=${LOG_ROOT}
|
||||
- DB_ENGINE=${DB_ENGINE}
|
||||
- DB_SCHEMA=${DB_SCHEMA}
|
||||
- DB_USER=${DB_USER}
|
||||
@ -112,6 +113,7 @@ services:
|
||||
be-celery-sbt:
|
||||
environment:
|
||||
- MEDIA_ROOT=${MEDIA_ROOT}
|
||||
- LOG_ROOT=${LOG_ROOT}
|
||||
- PYTHONPATH=${PYTHONPATH}:/app # For import module
|
||||
- PYTHONUNBUFFERED=1 # For show print log
|
||||
- DB_ENGINE=${DB_ENGINE}
|
||||
|
Loading…
Reference in New Issue
Block a user