Compare commits

..

16 Commits

Author SHA1 Message Date
PhanThanhTrung
3dd2440afa update docker-compose dev 2024-10-31 13:08:10 +07:00
PhanThanhTrung
0f955cb039 update 2024-10-31 13:06:44 +07:00
PhanThanhTrung
cfc4eaabdf update 2024-10-29 18:18:07 +07:00
PhanThanhTrung
57a0adf8de no no chmod 2024-10-29 17:25:03 +07:00
PhanThanhTrung
2e183360b0 update 2024-10-29 15:04:08 +07:00
PhanThanhTrung
5bcbc257de update 2024-10-29 15:03:31 +07:00
PhanThanhTrung
17a00c3595 update 2024-10-29 14:45:36 +07:00
PhanThanhTrung
90f959223c update 2024-10-29 14:43:48 +07:00
PhanThanhTrung
a5bd63df91 update 2024-10-29 14:41:35 +07:00
PhanThanhTrung
bb48329fee update 2024-10-29 14:41:10 +07:00
PhanThanhTrung
3a64bbd955 update 2024-10-29 13:48:42 +07:00
PhanThanhTrung
7b336085f3 update 2024-10-29 13:20:30 +07:00
PhanThanhTrung
1fea3c5b74 an alternative version of dockerfile only use for debugging 2024-10-29 12:51:07 +07:00
PhanThanhTrung
8899b9755a update opentelemetry for sbt backend 2024-10-29 12:31:49 +07:00
PhanThanhTrung
43cf82653c circumstance change for debugging 2024-10-29 12:15:24 +07:00
PhanThanhTrung
1a15907fc0 update opentelemetry to ai service sbt 2024-10-29 11:07:30 +07:00
43 changed files with 611 additions and 1161 deletions

View File

@ -11,26 +11,23 @@ RUN apt-get update && \
RUN pip install -U openmim==0.3.7 --no-cache-dir RUN pip install -U openmim==0.3.7 --no-cache-dir
RUN mim install mmcv-full==1.7.1 RUN mim install mmcv-full==1.7.1
RUN pip install mmcv==1.6.0 -f https://download.openmmlab.com/mmcv/dst/cu116/torch1.13/index.html --no-cache-dir RUN pip install mmcv==1.6.0 -f https://download.openmmlab.com/mmcv/dst/cu116/torch1.13/index.html --no-cache-dir
RUN ln -s /opt/conda/lib/python3.10/site-packages/torch/lib/libcudnn.so.8 /usr/lib/libcudnn.so && \ RUN ln -s /opt/conda/lib/python3.10/site-packages/torch/lib/libcudnn.so.8 /usr/lib/libcudnn.so && \
ln -s /opt/conda/lib/libcublas.so /usr/lib/libcublas.so ln -s /opt/conda/lib/libcublas.so /usr/lib/libcublas.so
# RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir # RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
RUN wget https://bj.bcebos.com/fastdeploy/release/wheels/fastdeploy_gpu_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl \ COPY fastdeploy_gpu_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl .
&& pip install fastdeploy_gpu_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl --no-cache-dir \ RUN pip install fastdeploy_gpu_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl --no-cache-dir
&& rm fastdeploy_gpu_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl RUN rm fastdeploy_gpu_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl
RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@a59f05630a8f205756064244bf5beb8661f96180' --no-cache-dir RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@a59f05630a8f205756064244bf5beb8661f96180' --no-cache-dir
COPY ./modules /workspace/modules
RUN cd /workspace/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/modules/sdsvkvu/sdsvkvu/externals/sdsvocr && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/modules/sdsvkvu && pip3 install -v -e . --no-cache-dir
# Install SDSV packages # Install SDSV packages
COPY . /workspace/cope2n-ai-fi COPY . /workspace/cope2n-ai-fi
RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu && pip3 install -v -e . --no-cache-dir
RUN cd /workspace/cope2n-ai-fi && pip3 install -r requirements.txt --no-cache-dir RUN cd /workspace/cope2n-ai-fi && pip3 install -r requirements.txt --no-cache-dir
WORKDIR /workspace WORKDIR /workspace
@ -38,5 +35,4 @@ WORKDIR /workspace
ENV PYTHONPATH="." ENV PYTHONPATH="."
ENV TZ="Asia/Ho_Chi_Minh" ENV TZ="Asia/Ho_Chi_Minh"
CMD [ "sh", "run.sh"] CMD [ "sh", "run.sh"]
# ENTRYPOINT [ "sleep", "infinity" ]

View File

@ -7,6 +7,7 @@ from pathlib import Path
import urllib.parse import urllib.parse
import uuid import uuid
from copy import deepcopy from copy import deepcopy
from opentelemetry import trace
import sys, os import sys, os
cur_dir = str(Path(__file__).parents[2]) cur_dir = str(Path(__file__).parents[2])
sys.path.append(cur_dir) sys.path.append(cur_dir)
@ -22,6 +23,7 @@ from utils.logging.logging import LOGGER_CONFIG
logging.config.dictConfig(LOGGER_CONFIG) logging.config.dictConfig(LOGGER_CONFIG)
# Get the logger # Get the logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
tracer = trace.get_tracer("sbt_celery_ai")
logger.info("OCR engine configfs: \n", ocr_cfg) logger.info("OCR engine configfs: \n", ocr_cfg)
logger.info("KVU configfs: \n", kvu_cfg) logger.info("KVU configfs: \n", kvu_cfg)
@ -35,7 +37,7 @@ kvu_cfg.pop("option") # pop option
sbt_engine = load_engine(kvu_cfg) sbt_engine = load_engine(kvu_cfg)
kvu_cfg["option"] = option kvu_cfg["option"] = option
@tracer.start_as_current_span("sbt_predict")
def sbt_predict(image_url, engine, metadata={}) -> None: def sbt_predict(image_url, engine, metadata={}) -> None:
req = urllib.request.urlopen(image_url) req = urllib.request.urlopen(image_url)
arr = np.asarray(bytearray(req.read()), dtype=np.uint8) arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
@ -65,6 +67,7 @@ def sbt_predict(image_url, engine, metadata={}) -> None:
os.remove(tmp_image_path) os.remove(tmp_image_path)
return outputs return outputs
@tracer.start_as_current_span("predict")
def predict(page_numb, image_url, metadata={}): def predict(page_numb, image_url, metadata={}):
""" """
module predict function module predict function

View File

@ -1,71 +1,22 @@
from celery_worker.worker_fi import app
from celery_worker.client_connector_fi import CeleryConnector
from common.process_pdf import compile_output_sbt
from .task_warpper import VerboseTask
import logging import logging
import logging.config import logging.config
from opentelemetry import trace
from celery_worker.client_connector_fi import CeleryConnector
from celery_worker.worker_fi import app
from common.process_pdf import compile_output_sbt
from utils.logging.logging import LOGGER_CONFIG from utils.logging.logging import LOGGER_CONFIG
from .task_warpper import VerboseTask
# Load the logging configuration # Load the logging configuration
logging.config.dictConfig(LOGGER_CONFIG) logging.config.dictConfig(LOGGER_CONFIG)
# Get the logger # Get the logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
tracer = trace.get_tracer("sbt_celery_ai")
@app.task(base=VerboseTask,name="process_fi_invoice") @app.task(base=VerboseTask,name="process_sbt_invoice", track_started=True)
def process_invoice(rq_id, list_url):
from celery_worker.client_connector_fi import CeleryConnector
from common.process_pdf import compile_output_fi
c_connector = CeleryConnector()
try:
result = compile_output_fi(list_url)
hoadon = {"status": 200, "content": result, "message": "Success"}
logger.info(hoadon)
c_connector.process_fi_invoice_result((rq_id, hoadon))
return {"rq_id": rq_id}
except Exception as e:
logger.info(e)
hoadon = {"status": 404, "content": {}}
c_connector.process_fi_invoice_result((rq_id, hoadon))
return {"rq_id": rq_id}
@app.task(base=VerboseTask,name="process_sap_invoice")
def process_sap_invoice(rq_id, list_url):
from celery_worker.client_connector_fi import CeleryConnector
from common.process_pdf import compile_output
logger.info(list_url)
c_connector = CeleryConnector()
try:
result = compile_output(list_url)
hoadon = {"status": 200, "content": result, "message": "Success"}
c_connector.process_sap_invoice_result((rq_id, hoadon))
return {"rq_id": rq_id}
except Exception as e:
logger.info(e)
hoadon = {"status": 404, "content": {}}
c_connector.process_sap_invoice_result((rq_id, hoadon))
return {"rq_id": rq_id}
@app.task(base=VerboseTask,name="process_manulife_invoice")
def process_manulife_invoice(rq_id, list_url):
from celery_worker.client_connector_fi import CeleryConnector
from common.process_pdf import compile_output_manulife
# TODO: simply returning 200 and 404 doesn't make any sense
c_connector = CeleryConnector()
try:
result = compile_output_manulife(list_url)
hoadon = {"status": 200, "content": result, "message": "Success"}
logger.info(hoadon)
c_connector.process_manulife_invoice_result((rq_id, hoadon))
return {"rq_id": rq_id}
except Exception as e:
logger.info(e)
hoadon = {"status": 404, "content": {}}
c_connector.process_manulife_invoice_result((rq_id, hoadon))
return {"rq_id": rq_id}
@app.task(base=VerboseTask,name="process_sbt_invoice")
def process_sbt_invoice(rq_id, list_url, metadata): def process_sbt_invoice(rq_id, list_url, metadata):
# TODO: simply returning 200 and 404 doesn't make any sense # TODO: simply returning 200 and 404 doesn't make any sense
c_connector = CeleryConnector() c_connector = CeleryConnector()

View File

@ -1,11 +1,47 @@
from celery import Celery
from kombu import Queue, Exchange
import environ import environ
from celery import Celery
from celery.signals import worker_process_init
from kombu import Exchange, Queue
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import \
OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import \
OTLPSpanExporter
from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
env = environ.Env( env = environ.Env(
DEBUG=(bool, False) DEBUG=(bool, False)
) )
debug = env.str("DEBUG", False)
tracer_endpoint = env.str("tracer_endpoint", "http://jaeger_collector:4318")
service_name = "sbt_celery_ai"
@worker_process_init.connect(weak=False)
def init_celery_tracing(*args, **kwargs):
CeleryInstrumentor().instrument()
span_exporter = OTLPSpanExporter(endpoint=f"{tracer_endpoint}/v1/traces")
processor = BatchSpanProcessor(span_exporter=span_exporter)
attributes = {SERVICE_NAME: service_name}
resource = Resource(attributes=attributes)
trace_provider = TracerProvider(resource=resource)
trace_provider.add_span_processor(span_processor=processor)
trace.set_tracer_provider(tracer_provider=trace_provider)
reader = PeriodicExportingMetricReader(
OTLPMetricExporter(endpoint=f"{tracer_endpoint}/v1/metrics"))
meter_provider = MeterProvider(resource=resource, metric_readers=[reader])
metrics.set_meter_provider(meter_provider=meter_provider)
app: Celery = Celery( app: Celery = Celery(
"postman", "postman",
broker= env.str("CELERY_BROKER", "amqp://test:test@rabbitmq:5672"), broker= env.str("CELERY_BROKER", "amqp://test:test@rabbitmq:5672"),

View File

@ -1,23 +1,17 @@
import os
import json
import time
from common import json2xml
from common.json2xml import convert_key_names, replace_xml_values
from common.utils_kvu.split_docs import split_docs, merge_sbt_output
# from api.OCRBase.prediction import predict as ocr_predict
# from api.Kie_Invoice_AP.prediction_sap import predict
# from api.Kie_Invoice_AP.prediction_fi import predict_fi
# from api.manulife.predict_manulife import predict as predict_manulife
from api.sdsap_sbt.prediction_sbt import predict as predict_sbt
import logging import logging
import logging.config import logging.config
import os
import time
from opentelemetry import trace
from api.sdsap_sbt.prediction_sbt import predict as predict_sbt
from common.utils_kvu.split_docs import merge_sbt_output
from utils.logging.logging import LOGGER_CONFIG from utils.logging.logging import LOGGER_CONFIG
# Load the logging configuration
logging.config.dictConfig(LOGGER_CONFIG) logging.config.dictConfig(LOGGER_CONFIG)
# Get the logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
tracer = trace.get_tracer("sbt_celery_ai")
os.environ['PYTHONPATH'] = '/home/thucpd/thucpd/cope2n-ai/cope2n-ai/' os.environ['PYTHONPATH'] = '/home/thucpd/thucpd/cope2n-ai/cope2n-ai/'
@ -27,54 +21,6 @@ def check_label_exists(array, target_label):
return True # Label exists in the array return True # Label exists in the array
return False # Label does not exist in the array return False # Label does not exist in the array
def compile_output(list_url):
"""_summary_
Args:
pdf_extracted (list): list: [{
"1": url},{"2": url},
...]
Raises:
NotImplementedError: _description_
Returns:
dict: output compiled
"""
results = {
"model":{
"name":"Invoice",
"confidence": 1.0,
"type": "finance/invoice",
"isValid": True,
"shape": "letter",
}
}
compile_outputs = []
compiled = []
for page in list_url:
output_model = predict(page['page_number'], page['file_url'])
for field in output_model['fields']:
if field['value'] != "" and not check_label_exists(compiled, field['label']):
element = {
'label': field['label'],
'value': field['value'],
}
compiled.append(element)
elif field['label'] == 'table' and check_label_exists(compiled, "table"):
for index, obj in enumerate(compiled):
if obj['label'] == 'table':
compiled[index]['value'].append(field['value'])
compile_output = {
'page_index': page['page_number'],
'request_file_id': page['request_file_id'],
'fields': output_model['fields']
}
compile_outputs.append(compile_output)
results['combine_results'] = compiled
results['pages'] = compile_outputs
return results
def update_null_values(kvu_result, next_kvu_result): def update_null_values(kvu_result, next_kvu_result):
for key, value in kvu_result.items(): for key, value in kvu_result.items():
if value is None and next_kvu_result.get(key) is not None: if value is None and next_kvu_result.get(key) is not None:
@ -86,127 +32,7 @@ def replace_empty_null_values(my_dict):
my_dict[key] = None my_dict[key] = None
return my_dict return my_dict
def compile_output_fi(list_url): @tracer.start_as_current_span("compile_output_sbt")
"""_summary_
Args:
pdf_extracted (list): list: [{
"1": url},{"2": url},
...]
Raises:
NotImplementedError: _description_
Returns:
dict: output compiled
"""
results = {
"model":{
"name":"Invoice",
"confidence": 1.0,
"type": "finance/invoice",
"isValid": True,
"shape": "letter",
}
}
# Loop through the list_url to update kvu_result
for i in range(len(list_url) - 1):
page = list_url[i]
next_page = list_url[i + 1]
kvu_result, output_kie = predict_fi(page['page_number'], page['file_url'])
next_kvu_result, next_output_kie = predict_fi(next_page['page_number'], next_page['file_url'])
update_null_values(kvu_result, next_kvu_result)
output_kie = replace_empty_null_values(output_kie)
next_output_kie = replace_empty_null_values(next_output_kie)
update_null_values(output_kie, next_output_kie)
# Handle the last item in the list_url
if list_url:
page = list_url[-1]
kvu_result, output_kie = predict_fi(page['page_number'], page['file_url'])
converted_dict = convert_key_names(kvu_result)
converted_dict.update(convert_key_names(output_kie))
output_fi = replace_xml_values(json2xml.xml_template3, converted_dict)
field_fi = {
"xml": output_fi,
}
results['combine_results'] = field_fi
# results['combine_results'] = converted_dict
# results['combine_results_kie'] = output_kie
return results
def compile_output_ocr_base(list_url):
"""Compile output of OCRBase
Args:
list_url (list): List string url of image
Returns:
dict: dict of output
"""
results = {
"model":{
"name":"OCRBase",
"confidence": 1.0,
"type": "ocrbase",
"isValid": True,
"shape": "letter",
}
}
compile_outputs = []
for page in list_url:
output_model = ocr_predict(page['page_number'], page['file_url'])
compile_output = {
'page_index': page['page_number'],
'request_file_id': page['request_file_id'],
'fields': output_model['fields']
}
compile_outputs.append(compile_output)
results['pages'] = compile_outputs
return results
def compile_output_manulife(list_url):
"""_summary_
Args:
pdf_extracted (list): list: [{
"1": url},{"2": url},
...]
Raises:
NotImplementedError: _description_
Returns:
dict: output compiled
"""
results = {
"model":{
"name":"Invoice",
"confidence": 1.0,
"type": "finance/invoice",
"isValid": True,
"shape": "letter",
}
}
outputs = []
for page in list_url:
output_model = predict_manulife(page['page_number'], page['file_url']) # gotta be predict_manulife(), for the time being, this function is not avaible, we just leave a dummy function here instead
logger.info("output_model", output_model)
outputs.append(output_model)
logger.info("outputs", outputs)
documents = split_docs(outputs)
logger.info("documents", documents)
results = {
"total_pages": len(list_url),
"ocr_num_pages": len(list_url),
"document": documents
}
return results
def compile_output_sbt(list_url, metadata): def compile_output_sbt(list_url, metadata):
"""_summary_ """_summary_
@ -237,6 +63,7 @@ def compile_output_sbt(list_url, metadata):
outputs = [] outputs = []
start = time.time() start = time.time()
pages_predict_time = [] pages_predict_time = []
for page in list_url: for page in list_url:
output_model = predict_sbt(page['page_number'], page['file_url'], metadata) output_model = predict_sbt(page['page_number'], page['file_url'], metadata)
pages_predict_time.append(time.time()) pages_predict_time.append(time.time())
@ -244,7 +71,9 @@ def compile_output_sbt(list_url, metadata):
output_model['doc_type'] = page['doc_type'] output_model['doc_type'] = page['doc_type']
outputs.append(output_model) outputs.append(output_model)
start_postprocess = time.time() start_postprocess = time.time()
documents = merge_sbt_output(outputs) documents = merge_sbt_output(outputs)
inference_profile["postprocess"] = [start_postprocess, time.time()] inference_profile["postprocess"] = [start_postprocess, time.time()]
inference_profile["inference"] = [start, pages_predict_time] inference_profile["inference"] = [start, pages_predict_time]
results = { results = {
@ -254,16 +83,3 @@ def compile_output_sbt(list_url, metadata):
"inference_profile": inference_profile "inference_profile": inference_profile
} }
return results return results
def main():
"""
main function
"""
list_url = [{"file_url": "https://www.irs.gov/pub/irs-pdf/fw9.pdf", "page_number": 1, "request_file_id": 1}, ...]
results = compile_output(list_url)
with open('output.json', 'w', encoding='utf-8') as file:
json.dump(results, file, ensure_ascii=False, indent=4)
if __name__ == "__main__":
main()

View File

@ -2,7 +2,11 @@ import os
import glob import glob
import json import json
from tqdm import tqdm from tqdm import tqdm
from opentelemetry import trace
tracer = trace.get_tracer("sbt_celery_ai")
@tracer.start_as_current_span("longestCommonSubsequence")
def longestCommonSubsequence(text1: str, text2: str) -> int: def longestCommonSubsequence(text1: str, text2: str) -> int:
# https://leetcode.com/problems/longest-common-subsequence/discuss/351689/JavaPython-3-Two-DP-codes-of-O(mn)-and-O(min(m-n))-spaces-w-picture-and-analysis # https://leetcode.com/problems/longest-common-subsequence/discuss/351689/JavaPython-3-Two-DP-codes-of-O(mn)-and-O(min(m-n))-spaces-w-picture-and-analysis
dp = [[0] * (len(text2) + 1) for _ in range(len(text1) + 1)] dp = [[0] * (len(text2) + 1) for _ in range(len(text1) + 1)]
@ -12,21 +16,25 @@ def longestCommonSubsequence(text1: str, text2: str) -> int:
dp[i][j] if c == d else max(dp[i][j + 1], dp[i + 1][j]) dp[i][j] if c == d else max(dp[i][j + 1], dp[i + 1][j])
return dp[-1][-1] return dp[-1][-1]
@tracer.start_as_current_span("write_to_json")
def write_to_json(file_path, content): def write_to_json(file_path, content):
with open(file_path, mode="w", encoding="utf8") as f: with open(file_path, mode="w", encoding="utf8") as f:
json.dump(content, f, ensure_ascii=False) json.dump(content, f, ensure_ascii=False)
@tracer.start_as_current_span("read_json")
def read_json(file_path): def read_json(file_path):
with open(file_path, "r") as f: with open(file_path, "r") as f:
return json.load(f) return json.load(f)
@tracer.start_as_current_span("check_label_exists")
def check_label_exists(array, target_label): def check_label_exists(array, target_label):
for obj in array: for obj in array:
if obj["label"] == target_label: if obj["label"] == target_label:
return True # Label exists in the array return True # Label exists in the array
return False # Label does not exist in the array return False # Label does not exist in the array
@tracer.start_as_current_span("merged_kvu_outputs")
def merged_kvu_outputs(loutputs: list) -> dict: def merged_kvu_outputs(loutputs: list) -> dict:
compiled = [] compiled = []
for output_model in loutputs: for output_model in loutputs:
@ -44,6 +52,7 @@ def merged_kvu_outputs(loutputs: list) -> dict:
return compiled return compiled
@tracer.start_as_current_span("split_docs")
def split_docs(doc_data: list, threshold: float=0.6) -> list: def split_docs(doc_data: list, threshold: float=0.6) -> list:
num_pages = len(doc_data) num_pages = len(doc_data)
outputs = [] outputs = []
@ -91,8 +100,10 @@ def split_docs(doc_data: list, threshold: float=0.6) -> list:
return outputs return outputs
@tracer.start_as_current_span("merge_sbt_output")
def merge_sbt_output(loutputs): def merge_sbt_output(loutputs):
# TODO: This function is too circumlocutory, need to refactor the whole flow # TODO: This function is too circumlocutory, need to refactor the whole flow
@tracer.start_as_current_span("dict_to_list_of_dict")
def dict_to_list_of_dict(the_dict): def dict_to_list_of_dict(the_dict):
output = [] output = []
for k,v in the_dict.items(): for k,v in the_dict.items():

View File

@ -11,4 +11,16 @@ easydict
imagesize==1.4.1 imagesize==1.4.1
pdf2image==1.17.0 pdf2image==1.17.0
redis==5.0.1 redis==5.0.1
celery==5.3.6 celery==5.3.6
opentelemetry-api==1.27.0
opentelemetry-distro==0.48b0
opentelemetry-exporter-otlp==1.27.0
opentelemetry-exporter-otlp-proto-common==1.27.0
opentelemetry-exporter-otlp-proto-http==1.27.0
opentelemetry-instrumentation==0.48b0
opentelemetry-instrumentation-celery==0.48b0
opentelemetry-proto==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-semantic-conventions==0.48b0
opentelemetry-util-http==0.48b0

View File

@ -1,62 +1,35 @@
FROM python:3.10.10-buster AS builder FROM python:3.10.10-buster
ARG UID=1000 RUN apt-get update \
ARG GID=1000 && apt-get install -y bash gettext poppler-utils ffmpeg libsm6 libxext6 wget
ARG USERNAME=container-user
RUN groupadd --gid ${GID} ${USERNAME} \
&& useradd --uid ${UID} --gid ${GID} -m ${USERNAME} \
&& apt-get update \
&& apt-get install -y sudo bash gettext poppler-utils \
&& echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \
&& chmod 0440 /etc/sudoers.d/${USERNAME}
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 wget -y
RUN yes | apt install postgresql gcc musl-dev RUN yes | apt install postgresql gcc musl-dev
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install uvicorn gunicorn Celery RUN pip install uvicorn gunicorn Celery
# For intergration with sdskvu # For intergration with sdskvu
RUN pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 COPY torch-1.13.1+cu116-cp310-cp310-linux_x86_64.whl .
RUN pip install torch-1.13.1+cu116-cp310-cp310-linux_x86_64.whl
RUN pip install -U openmim==0.3.7 --no-cache-dir RUN pip install -U openmim==0.3.7 --no-cache-dir
RUN mim install mmcv-full==1.7.2 RUN mim install mmcv-full==1.7.2
# RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir COPY fastdeploy_gpu_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl .
RUN wget https://bj.bcebos.com/fastdeploy/release/wheels/fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl \ RUN pip install fastdeploy_gpu_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl --no-cache-dir
&& pip install fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl --no-cache-dir \
&& rm fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl
# End intergration with sdskvu # End intergration with sdskvu
USER ${UID} COPY requirements.txt /app/
ADD --chown=${UID}:${GID} fwd /app RUN pip install -r /app/requirements.txt --no-cache-dir
COPY --chown=${UID}:${GID} requirements.txt /app
WORKDIR /app COPY fwd_api/utils/sdsvkvu ./sdsvkvu
RUN pip install -r requirements.txt --no-cache-dir RUN cd sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir
RUN cd sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir
COPY --chown=${UID}:${GID} . /app RUN cd sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir
RUN cd sdsvkvu && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir
# For intergration with sdskvu # For intergration with sdskvu
# RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir COPY paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl .
RUN wget https://paddle-wheel.bj.bcebos.com/2.4.2/linux/linux-gpu-cuda11.6-cudnn8.4.0-mkl-gcc8.2-avx/paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl \ RUN pip install paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl --no-cache-dir
&& pip install paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl --no-cache-dir \
&& rm paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl
WORKDIR /app
COPY . /app
ENV TZ="Asia/Ho_Chi_Minh" ENV TZ="Asia/Ho_Chi_Minh"
CMD ["bash"]
# FROM cope2n-api-base AS builder
# ARG UID=1000
# ARG GID=1000
# ARG USERNAME=container-user
# # Create a new user
# RUN groupadd --gid ${GID} ${USERNAME} \
# && useradd --uid ${UID} --gid ${GID} -m ${USERNAME} \
# && echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \
# && chmod 0440 /etc/sudoers.d/${USERNAME}
# WORKDIR /app
# COPY --chown=${UID}:${GID} . /app

View File

@ -0,0 +1,3 @@
from .celery import app as celery_app
__all__ = ('celery_app',)

View File

@ -1,16 +1,52 @@
import environ
import copy
import os import os
import django import django
from celery import Celery from celery import Celery
from celery.signals import setup_logging # noqa from celery.signals import setup_logging, worker_process_init # noqa
from kombu import Queue from kombu import Exchange, Queue
import copy from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import \
OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import \
OTLPSpanExporter
from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.instrumentation.django import DjangoInstrumentor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from fwd import settings from django.conf import settings
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "fwd.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "fwd.settings")
django.setup() django.setup()
env = environ.Env(
DEBUG=(bool, False)
)
tracer_endpoint = env.str("tracer_endpoint", "http://jaeger_collector:4318")
service_name = "sbt_celery_backend"
@worker_process_init.connect(weak=False)
def init_celery_tracing(*args, **kwargs):
CeleryInstrumentor().instrument()
span_exporter = OTLPSpanExporter(endpoint=f"{tracer_endpoint}/v1/traces")
processor = BatchSpanProcessor(span_exporter=span_exporter)
attributes = {SERVICE_NAME: service_name}
resource = Resource(attributes=attributes)
trace_provider = TracerProvider(resource=resource)
trace_provider.add_span_processor(span_processor=processor)
trace.set_tracer_provider(tracer_provider=trace_provider)
reader = PeriodicExportingMetricReader(OTLPMetricExporter(endpoint=f"{tracer_endpoint}/v1/metrics"))
meter_provider = MeterProvider(resource=resource, metric_readers=[reader])
metrics.set_meter_provider(meter_provider=meter_provider)
app: Celery = Celery( app: Celery = Celery(
'postman', 'postman',
broker=settings.BROKER_URL, broker=settings.BROKER_URL,
@ -24,6 +60,7 @@ app.autodiscover_tasks()
@setup_logging.connect @setup_logging.connect
def config_loggers(*args, **kwargs): def config_loggers(*args, **kwargs):
from logging.config import dictConfig # noqa from logging.config import dictConfig # noqa
from django.conf import settings # noqa from django.conf import settings # noqa
log_config = copy.deepcopy(settings.LOGGING) log_config = copy.deepcopy(settings.LOGGING)
@ -47,7 +84,6 @@ app.conf.update({
Queue('csv_feedback'), Queue('csv_feedback'),
Queue('report'), Queue('report'),
Queue('report_2'), Queue('report_2'),
Queue('error_responses'),
], ],
'task_routes': { 'task_routes': {
'process_sap_invoice_result': {'queue': 'invoice_sap_rs'}, 'process_sap_invoice_result': {'queue': 'invoice_sap_rs'},
@ -66,15 +102,6 @@ app.conf.update({
'remove_local_file': {'queue': "remove_local_file"}, 'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"}, 'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"}, 'make_a_report': {'queue': "report"},
'make_a_report_2': {'queue': "report_2"}, 'make_a_report_2': {'queue': "report_2"},
'send_response_to_sqs': {'queue': 'error_responses'},
} }
}) })
if __name__ == "__main__":
argv = [
'worker',
'--loglevel=INFO',
'--pool=solo' # Window opts
]
app.worker_main(argv)

View File

@ -0,0 +1,34 @@
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import \
OTLPSpanExporter
from opentelemetry.exporter.otlp.proto.http.metric_exporter import \
OTLPMetricExporter
from opentelemetry.instrumentation.django import DjangoInstrumentor
from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
def setup_tracing(tracer_endpoint, service_name):
# Instrument Django
DjangoInstrumentor().instrument()
CeleryInstrumentor().instrument()
# Set up a tracer provider
span_exporter = OTLPSpanExporter(endpoint=f"{tracer_endpoint}/v1/traces")
processor = BatchSpanProcessor(span_exporter=span_exporter)
attributes = {SERVICE_NAME: service_name}
resource = Resource(attributes=attributes)
trace_provider = TracerProvider(resource=resource)
trace_provider.add_span_processor(span_processor=processor)
trace.set_tracer_provider(tracer_provider=trace_provider)
reader = PeriodicExportingMetricReader(
OTLPMetricExporter(endpoint=f"{tracer_endpoint}/v1/metrics"))
meter_provider = MeterProvider(resource=resource, metric_readers=[reader])
metrics.set_meter_provider(meter_provider=meter_provider)

View File

@ -14,8 +14,7 @@ from pathlib import Path
import environ import environ
from django.urls import reverse_lazy from django.urls import reverse_lazy
from fwd_api.middleware.logging_request_response_middleware import TraceIDLogFilter from fwd_api.middleware.logging_request_response_middleware import TraceIDLogFilter
from fwd_api.middleware.response_monitor import ResponseMonitorMiddleware from .opentelemetry_config import setup_tracing
# Build paths inside the project like this: BASE_DIR / 'subdir'. # Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent BASE_DIR = Path(__file__).resolve().parent.parent
@ -23,6 +22,11 @@ BASE_DIR = Path(__file__).resolve().parent.parent
env = environ.Env( env = environ.Env(
DEBUG=(bool, False) DEBUG=(bool, False)
) )
TRACER_ENDPOINT = env.str("tracer_endpoint", "http://jaeger_collector:4317")
SERVICE_NAME = "sbt_django_backend"
setup_tracing(tracer_endpoint=TRACER_ENDPOINT, service_name=SERVICE_NAME)
DEBUG = False DEBUG = False
environ.Env.read_env(os.path.join(BASE_DIR, '.env')) environ.Env.read_env(os.path.join(BASE_DIR, '.env'))
ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=['*'] + ['107.120.{}.{}'.format(i, j) for i in range(256) for j in range(256)]) ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=['*'] + ['107.120.{}.{}'.format(i, j) for i in range(256) for j in range(256)])
@ -50,9 +54,6 @@ S3_SECRET_KEY = env.str("S3_SECRET_KEY", "")
S3_BUCKET_NAME = env.str("S3_BUCKET_NAME", "ocr-data") S3_BUCKET_NAME = env.str("S3_BUCKET_NAME", "ocr-data")
REDIS_HOST = env.str("REDIS_HOST", "result-cache") REDIS_HOST = env.str("REDIS_HOST", "result-cache")
REDIS_PORT = env.int("REDIS_PORT", 6379) REDIS_PORT = env.int("REDIS_PORT", 6379)
AWS_REGION = env.str("AWS_REGION", "")
AWS_QUEUE_URL = env.str("AWS_QUEUE_URL", "")
INSTALLED_APPS = [ INSTALLED_APPS = [
@ -81,8 +82,7 @@ MIDDLEWARE = [
'corsheaders.middleware.CorsMiddleware', 'corsheaders.middleware.CorsMiddleware',
"whitenoise.middleware.WhiteNoiseMiddleware", "whitenoise.middleware.WhiteNoiseMiddleware",
"django.middleware.locale.LocaleMiddleware", "django.middleware.locale.LocaleMiddleware",
"fwd_api.middleware.logging_request_response_middleware.LoggingMiddleware", "fwd_api.middleware.logging_request_response_middleware.LoggingMiddleware"
'fwd_api.middleware.response_monitor.ResponseMonitorMiddleware',
] ]
LOCALE_PATHS = [ LOCALE_PATHS = [
@ -250,7 +250,6 @@ FIELDS_BY_SUB = {
BAD_THRESHOLD = 0.75 BAD_THRESHOLD = 0.75
NEED_REVIEW = 1.0 NEED_REVIEW = 1.0
DOC_TYPES = ["imei", "invoice"]
SUB_FOR_BILLING = ["all", "seao"] SUB_FOR_BILLING = ["all", "seao"]
FIELD = ["imei_number", "purchase_date", "retailername", "sold_to_party", "invoice_no"] FIELD = ["imei_number", "purchase_date", "retailername", "sold_to_party", "invoice_no"]

View File

@ -78,7 +78,6 @@ class AccuracyViewSet(viewsets.ViewSet):
description='Which subsidiary to be included', description='Which subsidiary to be included',
type=OpenApiTypes.STR, type=OpenApiTypes.STR,
enum=list(settings.SUBS.keys()), enum=list(settings.SUBS.keys()),
required=True
), ),
OpenApiParameter( OpenApiParameter(
name='request_id', name='request_id',
@ -113,42 +112,6 @@ class AccuracyViewSet(viewsets.ViewSet):
type=OpenApiTypes.FLOAT, type=OpenApiTypes.FLOAT,
required=False required=False
), ),
OpenApiParameter(
name='predict_result',
location=OpenApiParameter.QUERY,
description='Filter by predict result',
type=OpenApiTypes.STR,
required=False
),
OpenApiParameter(
name='feedback_result',
location=OpenApiParameter.QUERY,
description='Filter by feedback result',
type=OpenApiTypes.STR,
required=False
),
OpenApiParameter(
name='reviewed_result',
location=OpenApiParameter.QUERY,
description='Filter by reviewed result',
type=OpenApiTypes.STR,
required=False
),
OpenApiParameter(
name='doc_type',
location=OpenApiParameter.QUERY,
description='Filter by document type',
type=OpenApiTypes.STR,
enum=list(settings.DOC_TYPES),
required=False
),
OpenApiParameter(
name='bad_reason',
location=OpenApiParameter.QUERY,
description='Filter by bad reason',
type=OpenApiTypes.STR,
required=False
),
], ],
responses=None, tags=['Accuracy'] responses=None, tags=['Accuracy']
) )
@ -166,11 +129,7 @@ class AccuracyViewSet(viewsets.ViewSet):
include_test = request.GET.get('includes_test', False) include_test = request.GET.get('includes_test', False)
subsidiary = request.GET.get("subsidiary", "all") subsidiary = request.GET.get("subsidiary", "all")
max_accuracy = float(request.GET.get("max_accuracy", 100)) max_accuracy = float(request.GET.get("max_accuracy", 100))
predict_result = request.GET.get('predict_result', None) # subsidiary = map_subsidiary_long_to_short(subsidiary)
feedback_result = request.GET.get('feedback_result', None)
reviewed_result = request.GET.get('reviewed_result', None)
doc_type = request.GET.get('doc_type', None)
bad_reason = request.GET.get('bad_reason', None)
base_query = Q(status=200) base_query = Q(status=200)
if start_date_str or end_date_str: if start_date_str or end_date_str:
@ -234,28 +193,10 @@ class AccuracyViewSet(viewsets.ViewSet):
base_query &= Q( base_query &= Q(
redemption_id__startswith=map_subsidiary_long_to_short(subsidiary)) redemption_id__startswith=map_subsidiary_long_to_short(subsidiary))
if predict_result:
base_query &= Q(predict_result__icontains=predict_result)
if feedback_result:
base_query &= Q(feedback_result__icontains=feedback_result)
if reviewed_result:
base_query &= Q(reviewed_result__icontains=reviewed_result)
if doc_type:
if doc_type.lower() == 'invoice':
base_query &= Q(doc_type__regex=r'^invoice(,\s*invoice)*$')
elif doc_type.lower() == 'imei':
base_query &= Q(doc_type__regex=r'^imei(,\s*imei)*$')
if isinstance(max_accuracy, float): if isinstance(max_accuracy, float):
base_query &= Q(raw_accuracy__lt=( base_query &= Q(raw_accuracy__lt=(
max_accuracy/100)) | Q(raw_accuracy__isnull=True) max_accuracy/100)) | Q(raw_accuracy__isnull=True)
if bad_reason:
bad_reason_subquery = SubscriptionRequestFile.objects.filter(
reason__icontains=bad_reason
).values_list('request_id', flat=True)
base_query &= Q(id__in=bad_reason_subquery)
subscription_requests = SubscriptionRequest.objects.filter( subscription_requests = SubscriptionRequest.objects.filter(
base_query).order_by('created_at') base_query).order_by('created_at')

View File

@ -1,32 +1,42 @@
import logging
import time import time
import uuid import uuid
from wsgiref.util import FileWrapper
from datetime import datetime from datetime import datetime
from multiprocessing.pool import ThreadPool
from typing import List
from wsgiref.util import FileWrapper
from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.files.uploadedfile import TemporaryUploadedFile
from django.http import HttpResponse, JsonResponse from django.http import HttpResponse, JsonResponse
from drf_spectacular.utils import extend_schema from drf_spectacular.utils import extend_schema
from fwd import settings
from opentelemetry import trace
from rest_framework import status, viewsets from rest_framework import status, viewsets
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.response import Response
from typing import List
from rest_framework.renderers import JSONRenderer from rest_framework.renderers import JSONRenderer
from rest_framework.response import Response
from rest_framework_xml.renderers import XMLRenderer from rest_framework_xml.renderers import XMLRenderer
from multiprocessing.pool import ThreadPool
from fwd import settings
from ..celery_worker.client_connector import c_connector
from ..annotation.api import throw_on_failure from ..annotation.api import throw_on_failure
from ..constant.common import ProcessType, REQUEST_ID, FOLDER_TYPE, EntityStatus, pdf_extensions, allowed_file_extensions, image_extensions, standard_ocr_list from ..celery_worker.client_connector import c_connector
from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \ from ..constant.common import (FOLDER_TYPE, REQUEST_ID, EntityStatus,
PermissionDeniedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException ProcessType, allowed_file_extensions,
from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate, FeedbackRequest image_extensions, pdf_extensions,
standard_ocr_list)
from ..exception.exceptions import (FileContentInvalidException,
InvalidException, LockedEntityException,
NotFoundException,
PermissionDeniedException,
RequiredFieldException,
ServiceTimeoutException)
from ..models import (FeedbackRequest, OcrTemplate, SubscriptionRequest,
SubscriptionRequestFile)
from ..response.ReportSerializer import ReportSerializer from ..response.ReportSerializer import ReportSerializer
from ..utils import file as FileUtils from ..utils import file as FileUtils
from ..utils import process as ProcessUtil from ..utils import process as ProcessUtil
import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
tracer = trace.get_tracer("sbt_django_backend")
class CtelViewSet(viewsets.ViewSet): class CtelViewSet(viewsets.ViewSet):
lookup_field = "username" lookup_field = "username"
@ -84,7 +94,7 @@ class CtelViewSet(viewsets.ViewSet):
file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, 100) file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, 100)
S3_path = FileUtils.save_to_S3(file_name, new_request, file_path) S3_path = FileUtils.save_to_S3(file_name, new_request, file_path)
files: [{ files =[{
"file_name": file_name, "file_name": file_name,
"file_path": file_path, # local path to file "file_path": file_path, # local path to file
"file_type": "" "file_type": ""
@ -183,6 +193,7 @@ class CtelViewSet(viewsets.ViewSet):
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
@tracer.start_as_current_span("process_sync")
@extend_schema(request={ @extend_schema(request={
'multipart/form-data': { 'multipart/form-data': {
'type': 'object', 'type': 'object',
@ -246,19 +257,22 @@ class CtelViewSet(viewsets.ViewSet):
total_page = len(doc_files_with_type) total_page = len(doc_files_with_type)
p_type = validated_data['type'] p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, with tracer.start_as_current_span("create_and_save_record_in_db"):
pages_left=total_page, new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
process_type=p_type, status=1, request_id=rq_id, pages_left=total_page,
provider_code=provider_code, process_type=p_type, status=1, request_id=rq_id,
subscription=sub, provider_code=provider_code,
redemption_id=validated_data["redemption_ID"], subscription=sub,
subsidiary=validated_data["subsidiary"], redemption_id=validated_data["redemption_ID"],
is_test_request=is_test_request) subsidiary=validated_data["subsidiary"],
new_request.save() is_test_request=is_test_request)
new_request.save()
# Run file processing in a pool of 2 threads. TODO: Convert to Celery worker when possible # Run file processing in a pool of 2 threads. TODO: Convert to Celery worker when possible
compact_files = [None] * len(doc_files_with_type) compact_files = [None] * len(doc_files_with_type)
pool = ThreadPool(processes=2) pool = ThreadPool(processes=2)
@tracer.start_as_current_span("process_sync.process_file")
def process_file(data): def process_file(data):
idx, doc_type, doc_file, tmp_file_name = data idx, doc_type, doc_file, tmp_file_name = data
doc_file.seek(0) doc_file.seek(0)
@ -272,50 +286,54 @@ class CtelViewSet(viewsets.ViewSet):
"file_path": file_path, "file_path": file_path,
"file_type": doc_type "file_type": doc_type
} }
for result in pool.map(process_file, doc_files_with_type):
compact_files[result["idx"]] = result with tracer.start_as_current_span("process_file_with_multi_thread"):
for result in pool.map(process_file, doc_files_with_type):
compact_files[result["idx"]] = result
# Send to AI queue # Send to AI queue
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files)) c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
time_limit = 120
start_time = time.time() with tracer.start_as_current_span("backend_waiting_for_result"):
while True: time_limit = 120
current_time = time.time() start_time = time.time()
waiting_time = current_time - start_time while True:
if waiting_time > time_limit: current_time = time.time()
break waiting_time = current_time - start_time
time.sleep(0.1) if waiting_time > time_limit:
report_filter = SubscriptionRequest.objects.filter(request_id=rq_id) break
if report_filter.count() != 1: time.sleep(0.1)
raise InvalidException(excArgs='requestId') report_filter = SubscriptionRequest.objects.filter(request_id=rq_id)
if report_filter.count() != 1:
raise InvalidException(excArgs='requestId')
if user_info.current_sub.id != report_filter[0].subscription.id: if user_info.current_sub.id != report_filter[0].subscription.id:
raise InvalidException(excArgs="user") raise InvalidException(excArgs="user")
if int(report_filter[0].process_type) == ProcessType.FI_INVOICE.value: if int(report_filter[0].process_type) == ProcessType.FI_INVOICE.value:
data = report_filter[0].predict_result data = report_filter[0].predict_result
xml_as_string = "" xml_as_string = ""
if data and 'content' in data and 'combine_results' in data['content'] and 'xml' in data['content']['combine_results']: if data and 'content' in data and 'combine_results' in data['content'] and 'xml' in data['content']['combine_results']:
xml_as_string = data['content']['combine_results']['xml'] xml_as_string = data['content']['combine_results']['xml']
xml_as_string = xml_as_string.replace("\n", "").replace("\\", "") xml_as_string = xml_as_string.replace("\n", "").replace("\\", "")
return HttpResponse(xml_as_string,content_type="text/xml") return HttpResponse(xml_as_string,content_type="text/xml")
serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True) serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
serializer.is_valid() serializer.is_valid()
if report_filter[0].status == 400: if report_filter[0].status == 400:
raise FileContentInvalidException() raise FileContentInvalidException()
if report_filter[0].status == 100: # continue, only return when result is fullfilled if report_filter[0].status == 100: # continue, only return when result is fullfilled
continue continue
if len(serializer.data) == 0: if len(serializer.data) == 0:
continue continue
if serializer.data[0].get("data", None) is None: if serializer.data[0].get("data", None) is None:
continue continue
if serializer.data[0]["data"].get("status", 200) != 200: if serializer.data[0]["data"].get("status", 200) != 200:
continue continue
serializer.data[0]["request_id"] = rq_id serializer.data[0]["request_id"] = rq_id
return Response(status=status.HTTP_200_OK, data=serializer.data[0]) return Response(status=status.HTTP_200_OK, data=serializer.data[0])
raise ServiceTimeoutException(excArgs=f"{rq_id}") raise ServiceTimeoutException(excArgs=f"{rq_id}")
@extend_schema(request={ @extend_schema(request={

View File

@ -1,10 +1,10 @@
from celery import Celery from celery import Celery
from celery.utils.log import get_task_logger
from fwd import settings from django.conf import settings
from fwd_api.exception.exceptions import GeneralException from fwd_api.exception.exceptions import GeneralException
from fwd_api.middleware.local_storage import get_current_trace_id from fwd_api.middleware.local_storage import get_current_trace_id
from kombu.utils.uuid import uuid from kombu.utils.uuid import uuid
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__) logger = get_task_logger(__name__)

View File

@ -1,33 +1,33 @@
import time
import uuid
import os
import base64 import base64
import copy
import csv
import json
import os
import time
import traceback import traceback
import uuid
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from fwd_api.models import SubscriptionRequest, UserProfile from celery.utils.log import get_task_logger
from fwd_api.celery_worker.worker import app from fwd import celery_app as app
from fwd import settings
from fwd_api.celery_worker.task_warpper import VerboseTask from fwd_api.celery_worker.task_warpper import VerboseTask
from fwd_api.constant.common import FileCategory
from fwd_api.middleware.local_storage import get_current_trace_id
from fwd_api.models import (FeedbackRequest, Report, SubscriptionRequest,
SubscriptionRequestFile, UserProfile)
from fwd_api.utils.accuracy import predict_result_to_ready
from opentelemetry import trace
from ..constant.common import FolderFileType, image_extensions from ..constant.common import FolderFileType, image_extensions
from ..exception.exceptions import FileContentInvalidException from ..exception.exceptions import FileContentInvalidException
from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
from ..utils import file as FileUtils from ..utils import file as FileUtils
from ..utils import process as ProcessUtil from ..utils import process as ProcessUtil
from ..utils import s3 as S3Util from ..utils import s3 as S3Util
from ..utils.accuracy import validate_feedback_file from ..utils.accuracy import validate_feedback_file
from fwd_api.constant.common import FileCategory
from fwd_api.middleware.local_storage import get_current_trace_id
import csv
import json
import copy
import boto3
import datetime
from fwd_api.utils.accuracy import predict_result_to_ready
from celery.utils.log import get_task_logger
from fwd import settings
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
tracer = trace.get_tracer("sbt_celery_backend")
s3_client = S3Util.MinioS3Client( s3_client = S3Util.MinioS3Client(
endpoint=settings.S3_ENDPOINT, endpoint=settings.S3_ENDPOINT,
@ -35,9 +35,8 @@ s3_client = S3Util.MinioS3Client(
secret_key=settings.S3_SECRET_KEY, secret_key=settings.S3_SECRET_KEY,
bucket_name=settings.S3_BUCKET_NAME bucket_name=settings.S3_BUCKET_NAME
) )
sqs_client = boto3.client('sqs'+ str(uuid.uuid4()), region_name=settings.AWS_REGION) # keys are stored in the cridental
@tracer.start_as_current_span("process_pdf_file")
def process_pdf_file(file_name: str, file_path: str, request, user, doc_type: str, index_in_request: int) -> list: def process_pdf_file(file_name: str, file_path: str, request, user, doc_type: str, index_in_request: int) -> list:
try: try:
# Origin file # Origin file
@ -58,7 +57,7 @@ def process_pdf_file(file_name: str, file_path: str, request, user, doc_type: st
request.save() request.save()
return None return None
@tracer.start_as_current_span("process_image_file")
def process_image_file(file_name: str, file_path, request, user) -> list: def process_image_file(file_name: str, file_path, request, user) -> list:
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=request, request=request,
@ -71,7 +70,7 @@ def process_image_file(file_name: str, file_path, request, user) -> list:
'request_file_id': new_request_file.code 'request_file_id': new_request_file.code
}] }]
@app.task(base=VerboseTask, name="csv_feedback") @app.task(base=VerboseTask, name="csv_feedback", track_started=True)
def process_csv_feedback(csv_file_path, feedback_id): def process_csv_feedback(csv_file_path, feedback_id):
# load file to RAM # load file to RAM
status = {} status = {}
@ -167,7 +166,7 @@ def process_csv_feedback(csv_file_path, feedback_id):
logger.error(f"Unable to set S3: {e}") logger.error(f"Unable to set S3: {e}")
feedback_rq.save() feedback_rq.save()
@app.task(base=VerboseTask, name='do_pdf') @app.task(base=VerboseTask, name='do_pdf', track_started=True)
def process_pdf(rq_id, sub_id, p_type, user_id, files): def process_pdf(rq_id, sub_id, p_type, user_id, files):
""" """
files: [{ files: [{
@ -235,7 +234,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
for sub_rq_id, sub_id, urls, user_id, p_type, metadata in to_queue: for sub_rq_id, sub_id, urls, user_id, p_type, metadata in to_queue:
ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type, metadata) ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type, metadata)
@app.task(base=VerboseTask, name='upload_file_to_s3') @app.task(base=VerboseTask, name='upload_file_to_s3', track_started=False)
def upload_file_to_s3(local_file_path, s3_key, request_id): def upload_file_to_s3(local_file_path, s3_key, request_id):
if s3_client.s3_client is not None: if s3_client.s3_client is not None:
try: try:
@ -249,7 +248,7 @@ def upload_file_to_s3(local_file_path, s3_key, request_id):
else: else:
logger.info(f"S3 is not available, skipping,...") logger.info(f"S3 is not available, skipping,...")
@app.task(base=VerboseTask, name='upload_feedback_to_s3') @app.task(base=VerboseTask, name='upload_feedback_to_s3', track_started=True)
def upload_feedback_to_s3(local_file_path, s3_key, feedback_id): def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
if s3_client.s3_client is not None: if s3_client.s3_client is not None:
try: try:
@ -263,7 +262,7 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
else: else:
logger.info(f"S3 is not available, skipping,...") logger.info(f"S3 is not available, skipping,...")
@app.task(base=VerboseTask, name='upload_report_to_s3') @app.task(base=VerboseTask, name='upload_report_to_s3', track_started=True)
def upload_report_to_s3(local_file_path, s3_key, report_id, delay): def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
if s3_client.s3_client is not None: if s3_client.s3_client is not None:
try: try:
@ -279,7 +278,7 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
else: else:
logger.info(f"S3 is not available, skipping,...") logger.info(f"S3 is not available, skipping,...")
@app.task(base=VerboseTask, name='remove_local_file') @app.task(base=VerboseTask, name='remove_local_file', track_started=False)
def remove_local_file(local_file_path, request_id): def remove_local_file(local_file_path, request_id):
logger.info(f"Removing local file: {local_file_path}, ...") logger.info(f"Removing local file: {local_file_path}, ...")
try: try:
@ -287,29 +286,10 @@ def remove_local_file(local_file_path, request_id):
except Exception as e: except Exception as e:
logger.info(f"Unable to remove local file: {e}") logger.info(f"Unable to remove local file: {e}")
@app.task(base=VerboseTask, name='upload_obj_to_s3') @app.task(base=VerboseTask, name='upload_obj_to_s3', track_started=True)
def upload_obj_to_s3(byte_obj, s3_key): def upload_obj_to_s3(byte_obj, s3_key):
if s3_client.s3_client is not None: if s3_client.s3_client is not None:
obj = base64.b64decode(byte_obj) obj = base64.b64decode(byte_obj)
res = s3_client.update_object(s3_key, obj) res = s3_client.update_object(s3_key, obj)
else: else:
logger.info(f"S3 is not available, skipping,...") logger.info(f"S3 is not available, skipping,...")
@app.task(base=VerboseTask, name='send_response_to_sqs')
def send_response_to_sqs(response_data, status_code):
"""Send error responses to SQS for monitoring"""
try:
message_body = {
"status_code": status_code,
"timestamp": int(datetime.datetime.now().timestamp()),
"message": response_data
}
sqs_client.send_message(
QueueUrl=settings.AWS_QUEUE_URL,
MessageBody=json.dumps(message_body)
)
logger.info(f"Error response sent to SQS: {status_code}")
except Exception as e:
logger.error(f"Failed to send to SQS: {str(e)}")

View File

@ -1,29 +1,33 @@
import traceback
from fwd_api.models import SubscriptionRequest, Report, ReportFile
from fwd_api.celery_worker.worker import app
from fwd_api.celery_worker.task_warpper import VerboseTask
from ..utils import s3 as S3Util
from ..utils.accuracy import (update_temp_accuracy, IterAvg,
count_transactions, extract_report_detail_list, calculate_a_request,
ReportAccumulateByRequest, create_billing_data)
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3, save_images_to_csv_briefly
from ..utils import time_stuff
from ..utils.redis import RedisUtils
from ..utils.cache import set_cache, get_cache
from django.utils import timezone
from django.db.models import Q
from itertools import chain
import json
import copy import copy
import json
import os import os
import traceback
from itertools import chain
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from django.db.models import Q
from django.utils import timezone
from fwd import celery_app as app
from fwd import settings from fwd import settings
from fwd_api.celery_worker.task_warpper import VerboseTask
from fwd_api.models import Report, SubscriptionRequest
from opentelemetry import trace
from ..utils import s3 as S3Util
from ..utils.accuracy import (IterAvg, ReportAccumulateByRequest,
calculate_a_request, count_transactions,
create_billing_data, extract_report_detail_list,
update_temp_accuracy)
from ..utils.cache import set_cache
from ..utils.file import (dict2xlsx, save_images_to_csv_briefly,
save_report_to_S3, save_workbook_file)
from ..utils.redis import RedisUtils
redis_client = RedisUtils() redis_client = RedisUtils()
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
tracer = trace.get_tracer("sbt_celery_backend")
s3_client = S3Util.MinioS3Client( s3_client = S3Util.MinioS3Client(
endpoint=settings.S3_ENDPOINT, endpoint=settings.S3_ENDPOINT,
@ -32,13 +36,14 @@ s3_client = S3Util.MinioS3Client(
bucket_name=settings.S3_BUCKET_NAME bucket_name=settings.S3_BUCKET_NAME
) )
@tracer.start_as_current_span("mean_list")
def mean_list(l): def mean_list(l):
l = [x for x in l if x is not None] l = [x for x in l if x is not None]
if len(l) == 0: if len(l) == 0:
return 0 return 0
return sum(l)/len(l) return sum(l)/len(l)
@app.task(base=VerboseTask, name='make_a_report_2') @app.task(base=VerboseTask, name='make_a_report_2', track_started=True)
def make_a_report_2(report_id, query_set): def make_a_report_2(report_id, query_set):
report_type = query_set.pop("report_type", "accuracy") report_type = query_set.pop("report_type", "accuracy")
if report_type == "accuracy": if report_type == "accuracy":
@ -48,7 +53,7 @@ def make_a_report_2(report_id, query_set):
else: else:
raise TypeError("Invalid report type") raise TypeError("Invalid report type")
@tracer.start_as_current_span("create_accuracy_report")
def create_accuracy_report(report_id, **kwargs): def create_accuracy_report(report_id, **kwargs):
try: try:
start_date = timezone.datetime.strptime(kwargs["start_date_str"], '%Y-%m-%dT%H:%M:%S%z') start_date = timezone.datetime.strptime(kwargs["start_date_str"], '%Y-%m-%dT%H:%M:%S%z')
@ -245,7 +250,7 @@ def create_accuracy_report(report_id, **kwargs):
traceback.print_exc() traceback.print_exc()
return 400 return 400
@tracer.start_as_current_span("create_billing_report")
def create_billing_report(report_id, **kwargs): def create_billing_report(report_id, **kwargs):
try: try:
start_date = timezone.datetime.strptime( start_date = timezone.datetime.strptime(

View File

@ -1,24 +1,25 @@
import traceback
import time
import uuid
import logging import logging
import time
import traceback
import uuid
from copy import deepcopy from copy import deepcopy
from fwd_api.celery_worker.worker import app from celery.utils.log import get_task_logger
from fwd_api.models import SubscriptionRequest from fwd import celery_app as app
from fwd_api.celery_worker.task_warpper import VerboseTask
from fwd_api.constant.common import ProcessType
from fwd_api.exception.exceptions import InvalidException from fwd_api.exception.exceptions import InvalidException
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile
from fwd_api.constant.common import ProcessType
from fwd_api.utils.redis import RedisUtils
from fwd_api.utils import process as ProcessUtil from fwd_api.utils import process as ProcessUtil
from fwd_api.celery_worker.task_warpper import VerboseTask from fwd_api.utils.redis import RedisUtils
from celery.utils.log import get_task_logger from opentelemetry import trace
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
tracer = trace.get_tracer("sbt_celery_backend")
redis_client = RedisUtils() redis_client = RedisUtils()
@tracer.start_as_current_span("aggregate_result")
def aggregate_result(results): def aggregate_result(results):
sorted_results = [None] * len(results) sorted_results = [None] * len(results)
doc_types = [] doc_types = []
@ -58,16 +59,17 @@ def aggregate_result(results):
return des_result return des_result
@tracer.start_as_current_span("print_id")
def print_id(rq_id): def print_id(rq_id):
logger.info(" [x] Received {rq}".format(rq=rq_id)) logger.info(" [x] Received {rq}".format(rq=rq_id))
@tracer.start_as_current_span("to_status")
def to_status(result): def to_status(result):
if 'status' in result and result['status'] not in [200, 201, 202]: if 'status' in result and result['status'] not in [200, 201, 202]:
return 4 return 4
return 3 return 3
@tracer.start_as_current_span("update_user")
def update_user(rq: SubscriptionRequest): def update_user(rq: SubscriptionRequest):
sub = rq.subscription sub = rq.subscription
predict_status = rq.status predict_status = rq.status
@ -75,7 +77,7 @@ def update_user(rq: SubscriptionRequest):
sub.current_token += ProcessUtil.token_value(int(rq.process_type)) sub.current_token += ProcessUtil.token_value(int(rq.process_type))
sub.save() sub.save()
@app.task(base=VerboseTask, name='process_sap_invoice_result') @app.task(base=VerboseTask, name='process_sap_invoice_result', track_started=True)
def process_invoice_sap_result(rq_id, result): def process_invoice_sap_result(rq_id, result):
try: try:
rq: SubscriptionRequest = \ rq: SubscriptionRequest = \
@ -97,7 +99,7 @@ def process_invoice_sap_result(rq_id, result):
return "FailInvoice" return "FailInvoice"
@app.task(base=VerboseTask, name='process_fi_invoice_result') @app.task(base=VerboseTask, name='process_fi_invoice_result', track_started=True)
def process_invoice_fi_result(rq_id, result): def process_invoice_fi_result(rq_id, result):
try: try:
rq: SubscriptionRequest = \ rq: SubscriptionRequest = \
@ -118,7 +120,7 @@ def process_invoice_fi_result(rq_id, result):
traceback.print_exc() traceback.print_exc()
return "FailInvoice" return "FailInvoice"
@app.task(base=VerboseTask, name='process_manulife_invoice_result') @app.task(base=VerboseTask, name='process_manulife_invoice_result', track_started=True)
def process_invoice_manulife_result(rq_id, result): def process_invoice_manulife_result(rq_id, result):
try: try:
rq: SubscriptionRequest = \ rq: SubscriptionRequest = \
@ -141,7 +143,7 @@ def process_invoice_manulife_result(rq_id, result):
random_processor_name = None random_processor_name = None
@app.task(base=VerboseTask, name='process_sbt_invoice_result') @app.task(base=VerboseTask, name='process_sbt_invoice_result', track_started=True)
def process_invoice_sbt_result(rq_id, result, metadata): def process_invoice_sbt_result(rq_id, result, metadata):
global random_processor_name global random_processor_name
if random_processor_name is None: if random_processor_name is None:
@ -194,7 +196,7 @@ def process_invoice_sbt_result(rq_id, result, metadata):
rq.save() rq.save()
return "FailInvoice" return "FailInvoice"
@tracer.start_as_current_span("_update_subscription_rq_file")
def _update_subscription_rq_file(request_id, index_in_request, doc_type, result): def _update_subscription_rq_file(request_id, index_in_request, doc_type, result):
image = SubscriptionRequestFile.objects.filter(request=request_id, index_in_request=index_in_request, doc_type=doc_type).first() image = SubscriptionRequestFile.objects.filter(request=request_id, index_in_request=index_in_request, doc_type=doc_type).first()
retailer_name = None retailer_name = None
@ -234,6 +236,7 @@ def _update_subscription_rq_file(request_id, index_in_request, doc_type, result)
image.predict_result = _predict_result image.predict_result = _predict_result
image.save() image.save()
@tracer.start_as_current_span("__get_actual_predict_result")
def __get_actual_predict_result(result: dict): def __get_actual_predict_result(result: dict):
predicted_res = result.get('content', {}).get('document', []) predicted_res = result.get('content', {}).get('document', [])
if len(predicted_res)==0: if len(predicted_res)==0:

View File

@ -1,41 +0,0 @@
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile
from fwd_api.models.SemiAutoCorrection import SemiAutoCorrection
from fwd_api.exception.exceptions import InvalidException
# Mapping dictionary for reasons
REASON_MAP = {
'Invalid image': 'invalid_image',
'Missing information': 'missing_information',
'Too blurry text': 'too_blurry_text',
'Too small text': 'too_small_text',
'Handwritten': 'handwritten',
'Recheck': 'recheck',
}
class Command(BaseCommand):
help = 'Replace the reason field in SubscriptionRequestFile and SemiAutoCorrection based on the provided mapping dictionary'
def handle(self, *args, **options):
# Process SubscriptionRequestFile instances
self.update_reasons(SubscriptionRequestFile, "SubscriptionRequestFile")
# Process SemiAutoCorrection instances
self.update_reasons(SemiAutoCorrection, "SemiAutoCorrection")
self.stdout.write(self.style.SUCCESS('All applicable reasons updated successfully!'))
def update_reasons(self, model, model_name):
instances = model.objects.exclude(reason__isnull=True).exclude(reason='').iterator()
for instance in tqdm(instances, desc=f"Updating reasons in {model_name}"):
try:
original_reason = instance.reason
new_reason = REASON_MAP.get(original_reason)
if new_reason is not None:
instance.reason = new_reason
instance.save()
self.stdout.write(self.style.SUCCESS(f"Updated reason for {model_name} ID {instance.id}: {original_reason} -> {new_reason}"))
except Exception as e:
self.stdout.write(self.style.ERROR(f"Updated reason failed for {model_name} ID {instance.id} due to {e}"))

View File

@ -1,14 +0,0 @@
from functools import partial
from django.utils.deprecation import MiddlewareMixin
from fwd_api.celery_worker.internal_task import send_response_to_sqs
class ResponseMonitorMiddleware(MiddlewareMixin):
def process_response(self, request, response):
"""Monitor responses and send errors to SQS"""
if response and (400 <= response.status_code < 600):
# Send async to avoid blocking response
send_response_to_sqs.delay(
response.data if hasattr(response, 'data') else str(response.content),
response.status_code
)
return response

View File

@ -1,31 +0,0 @@
# Generated by Django 4.1.3 on 2024-11-04 10:47
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0194_alter_semiautocorrection_feedback_accuracy_and_more'),
]
operations = [
migrations.AlterField(
model_name='subscriptionrequest',
name='request_id',
field=models.CharField(db_index=True, max_length=200),
),
migrations.AlterField(
model_name='userprofile',
name='id',
field=models.AutoField(db_index=True, primary_key=True, serialize=False),
),
migrations.AddIndex(
model_name='subscriptionrequestfile',
index=models.Index(fields=['request', 'index_in_request', 'doc_type'], name='fwd_api_sub_request_890e13_idx'),
),
migrations.AddIndex(
model_name='subscriptionrequestfile',
index=models.Index(fields=['request', 'file_name'], name='fwd_api_sub_request_1a42cd_idx'),
),
]

View File

@ -1,26 +0,0 @@
# Generated by Django 4.1.3 on 2024-11-28 07:10
import django.contrib.postgres.indexes
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0195_alter_subscriptionrequest_request_id_and_more'),
]
operations = [
migrations.AddIndex(
model_name='subscriptionrequestfile',
index=django.contrib.postgres.indexes.GinIndex(fields=['predict_result'], name='idx_gin_predict_result'),
),
migrations.AddIndex(
model_name='subscriptionrequestfile',
index=django.contrib.postgres.indexes.GinIndex(fields=['feedback_result'], name='idx_gin_feedback_result'),
),
migrations.AddIndex(
model_name='subscriptionrequestfile',
index=django.contrib.postgres.indexes.GinIndex(fields=['reviewed_result'], name='idx_gin_reviewed_result'),
),
]

View File

@ -1,26 +0,0 @@
# Generated by Django 4.1.3 on 2024-11-28 09:04
import django.contrib.postgres.indexes
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0196_subscriptionrequestfile_idx_gin_predict_result_and_more'),
]
operations = [
migrations.AddIndex(
model_name='subscriptionrequest',
index=django.contrib.postgres.indexes.GinIndex(fields=['predict_result'], name='idx_gin_rq_predict_result'),
),
migrations.AddIndex(
model_name='subscriptionrequest',
index=django.contrib.postgres.indexes.GinIndex(fields=['feedback_result'], name='idx_gin_rq_feedback_result'),
),
migrations.AddIndex(
model_name='subscriptionrequest',
index=django.contrib.postgres.indexes.GinIndex(fields=['reviewed_result'], name='idx_gin_rq_reviewed_result'),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.3 on 2024-11-28 09:40
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0197_subscriptionrequest_idx_gin_rq_predict_result_and_more'),
]
operations = [
migrations.AlterField(
model_name='subscriptionrequest',
name='doc_type',
field=models.CharField(db_index=True, max_length=100),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.3 on 2024-12-04 10:18
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0198_alter_subscriptionrequest_doc_type'),
]
operations = [
migrations.AlterField(
model_name='subscriptionrequestfile',
name='reason',
field=models.TextField(blank=True, db_index=True),
),
]

View File

@ -1,6 +1,5 @@
from django.db import models from django.db import models
from django.utils import timezone from django.utils import timezone
from django.contrib.postgres.indexes import GinIndex
from fwd_api.models.Subscription import Subscription from fwd_api.models.Subscription import Subscription
@ -8,8 +7,8 @@ class SubscriptionRequest(models.Model):
id = models.AutoField(primary_key=True) id = models.AutoField(primary_key=True)
pages: int = models.IntegerField() pages: int = models.IntegerField()
pages_left: int = models.IntegerField(default=1) pages_left: int = models.IntegerField(default=1)
doc_type: str = models.CharField(max_length=100, db_index=True) doc_type: str = models.CharField(max_length=100)
request_id = models.CharField(max_length=200, db_index=True) # Change to request_id request_id = models.CharField(max_length=200) # Change to request_id
redemption_id = models.CharField(max_length=200, null=True) redemption_id = models.CharField(max_length=200, null=True)
process_type = models.CharField(max_length=200) # driver/id/invoice process_type = models.CharField(max_length=200) # driver/id/invoice
provider_code = models.CharField(max_length=200, default="Guest") # Request source FWD/CTel provider_code = models.CharField(max_length=200, default="Guest") # Request source FWD/CTel
@ -40,10 +39,3 @@ class SubscriptionRequest(models.Model):
is_reviewed = models.BooleanField(default=False) is_reviewed = models.BooleanField(default=False)
is_required = models.BooleanField(default=True) is_required = models.BooleanField(default=True)
subsidiary = models.CharField(default="", null=True, max_length=200) subsidiary = models.CharField(default="", null=True, max_length=200)
class Meta:
indexes = [
GinIndex(fields=['predict_result'], name='idx_gin_rq_predict_result'),
GinIndex(fields=['feedback_result'], name='idx_gin_rq_feedback_result'),
GinIndex(fields=['reviewed_result'], name='idx_gin_rq_reviewed_result'),
]

View File

@ -1,5 +1,5 @@
from django.db import models from django.db import models
from django.contrib.postgres.indexes import GinIndex
from fwd_api.constant.common import FileCategory from fwd_api.constant.common import FileCategory
from fwd_api.models import SubscriptionRequest from fwd_api.models import SubscriptionRequest
from fwd_api.models.fields.EncryptedCharField import EncryptedCharField from fwd_api.models.fields.EncryptedCharField import EncryptedCharField
@ -23,7 +23,7 @@ class SubscriptionRequestFile(models.Model):
doc_type = models.CharField(max_length=10, default="") doc_type = models.CharField(max_length=10, default="")
index_in_request = models.IntegerField(default=0) # by doc_type index_in_request = models.IntegerField(default=0) # by doc_type
processing_time = models.FloatField(default=-1) # in milisecond processing_time = models.FloatField(default=-1) # in milisecond
reason = models.TextField(blank=True, db_index=True) reason = models.TextField(blank=True)
counter_measures = models.TextField(blank=True) counter_measures = models.TextField(blank=True)
is_reviewed = models.BooleanField(default=False) is_reviewed = models.BooleanField(default=False)
is_required = models.BooleanField(default=True) is_required = models.BooleanField(default=True)
@ -35,13 +35,3 @@ class SubscriptionRequestFile(models.Model):
feedback_accuracy = models.JSONField(null=True) feedback_accuracy = models.JSONField(null=True)
reviewed_accuracy = models.JSONField(null=True) reviewed_accuracy = models.JSONField(null=True)
class Meta:
indexes = [
models.Index(fields=['request', 'index_in_request', 'doc_type']), # For updating results
models.Index(fields=['request', 'file_name']), # for getting image files by AI
GinIndex(fields=['predict_result'], name='idx_gin_predict_result'),
GinIndex(fields=['feedback_result'], name='idx_gin_feedback_result'),
GinIndex(fields=['reviewed_result'], name='idx_gin_reviewed_result'),
]

View File

@ -6,7 +6,7 @@ from fwd_api.constant.common import EntityStatus
class UserProfile(models.Model): class UserProfile(models.Model):
id = models.AutoField(primary_key=True, db_index=True) id = models.AutoField(primary_key=True)
user_name: str = models.CharField(max_length=200, null=True) user_name: str = models.CharField(max_length=200, null=True)
password: str = models.CharField(max_length=200, null=True) password: str = models.CharField(max_length=200, null=True)
full_name: str = models.CharField(max_length=200, null=True) full_name: str = models.CharField(max_length=200, null=True)

View File

@ -1,31 +1,40 @@
import csv
import io import io
import os
import traceback
import pathlib
import json import json
import logging
from PIL import Image, ExifTags import os
from django.core.files.uploadedfile import TemporaryUploadedFile import pathlib
from django.utils import timezone import traceback
from datetime import datetime from datetime import datetime
import imagesize
from django.core.files.uploadedfile import TemporaryUploadedFile
from django.utils import timezone
from fwd import settings from fwd import settings
from ..utils import s3 as S3Util
from fwd_api.constant.common import allowed_file_extensions from fwd_api.constant.common import allowed_file_extensions
from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ from fwd_api.exception.exceptions import (FileFormatInvalidException,
ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException GeneralException,
from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile, Report, ReportFile InvalidDecompressedSizeException,
InvalidException,
LimitReachedException,
RequiredColumnException,
RequiredFieldException,
ServiceUnavailableException)
from fwd_api.models import (FeedbackRequest, OcrTemplate, Report,
SubscriptionRequest, SubscriptionRequestFile)
from fwd_api.utils import process as ProcessUtil from fwd_api.utils import process as ProcessUtil
from fwd_api.utils.crypto import image_authenticator from fwd_api.utils.crypto import image_authenticator
from fwd_api.utils.image import resize from fwd_api.utils.image import resize
from ..celery_worker.client_connector import c_connector
import imagesize
import csv
from openpyxl import load_workbook from openpyxl import load_workbook
from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle, numbers, Alignment from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
import logging from opentelemetry import trace
from PIL import ExifTags, Image
from ..celery_worker.client_connector import c_connector
from ..utils import s3 as S3Util
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
tracer = trace.get_tracer("sbt_django_backend")
s3_client = S3Util.MinioS3Client( s3_client = S3Util.MinioS3Client(
endpoint=settings.S3_ENDPOINT, endpoint=settings.S3_ENDPOINT,
@ -34,6 +43,7 @@ s3_client = S3Util.MinioS3Client(
bucket_name=settings.S3_BUCKET_NAME bucket_name=settings.S3_BUCKET_NAME
) )
@tracer.start_as_current_span("convert_date_string")
def convert_date_string(date_string): def convert_date_string(date_string):
# Parse the input date string # Parse the input date string
date_format = "%Y-%m-%d %H:%M:%S.%f %z" date_format = "%Y-%m-%d %H:%M:%S.%f %z"
@ -44,6 +54,7 @@ def convert_date_string(date_string):
return formatted_date return formatted_date
@tracer.start_as_current_span("validate_report_list")
def validate_report_list(request): def validate_report_list(request):
start_date_str = request.GET.get('start_date') start_date_str = request.GET.get('start_date')
end_date_str = request.GET.get('end_date') end_date_str = request.GET.get('end_date')
@ -68,6 +79,7 @@ def validate_report_list(request):
raise RequiredFieldException(excArgs="report_id, start_date, end_date") raise RequiredFieldException(excArgs="report_id, start_date, end_date")
return validated_data return validated_data
@tracer.start_as_current_span("validate_feedback_file")
def validate_feedback_file(csv_file_path): def validate_feedback_file(csv_file_path):
required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli'] required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli']
missing_columns = [] missing_columns = []
@ -83,6 +95,7 @@ def validate_feedback_file(csv_file_path):
if missing_columns: if missing_columns:
raise RequiredColumnException(excArgs=str(missing_columns)) raise RequiredColumnException(excArgs=str(missing_columns))
@tracer.start_as_current_span("validate_review")
def validate_review(review, num_imei): def validate_review(review, num_imei):
for field in settings.FIELD: for field in settings.FIELD:
if not field in review.keys(): if not field in review.keys():
@ -90,6 +103,7 @@ def validate_review(review, num_imei):
if not isinstance(review["imei_number"], list) or len(review["imei_number"]) != num_imei: if not isinstance(review["imei_number"], list) or len(review["imei_number"]) != num_imei:
raise InvalidException(excArgs=f'imei_number') raise InvalidException(excArgs=f'imei_number')
@tracer.start_as_current_span("validate_list_file")
def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"): def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"):
total_file_size = 0 total_file_size = 0
if len(files) < min_file_num: if len(files) < min_file_num:
@ -109,6 +123,7 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES
if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST: if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST:
raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB'))
@tracer.start_as_current_span("validate_csv_feedback")
def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv files"): def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv files"):
total_file_size = 0 total_file_size = 0
if len(files) < min_file_num: if len(files) < min_file_num:
@ -128,6 +143,7 @@ def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv
if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST: if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST:
raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB'))
# @tracer.start_as_current_span("get_file")
def get_file(file_path: str): def get_file(file_path: str):
try: try:
return open(file_path, 'rb') return open(file_path, 'rb')
@ -136,6 +152,7 @@ def get_file(file_path: str):
raise GeneralException("System") raise GeneralException("System")
@tracer.start_as_current_span("get_template_folder_path")
def get_template_folder_path(tem: OcrTemplate): def get_template_folder_path(tem: OcrTemplate):
tem_id = str(tem.id) tem_id = str(tem.id)
sub_id = str(tem.subscription.id) sub_id = str(tem.subscription.id)
@ -156,6 +173,7 @@ def get_folder_path(rq: SubscriptionRequest):
return os.path.join(settings.MEDIA_ROOT, 'users', user_id, "subscriptions", sub_id, 'requests', p_type, request_id) return os.path.join(settings.MEDIA_ROOT, 'users', user_id, "subscriptions", sub_id, 'requests', p_type, request_id)
@tracer.start_as_current_span("save_byte_file")
def save_byte_file(file_name: str, rq: SubscriptionRequest, file_bytes): def save_byte_file(file_name: str, rq: SubscriptionRequest, file_bytes):
folder_path = get_folder_path(rq) folder_path = get_folder_path(rq)
is_exist = os.path.exists(folder_path) is_exist = os.path.exists(folder_path)
@ -168,6 +186,7 @@ def save_byte_file(file_name: str, rq: SubscriptionRequest, file_bytes):
return file_path return file_path
@tracer.start_as_current_span("save_file")
def save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile): def save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile):
folder_path = get_folder_path(rq) folder_path = get_folder_path(rq)
is_exist = os.path.exists(folder_path) is_exist = os.path.exists(folder_path)
@ -183,6 +202,7 @@ def save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFi
return file_path return file_path
@tracer.start_as_current_span("save_json_file")
def save_json_file(file_name: str, rq: SubscriptionRequest, data: dict): def save_json_file(file_name: str, rq: SubscriptionRequest, data: dict):
folder_path = get_folder_path(rq) folder_path = get_folder_path(rq)
is_exist = os.path.exists(folder_path) is_exist = os.path.exists(folder_path)
@ -194,6 +214,7 @@ def save_json_file(file_name: str, rq: SubscriptionRequest, data: dict):
json.dump(data, json_file) json.dump(data, json_file)
return file_path return file_path
@tracer.start_as_current_span("save_feedback_file")
def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict): def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict):
user_id = str(rq.subscription.user.id) user_id = str(rq.subscription.user.id)
feedback_id = str(rq.id) feedback_id = str(rq.id)
@ -209,6 +230,7 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict)
csvfile.write(file_contents) csvfile.write(file_contents)
return file_path return file_path
@tracer.start_as_current_span("save_workbook_file")
def save_workbook_file(file_name: str, rp: Report, workbook, prefix=""): def save_workbook_file(file_name: str, rp: Report, workbook, prefix=""):
report_id = str(rp.report_id) report_id = str(rp.report_id)
@ -222,6 +244,7 @@ def save_workbook_file(file_name: str, rp: Report, workbook, prefix=""):
workbook.save(file_path) workbook.save(file_path)
return file_path return file_path
@tracer.start_as_current_span("delete_file_with_path")
def delete_file_with_path(file_path: str) -> bool: def delete_file_with_path(file_path: str) -> bool:
try: try:
os.remove(file_path) os.remove(file_path)
@ -231,6 +254,7 @@ def delete_file_with_path(file_path: str) -> bool:
return False return False
@tracer.start_as_current_span("save_template_file")
def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedFile, quality): def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedFile, quality):
try: try:
folder_path = get_template_folder_path(rq) folder_path = get_template_folder_path(rq)
@ -258,6 +282,7 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
raise ServiceUnavailableException() raise ServiceUnavailableException()
return file_path return file_path
@tracer.start_as_current_span("save_images_to_csv_briefly")
def save_images_to_csv_briefly(id, image_filenames): def save_images_to_csv_briefly(id, image_filenames):
# columns = ["request_id", "file_name", "predict_result", "feedback_result", "reviewed_result", "feedback_accuracy", "reviewed_accuracy"] # columns = ["request_id", "file_name", "predict_result", "feedback_result", "reviewed_result", "feedback_accuracy", "reviewed_accuracy"]
columns = ["request_id", "file_name", "predict_result", "feedback_result", "reviewed_result", "feedback_accuracy", "reviewed_accuracy"] columns = ["request_id", "file_name", "predict_result", "feedback_result", "reviewed_result", "feedback_accuracy", "reviewed_accuracy"]
@ -290,6 +315,7 @@ def save_images_to_csv_briefly(id, image_filenames):
# save to S3 # save to S3
save_report_to_S3(id, file_path) save_report_to_S3(id, file_path)
@tracer.start_as_current_span("resize_and_save_file")
def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality: int): def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality: int):
try: try:
folder_path = get_folder_path(rq) folder_path = get_folder_path(rq)
@ -310,6 +336,7 @@ def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: Temporar
logger.error(f"{e}") logger.error(f"{e}")
raise ServiceUnavailableException() raise ServiceUnavailableException()
@tracer.start_as_current_span("save_to_S3")
def save_to_S3(file_name, rq, local_file_path): def save_to_S3(file_name, rq, local_file_path):
try: try:
file_path = get_folder_path(rq) file_path = get_folder_path(rq)
@ -323,6 +350,7 @@ def save_to_S3(file_name, rq, local_file_path):
logger.error(f"{e}") logger.error(f"{e}")
raise ServiceUnavailableException() raise ServiceUnavailableException()
@tracer.start_as_current_span("save_feedback_to_S3")
def save_feedback_to_S3(file_name, id, local_file_path): def save_feedback_to_S3(file_name, id, local_file_path):
try: try:
assert len(local_file_path.split("/")) >= 3, "file_path must have at least feedback_folder and feedback_id" assert len(local_file_path.split("/")) >= 3, "file_path must have at least feedback_folder and feedback_id"
@ -335,6 +363,7 @@ def save_feedback_to_S3(file_name, id, local_file_path):
logger.error(f"{e}") logger.error(f"{e}")
raise ServiceUnavailableException() raise ServiceUnavailableException()
@tracer.start_as_current_span("save_report_to_S3")
def save_report_to_S3(id, local_file_path, delay=0): def save_report_to_S3(id, local_file_path, delay=0):
try: try:
s3_key = os.path.join("report", local_file_path.split("/")[-2], local_file_path.split("/")[-1]) s3_key = os.path.join("report", local_file_path.split("/")[-2], local_file_path.split("/")[-1])
@ -345,9 +374,11 @@ def save_report_to_S3(id, local_file_path, delay=0):
logger.error(f"{e}") logger.error(f"{e}")
raise ServiceUnavailableException() raise ServiceUnavailableException()
@tracer.start_as_current_span("download_from_S3")
def download_from_S3(s3_key, local_file_path): def download_from_S3(s3_key, local_file_path):
s3_client.download_file(s3_key, local_file_path) s3_client.download_file(s3_key, local_file_path)
@tracer.start_as_current_span("save_file_with_path")
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path):
try: try:
file_path = os.path.join(folder_path, file_name) file_path = os.path.join(folder_path, file_name)
@ -364,13 +395,14 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
raise ServiceUnavailableException() raise ServiceUnavailableException()
return file_path return file_path
@tracer.start_as_current_span("save_pdf")
def save_pdf(file_path: str, file: TemporaryUploadedFile): def save_pdf(file_path: str, file: TemporaryUploadedFile):
f = open(file_path, 'wb+') f = open(file_path, 'wb+')
for chunk in file.chunks(): for chunk in file.chunks():
f.write(chunk) f.write(chunk)
f.close() f.close()
@tracer.start_as_current_span("save_img")
def save_img(file_path: str, file: TemporaryUploadedFile, quality): def save_img(file_path: str, file: TemporaryUploadedFile, quality):
with open(file.temporary_file_path(), "rb") as fs: with open(file.temporary_file_path(), "rb") as fs:
input_file = io.BytesIO(fs.read()) input_file = io.BytesIO(fs.read())
@ -407,6 +439,7 @@ def save_img(file_path: str, file: TemporaryUploadedFile, quality):
image = image.convert('RGB') image = image.convert('RGB')
image.save(file_path, optimize=True, quality=quality) image.save(file_path, optimize=True, quality=quality)
@tracer.start_as_current_span("build_media_url")
def build_media_url(folder: str, uid: str, file_name: str = None) -> str: def build_media_url(folder: str, uid: str, file_name: str = None) -> str:
token = image_authenticator.generate_img_token() token = image_authenticator.generate_img_token()
if not file_name: if not file_name:
@ -420,6 +453,7 @@ def build_media_url(folder: str, uid: str, file_name: str = None) -> str:
token=token) token=token)
# @tracer.start_as_current_span("build_url")
def build_url(folder: str, data_id: str, user_id: int, file_name: str = None) -> str: def build_url(folder: str, data_id: str, user_id: int, file_name: str = None) -> str:
token = image_authenticator.generate_img_token(user_id) token = image_authenticator.generate_img_token(user_id)
if not file_name: if not file_name:
@ -431,13 +465,17 @@ def build_url(folder: str, data_id: str, user_id: int, file_name: str = None) ->
file_name=file_name, file_name=file_name,
base_url=settings.BASE_URL, base_url=settings.BASE_URL,
token=token) token=token)
@tracer.start_as_current_span("build_media_url_v2")
def build_media_url_v2(media_id: str, user_id: int, sub_id: int, u_sync_id: str) -> str: def build_media_url_v2(media_id: str, user_id: int, sub_id: int, u_sync_id: str) -> str:
token = image_authenticator.generate_img_token_v2(user_id, sub_id, u_sync_id) token = image_authenticator.generate_img_token_v2(user_id, sub_id, u_sync_id)
return f'{settings.BASE_URL}/api/ctel/v2/media/request/{media_id}/?token={token}' return f'{settings.BASE_URL}/api/ctel/v2/media/request/{media_id}/?token={token}'
@tracer.start_as_current_span("build_S3_url")
def build_S3_url(s3_key, exp_time): def build_S3_url(s3_key, exp_time):
return s3_client.create_url_with_expiration(s3_key, exp_time) return s3_client.create_url_with_expiration(s3_key, exp_time)
@tracer.start_as_current_span("get_value")
def get_value(_dict, keys): def get_value(_dict, keys):
keys = keys.split('.') keys = keys.split('.')
value = _dict value = _dict
@ -459,6 +497,7 @@ def get_value(_dict, keys):
return value return value
@tracer.start_as_current_span("dict2xlsx")
def dict2xlsx(input: json, _type='report'): def dict2xlsx(input: json, _type='report'):
if _type == 'report': if _type == 'report':
wb = dump_excel_report(input=input) wb = dump_excel_report(input=input)
@ -468,6 +507,7 @@ def dict2xlsx(input: json, _type='report'):
wb = dump_excel_billing_report(input=input) wb = dump_excel_billing_report(input=input)
return wb return wb
@tracer.start_as_current_span("dump_excel_report")
def dump_excel_report(input: json): def dump_excel_report(input: json):
red = "FF0000" red = "FF0000"
black = "000000" black = "000000"
@ -537,6 +577,7 @@ def dump_excel_report(input: json):
start_index += 1 start_index += 1
return wb return wb
@tracer.start_as_current_span("dump_excel_report_detail")
def dump_excel_report_detail(input: json): def dump_excel_report_detail(input: json):
red = "FF0000" red = "FF0000"
black = "000000" black = "000000"
@ -597,6 +638,7 @@ def dump_excel_report_detail(input: json):
start_index += 1 start_index += 1
return wb return wb
@tracer.start_as_current_span("dump_excel_billing_report")
def dump_excel_billing_report(input: json): def dump_excel_billing_report(input: json):
black = "000000" black = "000000"
font_black = Font(name="Calibri", size=11, color=black) font_black = Font(name="Calibri", size=11, color=black)

View File

@ -26,13 +26,13 @@ from ..models import UserProfile, OcrTemplate, OcrTemplateBox, \
Subscription, SubscriptionRequestFile, SubscriptionRequest Subscription, SubscriptionRequestFile, SubscriptionRequest
from ..celery_worker.client_connector import c_connector from ..celery_worker.client_connector import c_connector
import logging import logging
from opentelemetry import trace
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
tracer = trace.get_tracer("sbt_django_backend")
class UserData: class UserData:
user: UserProfile = None user: UserProfile = None
current_sub: Subscription = None current_sub: Subscription = None
def __init__(self, request): def __init__(self, request):
user_data = validate_user_request_and_get(request) user_data = validate_user_request_and_get(request)
users = UserProfile.objects.filter(sync_id=user_data['id']) users = UserProfile.objects.filter(sync_id=user_data['id'])
@ -64,11 +64,11 @@ class UserData:
self.user = user self.user = user
self.current_sub = sub self.current_sub = sub
@tracer.start_as_current_span("get_user")
def get_user(request) -> UserData: def get_user(request) -> UserData:
return UserData(request) return UserData(request)
@tracer.start_as_current_span("validate_user_request_and_get")
def validate_user_request_and_get(request): def validate_user_request_and_get(request):
if not hasattr(request, 'user_data'): if not hasattr(request, 'user_data'):
raise NotFoundException(excArgs='user') raise NotFoundException(excArgs='user')
@ -79,7 +79,7 @@ def validate_user_request_and_get(request):
raise NotFoundException(excArgs='subscription') raise NotFoundException(excArgs='subscription')
return data return data
@tracer.start_as_current_span("validate_ocr_request_and_get")
def validate_ocr_request_and_get(request, subscription): def validate_ocr_request_and_get(request, subscription):
validated_data = {} validated_data = {}
if "processType" not in request.data or request.data['processType'] is None \ if "processType" not in request.data or request.data['processType'] is None \
@ -109,6 +109,7 @@ def validate_ocr_request_and_get(request, subscription):
return validated_data return validated_data
@tracer.start_as_current_span("sbt_validate_ocr_request_and_get")
def sbt_validate_ocr_request_and_get(request, subscription): def sbt_validate_ocr_request_and_get(request, subscription):
validated_data = {} validated_data = {}
# if "processType" not in request.data or request.data['processType'] is None \ # if "processType" not in request.data or request.data['processType'] is None \
@ -154,6 +155,7 @@ def sbt_validate_ocr_request_and_get(request, subscription):
return validated_data return validated_data
@tracer.start_as_current_span("string_to_boolean")
def string_to_boolean(value): def string_to_boolean(value):
true_strings = ['true', 'yes', '1', 'on'] true_strings = ['true', 'yes', '1', 'on']
false_strings = ['false', 'no', '0', 'off'] false_strings = ['false', 'no', '0', 'off']
@ -165,6 +167,7 @@ def string_to_boolean(value):
else: else:
return False return False
@tracer.start_as_current_span("sbt_validate_feedback")
def sbt_validate_feedback(request): def sbt_validate_feedback(request):
validated_data = {} validated_data = {}
@ -194,6 +197,7 @@ def sbt_validate_feedback(request):
return validated_data return validated_data
@tracer.start_as_current_span("count_pages_in_pdf")
def count_pages_in_pdf(pdf_file): def count_pages_in_pdf(pdf_file):
count = 0 count = 0
fh, temp_filename = tempfile.mkstemp() # make a tmp file fh, temp_filename = tempfile.mkstemp() # make a tmp file
@ -207,6 +211,7 @@ def count_pages_in_pdf(pdf_file):
return count return count
@tracer.start_as_current_span("count_pages_in_pdf_list")
def count_pages_in_pdf_list(list_file): def count_pages_in_pdf_list(list_file):
total_page = 0 total_page = 0
@ -216,6 +221,7 @@ def count_pages_in_pdf_list(list_file):
return total_page return total_page
# @tracer.start_as_current_span("map_process_type_to_folder_name")
def map_process_type_to_folder_name(p_type): def map_process_type_to_folder_name(p_type):
if p_type == ProcessType.ID_CARD.value: if p_type == ProcessType.ID_CARD.value:
return 'id_card' return 'id_card'
@ -239,6 +245,7 @@ def map_process_type_to_folder_name(p_type):
raise InvalidException(excArgs='processType') raise InvalidException(excArgs='processType')
@tracer.start_as_current_span("get_random_string")
def get_random_string(length): def get_random_string(length):
# choose from all lowercase letter # choose from all lowercase letter
letters = string.ascii_lowercase letters = string.ascii_lowercase
@ -247,6 +254,7 @@ def get_random_string(length):
return result_str return result_str
@tracer.start_as_current_span("is_int")
def is_int(text) -> bool: def is_int(text) -> bool:
try: try:
# converting to integer # converting to integer
@ -256,6 +264,7 @@ def is_int(text) -> bool:
return False return False
@tracer.start_as_current_span("validate_box")
def validate_box(list_box, max_number_of_box, max_number_of_item_in_a_box, number_of_box=None): def validate_box(list_box, max_number_of_box, max_number_of_item_in_a_box, number_of_box=None):
if len(list_box) > max_number_of_box: if len(list_box) > max_number_of_box:
raise NumberOfBoxLimitReachedException(excArgs=LIST_BOX_MESSAGE) raise NumberOfBoxLimitReachedException(excArgs=LIST_BOX_MESSAGE)
@ -268,6 +277,7 @@ def validate_box(list_box, max_number_of_box, max_number_of_item_in_a_box, numbe
raise InvalidException(excArgs="box coordinates") raise InvalidException(excArgs="box coordinates")
@tracer.start_as_current_span("to_box_list")
def to_box_list(str_list): def to_box_list(str_list):
ls = [] ls = []
if not str_list: if not str_list:
@ -280,6 +290,7 @@ def to_box_list(str_list):
return ls return ls
@tracer.start_as_current_span("validate_json_response_and_return")
def validate_json_response_and_return(res): def validate_json_response_and_return(res):
if res.status_code != status.HTTP_200_OK: if res.status_code != status.HTTP_200_OK:
raise ServiceUnavailableException() raise ServiceUnavailableException()
@ -290,6 +301,7 @@ def validate_json_response_and_return(res):
return res_data return res_data
@tracer.start_as_current_span("is_duplicate_in_list")
def is_duplicate_in_list(str_list): def is_duplicate_in_list(str_list):
unique_set: set = set({}) unique_set: set = set({})
for label in str_list: for label in str_list:
@ -300,16 +312,19 @@ def is_duplicate_in_list(str_list):
return False return False
@tracer.start_as_current_span("validate_duplicate")
def validate_duplicate(list_box): def validate_duplicate(list_box):
if is_duplicate_in_list(list_box): if is_duplicate_in_list(list_box):
raise DuplicateEntityException(excArgs="box_label") raise DuplicateEntityException(excArgs="box_label")
@tracer.start_as_current_span("validate_vn_and_space")
def validate_vn_and_space(txt: str): def validate_vn_and_space(txt: str):
if not pattern.fullmatch(txt.upper()): if not pattern.fullmatch(txt.upper()):
raise InvalidException(excArgs=NAME_MESSAGE) raise InvalidException(excArgs=NAME_MESSAGE)
@tracer.start_as_current_span("save_template_boxs")
def save_template_boxs(data, template): def save_template_boxs(data, template):
saving_list = [] saving_list = []
for d_box in data['data_boxs']: for d_box in data['data_boxs']:
@ -329,6 +344,7 @@ def token_value(token_type):
return 5 return 5
return 1 # Basic OCR return 1 # Basic OCR
@tracer.start_as_current_span("send_to_queue2")
def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}): def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}):
try: try:
if typez == ProcessType.ID_CARD.value: if typez == ProcessType.ID_CARD.value:
@ -346,6 +362,7 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}):
logger.error(e) logger.error(e)
raise BadGatewayException() raise BadGatewayException()
@tracer.start_as_current_span("build_template_matching_data")
def build_template_matching_data(template): def build_template_matching_data(template):
temp_dict = { temp_dict = {
@ -372,6 +389,7 @@ def build_template_matching_data(template):
return temp_dict return temp_dict
@tracer.start_as_current_span("send_template_queue")
def send_template_queue(rq_id, file_url, template: OcrTemplate, uid): def send_template_queue(rq_id, file_url, template: OcrTemplate, uid):
try: try:
@ -383,9 +401,11 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid):
logger.error(e) logger.error(e)
raise BadGatewayException() raise BadGatewayException()
@tracer.start_as_current_span("process_feedback")
def process_feedback(feedback_id, local_file_path): def process_feedback(feedback_id, local_file_path):
c_connector.csv_feedback((local_file_path, feedback_id)) c_connector.csv_feedback((local_file_path, feedback_id))
@tracer.start_as_current_span("process_pdf_file")
def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list: def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list:
doc: fitz.Document = fitz.open(stream=file_obj.file.read()) doc: fitz.Document = fitz.open(stream=file_obj.file.read())
if doc.page_count > settings.MAX_PAGES_OF_PDF_FILE: if doc.page_count > settings.MAX_PAGES_OF_PDF_FILE:
@ -406,6 +426,7 @@ def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: S
# Sub-file # Sub-file
return pdf_to_images_urls(doc, request, user) return pdf_to_images_urls(doc, request, user)
@tracer.start_as_current_span("process_image_file")
def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list: def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list:
if file_obj.size > settings.SIZE_TO_COMPRESS: if file_obj.size > settings.SIZE_TO_COMPRESS:
quality = 95 quality = 95
@ -425,6 +446,7 @@ def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request:
'request_file_id': new_request_file.code 'request_file_id': new_request_file.code
}] }]
# @tracer.start_as_current_span("process_image_local_file")
def process_image_local_file(file_name: str, file_path: str, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list: def process_image_local_file(file_name: str, file_path: str, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list:
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=request, request=request,
@ -439,6 +461,7 @@ def process_image_local_file(file_name: str, file_path: str, request: Subscripti
'request_file_id': new_request_file.code 'request_file_id': new_request_file.code
}] }]
@tracer.start_as_current_span("pdf_to_images_urls")
def pdf_to_images_urls(doc_path, request: SubscriptionRequest, user, dpi: int = 300) -> list: def pdf_to_images_urls(doc_path, request: SubscriptionRequest, user, dpi: int = 300) -> list:
pdf_extracted = [] pdf_extracted = []
saving_path = FileUtils.get_folder_path(request) saving_path = FileUtils.get_folder_path(request)

View File

@ -59,4 +59,17 @@ openpyxl==3.1.2
# For sdsvkvu compatibility # For sdsvkvu compatibility
# torch==1.13.1+cu116 # torch==1.13.1+cu116
# torchvision==0.14.1+cu116 # torchvision==0.14.1+cu116
# --extra-index-url https://download.pytorch.org/whl/cu116 # --extra-index-url https://download.pytorch.org/whl/cu116
opentelemetry-api==1.27.0
opentelemetry-distro==0.48b0
opentelemetry-exporter-otlp==1.27.0
opentelemetry-exporter-otlp-proto-common==1.27.0
opentelemetry-exporter-otlp-proto-http==1.27.0
opentelemetry-instrumentation==0.48b0
opentelemetry-instrumentation-celery==0.48b0
opentelemetry-instrumentation-django==0.48b0
opentelemetry-proto==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-semantic-conventions==0.48b0
opentelemetry-util-http==0.48b0

View File

@ -83,9 +83,9 @@
"prettier-plugin-organize-imports": "^3.2.1", "prettier-plugin-organize-imports": "^3.2.1",
"rollup-plugin-visualizer": "^5.9.0", "rollup-plugin-visualizer": "^5.9.0",
"sass": "^1.57.1", "sass": "^1.57.1",
"typescript": "^5.6.3", "typescript": "^4.9.4",
"vite": "^4.0.3", "vite": "^4.0.3",
"vite-plugin-svgr": "^2.4.0", "vite-plugin-svgr": "^2.4.0",
"vite-tsconfig-paths": "^4.0.3" "vite-tsconfig-paths": "^4.0.3"
} }
} }

View File

@ -1,4 +1,4 @@
import { AppstoreOutlined, BarChartOutlined, RotateRightOutlined } from '@ant-design/icons'; import { AppstoreOutlined, BarChartOutlined, RotateRightOutlined, FileSearchOutlined } from '@ant-design/icons';
import { t } from '@lingui/macro'; import { t } from '@lingui/macro';
import { Menu, MenuProps } from 'antd'; import { Menu, MenuProps } from 'antd';
import React from 'react'; import React from 'react';

View File

@ -4,6 +4,7 @@ export function GlobalSpin() {
return ( return (
<Spin <Spin
size='large' size='large'
tip='Loading ...'
style={{ style={{
position: 'fixed', position: 'fixed',
top: '50%', top: '50%',

View File

@ -40,7 +40,6 @@
"Remove this image from the evaluation report": "Remove this image from the evaluation report", "Remove this image from the evaluation report": "Remove this image from the evaluation report",
"Report Details": "Report Details", "Report Details": "Report Details",
"Report Filters": "Report Filters", "Report Filters": "Report Filters",
"Review Filters": "Review Filters",
"Report Type": "Report Type", "Report Type": "Report Type",
"Reports": "Reports", "Reports": "Reports",
"Retry": "Retry", "Retry": "Retry",
@ -71,9 +70,5 @@
"You are only allowed to upload {0} file.": "You are only allowed to upload {0} file.", "You are only allowed to upload {0} file.": "You are only allowed to upload {0} file.",
"You have unsaved changes!": "You have unsaved changes!", "You have unsaved changes!": "You have unsaved changes!",
"Your current password has expired. Please change your password to continue.": "Your current password has expired. Please change your password to continue.", "Your current password has expired. Please change your password to continue.": "Your current password has expired. Please change your password to continue.",
"max_accuracy": "Max accuracy", "max_accuracy": "Max accuracy"
"docType": "Only type",
"Feedback Result": "Feedback Result",
"Predict Result": "Predict Result",
"Reviewed Result": "Reviewed Result"
} }

View File

@ -40,7 +40,6 @@
"Remove this image from the evaluation report": "", "Remove this image from the evaluation report": "",
"Report Details": "", "Report Details": "",
"Report Filters": "", "Report Filters": "",
"Review Filters": "",
"Report Type": "", "Report Type": "",
"Reports": "", "Reports": "",
"Retry": "Thử lại", "Retry": "Thử lại",
@ -71,9 +70,5 @@
"You are only allowed to upload {0} file.": "Bạn chỉ được phép tải lên {0}.", "You are only allowed to upload {0} file.": "Bạn chỉ được phép tải lên {0}.",
"You have unsaved changes!": "Bạn có những thay đổi chưa được lưu!", "You have unsaved changes!": "Bạn có những thay đổi chưa được lưu!",
"Your current password has expired. Please change your password to continue.": "", "Your current password has expired. Please change your password to continue.": "",
"max_accuracy": "Độ chính xác tối đa", "max_accuracy": "Độ chính xác tối đa"
"DocType": "Kiểu tài liệu",
"Feedback Result": "",
"Predict Result": "",
"Reviewed Result": ""
} }

View File

@ -1,69 +0,0 @@
import React from 'react';
import { Button, Descriptions, Input } from 'antd';
import type { DescriptionsProps } from 'antd';
import { CopyOutlined } from '@ant-design/icons';
import { FEEDBACK_RESULT, PREDICTED_RESULT, REVIEWED_RESULT } from './const';
const DocumentCompareInfo = ({ key, data, selectedFileDataSource, updateRevisedByFeedback, handleUpdateFileInField, shouldRevised, disabledInput }) => {
const items: DescriptionsProps['items'] = [
{
key: selectedFileDataSource[data]?.[FEEDBACK_RESULT] || '1',
label: 'Feedback',
children: selectedFileDataSource[data]?.[FEEDBACK_RESULT],
labelStyle: { color: '#333', padding: '4px 16px' },
contentStyle: { padding: '4px 16px' },
span: 3
},
{
key: selectedFileDataSource[data]?.[PREDICTED_RESULT] || '2',
label: 'Predicted',
children: selectedFileDataSource[data]?.[PREDICTED_RESULT],
labelStyle: { color: '#333', padding: '4px 16px' },
contentStyle: { padding: '4px 16px' },
span: 3
},
{
key: selectedFileDataSource[data]?.[REVIEWED_RESULT] || '3',
label: 'Revised',
children: <Input
style={{ background: shouldRevised ? 'yellow' : '', padding: '0' }}
value={selectedFileDataSource[data]?.[REVIEWED_RESULT]}
size='small'
onChange={(e) =>
handleUpdateFileInField(data, e.target.value)
}
variant="borderless"
disabled={disabledInput === undefined || disabledInput === 0}
/>,
labelStyle: { color: '#333', padding: '4px 16px' },
contentStyle: { padding: '4px 16px' },
span: 3
},
];
return (
<div style={{ margin: '0 0 8px' }} className='file-input-group' key={key}>
<div
style={{
display: 'flex',
justifyContent: 'space-between',
alignItems: 'center',
margin: '0 0 4px',
}}
>
<p style={{ fontWeight: 'bold', margin: 0 }}>{data}</p>
<Button
shape='round'
type='primary'
ghost
icon={<CopyOutlined />}
size='small'
onClick={() => updateRevisedByFeedback(data)}
disabled={disabledInput === undefined || disabledInput === 0}
/>
</div>
<Descriptions bordered items={items} layout="horizontal" size='small' contentStyle={{ height: '13px' }} />
</div>
)
}
export default DocumentCompareInfo;

View File

@ -1,39 +0,0 @@
import React from 'react';
import { Descriptions } from 'antd';
import type { DescriptionsProps } from 'antd';
const DocumentHeadInfo = ({ currentRequest }) => {
const items: DescriptionsProps['items'] = [
{
key: '1',
label: 'Request ID',
children: currentRequest?.RequestID,
span: 2,
labelStyle: { color: '#333', width: '200px' }
},
{
key: '2',
label: 'Raw accuracy',
children: currentRequest?.raw_accuracy,
labelStyle: { color: '#333', width: '200px' }
},
{
key: '3',
label: 'Redemption ID',
children: currentRequest?.RedemptionID,
span: 2,
labelStyle: { color: '#333', width: '200px' }
},
{
key: '4',
label: 'Processing time',
children: currentRequest?.['Server Processing Time (ms)'],
labelStyle: { color: '#333', width: '200px' }
}
];
return (
<Descriptions bordered items={items} size="small" />
)
}
export default DocumentHeadInfo;

View File

@ -2,16 +2,10 @@ import { baseURL } from 'request/api';
import { RecentRequest } from './const'; import { RecentRequest } from './const';
export const fetchAllRequests = async ( export const fetchAllRequests = async (
filterDateRange: any[], filterDateRange,
filterSubsidiaries: string, filterSubsidiaries,
filterReviewState: string, filterReviewState,
filterIncludeTests: string, filterIncludeTests,
filterDoctype: string,
filterFeedbackResult: string,
filterPredictResult: string,
filterReviewedResult: string,
filterBadReason: string,
filterOtherReason: string,
page = 1, page = 1,
page_size = 20, page_size = 20,
max_accuracy = 100, max_accuracy = 100,
@ -31,25 +25,6 @@ export const fetchAllRequests = async (
if (filterIncludeTests) { if (filterIncludeTests) {
filterStr += `includes_test=${filterIncludeTests}&`; filterStr += `includes_test=${filterIncludeTests}&`;
} }
// add 4 more field
if (filterDoctype) {
filterStr += `doc_type=${filterDoctype}&`;
}
if (filterFeedbackResult) {
filterStr += `feedback_result=${filterFeedbackResult}&`;
}
if (filterPredictResult) {
filterStr += `predict_result=${filterPredictResult}&`;
}
if (filterReviewedResult) {
filterStr += `reviewed_result=${filterReviewedResult}&`;
}
if (filterBadReason === 'other' && filterOtherReason.trim()) {
filterStr += `bad_reason=${filterOtherReason}&`;
} else if(filterBadReason !== 'other') {
filterStr += `bad_reason=${filterBadReason}&`;
}
//
if (startDate && endDate) { if (startDate && endDate) {
filterStr += `start_date=${startDate}&end_date=${endDate}&`; filterStr += `start_date=${startDate}&end_date=${endDate}&`;
} }
@ -79,7 +54,7 @@ export const confirmRequest = async (requestID: any) => {
body: JSON.stringify({ request_file_results: [] }), body: JSON.stringify({ request_file_results: [] }),
}) })
}; };
export const updateRevisedDataByFile = async ( export const updateRevisedDataByFile = async (
@ -102,7 +77,7 @@ export const updateRevisedDataByFile = async (
) )
}; };
export const fetchRequest = async (id: string) => { export const fetchRequest = async (id) => {
const token = localStorage.getItem('sbt-token') || ''; const token = localStorage.getItem('sbt-token') || '';
const response = await fetch(`${baseURL}/ctel/request/${id}/`, { const response = await fetch(`${baseURL}/ctel/request/${id}/`, {
method: 'GET', method: 'GET',
@ -112,7 +87,7 @@ export const fetchRequest = async (id: string) => {
}); });
return await ( return await (
await response.json() await response.json()
).subscription_requests?.[0]; ).subscription_requests[0];
}; };
export const addRecentRequest = ( export const addRecentRequest = (

View File

@ -48,12 +48,6 @@ export const SUBSIDIARIES = [
{ value: 'SEIN', label: 'SEIN' }, { value: 'SEIN', label: 'SEIN' },
]; ];
export const DOCTYPE = [
{ value: null, label: '--' },
{ value: 'imei', label: 'imei' },
{ value: 'invoice', label: 'invoice' },
];
export const SOURCE_KEYS = [ export const SOURCE_KEYS = [
'retailername', 'retailername',
'sold_to_party', 'sold_to_party',

View File

@ -3,6 +3,7 @@ import {
ArrowRightOutlined, ArrowRightOutlined,
CheckCircleOutlined, CheckCircleOutlined,
ClockCircleFilled, ClockCircleFilled,
CopyOutlined,
FullscreenExitOutlined, FullscreenExitOutlined,
FullscreenOutlined, FullscreenOutlined,
} from '@ant-design/icons'; } from '@ant-design/icons';
@ -40,7 +41,6 @@ import {
} from './api'; } from './api';
import { import {
counter_measure_map, counter_measure_map,
DOCTYPE,
FEEDBACK_ACCURACY, FEEDBACK_ACCURACY,
FEEDBACK_RESULT, FEEDBACK_RESULT,
PREDICTED_RESULT, PREDICTED_RESULT,
@ -54,8 +54,6 @@ import {
import FileCard from './FileCard'; import FileCard from './FileCard';
import RecentRequest from './RecentRequest'; import RecentRequest from './RecentRequest';
import './style.css'; import './style.css';
import DocumentHeadInfo from './DocumentHeadInfo';
import DocumentCompareInfo from './DocumentCompareInfo';
const ReviewPage = () => { const ReviewPage = () => {
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
@ -73,13 +71,6 @@ const ReviewPage = () => {
const [filterAccuracy, setFilterAccuracy] = useState(100); const [filterAccuracy, setFilterAccuracy] = useState(100);
const [filterReviewState, setFilterReviewState] = useState('all'); const [filterReviewState, setFilterReviewState] = useState('all');
const [filterIncludeTests, setFilterIncludesTests] = useState('true'); const [filterIncludeTests, setFilterIncludesTests] = useState('true');
const [filterDoctype, setFilterDoctype] = useState(null);
const [filterFeedbackResult, setFilterFeedbackResult] = useState('');
const [filterPredictResult, setFilterPredictResult] = useState('');
const [filterReviewedResult, setFilterReviewedResult] = useState('');
const [filterBadReason, setFilterBadReason] = useState('other');
const [filterOtherReason, setFilterOtherReason] = useState('');
// const [requests, setRequests] = useState([]); // const [requests, setRequests] = useState([]);
const [currentRequest, setCurrentRequest] = useState(null); const [currentRequest, setCurrentRequest] = useState(null);
const [currentRequestIndex, setCurrentRequestIndex] = useState(1); const [currentRequestIndex, setCurrentRequestIndex] = useState(1);
@ -118,12 +109,6 @@ const ReviewPage = () => {
filterSubsidiaries, filterSubsidiaries,
filterReviewState, filterReviewState,
filterIncludeTests, filterIncludeTests,
filterDoctype,
filterFeedbackResult,
filterPredictResult,
filterReviewedResult,
filterBadReason,
filterOtherReason,
1, 1,
1, 1,
filterAccuracy, filterAccuracy,
@ -131,7 +116,7 @@ const ReviewPage = () => {
setTotalPages(data?.page?.total_requests); setTotalPages(data?.page?.total_requests);
setHasNextRequest(1 < data?.page?.total_requests); setHasNextRequest(1 < data?.page?.total_requests);
const firstRequest = fetchRequest( const firstRequest = fetchRequest(
data?.subscription_requests?.[0]?.RequestID, data?.subscription_requests[0].RequestID,
); );
firstRequest.then(async (data) => { firstRequest.then(async (data) => {
if (data) setCurrentRequest(data); if (data) setCurrentRequest(data);
@ -178,7 +163,7 @@ const ReviewPage = () => {
const setAndLoadSelectedFile = async (requestData, index) => { const setAndLoadSelectedFile = async (requestData, index) => {
setSelectedFileId(index); setSelectedFileId(index);
if (!requestData?.['Files'][index]) { if (!requestData['Files'][index]) {
setSelectedFileData('FAILED_TO_LOAD_FILE'); setSelectedFileData('FAILED_TO_LOAD_FILE');
setImageLoading(false); setImageLoading(false);
return; return;
@ -233,12 +218,6 @@ const ReviewPage = () => {
filterSubsidiaries, filterSubsidiaries,
filterReviewState, filterReviewState,
filterIncludeTests, filterIncludeTests,
filterDoctype,
filterFeedbackResult,
filterPredictResult,
filterReviewedResult,
filterBadReason,
filterOtherReason,
requestIndex, requestIndex,
1, 1,
filterAccuracy, filterAccuracy,
@ -294,12 +273,6 @@ const ReviewPage = () => {
filterSubsidiaries, filterSubsidiaries,
filterReviewState, filterReviewState,
filterIncludeTests, filterIncludeTests,
filterDoctype,
filterFeedbackResult,
filterPredictResult,
filterReviewedResult,
filterBadReason,
filterOtherReason,
1, 1,
1, 1,
filterAccuracy, filterAccuracy,
@ -426,18 +399,18 @@ const ReviewPage = () => {
style={ style={
fullscreen fullscreen
? { ? {
position: 'fixed', position: 'fixed',
top: 0, top: 0,
left: 0, left: 0,
width: '100%', width: '100%',
height: '100%', height: '100%',
backgroundColor: '#fff', backgroundColor: '#fff',
zIndex: 1000, zIndex: 1000,
} }
: { : {
height: '100%', height: '100%',
position: 'relative', position: 'relative',
} }
} }
> >
<div <div
@ -501,10 +474,42 @@ const ReviewPage = () => {
> >
{fullscreen ? <FullscreenExitOutlined /> : <FullscreenOutlined />} {fullscreen ? <FullscreenExitOutlined /> : <FullscreenOutlined />}
</Button> </Button>
{totalRequests && (
<div
style={{
flexGrow: 1,
display: 'grid',
gridTemplateColumns: '1fr 1fr 1fr',
marginLeft: '16px',
}}
>
<div style={{ gridColumn: 'span 2 / span 2' }}>
<b>Request ID: &nbsp;</b>
{currentRequest?.RequestID}
</div>{' '}
<div>
<b>Created at: &nbsp;</b>
{currentRequest?.created_at}
</div>{' '}
<div>
<b>Request time: &nbsp;</b>
{currentRequest?.['Client Request Time (ms)']}
</div>{' '}
<div style={{ gridColumn: 'span 2 / span 2' }}>
<b>Redemption ID: &nbsp;</b>
{currentRequest?.RedemptionID}
</div>{' '}
<div>
<b>Raw accuracy: &nbsp;</b>
{currentRequest?.raw_accuracy}
</div>{' '}
<div style={{ gridColumn: 'span 2 / span 2' }}>
<b>Processing time: &nbsp;</b>
{currentRequest?.['Server Processing Time (ms)']}
</div>{' '}
</div>
)}
</div> </div>
{totalRequests && (
<DocumentHeadInfo currentRequest={currentRequest} />
)}
{totalRequests > 0 && ( {totalRequests > 0 && (
<div <div
style={{ style={{
@ -556,17 +561,17 @@ const ReviewPage = () => {
overflow: 'auto', overflow: 'auto',
}} }}
> >
{selectedFileData && <Viewer <Viewer
plugins={[defaultLayoutPluginInstance]} plugins={[defaultLayoutPluginInstance]}
fileUrl={selectedFileData} fileUrl={selectedFileData}
onDocumentLoad={() => setImageLoading(false)} onDocumentLoad={() => setImageLoading(false)}
/>} />
</div> </div>
) : ( ) : (
<div <div
style={{ style={{
flexGrow: 1, flexGrow: 1,
overflowY: 'auto', overflow: 'auto',
display: 'flex', display: 'flex',
justifyContent: 'center', justifyContent: 'center',
alignItems: 'center', alignItems: 'center',
@ -734,10 +739,11 @@ const ReviewPage = () => {
> >
<Input <Input
size='middle' size='middle'
value={`Sub: ${filterSubsidiaries}, Date:${filterDateRange[0] value={`Sub: ${filterSubsidiaries}, Date:${
? filterDateRange[0] + ' to ' + filterDateRange[1] filterDateRange[0]
: 'All' ? filterDateRange[0] + ' to ' + filterDateRange[1]
}, Reviewed: ${filterReviewState}, Tests: ${filterIncludeTests}`} : 'All'
}, Reviewed: ${filterReviewState}, Tests: ${filterIncludeTests}`}
readOnly readOnly
/> />
<Button <Button
@ -758,21 +764,54 @@ const ReviewPage = () => {
let shouldRevised = false; let shouldRevised = false;
try { try {
if ( if (
selectedFileDataSource[data]?.[FEEDBACK_ACCURACY]?.length > 0 selectedFileDataSource[data]?.[FEEDBACK_ACCURACY].length > 0
) { ) {
shouldRevised = shouldRevised =
selectedFileDataSource[data][FEEDBACK_ACCURACY][0] < 1; selectedFileDataSource[data][FEEDBACK_ACCURACY][0] < 1;
} }
} catch (error) { } } catch (error) {}
return ( return (
<DocumentCompareInfo key={data} <div style={{ margin: '0 0 8px' }} className='file-input-group'>
data={data} <div
selectedFileDataSource={selectedFileDataSource} style={{
updateRevisedByFeedback={updateRevisedByFeedback} display: 'flex',
handleUpdateFileInField={handleUpdateFileInField} justifyContent: 'space-between',
shouldRevised={shouldRevised} alignItems: 'center',
disabledInput = {currentRequest?.Files?.length} margin: '0 0 4px',
/> }}
>
<p style={{ fontWeight: 'bold', margin: 0 }}>{data}</p>
<Button
shape='round'
type='primary'
ghost
icon={<CopyOutlined />}
size='small'
onClick={() => updateRevisedByFeedback(data)}
/>
</div>
<Input
addonBefore='Feedback'
size='small'
readOnly
value={selectedFileDataSource[data]?.[FEEDBACK_RESULT]}
/>
<Input
addonBefore='Predicted'
readOnly
size='small'
value={selectedFileDataSource[data]?.[PREDICTED_RESULT]}
/>
<Input
addonBefore='Revised'
style={{ background: shouldRevised ? 'yellow' : '' }}
size='small'
value={selectedFileDataSource[data]?.[REVIEWED_RESULT]}
onChange={(e) =>
handleUpdateFileInField(data, e.target.value)
}
/>
</div>
); );
})} })}
<b>{t`Bad image reason:`}</b> <b>{t`Bad image reason:`}</b>
@ -819,7 +858,7 @@ const ReviewPage = () => {
options={SOLUTION_BAD_QUALITY} options={SOLUTION_BAD_QUALITY}
onChange={setSolution} onChange={setSolution}
value={solution} value={solution}
// defaultValue={solution} // defaultValue={solution}
/> />
{solution === 'other' && ( {solution === 'other' && (
<Input <Input
@ -836,7 +875,7 @@ const ReviewPage = () => {
<div style={{ display: 'flex', justifyContent: 'flex-end' }}> <div style={{ display: 'flex', justifyContent: 'flex-end' }}>
<Button <Button
type='primary' type='primary'
// color='success' color='success'
size='middle' size='middle'
style={{ style={{
height: '36px', height: '36px',
@ -849,7 +888,7 @@ const ReviewPage = () => {
</div> </div>
</div> </div>
<Modal <Modal
title={t`Review Filters`} title={t`Report Filters`}
open={isModalOpen} open={isModalOpen}
width={700} width={700}
onOk={() => { onOk={() => {
@ -864,75 +903,31 @@ const ReviewPage = () => {
style={{ style={{
marginTop: 30, marginTop: 30,
}} }}
layout="vertical"
> >
<div <Form.Item
name='dateRange'
label={t`Date (GMT+8)`}
rules={[
{
required: true,
message: 'Please select a date range',
},
]}
style={{ style={{
marginBottom: 24,
display: 'flex',
justifyContent: 'space-between',
marginLeft: 0,
padding: 0,
}} }}
> >
<Form.Item <DatePicker.RangePicker
name='dateRange' onChange={(date, dateString) => {
label={t`Date (GMT+8)`} setFilterDateRange(dateString);
rules={[
{
required: true,
message: 'Please select a date range',
},
]}
style={{
flex: 1,
}} }}
> style={{ width: 200 }}
<DatePicker.RangePicker />
onChange={(date, dateString) => { </Form.Item>
setFilterDateRange(dateString);
}}
style={{ width: 300 }}
/>
</Form.Item>
<div style={{
flex: 1,
}}>
<Form.Item
name='bad_reason'
label={t`Bad image reason`}
>
<Select
placeholder='Select a reason'
style={{ width: 300 }}
options={REASON_BAD_QUALITY}
value={filterBadReason}
defaultValue={filterBadReason}
onChange={setFilterBadReason}
/>
</Form.Item>
{filterBadReason === 'other' && (
<Form.Item
name='other_reason'
style={{
flex: 1,
}}
>
<Input
placeholder='Other reason'
value={filterOtherReason}
style={{ width: 300 }}
onChange={(e) => {
setFilterOtherReason(e.target.value);
}}
/>
</Form.Item>
)}
</div>
</div>
<div <div
style={{ style={{
marginTop: 10,
display: 'flex', display: 'flex',
justifyContent: 'space-between', justifyContent: 'space-between',
marginLeft: 0, marginLeft: 0,
@ -948,13 +943,10 @@ const ReviewPage = () => {
message: 'Please select a subsidiary', message: 'Please select a subsidiary',
}, },
]} ]}
style={{
flex: 1,
}}
> >
<Select <Select
placeholder='Select a subsidiary' placeholder='Select a subsidiary'
style={{ width: 300 }} style={{ width: 200 }}
options={SUBSIDIARIES} options={SUBSIDIARIES}
value={filterSubsidiaries} value={filterSubsidiaries}
defaultValue={filterSubsidiaries} defaultValue={filterSubsidiaries}
@ -970,12 +962,8 @@ const ReviewPage = () => {
message: 'Please select max accuracy', message: 'Please select max accuracy',
}, },
]} ]}
style={{
flex: 1,
}}
> >
<InputNumber <InputNumber
style={{ width: 300 }}
min={1} min={1}
max={100} max={100}
defaultValue={filterAccuracy} defaultValue={filterAccuracy}
@ -985,7 +973,7 @@ const ReviewPage = () => {
</div> </div>
<div <div
style={{ style={{
marginTop: 10,
display: 'flex', display: 'flex',
justifyContent: 'space-between', justifyContent: 'space-between',
marginLeft: 0, marginLeft: 0,
@ -1001,12 +989,9 @@ const ReviewPage = () => {
message: 'Please select review status', message: 'Please select review status',
}, },
]} ]}
style={{
flex: 1,
}}
> >
<Select <Select
style={{ width: 300 }} style={{ width: 200 }}
options={[ options={[
{ label: 'All', value: 'all' }, { label: 'All', value: 'all' },
{ label: 'Reviewed', value: 'reviewed' }, { label: 'Reviewed', value: 'reviewed' },
@ -1026,10 +1011,10 @@ const ReviewPage = () => {
message: 'Please select test status', message: 'Please select test status',
}, },
]} ]}
style={{ flex: 1 }} style={{ marginLeft: 16 }}
> >
<Select <Select
style={{ width: 300 }} style={{ width: 200 }}
options={[ options={[
{ label: 'Include tests', value: 'true' }, { label: 'Include tests', value: 'true' },
{ label: 'Exclude tests', value: 'false' }, { label: 'Exclude tests', value: 'false' },
@ -1040,82 +1025,6 @@ const ReviewPage = () => {
/> />
</Form.Item> </Form.Item>
</div> </div>
{/* add 4 more filter fields */}
<div
style={{
display: 'flex',
justifyContent: 'space-between',
marginLeft: 0,
padding: 0,
}}
>
<Form.Item
name='doc_type'
label={t`Only type`}
style={{
flex: 1,
}}
>
<Select
placeholder='Select a document type'
style={{ width: 300 }}
options={DOCTYPE}
value={filterDoctype}
defaultValue={filterDoctype}
onChange={setFilterDoctype}
/>
</Form.Item>
<Form.Item
name='feedback_result'
label={t`Feedback includes`}
style={{
flex: 1,
}}
>
<Input
style={{ width: 300 }}
defaultValue={filterFeedbackResult}
onChange={(e) => setFilterFeedbackResult(e.target.value)}
/>
</Form.Item>
</div>
<div
style={{
display: 'flex',
justifyContent: 'space-between',
marginLeft: 0,
padding: 0,
}}
>
<Form.Item
name='predict_result'
label={t`Predict includes`}
style={{
flex: 1,
}}
>
<Input
style={{ width: 300 }}
defaultValue={filterPredictResult}
onChange={(e) => setFilterPredictResult(e.target.value)}
/>
</Form.Item>
<Form.Item
name='reviewed_result'
label={t`Review includes`}
style={{
flex: 1,
}}
>
<Input
style={{ width: 300 }}
defaultValue={filterReviewedResult}
onChange={(e) => setFilterReviewedResult(e.target.value)}
/>
</Form.Item>
</div>
</Form> </Form>
</Modal> </Modal>
<Modal <Modal

View File

@ -11,7 +11,7 @@ const environment = process.env.NODE_ENV;
const AXIOS_TIMEOUT_MS = 30 * 60 * 1000; // This config sastified long-live upload file request const AXIOS_TIMEOUT_MS = 30 * 60 * 1000; // This config sastified long-live upload file request
const EXPIRED_PASSWORD_SIGNAL = 'expired_password'; const EXPIRED_PASSWORD_SIGNAL = 'expired_password';
export const baseURL = environment === 'development' ? 'http://107.120.133.27:19001/api' : '/api'; export const baseURL = environment === 'development' ? 'http://107.120.133.27:9881/api' : '/api';
// export const baseURL = '/api'; // export const baseURL = '/api';

View File

@ -30,14 +30,10 @@ services:
reservations: reservations:
devices: devices:
- driver: nvidia - driver: nvidia
count: 1 count: all
capabilities: [gpu] capabilities: [gpu]
# command: bash -c "tail -f > /dev/null"
command: bash run.sh command: bash run.sh
# deploy:
# mode: replicated
# replicas: 1
# Back-end services
be-ctel-sbt: be-ctel-sbt:
build: build:
context: cope2n-api context: cope2n-api
@ -93,12 +89,11 @@ services:
depends_on: depends_on:
db-sbt: db-sbt:
condition: service_started condition: service_started
command: sh -c "sudo chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && command: sh -c "python manage.py collectstatic --no-input &&
python manage.py makemigrations && python manage.py makemigrations &&
python manage.py migrate && python manage.py migrate &&
python manage.py compilemessages && python manage.py compilemessages &&
gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
# command: "sleep infinity"
minio: minio:
image: minio/minio image: minio/minio
@ -186,7 +181,7 @@ services:
- ./cope2n-api:/app - ./cope2n-api:/app
working_dir: /app working_dir: /app
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" command: sh -c "celery -A fwd worker -l INFO -c 5"
# command: bash -c "tail -f > /dev/null" # command: bash -c "tail -f > /dev/null"
# Back-end persistent # Back-end persistent
@ -274,4 +269,4 @@ volumes:
db_data: db_data:
BE_static: BE_static:
BE_media: BE_media:
BE_log: BE_log: