import logging import logging.config import os import time from opentelemetry import trace from api.sdsap_sbt.prediction_sbt import predict as predict_sbt from common.utils_kvu.split_docs import merge_sbt_output from utils.logging.logging import LOGGER_CONFIG logging.config.dictConfig(LOGGER_CONFIG) logger = logging.getLogger(__name__) tracer = trace.get_tracer() os.environ['PYTHONPATH'] = '/home/thucpd/thucpd/cope2n-ai/cope2n-ai/' def check_label_exists(array, target_label): for obj in array: if obj["label"] == target_label: return True # Label exists in the array return False # Label does not exist in the array def update_null_values(kvu_result, next_kvu_result): for key, value in kvu_result.items(): if value is None and next_kvu_result.get(key) is not None: kvu_result[key] = next_kvu_result[key] def replace_empty_null_values(my_dict): for key, value in my_dict.items(): if value == '': my_dict[key] = None return my_dict @tracer.start_as_current_span("compile_output_sbt") def compile_output_sbt(list_url, metadata): """_summary_ Args: pdf_extracted (list): list: [{ "1": url},{"2": url}, ...] Raises: NotImplementedError: _description_ Returns: dict: output compiled """ inference_profile = {} results = { "model":{ "name":"Invoice", "confidence": 1.0, "type": "finance/invoice", "isValid": True, "shape": "letter", } } outputs = [] start = time.time() pages_predict_time = [] for page in list_url: output_model = predict_sbt(page['page_number'], page['file_url'], metadata) pages_predict_time.append(time.time()) if "doc_type" in page: output_model['doc_type'] = page['doc_type'] outputs.append(output_model) start_postprocess = time.time() documents = merge_sbt_output(outputs) inference_profile["postprocess"] = [start_postprocess, time.time()] inference_profile["inference"] = [start, pages_predict_time] results = { "total_pages": len(list_url), "ocr_num_pages": len(list_url), "document": documents, "inference_profile": inference_profile } return results