sbt-idp/cope2n-ai-fi/api/manulife/predict_manulife.py

105 lines
3.0 KiB
Python
Raw Permalink Normal View History

2023-11-30 11:22:16 +00:00
import cv2
import urllib
import random
import numpy as np
from pathlib import Path
import sys, os
cur_dir = str(Path(__file__).parents[2])
sys.path.append(cur_dir)
from modules.sdsvkvu import load_engine, process_img
from modules.ocr_engine import OcrEngine
from configs.manulife import device, ocr_cfg, kvu_cfg
2024-07-05 13:14:47 +00:00
import logging
import logging.config
from utils.logging.logging import LOGGER_CONFIG
# Load the logging configuration
logging.config.dictConfig(LOGGER_CONFIG)
# Get the logger
logger = logging.getLogger(__name__)
2023-11-30 11:22:16 +00:00
def load_ocr_engine(opt) -> OcrEngine:
2024-07-05 13:14:47 +00:00
logger.info("[INFO] Loading engine...")
2023-11-30 11:22:16 +00:00
engine = OcrEngine(**opt)
2024-07-05 13:14:47 +00:00
logger.info("[INFO] Engine loaded")
2023-11-30 11:22:16 +00:00
return engine
2024-07-05 13:14:47 +00:00
logger.info("OCR engine configfs: \n", ocr_cfg)
logger.info("KVU configfs: \n", kvu_cfg)
2023-11-30 11:22:16 +00:00
ocr_engine = load_ocr_engine(ocr_cfg)
kvu_cfg['ocr_engine'] = ocr_engine
option = kvu_cfg['option']
kvu_cfg.pop("option") # pop option
manulife_engine = load_engine(kvu_cfg)
def manulife_predict(image_url, engine) -> None:
req = urllib.request.urlopen(image_url)
arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
img = cv2.imdecode(arr, -1)
save_dir = "./tmp_results"
# image_path = os.path.join(save_dir, f"{image_url}.jpg")
image_path = os.path.join(save_dir, "abc.jpg")
cv2.imwrite(image_path, img)
outputs = process_img(img_path=image_path,
save_dir=save_dir,
engine=engine,
export_all=False,
option=option)
return outputs
def predict(page_numb, image_url):
"""
module predict function
Args:
image_url (str): image url
Returns:
example output:
"data": {
"document_type": "invoice",
"fields": [
{
"label": "Invoice Number",
"value": "INV-12345",
"box": [0, 0, 0, 0],
"confidence": 0.98
},
...
]
}
dict: output of model
"""
kvu_result = manulife_predict(image_url, engine=manulife_engine)
output_dict = {
"document_type": kvu_result['title'] if kvu_result['title'] is not None else "unknown",
"document_class": kvu_result['class_doc'] if kvu_result['class_doc'] is not None else "unknown",
"page_number": page_numb,
"fields": []
}
for key in kvu_result.keys():
if key in ("title", "class_doc"):
continue
field = {
"label": key,
"value": kvu_result[key],
"box": [0, 0, 0, 0],
"confidence": random.uniform(0.9, 1.0),
"page": page_numb
}
output_dict['fields'].append(field)
2024-07-05 13:14:47 +00:00
logger.info(output_dict)
2023-11-30 11:22:16 +00:00
return output_dict
if __name__ == "__main__":
image_url = "/root/thucpd/20230322144639VUzu_16794962527791962785161104697882.jpg"
output = predict(0, image_url)
2024-07-05 13:14:47 +00:00
logger.info(output)