import numpy as np from pathlib import Path from typing import Union, Tuple, List import sys # sys.path.append('/home/thucpd/thucpd/PV2-2023/common/AnyKey_Value/ocr-engine') # from src.ocr import OcrEngine sys.path.append('/home/thucpd/thucpd/git/PV2-2023/kie-invoice/components/prediction') # TODO: ?????? import serve_model import logging import logging.config from utils.logging.logging import LOGGER_CONFIG # Load the logging configuration logging.config.dictConfig(LOGGER_CONFIG) # Get the logger logger = logging.getLogger(__name__) # def load_ocr_engine() -> OcrEngine: def load_ocr_engine() -> OcrEngine: logger.info("[INFO] Loading engine...") # engine = OcrEngine() engine = serve_model.engine logger.info("[INFO] Engine loaded") return engine def process_img(img: Union[str, np.ndarray], save_dir_or_path: str, engine: OcrEngine, export_img: bool) -> None: save_dir_or_path = Path(save_dir_or_path) if isinstance(img, np.ndarray): if save_dir_or_path.is_dir(): raise ValueError("numpy array input require a save path, not a save dir") page = engine(img) save_path = str(save_dir_or_path.joinpath(Path(img).stem + ".txt") ) if save_dir_or_path.is_dir() else str(save_dir_or_path) page.write_to_file('word', save_path) if export_img: page.save_img(save_path.replace(".txt", ".jpg"), is_vnese=True, ) def read_img(img: Union[str, np.ndarray], engine: OcrEngine): page = engine(img) return ' '.join([f.text for f in page.llines])