40 lines
1.6 KiB
Python
Executable File
40 lines
1.6 KiB
Python
Executable File
import numpy as np
|
|
from pathlib import Path
|
|
from typing import Union, Tuple, List
|
|
import sys
|
|
# sys.path.append('/home/thucpd/thucpd/PV2-2023/common/AnyKey_Value/ocr-engine')
|
|
# from src.ocr import OcrEngine
|
|
sys.path.append('/home/thucpd/thucpd/git/PV2-2023/kie-invoice/components/prediction') # TODO: ??????
|
|
import serve_model
|
|
import logging
|
|
import logging.config
|
|
from utils.logging.logging import LOGGER_CONFIG
|
|
# Load the logging configuration
|
|
logging.config.dictConfig(LOGGER_CONFIG)
|
|
# Get the logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# def load_ocr_engine() -> OcrEngine:
|
|
def load_ocr_engine() -> OcrEngine:
|
|
logger.info("[INFO] Loading engine...")
|
|
# engine = OcrEngine()
|
|
engine = serve_model.engine
|
|
logger.info("[INFO] Engine loaded")
|
|
return engine
|
|
|
|
def process_img(img: Union[str, np.ndarray], save_dir_or_path: str, engine: OcrEngine, export_img: bool) -> None:
|
|
save_dir_or_path = Path(save_dir_or_path)
|
|
if isinstance(img, np.ndarray):
|
|
if save_dir_or_path.is_dir():
|
|
raise ValueError("numpy array input require a save path, not a save dir")
|
|
page = engine(img)
|
|
save_path = str(save_dir_or_path.joinpath(Path(img).stem + ".txt")
|
|
) if save_dir_or_path.is_dir() else str(save_dir_or_path)
|
|
page.write_to_file('word', save_path)
|
|
if export_img:
|
|
page.save_img(save_path.replace(".txt", ".jpg"), is_vnese=True, )
|
|
|
|
def read_img(img: Union[str, np.ndarray], engine: OcrEngine):
|
|
page = engine(img)
|
|
return ' '.join([f.text for f in page.llines]) |