sbt-idp/cope2n-ai-fi/common/AnyKey_Value/utils/run_ocr.py
2023-11-30 18:22:16 +07:00

33 lines
1.3 KiB
Python
Executable File

import numpy as np
from pathlib import Path
from typing import Union, Tuple, List
import sys
# sys.path.append('/home/thucpd/thucpd/PV2-2023/common/AnyKey_Value/ocr-engine')
# from src.ocr import OcrEngine
sys.path.append('/home/thucpd/thucpd/git/PV2-2023/kie-invoice/components/prediction') # TODO: ??????
import serve_model
# def load_ocr_engine() -> OcrEngine:
def load_ocr_engine() -> OcrEngine:
print("[INFO] Loading engine...")
# engine = OcrEngine()
engine = serve_model.engine
print("[INFO] Engine loaded")
return engine
def process_img(img: Union[str, np.ndarray], save_dir_or_path: str, engine: OcrEngine, export_img: bool) -> None:
save_dir_or_path = Path(save_dir_or_path)
if isinstance(img, np.ndarray):
if save_dir_or_path.is_dir():
raise ValueError("numpy array input require a save path, not a save dir")
page = engine(img)
save_path = str(save_dir_or_path.joinpath(Path(img).stem + ".txt")
) if save_dir_or_path.is_dir() else str(save_dir_or_path)
page.write_to_file('word', save_path)
if export_img:
page.save_img(save_path.replace(".txt", ".jpg"), is_vnese=True, )
def read_img(img: Union[str, np.ndarray], engine: OcrEngine):
page = engine(img)
return ' '.join([f.text for f in page.llines])