sbt-idp/cope2n-ai-fi/modules/ocr_engine/src/utils.py

370 lines
14 KiB
Python
Raw Normal View History

2023-11-30 11:22:16 +00:00
from PIL import ImageFont, ImageDraw, Image, ImageOps
# import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import time
from typing import Generator, Union, List, overload, Tuple, Callable
import glob
import math
from pathlib import Path
from pdf2image import convert_from_path
# from deskew import determine_skew
# from jdeskew.estimator import get_angle
# from jdeskew.utility import rotate as jrotate
def post_process_recog(text: str) -> str:
text = text.replace("", " ")
return text
def find_maximum_without_outliers(lst: list[int], threshold: float = 1.):
'''
To find the maximum number in a list while excluding its outlier values, you can follow these steps:
Determine the range within which you consider values as outliers. This can be based on a specific threshold or a statistical measure such as the interquartile range (IQR).
Iterate through the list and filter out the outlier values based on the defined range. Keep track of the non-outlier values.
Find the maximum value among the non-outlier values.
'''
# Calculate the lower and upper boundaries for outliers
q1 = np.percentile(lst, 25)
q3 = np.percentile(lst, 75)
iqr = q3 - q1
lower_bound = q1 - threshold * iqr
upper_bound = q3 + threshold * iqr
# Filter out outlier values
non_outliers = [x for x in lst if lower_bound <= x <= upper_bound]
# Find the maximum value among non-outliers
max_value = max(non_outliers)
return max_value
class Timer:
def __init__(self, name: str) -> None:
self.name = name
def __enter__(self):
self.start_time = time.perf_counter()
return self
def __exit__(self, func: Callable, *args):
self.end_time = time.perf_counter()
self.elapsed_time = self.end_time - self.start_time
print(f"[INFO]: {self.name} took : {self.elapsed_time:.6f} seconds")
# def rotate(
# image: np.ndarray, angle: float, background: Union[int, Tuple[int, int, int]]
# ) -> np.ndarray:
# old_width, old_height = image.shape[:2]
# angle_radian = math.radians(angle)
# width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width)
# height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height)
# image_center = tuple(np.array(image.shape[1::-1]) / 2)
# rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
# rot_mat[1, 2] += (width - old_width) / 2
# rot_mat[0, 2] += (height - old_height) / 2
# return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background)
# def rotate_bbox(bbox: list, angle: float) -> list:
# # Compute the center point of the bounding box
# cx = bbox[0] + bbox[2] / 2
# cy = bbox[1] + bbox[3] / 2
# # Define the scale factor for the rotated bounding box
# scale = 1.0 # following the deskew and jdeskew function
# angle_radian = math.radians(angle)
# # Obtain the rotation matrix using cv2.getRotationMatrix2D()
# M = cv2.getRotationMatrix2D((cx, cy), angle_radian, scale)
# # Apply the rotation matrix to the four corners of the bounding box
# corners = np.array([[bbox[0], bbox[1]],
# [bbox[0] + bbox[2], bbox[1]],
# [bbox[0] + bbox[2], bbox[1] + bbox[3]],
# [bbox[0], bbox[1] + bbox[3]]], dtype=np.float32)
# rotated_corners = cv2.transform(np.array([corners]), M)[0]
# # Compute the bounding box of the rotated corners
# x = int(np.min(rotated_corners[:, 0]))
# y = int(np.min(rotated_corners[:, 1]))
# w = int(np.max(rotated_corners[:, 0]) - np.min(rotated_corners[:, 0]))
# h = int(np.max(rotated_corners[:, 1]) - np.min(rotated_corners[:, 1]))
# rotated_bbox = [x, y, w, h]
# return rotated_bbox
# def rotate_bbox(bbox: List[int], angle: float, old_shape: Tuple[int, int]) -> List[int]:
# # https://medium.com/@pokomaru/image-and-bounding-box-rotation-using-opencv-python-2def6c39453
# bbox_ = [bbox[0], bbox[1], bbox[2], bbox[1], bbox[2], bbox[3], bbox[0], bbox[3]]
# h, w = old_shape
# cx, cy = (int(w / 2), int(h / 2))
# bbox_tuple = [
# (bbox_[0], bbox_[1]),
# (bbox_[2], bbox_[3]),
# (bbox_[4], bbox_[5]),
# (bbox_[6], bbox_[7]),
# ] # put x and y coordinates in tuples, we will iterate through the tuples and perform rotation
# rotated_bbox = []
# for i, coord in enumerate(bbox_tuple):
# M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
# cos, sin = abs(M[0, 0]), abs(M[0, 1])
# newW = int((h * sin) + (w * cos))
# newH = int((h * cos) + (w * sin))
# M[0, 2] += (newW / 2) - cx
# M[1, 2] += (newH / 2) - cy
# v = [coord[0], coord[1], 1]
# adjusted_coord = np.dot(M, v)
# rotated_bbox.insert(i, (adjusted_coord[0], adjusted_coord[1]))
# result = [int(x) for t in rotated_bbox for x in t]
# return [result[i] for i in [0, 1, 2, -1]] # reformat to xyxy
# def deskew(image: np.ndarray) -> Tuple[np.ndarray, float]:
# grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# angle = 0.
# try:
# angle = determine_skew(grayscale)
# except Exception:
# pass
# rotated = rotate(image, angle, (0, 0, 0)) if angle else image
# return rotated, angle
# def jdeskew(image: np.ndarray) -> Tuple[np.ndarray, float]:
# angle = 0.
# try:
# angle = get_angle(image)
# except Exception:
# pass
# # TODO: change resize = True and scale the bounding box
# rotated = jrotate(image, angle, resize=False) if angle else image
# return rotated, angle
# def deskew()
class ImageReader:
"""
accept anything, return numpy array image
"""
supported_ext = [".png", ".jpg", ".jpeg", ".pdf", ".gif"]
@staticmethod
def validate_img_path(img_path: str) -> None:
if not os.path.exists(img_path):
raise FileNotFoundError(img_path)
if os.path.isdir(img_path):
raise IsADirectoryError(img_path)
if not Path(img_path).suffix.lower() in ImageReader.supported_ext:
raise NotImplementedError("Not supported extension at {}".format(img_path))
@overload
@staticmethod
def read(img: Union[str, np.ndarray, Image.Image]) -> np.ndarray: ...
@overload
@staticmethod
def read(img: List[Union[str, np.ndarray, Image.Image]]) -> List[np.ndarray]: ...
@overload
@staticmethod
def read(img: str) -> List[np.ndarray]: ... # for pdf or directory
@staticmethod
def read(img):
if isinstance(img, list):
return ImageReader.from_list(img)
elif isinstance(img, str) and os.path.isdir(img):
return ImageReader.from_dir(img)
elif isinstance(img, str) and img.endswith(".pdf"):
return ImageReader.from_pdf(img)
else:
return ImageReader._read(img)
@staticmethod
def from_dir(dir_path: str) -> List[np.ndarray]:
if os.path.isdir(dir_path):
image_files = glob.glob(os.path.join(dir_path, "*"))
return ImageReader.from_list(image_files)
else:
raise NotADirectoryError(dir_path)
@staticmethod
def from_str(img_path: str) -> np.ndarray:
ImageReader.validate_img_path(img_path)
return ImageReader.from_PIL(Image.open(img_path))
@staticmethod
def from_np(img_array: np.ndarray) -> np.ndarray:
return img_array
@staticmethod
def from_PIL(img_pil: Image.Image, transpose=True) -> np.ndarray:
# if img_pil.is_animated:
# raise NotImplementedError("Only static images are supported, animated image found")
if transpose:
img_pil = ImageOps.exif_transpose(img_pil)
if img_pil.mode != "RGB":
img_pil = img_pil.convert("RGB")
return np.array(img_pil)
@staticmethod
def from_list(img_list: List[Union[str, np.ndarray, Image.Image]]) -> List[np.ndarray]:
limgs = list()
for img_path in img_list:
try:
if isinstance(img_path, str):
ImageReader.validate_img_path(img_path)
limgs.append(ImageReader._read(img_path))
except (FileNotFoundError, NotImplementedError, IsADirectoryError) as e:
print("[ERROR]: ", e)
print("[INFO]: Skipping image {}".format(img_path))
return limgs
@staticmethod
def from_pdf(pdf_path: str, start_page: int = 0, end_page: int = 0) -> List[np.ndarray]:
pdf_file = convert_from_path(pdf_path)
if end_page is not None:
end_page = min(len(pdf_file), end_page + 1)
limgs = [np.array(pdf_page) for pdf_page in pdf_file[start_page:end_page]]
return limgs
@staticmethod
def _read(img: Union[str, np.ndarray, Image.Image]) -> np.ndarray:
if isinstance(img, str):
return ImageReader.from_str(img)
elif isinstance(img, Image.Image):
return ImageReader.from_PIL(img)
elif isinstance(img, np.ndarray):
return ImageReader.from_np(img)
else:
raise ValueError("Invalid img argument type: ", type(img))
def get_name(file_path, ext: bool = True):
file_path_ = os.path.basename(file_path)
return file_path_ if ext else os.path.splitext(file_path_)[0]
def construct_file_path(dir, file_path, ext=''):
'''
args:
dir: /path/to/dir
file_path /example_path/to/file.txt
ext = '.json'
return
/path/to/dir/file.json
'''
return os.path.join(
dir, get_name(file_path,
True)) if ext == '' else os.path.join(
dir, get_name(file_path,
False)) + ext
def chunks(lst: list, n: int) -> Generator:
"""
Yield successive n-sized chunks from lst.
https://stackoverflow.com/questions/312443/how-do-i-split-a-list-into-equally-sized-chunks
"""
for i in range(0, len(lst), n):
yield lst[i:i + n]
def read_ocr_result_from_txt(file_path: str) -> Tuple[list, list]:
'''
return list of bounding boxes, list of words
'''
with open(file_path, 'r') as f:
lines = f.read().splitlines()
boxes, words = [], []
for line in lines:
if line == "":
continue
x1, y1, x2, y2, text = line.split("\t")
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
if text and text != " ":
words.append(text)
boxes.append((x1, y1, x2, y2))
return boxes, words
def get_xyxywh_base_on_format(bbox, format):
if format == "xywh":
x1, y1, w, h = bbox[0], bbox[1], bbox[2], bbox[3]
x2, y2 = x1 + w, y1 + h
elif format == "xyxy":
x1, y1, x2, y2 = bbox
w, h = x2 - x1, y2 - y1
else:
raise NotImplementedError("Invalid format {}".format(format))
return (x1, y1, x2, y2, w, h)
def get_dynamic_params_for_bbox_of_label(text, x1, y1, w, h, img_h, img_w, font, font_scale_offset=1):
font_scale_factor = img_h / (img_w + img_h) * font_scale_offset
font_scale = w / (w + h) * font_scale_factor # adjust font scale by width height
thickness = int(font_scale_factor) + 1
(text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = x1
text_offset_y = y1 - thickness
box_coords = ((text_offset_x, text_offset_y + 1), (text_offset_x + text_width - 2, text_offset_y - text_height - 2))
return (font_scale, thickness, text_height, box_coords)
def visualize_bbox_and_label(
img, bboxes, texts, bbox_color=(200, 180, 60),
text_color=(0, 0, 0),
format="xyxy", is_vnese=False, draw_text=True):
ori_img_type = type(img)
if is_vnese:
img = Image.fromarray(img) if ori_img_type is np.ndarray else img
draw = ImageDraw.Draw(img)
img_w, img_h = img.size
font_pil_str = "fonts/arial.ttf"
font_cv2 = cv2.FONT_HERSHEY_SIMPLEX
else:
img_h, img_w = img.shape[0], img.shape[1]
font_cv2 = cv2.FONT_HERSHEY_SIMPLEX
for i in range(len(bboxes)):
text = texts[i] # text = "{}: {:.0f}%".format(LABELS[classIDs[i]], confidences[i]*100)
x1, y1, x2, y2, w, h = get_xyxywh_base_on_format(bboxes[i], format)
font_scale, thickness, text_height, box_coords = get_dynamic_params_for_bbox_of_label(
text, x1, y1, w, h, img_h, img_w, font=font_cv2)
if is_vnese:
font_pil = ImageFont.truetype(font_pil_str, size=text_height) # type: ignore
fdraw_text = draw.text # type: ignore
fdraw_bbox = draw.rectangle # type: ignore
# Pil use different coordinate => y = y+thickness = y-thickness + 2*thickness
arg_text = ((box_coords[0][0], box_coords[1][1]), text)
kwarg_text = {"font": font_pil, "fill": text_color, "width": thickness}
arg_rec = ((x1, y1, x2, y2),)
kwarg_rec = {"outline": bbox_color, "width": thickness}
arg_rec_text = ((box_coords[0], box_coords[1]),)
kwarg_rec_text = {"fill": bbox_color, "width": thickness}
else:
# cv2.rectangle(img, box_coords[0], box_coords[1], color, cv2.FILLED)
# cv2.putText(img, text, (text_offset_x, text_offset_y), font, fontScale=font_scale, color=(50, 0,0), thickness=thickness)
# cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)
fdraw_text = cv2.putText
fdraw_bbox = cv2.rectangle
arg_text = (img, text, box_coords[0])
kwarg_text = {"fontFace": font_cv2, "fontScale": font_scale, "color": text_color, "thickness": thickness}
arg_rec = (img, (x1, y1), (x2, y2))
kwarg_rec = {"color": bbox_color, "thickness": thickness}
arg_rec_text = (img, box_coords[0], box_coords[1])
kwarg_rec_text = {"color": bbox_color, "thickness": cv2.FILLED}
# draw a bounding box rectangle and label on the img
fdraw_bbox(*arg_rec, **kwarg_rec) # type: ignore
if draw_text:
fdraw_bbox(*arg_rec_text, **kwarg_rec_text) # type: ignore
fdraw_text(*arg_text, **kwarg_text) # type: ignore # text have to put in front of rec_text
return np.array(img) if ori_img_type is np.ndarray and is_vnese else img