from PIL import ImageFont, ImageDraw, Image, ImageOps # import matplotlib.pyplot as plt import numpy as np import cv2 import os import time from typing import Generator, Union, List, overload, Tuple, Callable import glob import math from pathlib import Path from pdf2image import convert_from_path # from deskew import determine_skew # from jdeskew.estimator import get_angle # from jdeskew.utility import rotate as jrotate def post_process_recog(text: str) -> str: text = text.replace("✪", " ") return text def find_maximum_without_outliers(lst: list[int], threshold: float = 1.): ''' To find the maximum number in a list while excluding its outlier values, you can follow these steps: Determine the range within which you consider values as outliers. This can be based on a specific threshold or a statistical measure such as the interquartile range (IQR). Iterate through the list and filter out the outlier values based on the defined range. Keep track of the non-outlier values. Find the maximum value among the non-outlier values. ''' # Calculate the lower and upper boundaries for outliers q1 = np.percentile(lst, 25) q3 = np.percentile(lst, 75) iqr = q3 - q1 lower_bound = q1 - threshold * iqr upper_bound = q3 + threshold * iqr # Filter out outlier values non_outliers = [x for x in lst if lower_bound <= x <= upper_bound] # Find the maximum value among non-outliers max_value = max(non_outliers) return max_value class Timer: def __init__(self, name: str) -> None: self.name = name def __enter__(self): self.start_time = time.perf_counter() return self def __exit__(self, func: Callable, *args): self.end_time = time.perf_counter() self.elapsed_time = self.end_time - self.start_time print(f"[INFO]: {self.name} took : {self.elapsed_time:.6f} seconds") # def rotate( # image: np.ndarray, angle: float, background: Union[int, Tuple[int, int, int]] # ) -> np.ndarray: # old_width, old_height = image.shape[:2] # angle_radian = math.radians(angle) # width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width) # height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height) # image_center = tuple(np.array(image.shape[1::-1]) / 2) # rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) # rot_mat[1, 2] += (width - old_width) / 2 # rot_mat[0, 2] += (height - old_height) / 2 # return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background) # def rotate_bbox(bbox: list, angle: float) -> list: # # Compute the center point of the bounding box # cx = bbox[0] + bbox[2] / 2 # cy = bbox[1] + bbox[3] / 2 # # Define the scale factor for the rotated bounding box # scale = 1.0 # following the deskew and jdeskew function # angle_radian = math.radians(angle) # # Obtain the rotation matrix using cv2.getRotationMatrix2D() # M = cv2.getRotationMatrix2D((cx, cy), angle_radian, scale) # # Apply the rotation matrix to the four corners of the bounding box # corners = np.array([[bbox[0], bbox[1]], # [bbox[0] + bbox[2], bbox[1]], # [bbox[0] + bbox[2], bbox[1] + bbox[3]], # [bbox[0], bbox[1] + bbox[3]]], dtype=np.float32) # rotated_corners = cv2.transform(np.array([corners]), M)[0] # # Compute the bounding box of the rotated corners # x = int(np.min(rotated_corners[:, 0])) # y = int(np.min(rotated_corners[:, 1])) # w = int(np.max(rotated_corners[:, 0]) - np.min(rotated_corners[:, 0])) # h = int(np.max(rotated_corners[:, 1]) - np.min(rotated_corners[:, 1])) # rotated_bbox = [x, y, w, h] # return rotated_bbox # def rotate_bbox(bbox: List[int], angle: float, old_shape: Tuple[int, int]) -> List[int]: # # https://medium.com/@pokomaru/image-and-bounding-box-rotation-using-opencv-python-2def6c39453 # bbox_ = [bbox[0], bbox[1], bbox[2], bbox[1], bbox[2], bbox[3], bbox[0], bbox[3]] # h, w = old_shape # cx, cy = (int(w / 2), int(h / 2)) # bbox_tuple = [ # (bbox_[0], bbox_[1]), # (bbox_[2], bbox_[3]), # (bbox_[4], bbox_[5]), # (bbox_[6], bbox_[7]), # ] # put x and y coordinates in tuples, we will iterate through the tuples and perform rotation # rotated_bbox = [] # for i, coord in enumerate(bbox_tuple): # M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0) # cos, sin = abs(M[0, 0]), abs(M[0, 1]) # newW = int((h * sin) + (w * cos)) # newH = int((h * cos) + (w * sin)) # M[0, 2] += (newW / 2) - cx # M[1, 2] += (newH / 2) - cy # v = [coord[0], coord[1], 1] # adjusted_coord = np.dot(M, v) # rotated_bbox.insert(i, (adjusted_coord[0], adjusted_coord[1])) # result = [int(x) for t in rotated_bbox for x in t] # return [result[i] for i in [0, 1, 2, -1]] # reformat to xyxy # def deskew(image: np.ndarray) -> Tuple[np.ndarray, float]: # grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # angle = 0. # try: # angle = determine_skew(grayscale) # except Exception: # pass # rotated = rotate(image, angle, (0, 0, 0)) if angle else image # return rotated, angle # def jdeskew(image: np.ndarray) -> Tuple[np.ndarray, float]: # angle = 0. # try: # angle = get_angle(image) # except Exception: # pass # # TODO: change resize = True and scale the bounding box # rotated = jrotate(image, angle, resize=False) if angle else image # return rotated, angle # def deskew() class ImageReader: """ accept anything, return numpy array image """ supported_ext = [".png", ".jpg", ".jpeg", ".pdf", ".gif"] @staticmethod def validate_img_path(img_path: str) -> None: if not os.path.exists(img_path): raise FileNotFoundError(img_path) if os.path.isdir(img_path): raise IsADirectoryError(img_path) if not Path(img_path).suffix.lower() in ImageReader.supported_ext: raise NotImplementedError("Not supported extension at {}".format(img_path)) @overload @staticmethod def read(img: Union[str, np.ndarray, Image.Image]) -> np.ndarray: ... @overload @staticmethod def read(img: List[Union[str, np.ndarray, Image.Image]]) -> List[np.ndarray]: ... @overload @staticmethod def read(img: str) -> List[np.ndarray]: ... # for pdf or directory @staticmethod def read(img): if isinstance(img, list): return ImageReader.from_list(img) elif isinstance(img, str) and os.path.isdir(img): return ImageReader.from_dir(img) elif isinstance(img, str) and img.endswith(".pdf"): return ImageReader.from_pdf(img) else: return ImageReader._read(img) @staticmethod def from_dir(dir_path: str) -> List[np.ndarray]: if os.path.isdir(dir_path): image_files = glob.glob(os.path.join(dir_path, "*")) return ImageReader.from_list(image_files) else: raise NotADirectoryError(dir_path) @staticmethod def from_str(img_path: str) -> np.ndarray: ImageReader.validate_img_path(img_path) return ImageReader.from_PIL(Image.open(img_path)) @staticmethod def from_np(img_array: np.ndarray) -> np.ndarray: return img_array @staticmethod def from_PIL(img_pil: Image.Image, transpose=True) -> np.ndarray: # if img_pil.is_animated: # raise NotImplementedError("Only static images are supported, animated image found") if transpose: img_pil = ImageOps.exif_transpose(img_pil) if img_pil.mode != "RGB": img_pil = img_pil.convert("RGB") return np.array(img_pil) @staticmethod def from_list(img_list: List[Union[str, np.ndarray, Image.Image]]) -> List[np.ndarray]: limgs = list() for img_path in img_list: try: if isinstance(img_path, str): ImageReader.validate_img_path(img_path) limgs.append(ImageReader._read(img_path)) except (FileNotFoundError, NotImplementedError, IsADirectoryError) as e: print("[ERROR]: ", e) print("[INFO]: Skipping image {}".format(img_path)) return limgs @staticmethod def from_pdf(pdf_path: str, start_page: int = 0, end_page: int = 0) -> List[np.ndarray]: pdf_file = convert_from_path(pdf_path) if end_page is not None: end_page = min(len(pdf_file), end_page + 1) limgs = [np.array(pdf_page) for pdf_page in pdf_file[start_page:end_page]] return limgs @staticmethod def _read(img: Union[str, np.ndarray, Image.Image]) -> np.ndarray: if isinstance(img, str): return ImageReader.from_str(img) elif isinstance(img, Image.Image): return ImageReader.from_PIL(img) elif isinstance(img, np.ndarray): return ImageReader.from_np(img) else: raise ValueError("Invalid img argument type: ", type(img)) def get_name(file_path, ext: bool = True): file_path_ = os.path.basename(file_path) return file_path_ if ext else os.path.splitext(file_path_)[0] def construct_file_path(dir, file_path, ext=''): ''' args: dir: /path/to/dir file_path /example_path/to/file.txt ext = '.json' return /path/to/dir/file.json ''' return os.path.join( dir, get_name(file_path, True)) if ext == '' else os.path.join( dir, get_name(file_path, False)) + ext def chunks(lst: list, n: int) -> Generator: """ Yield successive n-sized chunks from lst. https://stackoverflow.com/questions/312443/how-do-i-split-a-list-into-equally-sized-chunks """ for i in range(0, len(lst), n): yield lst[i:i + n] def read_ocr_result_from_txt(file_path: str) -> Tuple[list, list]: ''' return list of bounding boxes, list of words ''' with open(file_path, 'r') as f: lines = f.read().splitlines() boxes, words = [], [] for line in lines: if line == "": continue x1, y1, x2, y2, text = line.split("\t") x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) if text and text != " ": words.append(text) boxes.append((x1, y1, x2, y2)) return boxes, words def get_xyxywh_base_on_format(bbox, format): if format == "xywh": x1, y1, w, h = bbox[0], bbox[1], bbox[2], bbox[3] x2, y2 = x1 + w, y1 + h elif format == "xyxy": x1, y1, x2, y2 = bbox w, h = x2 - x1, y2 - y1 else: raise NotImplementedError("Invalid format {}".format(format)) return (x1, y1, x2, y2, w, h) def get_dynamic_params_for_bbox_of_label(text, x1, y1, w, h, img_h, img_w, font, font_scale_offset=1): font_scale_factor = img_h / (img_w + img_h) * font_scale_offset font_scale = w / (w + h) * font_scale_factor # adjust font scale by width height thickness = int(font_scale_factor) + 1 (text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=thickness)[0] text_offset_x = x1 text_offset_y = y1 - thickness box_coords = ((text_offset_x, text_offset_y + 1), (text_offset_x + text_width - 2, text_offset_y - text_height - 2)) return (font_scale, thickness, text_height, box_coords) def visualize_bbox_and_label( img, bboxes, texts, bbox_color=(200, 180, 60), text_color=(0, 0, 0), format="xyxy", is_vnese=False, draw_text=True): ori_img_type = type(img) if is_vnese: img = Image.fromarray(img) if ori_img_type is np.ndarray else img draw = ImageDraw.Draw(img) img_w, img_h = img.size font_pil_str = "fonts/arial.ttf" font_cv2 = cv2.FONT_HERSHEY_SIMPLEX else: img_h, img_w = img.shape[0], img.shape[1] font_cv2 = cv2.FONT_HERSHEY_SIMPLEX for i in range(len(bboxes)): text = texts[i] # text = "{}: {:.0f}%".format(LABELS[classIDs[i]], confidences[i]*100) x1, y1, x2, y2, w, h = get_xyxywh_base_on_format(bboxes[i], format) font_scale, thickness, text_height, box_coords = get_dynamic_params_for_bbox_of_label( text, x1, y1, w, h, img_h, img_w, font=font_cv2) if is_vnese: font_pil = ImageFont.truetype(font_pil_str, size=text_height) # type: ignore fdraw_text = draw.text # type: ignore fdraw_bbox = draw.rectangle # type: ignore # Pil use different coordinate => y = y+thickness = y-thickness + 2*thickness arg_text = ((box_coords[0][0], box_coords[1][1]), text) kwarg_text = {"font": font_pil, "fill": text_color, "width": thickness} arg_rec = ((x1, y1, x2, y2),) kwarg_rec = {"outline": bbox_color, "width": thickness} arg_rec_text = ((box_coords[0], box_coords[1]),) kwarg_rec_text = {"fill": bbox_color, "width": thickness} else: # cv2.rectangle(img, box_coords[0], box_coords[1], color, cv2.FILLED) # cv2.putText(img, text, (text_offset_x, text_offset_y), font, fontScale=font_scale, color=(50, 0,0), thickness=thickness) # cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness) fdraw_text = cv2.putText fdraw_bbox = cv2.rectangle arg_text = (img, text, box_coords[0]) kwarg_text = {"fontFace": font_cv2, "fontScale": font_scale, "color": text_color, "thickness": thickness} arg_rec = (img, (x1, y1), (x2, y2)) kwarg_rec = {"color": bbox_color, "thickness": thickness} arg_rec_text = (img, box_coords[0], box_coords[1]) kwarg_rec_text = {"color": bbox_color, "thickness": cv2.FILLED} # draw a bounding box rectangle and label on the img fdraw_bbox(*arg_rec, **kwarg_rec) # type: ignore if draw_text: fdraw_bbox(*arg_rec_text, **kwarg_rec_text) # type: ignore fdraw_text(*arg_text, **kwarg_text) # type: ignore # text have to put in front of rec_text return np.array(img) if ori_img_type is np.ndarray and is_vnese else img