sbt-idp/cope2n-ai-fi/common/utils/utils.py

import os
import json
import glob
import random
import cv2


def read_txt(file):
    with open(file, "r", encoding="utf8") as f:
        data = [line.strip() for line in f]
    return data


def write_txt(file, data):
    with open(file, "w", encoding="utf8") as f:
        for item in data:
            f.write(item + "\n")


def write_json(file, data):
    with open(file, "w", encoding="utf8") as f:
        json.dump(data, f, ensure_ascii=False, sort_keys=True)


def read_json(file):
    with open(file, "r", encoding="utf8") as f:
        data = json.load(f)
    return data


def get_colors(kie_labels):

    random.seed(1997)
    colors = []
    for _ in range(len(kie_labels)):
        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        colors.append(color)

    return colors


def normalize_box(box, width, height):
    assert (
        max(box) <= width or max(box) <= height
    ), "box must smaller than width, height; max box = {}, width = {}, height = {}".format(
        max(box), width, height
    )
    return [
        int(1000 * (box[0] / width)),
        int(1000 * (box[1] / height)),
        int(1000 * (box[2] / width)),
        int(1000 * (box[3] / height)),
    ]


def unnormalize_box(bbox, width, height):
    return [
        width * (bbox[0] / 1000),
        height * (bbox[1] / 1000),
        width * (bbox[2] / 1000),
        height * (bbox[3] / 1000),
    ]


def load_image_paths_and_labels(data_dir):
    r"""Load (image path, label) pairs into a DataFrame with keys ``image_path`` and ``label``

    @todo   Add OCR paths here
    """

    img_paths = [path for path in glob.glob(data_dir + "/*") if ".txt" not in path]
    label_paths = [os.path.splitext(path)[0] + ".txt" for path in img_paths]

    return img_paths, label_paths


import cv2


def read_image_file(img_path):
    image = cv2.imread(img_path)
    return image


def normalize_bbox(x1, y1, x2, y2, w, h):
    x1 = int(float(min(max(0, x1), w)))
    x2 = int(float(min(max(0, x2), w)))
    y1 = int(float(min(max(0, y1), h)))
    y2 = int(float(min(max(0, y2), h)))
    return (x1, y1, x2, y2)


def extend_crop_img(
    left, top, right, bottom, margin_l=0, margin_t=0.03, margin_r=0.02, margin_b=0.05
):
    top = top - (bottom - top) * margin_t
    bottom = bottom + (bottom - top) * margin_b
    left = left - (right - left) * margin_l
    right = right + (right - left) * margin_r
    return left, top, right, bottom


def get_crop_img_and_bbox(img, bbox, extend: bool):
    """
    img : numpy array img
    bbox : should be xyxy format
    """
    if len(bbox) == 5:
        left, top, right, bottom, _conf = bbox
    elif len(bbox) == 4:
        left, top, right, bottom = bbox
    if extend:
        left, top, right, bottom = extend_crop_img(left, top, right, bottom)
    left, top, right, bottom = normalize_bbox(
        left, top, right, bottom, img.shape[1], img.shape[0]
    )
    assert (bottom - top) * (right - left) > 0, "bbox is invalid"
    crop_img = img[top:bottom, left:right]
    return crop_img, (left, top, right, bottom)


import json
import os


def load_kie_labels_yolo(label_path):
    with open(label_path, 'r') as f:
        lines = f.read().splitlines()
    words, boxes, labels = [], [], []
    for line in lines:
        x1, y1, x2, y2, text, kie = line.split("\t")
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        if text != " ":
            words.append(text)
            boxes.append((x1, y1, x2, y2))
            labels.append(kie)
    return words, boxes, labels


def create_empty_kie_dict():
    return {cfg.KIE_LABELS[i]: [] for i in range(len(cfg.KIE_LABELS)) if cfg.KIE_LABELS[i] != cfg.IGNORE_KIE_LABEL}


def write_to_json_(file_path, content):
    with open(file_path, mode='w', encoding='utf8') as f:
        json.dump(content, f, ensure_ascii=False)


def read_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)


def get_name(file_path, ext: bool = True):
    file_path_ = os.path.basename(file_path)
    return file_path_ if ext else os.path.splitext(file_path_)[0]


def construct_file_path(dir, file_path, ext=''):
    '''
    args:
        dir: /path/to/dir
        file_path /example_path/to/file.txt
        ext = '.json'
    return
        /path/to/dir/file.json
    '''
    return os.path.join(
        dir, get_name(file_path,
                      True)) if ext == '' else os.path.join(
        dir, get_name(file_path,
                      False)) + ext


def write_to_txt_(file_path, content):
    with open(file_path, 'w') as f:
        f.write(content)