from config import config as cfg import json import glob from sklearn.model_selection import train_test_split import os import pandas as pd def load_kie_labels_yolo(label_path): with open(label_path, "r") as f: lines = f.read().splitlines() words, boxes, labels = [], [], [] for line in lines: x1, y1, x2, y2, text, kie = line.split("\t") x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) if text != " ": words.append(text) boxes.append((x1, y1, x2, y2)) labels.append(kie) return words, boxes, labels def create_empty_kie_dict(): return { cfg.KIE_LABELS[i]: [] for i in range(len(cfg.KIE_LABELS)) if cfg.KIE_LABELS[i] != cfg.IGNORE_KIE_LABEL } def write_to_json_(file_path, content): with open(file_path, mode="w", encoding="utf8") as f: json.dump(content, f, ensure_ascii=False) def load_train_val_id_cards(train_root, label_path): train_labels = glob.glob(os.path.join(label_path, "*.txt")) img_names = [ os.path.basename(train_label).replace(".txt", ".jpg") for train_label in train_labels ] train_paths = [os.path.join(train_root, img_name) for img_name in img_names] train_df = pd.DataFrame.from_dict( {"image_path": train_paths, "label": train_labels} ) train, test = train_test_split(train_df, test_size=0.2, random_state=cfg.SEED) return train, test def read_json(file_path): with open(file_path, "r") as f: return json.load(f) def get_name(file_path, ext: bool = True): file_path_ = os.path.basename(file_path) return file_path_ if ext else os.path.splitext(file_path_)[0] def construct_file_path(dir, file_path, ext=""): """ args: dir: /path/to/dir file_path /example_path/to/file.txt ext = '.json' return /path/to/dir/file.json """ return ( os.path.join(dir, get_name(file_path, True)) if ext == "" else os.path.join(dir, get_name(file_path, False)) + ext ) def write_to_txt_(file_path, content): with open(file_path, "w") as f: f.write(content)