sbt-idp/cope2n-ai-fi/common/utils/layoutLM_utils.py

from config import config as cfg
import json
import glob
from sklearn.model_selection import train_test_split
import os
import pandas as pd


def load_kie_labels_yolo(label_path):
    with open(label_path, "r") as f:
        lines = f.read().splitlines()
    words, boxes, labels = [], [], []
    for line in lines:
        x1, y1, x2, y2, text, kie = line.split("\t")
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        if text != " ":
            words.append(text)
            boxes.append((x1, y1, x2, y2))
            labels.append(kie)
    return words, boxes, labels


def create_empty_kie_dict():
    return {
        cfg.KIE_LABELS[i]: []
        for i in range(len(cfg.KIE_LABELS))
        if cfg.KIE_LABELS[i] != cfg.IGNORE_KIE_LABEL
    }


def write_to_json_(file_path, content):
    with open(file_path, mode="w", encoding="utf8") as f:
        json.dump(content, f, ensure_ascii=False)


def load_train_val_id_cards(train_root, label_path):
    train_labels = glob.glob(os.path.join(label_path, "*.txt"))
    img_names = [
        os.path.basename(train_label).replace(".txt", ".jpg")
        for train_label in train_labels
    ]
    train_paths = [os.path.join(train_root, img_name) for img_name in img_names]
    train_df = pd.DataFrame.from_dict(
        {"image_path": train_paths, "label": train_labels}
    )
    train, test = train_test_split(train_df, test_size=0.2, random_state=cfg.SEED)
    return train, test


def read_json(file_path):
    with open(file_path, "r") as f:
        return json.load(f)


def get_name(file_path, ext: bool = True):
    file_path_ = os.path.basename(file_path)
    return file_path_ if ext else os.path.splitext(file_path_)[0]


def construct_file_path(dir, file_path, ext=""):
    """
    args:
        dir: /path/to/dir
        file_path /example_path/to/file.txt
        ext = '.json'
    return
        /path/to/dir/file.json
    """
    return (
        os.path.join(dir, get_name(file_path, True))
        if ext == ""
        else os.path.join(dir, get_name(file_path, False)) + ext
    )


def write_to_txt_(file_path, content):
    with open(file_path, "w") as f:
        f.write(content)
Add everything 2023-11-30 11:22:16 +00:00			`from config import config as cfg`
			`import json`
			`import glob`
			`from sklearn.model_selection import train_test_split`
			`import os`
			`import pandas as pd`


			`def load_kie_labels_yolo(label_path):`
			`with open(label_path, "r") as f:`
			`lines = f.read().splitlines()`
			`words, boxes, labels = [], [], []`
			`for line in lines:`
			`x1, y1, x2, y2, text, kie = line.split("\t")`
			`x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)`
			`if text != " ":`
			`words.append(text)`
			`boxes.append((x1, y1, x2, y2))`
			`labels.append(kie)`
			`return words, boxes, labels`


			`def create_empty_kie_dict():`
			`return {`
			`cfg.KIE_LABELS[i]: []`
			`for i in range(len(cfg.KIE_LABELS))`
			`if cfg.KIE_LABELS[i] != cfg.IGNORE_KIE_LABEL`
			`}`


			`def write_to_json_(file_path, content):`
			`with open(file_path, mode="w", encoding="utf8") as f:`
			`json.dump(content, f, ensure_ascii=False)`


			`def load_train_val_id_cards(train_root, label_path):`
			`train_labels = glob.glob(os.path.join(label_path, "*.txt"))`
			`img_names = [`
			`os.path.basename(train_label).replace(".txt", ".jpg")`
			`for train_label in train_labels`
			`]`
			`train_paths = [os.path.join(train_root, img_name) for img_name in img_names]`
			`train_df = pd.DataFrame.from_dict(`
			`{"image_path": train_paths, "label": train_labels}`
			`)`
			`train, test = train_test_split(train_df, test_size=0.2, random_state=cfg.SEED)`
			`return train, test`


			`def read_json(file_path):`
			`with open(file_path, "r") as f:`
			`return json.load(f)`


			`def get_name(file_path, ext: bool = True):`
			`file_path_ = os.path.basename(file_path)`
			`return file_path_ if ext else os.path.splitext(file_path_)[0]`


			`def construct_file_path(dir, file_path, ext=""):`
			`"""`
			`args:`
			`dir: /path/to/dir`
			`file_path /example_path/to/file.txt`
			`ext = '.json'`
			`return`
			`/path/to/dir/file.json`
			`"""`
			`return (`
			`os.path.join(dir, get_name(file_path, True))`
			`if ext == ""`
			`else os.path.join(dir, get_name(file_path, False)) + ext`
			`)`


			`def write_to_txt_(file_path, content):`
			`with open(file_path, "w") as f:`
			`f.write(content)`