# %% # from pathlib import Path # add Fiintrade path to import config, required to run main() import sys # TODO: Why??? for what reason ??????????????? sys.path.append(".") # add Fiintrade/ to path from srcc.tools.utils import ( load_kie_labels_yolo, create_empty_kie_dict, write_to_json_, load_train_val_id_cards, ) import glob from OCRBase.config import config as cfg import os import pandas as pd sys.path.append("/home/sds/hoangmd/TokenClassification") # TODO: Why there are bunch of absolute path here from src.experiments.word_formation import * from process_label import * KIE_LABEL_DIR = "data/label/207/kie" KIE_LABEL_LINE_PATH = "/home/sds/hungbnt/KIE_pretrained/data/label/207/json" # TODO: Absolute path ????? # %% def create_kie_dict(list_words): kie_dict = create_empty_kie_dict() list_words = throw_overlapping_words(list_words) for word in list_words: if word.kie_label in kie_dict: kie_dict[word.kie_label].append(word) word.text = word.text.strip() for kie_label in kie_dict: list_lines, _ = words_to_lines(kie_dict[kie_label]) kie_dict[kie_label] = "\n ".join([line.text.strip() for line in list_lines]) return kie_dict # %% def main(): label_paths = glob.glob(f"{KIE_LABEL_DIR}/*.txt") for label_path in label_paths: words, bboxes, kie_labels = load_kie_labels_yolo(label_path) list_words = [] for i, kie_label in enumerate(kie_labels): list_words.append( Word(text=words[i], bndbox=bboxes[i], kie_label=kie_label) ) kie_dict = create_kie_dict(list_words) kie_path = os.path.join( KIE_LABEL_LINE_PATH, os.path.basename(label_path).replace(".txt", ".json") ) write_to_json_(kie_path, kie_dict) # %% if __name__ == "__main__": main()