68 lines
1.8 KiB
Python
Executable File
68 lines
1.8 KiB
Python
Executable File
# %%
|
|
# from pathlib import Path # add Fiintrade path to import config, required to run main()
|
|
import sys
|
|
|
|
# TODO: Why??? for what reason ???????????????
|
|
sys.path.append(".") # add Fiintrade/ to path
|
|
|
|
|
|
from srcc.tools.utils import (
|
|
load_kie_labels_yolo,
|
|
create_empty_kie_dict,
|
|
write_to_json_,
|
|
load_train_val_id_cards,
|
|
)
|
|
import glob
|
|
from OCRBase.config import config as cfg
|
|
import os
|
|
import pandas as pd
|
|
|
|
sys.path.append("/home/sds/hoangmd/TokenClassification") # TODO: Why there are bunch of absolute path here
|
|
from src.experiments.word_formation import *
|
|
from process_label import *
|
|
|
|
KIE_LABEL_DIR = "data/label/207/kie"
|
|
KIE_LABEL_LINE_PATH = "/home/sds/hungbnt/KIE_pretrained/data/label/207/json" # TODO: Absolute path ?????
|
|
|
|
# %%
|
|
|
|
|
|
def create_kie_dict(list_words):
|
|
kie_dict = create_empty_kie_dict()
|
|
list_words = throw_overlapping_words(list_words)
|
|
for word in list_words:
|
|
if word.kie_label in kie_dict:
|
|
kie_dict[word.kie_label].append(word)
|
|
word.text = word.text.strip()
|
|
for kie_label in kie_dict:
|
|
list_lines, _ = words_to_lines(kie_dict[kie_label])
|
|
kie_dict[kie_label] = "\n ".join([line.text.strip() for line in list_lines])
|
|
return kie_dict
|
|
|
|
|
|
# %%
|
|
|
|
|
|
def main():
|
|
label_paths = glob.glob(f"{KIE_LABEL_DIR}/*.txt")
|
|
for label_path in label_paths:
|
|
words, bboxes, kie_labels = load_kie_labels_yolo(label_path)
|
|
list_words = []
|
|
for i, kie_label in enumerate(kie_labels):
|
|
list_words.append(
|
|
Word(text=words[i], bndbox=bboxes[i], kie_label=kie_label)
|
|
)
|
|
|
|
kie_dict = create_kie_dict(list_words)
|
|
kie_path = os.path.join(
|
|
KIE_LABEL_LINE_PATH, os.path.basename(label_path).replace(".txt", ".json")
|
|
)
|
|
write_to_json_(kie_path, kie_dict)
|
|
|
|
|
|
# %%
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|