sbt-idp/cope2n-ai-fi/common/post_processing_id.py

52 lines
1.6 KiB
Python
Raw Permalink Normal View History

2023-11-30 11:22:16 +00:00
from common.utils.word_formation import words_to_lines
from Kie_AHung_ID.prediction import KIE_LABELS, IGNORE_KIE_LABEL
from common.post_processing_datetime import DatetimeCorrector
def merge_bbox(list_bbox):
if not list_bbox:
return list_bbox
left = min(list_bbox, key=lambda x: x[0])[0]
top = min(list_bbox, key=lambda x: x[1])[1]
right = max(list_bbox, key=lambda x: x[2])[2]
bot = max(list_bbox, key=lambda x: x[3])[3]
return [left, top, right, bot]
def create_result_kie_dict():
return {
KIE_LABELS[i]: {}
for i in range(len(KIE_LABELS))
if KIE_LABELS[i] != IGNORE_KIE_LABEL
}
def create_empty_kie_dict():
return {
KIE_LABELS[i]: []
for i in range(len(KIE_LABELS))
if KIE_LABELS[i] != IGNORE_KIE_LABEL
}
def create_kie_dict(list_words):
kie_dict = create_empty_kie_dict()
# append each word to respected dict
for word in list_words:
if word.kie_label in kie_dict:
kie_dict[word.kie_label].append(word)
word.text = word.text.strip()
# construct line from words for each kie_label
result_dict = create_result_kie_dict()
for kie_label in result_dict:
list_lines, _ = words_to_lines(kie_dict[kie_label])
text = "\n ".join([line.text.strip() for line in list_lines])
if kie_label == "date":
# text = post_processing_datetime(text)
text = DatetimeCorrector.correct(text)
result_dict[kie_label]["text"] = text
result_dict[kie_label]["bbox"] = merge_bbox(
[line.boundingbox for line in list_lines]
)
return result_dict