52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
|
from common.utils.word_formation import words_to_lines
|
||
|
from Kie_AHung_ID.prediction import KIE_LABELS, IGNORE_KIE_LABEL
|
||
|
from common.post_processing_datetime import DatetimeCorrector
|
||
|
|
||
|
|
||
|
def merge_bbox(list_bbox):
|
||
|
if not list_bbox:
|
||
|
return list_bbox
|
||
|
left = min(list_bbox, key=lambda x: x[0])[0]
|
||
|
top = min(list_bbox, key=lambda x: x[1])[1]
|
||
|
right = max(list_bbox, key=lambda x: x[2])[2]
|
||
|
bot = max(list_bbox, key=lambda x: x[3])[3]
|
||
|
return [left, top, right, bot]
|
||
|
|
||
|
|
||
|
def create_result_kie_dict():
|
||
|
return {
|
||
|
KIE_LABELS[i]: {}
|
||
|
for i in range(len(KIE_LABELS))
|
||
|
if KIE_LABELS[i] != IGNORE_KIE_LABEL
|
||
|
}
|
||
|
|
||
|
|
||
|
def create_empty_kie_dict():
|
||
|
return {
|
||
|
KIE_LABELS[i]: []
|
||
|
for i in range(len(KIE_LABELS))
|
||
|
if KIE_LABELS[i] != IGNORE_KIE_LABEL
|
||
|
}
|
||
|
|
||
|
|
||
|
def create_kie_dict(list_words):
|
||
|
kie_dict = create_empty_kie_dict()
|
||
|
# append each word to respected dict
|
||
|
for word in list_words:
|
||
|
if word.kie_label in kie_dict:
|
||
|
kie_dict[word.kie_label].append(word)
|
||
|
word.text = word.text.strip()
|
||
|
# construct line from words for each kie_label
|
||
|
result_dict = create_result_kie_dict()
|
||
|
for kie_label in result_dict:
|
||
|
list_lines, _ = words_to_lines(kie_dict[kie_label])
|
||
|
text = "\n ".join([line.text.strip() for line in list_lines])
|
||
|
if kie_label == "date":
|
||
|
# text = post_processing_datetime(text)
|
||
|
text = DatetimeCorrector.correct(text)
|
||
|
result_dict[kie_label]["text"] = text
|
||
|
result_dict[kie_label]["bbox"] = merge_bbox(
|
||
|
[line.boundingbox for line in list_lines]
|
||
|
)
|
||
|
return result_dict
|