from common.utils.word_formation import words_to_lines from Kie_AHung.prediction import KIE_LABELS, IGNORE_KIE_LABEL from common.post_processing_datetime import DatetimeCorrector def merge_bbox(list_bbox): if not list_bbox: return list_bbox left = min(list_bbox, key=lambda x: x[0])[0] top = min(list_bbox, key=lambda x: x[1])[1] right = max(list_bbox, key=lambda x: x[2])[2] bot = max(list_bbox, key=lambda x: x[3])[3] return [left, top, right, bot] def create_result_kie_dict(): return { KIE_LABELS[i]: {} for i in range(len(KIE_LABELS)) if KIE_LABELS[i] != IGNORE_KIE_LABEL } def create_empty_kie_dict(): return { KIE_LABELS[i]: [] for i in range(len(KIE_LABELS)) if KIE_LABELS[i] != IGNORE_KIE_LABEL } def create_kie_dict(list_words): kie_dict = create_empty_kie_dict() # append each word to respected dict for word in list_words: if word.kie_label in kie_dict: kie_dict[word.kie_label].append(word) word.text = word.text.strip() # construct line from words for each kie_label result_dict = create_result_kie_dict() for kie_label in result_dict: list_lines, _ = words_to_lines(kie_dict[kie_label]) text = "\n ".join([line.text.strip() for line in list_lines]) if kie_label == "date": # text = post_processing_datetime(text) text = DatetimeCorrector.correct(text) result_dict[kie_label]["text"] = text result_dict[kie_label]["bbox"] = merge_bbox( [line.boundingbox for line in list_lines] ) return result_dict