75 lines
2.5 KiB
Python
75 lines
2.5 KiB
Python
|
from sdsvkvu.utils.post_processing import split_key_value_by_colon, remove_bullet_points_and_punctuation
|
||
|
|
||
|
|
||
|
def normalize_kvu_output(raw_outputs: dict) -> dict:
|
||
|
outputs = {}
|
||
|
for key, values in raw_outputs.items():
|
||
|
if key == "table":
|
||
|
table = []
|
||
|
for row in values:
|
||
|
item = {}
|
||
|
for k, v in row.items():
|
||
|
k = remove_bullet_points_and_punctuation(k)
|
||
|
if v is not None and len(v) > 0:
|
||
|
v = remove_bullet_points_and_punctuation(v)
|
||
|
item[k] = v
|
||
|
table.append(item)
|
||
|
outputs[key] = table
|
||
|
else:
|
||
|
key = remove_bullet_points_and_punctuation(key)
|
||
|
if isinstance(values, list):
|
||
|
values = [remove_bullet_points_and_punctuation(v) for v in values]
|
||
|
elif values is not None and len(values) > 0:
|
||
|
values = remove_bullet_points_and_punctuation(values)
|
||
|
outputs[key] = values
|
||
|
return outputs
|
||
|
|
||
|
|
||
|
def export_kvu_for_all(raw_outputs: dict) -> dict:
|
||
|
outputs = {}
|
||
|
# Title
|
||
|
outputs["title"] = (
|
||
|
raw_outputs["title"][0]["text"] if len(raw_outputs["title"]) > 0 else None
|
||
|
)
|
||
|
|
||
|
# Pairs of key-value
|
||
|
for pair in raw_outputs["single"]:
|
||
|
for key, values in pair.items():
|
||
|
# outputs[key] = values["text"]
|
||
|
elements = split_key_value_by_colon(key, values["text"])
|
||
|
outputs[elements[0]] = elements[1]
|
||
|
|
||
|
# Only key fields
|
||
|
for key in raw_outputs["key"]:
|
||
|
# outputs[key["text"]] = None
|
||
|
elements = split_key_value_by_colon(key["text"], None)
|
||
|
outputs[elements[0]] = elements[1]
|
||
|
|
||
|
# Triplet data
|
||
|
for triplet in raw_outputs["triplet"]:
|
||
|
for key, list_value in triplet.items():
|
||
|
outputs[key] = [value["text"] for value in list_value]
|
||
|
|
||
|
# Table data
|
||
|
table = []
|
||
|
for row in raw_outputs["table"]:
|
||
|
item = {}
|
||
|
for cell in row:
|
||
|
item[cell["header"]] = cell["text"]
|
||
|
table.append(item)
|
||
|
outputs["table"] = table
|
||
|
outputs = normalize_kvu_output(outputs)
|
||
|
return outputs
|
||
|
|
||
|
|
||
|
def merged_kvu_for_all_for_multi_pages(loutputs: list) -> dict:
|
||
|
merged_outputs = {}
|
||
|
table = []
|
||
|
for outputs in loutputs:
|
||
|
for key, value in outputs.items():
|
||
|
if key == "table":
|
||
|
table.append(value)
|
||
|
else:
|
||
|
merged_outputs[key] = value
|
||
|
merged_outputs['table'] = table
|
||
|
return merged_outputs
|