50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
|
import os
|
||
|
|
||
|
import json
|
||
|
from pathlib import Path
|
||
|
|
||
|
def read_json(json_path):
|
||
|
with open(json_path, 'r') as f:
|
||
|
data = json.load(f)
|
||
|
return data
|
||
|
|
||
|
def write_json(json_path, data):
|
||
|
with open(json_path, 'w') as f:
|
||
|
json.dump(data, f, ensure_ascii=False)
|
||
|
|
||
|
|
||
|
def clean_json(json_in, json_out, valid_names):
|
||
|
out_data = {}
|
||
|
data = read_json(json_in)
|
||
|
for name_key, items in data.items():
|
||
|
if name_key in valid_names:
|
||
|
out_data[name_key] = items
|
||
|
|
||
|
write_json(json_out, out_data)
|
||
|
|
||
|
# DIRNAMES = ['SL_HCM', 'SL_HN_batch_1', 'SL_HN_batch_2', 'Invoices_SAVINA']
|
||
|
# ROOT_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/multi_page"
|
||
|
|
||
|
DIRNAMES = ['test_sbt_v2']
|
||
|
ROOT_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed"
|
||
|
for dirname in DIRNAMES:
|
||
|
json_path = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_end2end/sbt_validation_e2e.json"
|
||
|
json_out_path = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_end2end/sbt_validation_e2e_v2.json"
|
||
|
valid_names = [p.stem for p in (Path(ROOT_DIR) / dirname).glob("*")]
|
||
|
print(valid_names)
|
||
|
clean_json(json_path, json_out_path, valid_names)
|
||
|
|
||
|
|
||
|
# def combine_json(json_paths, json_out):
|
||
|
# datas = [read_json(json_path) for json_path in json_paths]
|
||
|
# out_data = {}
|
||
|
# for data in datas:
|
||
|
# out_data.update(data)
|
||
|
# write_json(json_out, out_data)
|
||
|
|
||
|
|
||
|
# json_paths = [Path(ROOT_DIR) / (dirname + "_out.json") for dirname in DIRNAMES]
|
||
|
# json_out = ROOT_DIR + "/test_e2e_multi_pages.json"
|
||
|
# combine_json(json_paths, json_out)
|
||
|
|