Remove unused files

2023-12-14 13:49:42 +07:00 · 2023-12-14 13:49:42 +07:00 · eb2c08195d
commit eb2c08195d
parent 6873ffce05
119 changed files with 0 additions and 16408 deletions
--- a/cope2n-ai-fi/modules/sdsvkie/.gitignore
+++ b/cope2n-ai-fi/modules/sdsvkie/.gitignore
@ -1,13 +0,0 @@
-*.pyc
-__pycache__
-.cache
-/microsoft
-weights/
-workdirs/
-wandb
-sdsvkie/tools/sample_cvat
-notebooks/workdirs
-external/
-notebooks/visualize
-*.egg-info
-./external/sdsv_dewarp
--- a/cope2n-ai-fi/modules/sdsvkie/README.md
+++ b/cope2n-ai-fi/modules/sdsvkie/README.md
@ -1,75 +0,0 @@
-<p align="center">
-  <h1 align="center">SDSVKIE</h1>
-</p>
-
-***Feature***
- Extract information from documents: VAT Invoice, Receipt
- Language: VI + EN
-
-***What's news***
-### - Ver 1.0.1:
- Improve postprocessing for receipts
- Support handling multiple pages for PDF files
- Lastest weight: /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/best
- Lastest config: /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/config.yaml
-
-
-## I. Setup 
-***Dependencies***
- Python: 3.8
- Torch: 1.10.2
- CUDA: 11.6
- transformers: 4.28.1
-```
-pip install -v -e .
-```
-
-
-## II. Inference
-```
-from sdsvkie import Predictor
-import cv2 
-
-predictor = Predictor(
-    cfg="./workdirs/training/sdsap_receipt/exp_3/config.yaml", 
-    weights="./workdirs/training/sdsap_receipt/exp_3/best",
-    device="cpu",
-)
-img = cv2.imread("./demos/4 Sep OPC to Home.jpg")
-out = predictor(img)
-output = out['end2end_results']
-
-```
-
-
-## III. Training
- Prepare dataset: The structure of the dataset directory is organized as follows:
-
-└── base_dataset \
-  ├── train \
-  ├──── sub_dir_1 \
-  ├────── img1.txt \
-  ├────── img1.txt \
-  ├────── ... \
-  ├──── sub_dir_2 \
-  ├────── img2.txt \
-  ├────── img2.txt \
-  ├── test \
-  ├──── imgn.jpg \
-  ├──── imgn.txt
-
- Edit and run scripts:
-```
-sh ./scripts/train.sh
-```
-
-# TODO
-
- [ ] Add more fields: sub_total, tips, seller_address, item list
- [x] Support muliple pages
- [x] Review result KIE for invoice (vnpt_exp_4_model)
- [x] Fix unnormalize box error in some cases
- [x] Support multiple pages
- [x] Create 200 multiple pages invoice
- [ ] Finalize multi page testset
- [ ] Eval result
--- a/cope2n-ai-fi/modules/sdsvkie/arial.ttf
+++ b/cope2n-ai-fi/modules/sdsvkie/arial.ttf
--- a/cope2n-ai-fi/modules/sdsvkie/demos/2022_07_25
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/2022_07_25
--- a/cope2n-ai-fi/modules/sdsvkie/demos/4
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/4
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
@ -1,73 +0,0 @@
-350	31	403	58	dịch
-329	143	386	166	ngoài
-104	298	174	321	CƯỚC
-323	244	375	268	ĐẾN
-739	67	810	91	(Price)
-610	67	663	91	Unit)
-1109	26	1174	55	Thuế
-181	143	231	166	hãng
-102	244	145	268	PHÍ
-1021	27	1076	55	thuế
-25	67	73	92	(No)
-151	350	240	374	CHỨNG
-893	25	949	54	Tiền
-938	68	1033	92	(Amount)
-150	244	198	268	XẾP
-247	244	316	267	CẢNG
-967	301	1080	323	68.730.120
-998	354	1080	376	760.000
-1394	31	1473	55	Thành
-102	349	146	375	PHÍ
-1477	25	1527	56	tiền
-784	30	823	59	giá
-241	68	374	92	(Description)
-293	31	345	58	hóa,
-228	32	288	58	hàng
-1383	66	1447	92	(Total
-247	350	333	374	TỪ-D/O
-103	142	145	162	Thu
-1196	82	1254	112	Tiền
-203	244	242	268	DỠ
-781	353	863	376	760.000
-181	298	240	321	BIỂN
-979	248	1080	270	6.342.336
-174	31	222	54	Tên
-533	66	579	92	(Qty
-1254	354	1324	375	40.000
-1486	353	1567	375	800.000
-1467	247	1568	270	6.342.336
-750	301	864	323	68.730.120
-273	144	324	162	nước
-956	32	1015	54	chưa
-726	32	778	54	Đơn
-1455	300	1568	323	68.730.120
-148	142	177	166	hộ
-1180	30	1321	59	GTGT✪(VAT)
-1258	81	1316	111	thuế
-531	30	568	54	SL
-104	195	285	215	BL146201088385
-604	31	663	53	ĐVT
-763	248	863	270	6.342.336
-22	31	76	54	STT
-1451	68	1535	92	amount)
-407	36	440	59	vụ
-235	143	268	163	tàu
-260	302	304	321	O.F.
-1120	87	1149	112	%
-583	66	605	88	&
-573	30	598	54	&
-591	301	624	322	BL
-591	353	625	376	BL
-1115	306	1154	322	XXX
-41	301	57	322	2
-1115	360	1154	375	XXX
-1114	253	1154	269	XXX
-42	354	56	375	3
-591	247	625	270	BL
-44	248	55	268	1
-1310	301	1326	322	0
-574	301	586	322	1
-573	354	586	375	1
-573	248	586	269	1
-244	303	256	322	-
-1310	247	1326	270	0
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
@ -1,198 +0,0 @@
-749	41	785	60	suất
-746	18	789	37	Thuế
-746	70	788	86	(Rate)
-192	35	226	52	dịch
-812	30	850	49	Tiền
-341	70	382	86	(Unit)
-15	104	63	120	CƯỚC
-632	29	670	48	Tiền
-673	33	714	52	hàng
-110	34	150	52	hàng
-673	674	727	689	855.000
-841	552	894	567	169.742
-433	34	479	52	lượng
-660	552	727	567	3.227.035
-345	46	377	60	tính
-841	228	894	243	169.742
-154	34	188	51	hóa,
-973	227	1040	243	3.396.777
-66	103	102	120	TÀU
-841	715	894	729	111.509
-660	227	727	243	3.227.035
-673	512	727	527	855.000
-673	187	727	202	855.000
-660	714	727	730	2.119.950
-474	895	510	912	chiếu
-118	58	207	75	(Description)
-605	899	639	914	nhận
-388	895	422	913	kiểm
-973	552	1040	567	3.396.777
-841	58	899	74	Amount)
-75	34	107	49	Tên
-988	29	1020	49	tiền
-847	188	894	201	44.973
-535	227	602	243	3.227.035
-926	58	968	74	(Total
-986	187	1039	202	899.973
-334	23	368	37	Đơn
-852	30	890	49	thuế
-652	755	727	770	23.507.890
-965	268	1040	283	38.111.990
-973	390	1039	405	3.396.777
-548	187	603	202	855.000
-410	58	479	75	(Quantity)
-547	59	589	74	Price)
-841	390	894	405	169.742
-652	593	727	608	38.111.990
-535	552	602	568	3.227.035
-847	512	894	526	44.973
-965	106	1040	121	38.111.990
-932	34	985	49	Thành
-537	898	563	915	lập,
-342	107	380	120	45GP
-15	147	43	161	Thu
-47	185	106	202	CHỨNG
-673	349	727	364	855.000
-847	674	894	689	44.973
-548	674	603	689	855.000
-567	900	601	915	giao,
-555	33	580	52	giá
-660	389	727	405	3.227.035
-15	185	43	201	PHÍ
-985	674	1039	689	899.973
-652	268	727	284	38.111.990
-351	895	386	915	(Cần
-671	898	704	915	đơn)
-675	59	731	74	amount)
-970	59	1026	74	amount)
-45	147	65	164	hộ
-985	512	1039	527	899.973
-973	714	1040	730	2.231.459
-448	895	472	913	đối
-535	715	603	730	2.119.950
-652	106	727	121	38.111.990
-965	593	1040	608	38.111.990
-548	512	603	527	855.000
-15	266	63	282	CƯỚC
-508	58	544	74	(Unit
-46	509	106	526	CHỨNG
-408	30	429	49	Số
-965	430	1040	446	38.111.990
-46	225	79	242	XÉP
-105	147	249	161	SNKO010220804769
-15	428	62	444	CƯỚC
-527	106	602	121	38.111.990
-985	349	1039	364	899.973
-15	590	63	607	CƯỚC
-642	898	669	913	hóa
-527	268	603	284	38.111.990
-65	589	101	607	TÀU
-15	752	63	769	CƯỚC
-46	671	106	688	CHỨNG
-15	225	43	242	PHÍ
-341	228	380	242	45GP
-14	633	43	648	Thu
-847	350	894	364	44.973
-547	349	603	365	855.000
-65	427	101	445	TÀU
-513	898	536	913	khi
-14	471	43	485	Thu
-673	837	727	851	855.000
-15	309	43	323	Thu
-14	508	44	526	PHÍ
-965	755	1040	770	23.507.890
-67	148	101	161	SNK
-14	670	43	688	PHÍ
-527	593	603	608	38.111.990
-527	756	602	770	23.507.890
-65	751	101	769	TÀU
-44	470	65	489	hộ
-517	34	551	48	Đơn
-45	548	78	566	XÉP
-14	549	43	567	PHÍ
-65	265	102	283	TÀU
-535	389	602	406	3.227.035
-14	386	44	404	PHÍ
-14	832	43	850	PHÍ
-46	347	106	364	CHỨNG
-44	633	65	651	hộ
-548	837	602	850	855.000
-46	833	106	850	CHỨNG
-44	308	65	326	hộ
-45	386	79	404	XÃP
-14	346	43	364	PHÍ
-14	795	43	810	Thu
-526	430	602	446	38.111.990
-802	58	840	74	(VAT
-45	711	79	729	XÉP
-848	837	893	850	44.973
-80	225	107	242	DỠ
-230	37	251	52	vụ
-754	109	781	120	XXX
-652	430	727	446	38.111.990
-80	549	107	567	DỠ
-350	188	372	202	BL
-109	184	136	202	TỪ
-616	58	672	73	(Pre-tax
-44	795	65	813	hộ
-986	837	1039	851	899.973
-341	756	380	770	22GP
-80	387	107	404	DỠ
-424	900	445	912	tra
-14	711	43	729	PHÍ
-104	796	249	810	SNKO010220805559
-104	309	248	323	SNKO010220805023
-341	268	380	283	45GP
-66	634	101	647	SNK
-80	711	107	729	DỠ
-372	22	389	40	vị
-108	670	136	689	TỪ
-104	633	248	648	SNKO010220805118
-754	556	781	567	XXX
-754	759	781	769	XXX
-754	597	781	607	XXX
-66	472	101	485	SNK
-66	309	101	323	SNK
-755	719	780	729	XXX
-108	832	136	851	TỪ
-341	431	380	445	45GP
-754	515	781	526	XXX
-754	840	781	850	XXX
-341	715	380	729	22GP
-108	345	136	364	TỪ
-754	678	781	688	XXX
-66	796	101	810	SNK
-341	390	380	405	45GP
-104	471	249	486	SNKO010220805117
-341	553	380	567	45GP
-108	508	136	527	TỪ
-755	232	780	242	XXX
-754	435	781	445	XXX
-341	593	380	608	45GP
-754	394	781	405	XXX
-755	272	781	283	XXX
-754	191	780	202	XXX
-349	836	373	851	BL
-754	353	780	364	XXX
-349	349	372	364	BL
-440	106	448	120	1
-887	106	896	121	1
-440	553	448	567	1
-440	755	448	769	1
-441	715	448	729	1
-888	756	896	769	/
-441	228	448	242	1
-440	593	448	608	1
-440	269	449	283	1
-440	836	448	850	1
-440	349	449	364	1
-888	269	896	283	1
-440	390	448	404	1
-440	188	448	202	1
-440	431	448	445	1
-888	431	896	445	/
-350	512	372	526	BL
-440	674	448	688	1
-888	594	896	607	/
-349	675	372	689	BL
-440	512	448	526	1
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
@ -1,76 +0,0 @@
-162	92	206	111	hàng
-263	152	303	175	gồm
-133	124	188	142	chứng
-132	26	176	46	hàng
-224	26	261	45	dịch
-267	124	311	142	nhập
-928	23	974	43	Thuế
-218	124	261	142	hàng
-93	53	202	73	Descriptions
-343	155	378	175	phụ
-1110	23	1151	43	thuế
-1067	23	1107	43	Tiền
-181	26	219	45	hóa,
-98	156	148	172	Cước
-216	152	258	174	biển
-26	54	56	70	No.
-1185	26	1240	42	Thành
-437	53	469	73	Qty
-270	87	304	107	đến
-92	26	127	43	Tên
-1245	23	1279	44	tiền
-979	23	1018	43	suất
-640	26	669	47	giá
-152	156	185	174	vận
-98	124	129	140	Phí
-98	92	129	109	Phí
-1075	92	1144	109	166.904
-813	92	900	109	3.171.168
-866	23	906	43	thuế
-97	178	127	198	phí
-1223	54	1267	71	Total
-26	26	52	43	Stt
-308	155	340	171	các
-684	54	728	70	Price
-1246	92	1333	109	3.338.072
-769	23	809	43	Tiền
-1236	167	1333	184	36.393.480
-673	26	728	46	(VND)
-803	167	899	183	36.393.480
-1085	124	1144	140	39.474
-830	124	900	140	750.000
-636	92	724	109	3.171.168
-625	168	723	183	36.393.480
-1082	54	1151	71	Amount
-652	124	723	140	750.000
-1284	26	1339	46	(VND)
-1263	124	1333	140	789.474
-487	92	542	108	CONT
-837	54	906	71	Amount
-132	92	158	108	dỡ
-210	92	233	111	tại
-643	54	679	70	Unit
-813	27	862	43	trước
-237	91	266	107	nơi
-1271	54	1339	71	Amount
-598	27	636	43	Đơn
-482	54	519	70	Unit
-190	155	212	172	tải
-1039	54	1079	70	VAT
-265	29	288	45	vụ
-480	26	522	43	ĐVT
-442	25	468	43	SL
-975	54	1016	70	Rate
-193	124	214	140	từ
-485	123	517	141	B/L
-485	166	510	184	Lô
-931	53	972	71	VAT
-31	167	43	183	3
-31	124	44	140	2
-1000	94	1014	110	X
-32	92	42	108	1
-1001	127	1014	142	X
-1004	166	1014	185	/
-1138	167	1147	184	/
-451	92	461	108	1
-451	124	461	140	1
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
@ -1,82 +0,0 @@
-537	569	588	597	Tổng
-233	161	291	182	chứng
-593	573	641	597	cộng
-290	35	332	57	dịch
-467	78	524	99	(Unit)
-577	35	632	57	lượng
-472	50	511	68	tính
-1124	18	1175	45	Tổng
-190	161	227	183	nộp
-647	573	715	596	(Total):
-1012	160	1088	179	720.000
-114	160	156	182	dịch
-133	613	180	641	bằng
-792	63	877	84	(Amount)
-193	36	239	57	hàng
-986	52	1029	76	Tiền
-1138	78	1201	99	(Total)
-797	159	892	180	9.000.000
-322	156	364	179	xuất
-1118	50	1170	68	thanh
-244	35	285	56	hóa,
-87	575	184	596	(Exchange
-50	613	88	637	tiền
-844	30	883	55	tiền
-399	159	440	179	C/O
-463	22	504	41	Đơn
-1139	160	1235	179	9.720.000
-703	64	758	84	price)
-1140	119	1201	139	9=6+8
-781	35	840	54	Thành
-1178	17	1216	42	tiền
-914	14	964	38	Thuế
-915	160	949	180	8%
-189	576	239	596	rate):
-198	64	318	84	(Description)
-91	613	129	637	viết
-77	160	109	179	Phí
-52	573	83	597	giá
-256	619	323	640	words):
-1032	53	1074	76	thuế
-20	613	47	637	Số
-1175	51	1217	68	toán
-804	119	863	139	6=4x5
-710	34	741	59	giá
-664	35	707	54	Đơn
-651	63	700	83	(Unit
-546	30	573	54	Số
-546	63	639	84	(Quantity)
-151	36	188	54	Tên
-368	159	396	179	xứ
-184	618	221	637	chữ
-482	619	531	638	trăm
-20	573	48	597	Tỷ
-1018	85	1092	105	Amount)
-969	17	1082	41	GTGT(VAT)
-634	618	692	642	nghìn
-916	68	947	89	TS
-696	613	754	644	đồng.
-575	619	629	638	mươi
-782	572	877	594	9.000.000
-329	619	379	638	Chín
-384	619	433	642	triệu
-966	84	1015	104	(VAT
-225	619	252	640	(In
-296	160	318	179	từ
-989	573	1065	592	720.000
-17	63	66	84	(No.)
-161	165	186	182	vụ
-537	618	571	638	hai
-336	39	362	57	vụ
-507	21	529	45	vị
-1119	572	1215	594	9.720.000
-438	619	477	642	bảy
-487	118	501	139	3
-15	35	60	54	STT
-1021	119	1037	140	8
-925	119	940	139	7
-585	118	600	139	4
-252	119	266	139	2
-697	117	712	140	5
-37	160	47	178	1
-37	120	47	138	1
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
@ -1,68 +0,0 @@
-624	54	678	75	(Unit)
-1049	23	1121	45	THÀNH
-119	129	157	145	thuê
-667	22	717	44	TÍNH
-120	203	157	220	thuê
-308	22	360	49	HÓA
-185	161	217	183	đầu
-185	198	217	220	đầu
-749	54	838	76	(Quantity)
-119	166	157	182	thuê
-937	54	989	75	Price)
-85	202	115	220	Phí
-28	53	75	75	(No.)
-185	124	217	146	đầu
-774	25	850	48	LƯỢNG
-1067	54	1153	75	(Amount)
-270	54	382	76	(Description)
-85	166	115	183	Phí
-221	202	252	220	kéo
-1067	202	1199	221	360.000.000.00
-84	128	115	146	Phí
-738	22	770	45	SỐ
-245	23	304	45	HÀNG
-221	165	252	183	kéo
-221	128	252	146	kéo
-1125	23	1173	45	TIỀN
-423	421	505	449	STOP
-1068	165	1198	185	138.461.550,00
-586	26	634	45	ĐƠN
-892	202	1012	221	72.000.000.00
-1077	129	1198	148	47.076.927,00
-39	95	65	116	(1)
-885	54	933	74	(Unit
-33	26	73	46	STT
-199	22	242	45	TÊN
-891	128	1012	148	47.076.927,00
-891	165	1013	184	69.230.775.00
-318	420	391	449	ONE
-637	26	662	48	VỊ
-697	128	726	145	XE
-639	94	664	115	(3)
-364	27	421	49	DỊCH
-160	169	182	183	xe
-698	165	726	182	XE
-160	132	182	146	xe
-925	95	950	116	(5)
-782	95	807	116	(4)
-160	207	182	220	xe
-46	165	58	183	2
-892	26	940	45	ĐƠN
-315	94	339	116	(2)
-46	202	58	220	3
-943	22	984	46	GIÁ
-841	166	853	183	2
-47	129	57	146	1
-691	421	724	449	M
-842	203	853	220	5
-424	26	456	50	VỤ
-843	129	852	146	1
-580	420	680	451	VICES,
-733	421	787	449	ULTI
-698	203	727	220	XE
-399	425	414	447	-
-1078	95	1159	115	)=(4)X(5)
-1061	96	1084	115	(6)
-796	420	854	449	BEN
-515	420	572	449	SER
-862	421	936	449	EFITS
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
@ -1,76 +0,0 @@
-590	592	641	614	Cộng
-141	131	193	153	ngoài
-234	131	286	153	tháng
-374	23	417	44	dịch
-759	22	818	44	lượng
-272	22	321	44	hàng
-327	23	369	43	hóa,
-731	47	815	67	(Quantity)
-646	586	682	610	tiền
-686	591	732	614	hàng
-589	711	640	739	Tổng
-620	47	671	66	(Unit)
-282	47	393	68	(Description)
-117	588	184	613	chuyển
-590	628	639	652	Thuế
-291	130	367	150	08/2022
-792	593	867	613	amount):
-590	670	634	693	Tiền
-638	670	680	693	thuế
-35	46	79	67	(No.)
-643	628	682	652	suất
-728	17	755	40	Số
-104	130	136	149	Phí
-198	131	230	154	giờ
-940	48	992	66	Price)
-71	591	112	613	lòng
-228	22	267	41	Tên
-949	21	982	44	giá
-687	633	748	651	GTGT
-644	716	691	738	cộng
-34	21	79	40	STT
-1189	17	1229	41	tiền
-589	23	632	40	Đơn
-750	675	807	696	(VAT):
-838	717	900	737	(Grand
-694	711	731	735	tiền
-661	22	701	40	tính
-684	674	745	693	GTGT
-1120	22	1184	40	Thành
-737	593	788	612	(Total
-98	616	164	636	transfer
-735	716	787	734	thanh
-31	616	93	635	(Please
-902	22	945	40	Đơn
-753	634	799	654	(VAT
-319	591	377	613	chúng
-792	715	833	734	toán
-891	47	936	66	(Unit
-1135	47	1212	67	(Amount)
-801	634	849	654	rate):
-30	591	66	609	Vui
-905	717	961	736	Total):
-1187	131	1290	149	57.753.850
-257	591	315	610	khoản
-636	22	657	44	vị
-224	616	266	633	bank
-189	591	224	610	vào
-339	616	383	636	No.):
-421	27	448	46	vụ
-410	586	435	611	số
-1201	674	1292	693	4.620.308
-228	591	253	610	tài
-1189	591	1292	610	57.753.850
-1257	632	1292	652	8%
-381	590	408	611	tôi
-1189	715	1292	734	62.374.158
-1140	94	1207	111	6=4x5
-269	619	336	633	account
-190	621	220	633	our
-168	619	186	633	to
-331	93	344	111	2
-767	93	780	110	4
-639	93	651	111	3
-935	93	948	110	5
-52	130	62	149	1
-53	94	62	110	1
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
--- a/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/invoice/Screenshot
@ -1,150 +0,0 @@
-677	742	742	776	Tổng
-1413	28	1476	63	Tổng
-590	104	660	129	(Unit)
-814	748	900	775	(Total):
-1431	104	1509	129	(Total)
-748	747	807	776	cộng
-72	797	118	827	tiền
-174	797	233	832	bằng
-727	50	796	78	lượng
-199	202	284	235	chuyển
-369	50	421	77	dịch
-596	68	645	92	tính
-292	207	409	235	(trucking)
-1480	28	1528	58	tiền
-1150	24	1213	53	Thuế
-305	515	358	542	định
-884	86	954	111	price)
-427	360	547	388	clearance)
-1405	69	1471	91	thanh
-1240	71	1294	101	Tiền
-123	797	170	827	viết
-247	51	305	78	hàng
-1069	805	1141	833	nghìn
-688	43	722	73	Số
-995	85	1102	111	(Amount)
-584	33	636	58	Đơn
-148	516	199	542	dịch
-1061	43	1109	75	tiền
-242	516	300	543	giám
-148	259	207	286	nâng
-271	258	327	285	(Lift
-1274	515	1368	539	160.000
-1477	69	1529	91	toán
-311	50	362	77	hóa,
-982	50	1056	73	Thành
-993	309	1120	334	13.309.081
-148	464	205	491	giám
-118	749	238	776	(Exchange
-73	747	112	777	giá
-253	86	404	111	(Description)
-148	207	192	234	vận
-148	412	199	439	dịch
-820	85	880	110	(Unit
-1413	799	1484	835	đồng.
-1249	206	1368	230	4.055.959
-1252	258	1367	281	1.973.455
-1206	805	1269	828	trăm
-101	206	143	230	Phí
-689	85	805	111	(Quantity)
-333	259	374	286	on)
-304	309	351	337	off)
-329	805	412	830	words):
-832	805	893	828	trăm
-990	257	1123	282	24.668.174
-837	50	889	73	Đơn
-101	258	143	281	Phí
-149	361	199	384	khai
-1273	463	1368	489	576.000
-243	310	299	337	(Lift
-33	797	67	827	Số
-1432	515	1551	540	2.160.000
-102	515	142	539	Phí
-238	803	284	827	chữ
-244	750	308	775	rate):
-1005	514	1124	540	2.000.000
-103	563	156	585	SEA
-1434	463	1551	488	7.776.000
-195	51	241	74	Tên
-101	361	142	384	Phí
-1298	72	1350	101	thuế
-974	746	1117	771	113.180.694
-1281	112	1372	137	Amount)
-386	418	443	440	sung
-1418	258	1550	281	26.641.629
-991	206	1122	231	50.699.472
-1432	156	1509	179	9=6+8
-1434	412	1551	436	7.560.000
-101	412	143	436	Phí
-894	49	933	79	giá
-1011	155	1085	179	6=4x5
-314	362	420	388	(customs
-102	309	143	333	Phí
-1007	412	1123	437	7.000.000
-34	747	69	777	Tỷ
-1152	205	1194	231	8%
-1419	206	1548	230	54.755.431
-101	463	142	488	Phí
-1152	256	1194	282	8%
-1253	309	1367	333	1.064.725
-1153	309	1194	334	8%
-715	805	777	832	triệu
-1005	360	1123	385	8.303.967
-206	361	244	385	hải
-1273	412	1369	436	560.000
-1273	361	1368	384	664.317
-1153	361	1194	385	8%
-349	407	381	437	bổ
-594	806	661	828	mươi
-1276	805	1333	828	năm
-1153	464	1193	488	8%
-214	262	265	281	cont
-242	412	280	437	hải
-1220	29	1360	57	GTGT(VAT)
-940	806	1006	828	mươi
-477	805	539	828	trăm
-1153	515	1194	539	8%
-1422	309	1550	332	14.373.806
-1006	463	1123	489	7.200.000
-899	805	933	828	ba
-1153	91	1192	117	TS
-250	367	307	388	quan
-1013	804	1061	828	lăm
-1434	361	1551	384	8.968.284
-639	32	667	62	vị
-1340	806	1407	829	mươi
-1149	805	1200	831	một
-289	804	323	831	(In
-1395	746	1540	771	122.235.150
-1231	747	1350	771	9.054.456
-784	805	826	828	hai
-1153	413	1194	437	8%
-545	804	588	829	hai
-148	310	179	337	hạ
-286	419	343	440	quan
-667	805	709	829	hai
-101	595	262	624	>07/08/2022)
-1217	111	1277	135	(VAT
-185	314	237	333	cont
-204	521	236	543	vụ
-419	805	471	832	Một
-28	85	88	110	(No.)
-211	464	269	491	định
-165	561	447	589	INBOUND-(01/08/2022
-427	55	459	78	vụ
-615	155	633	180	3
-737	154	757	179	4
-31	49	86	74	STT
-206	418	237	440	vụ
-50	463	68	488	6
-1285	154	1304	180	8
-49	309	66	334	3
-1165	155	1182	180	7
-49	257	67	281	2
-49	360	68	385	4
-50	412	67	437	5
-51	515	67	539	7
-878	154	896	180	5
-52	156	65	178	1
-52	206	64	229	1
-321	155	339	179	2
--- a/cope2n-ai-fi/modules/sdsvkie/demos/test.py
+++ b/cope2n-ai-fi/modules/sdsvkie/demos/test.py
@ -1,948 +0,0 @@
-# Ultralytics YOLO 🚀, GPL-3.0 license
-"""
-Model validation metrics
-"""
-import math
-import warnings
-from pathlib import Path
-
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-import torch.nn as nn
-
-from sklearn.metrics import confusion_matrix
-
-# boxes
-def box_area(box):
-    # box = xyxy(4,n)
-    return (box[2] - box[0]) * (box[3] - box[1])
-
-
-def bbox_ioa(box1, box2, eps=1e-7):
-    """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
-    box1:       np.array of shape(nx4)
-    box2:       np.array of shape(mx4)
-    returns:    np.array of shape(nxm)
-    """
-
-    # Get the coordinates of bounding boxes
-    b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
-    b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
-
-    # Intersection area
-    inter_area = (
-        np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)
-    ).clip(0) * (
-        np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)
-    ).clip(
-        0
-    )
-
-    # box2 area
-    box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
-
-    # Intersection over box2 area
-    return inter_area / box2_area
-
-
-def box_iou(box1, box2, eps=1e-7):
-    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
-    """
-    Return intersection-over-union (Jaccard index) of boxes.
-    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
-    Arguments:
-        box1 (Tensor[N, 4])
-        box2 (Tensor[M, 4])
-    Returns:
-        iou (Tensor[N, M]): the NxM matrix containing the pairwise
-            IoU values for every element in boxes1 and boxes2
-    """
-
-    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
-    (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
-    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
-
-    # IoU = inter / (area1 + area2 - inter)
-    return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
-
-
-def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
-    # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
-
-    # Get the coordinates of bounding boxes
-    if xywh:  # transform from xywh to xyxy
-        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
-        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
-        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
-        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
-    else:  # x1, y1, x2, y2 = box1
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
-        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
-        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
-
-    # Intersection area
-    inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp(0) * (
-        b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)
-    ).clamp(0)
-
-    # Union Area
-    union = w1 * h1 + w2 * h2 - inter + eps
-
-    # IoU
-    iou = inter / union
-    if CIoU or DIoU or GIoU:
-        cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(
-            b2_x1
-        )  # convex (smallest enclosing box) width
-        ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1)  # convex height
-        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
-            c2 = cw**2 + ch**2 + eps  # convex diagonal squared
-            rho2 = (
-                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
-                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
-            ) / 4  # center dist ** 2
-            if (
-                CIoU
-            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi**2) * (
-                    torch.atan(w2 / h2) - torch.atan(w1 / h1)
-                ).pow(2)
-                with torch.no_grad():
-                    alpha = v / (v - iou + (1 + eps))
-                return iou - (rho2 / c2 + v * alpha)  # CIoU
-            return iou - rho2 / c2  # DIoU
-        c_area = cw * ch + eps  # convex area
-        return (
-            iou - (c_area - union) / c_area
-        )  # GIoU https://arxiv.org/pdf/1902.09630.pdf
-    return iou  # IoU
-
-
-def mask_iou(mask1, mask2, eps=1e-7):
-    """
-    mask1: [N, n] m1 means number of predicted objects
-    mask2: [M, n] m2 means number of gt objects
-    Note: n means image_w x image_h
-    return: masks iou, [N, M]
-    """
-    intersection = torch.matmul(mask1, mask2.t()).clamp(0)
-    union = (
-        mask1.sum(1)[:, None] + mask2.sum(1)[None]
-    ) - intersection  # (area1 + area2) - intersection
-    return intersection / (union + eps)
-
-
-def masks_iou(mask1, mask2, eps=1e-7):
-    """
-    mask1: [N, n] m1 means number of predicted objects
-    mask2: [N, n] m2 means number of gt objects
-    Note: n means image_w x image_h
-    return: masks iou, (N, )
-    """
-    intersection = (mask1 * mask2).sum(1).clamp(0)  # (N, )
-    union = (mask1.sum(1) + mask2.sum(1))[
-        None
-    ] - intersection  # (area1 + area2) - intersection
-    return intersection / (union + eps)
-
-
-def smooth_BCE(
-    eps=0.1,
-):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
-    # return positive, negative label smoothing BCE targets
-    return 1.0 - 0.5 * eps, 0.5 * eps
-
-
-# losses
-class FocalLoss(nn.Module):
-    # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
-    def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
-        super().__init__()
-        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
-        self.gamma = gamma
-        self.alpha = alpha
-        self.reduction = loss_fcn.reduction
-        self.loss_fcn.reduction = "none"  # required to apply FL to each element
-
-    def forward(self, pred, true):
-        loss = self.loss_fcn(pred, true)
-        # p_t = torch.exp(-loss)
-        # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability
-
-        # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
-        pred_prob = torch.sigmoid(pred)  # prob from logits
-        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
-        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
-        modulating_factor = (1.0 - p_t) ** self.gamma
-        loss *= alpha_factor * modulating_factor
-
-        if self.reduction == "mean":
-            return loss.mean()
-        elif self.reduction == "sum":
-            return loss.sum()
-        else:  # 'none'
-            return loss
-
-
-class ConfusionMatrix:
-    # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
-    def __init__(self, nc, conf=0.25, iou_thres=0.45):
-        self.matrix = np.zeros((nc + 1, nc + 1))
-        self.nc = nc  # number of classes
-        self.conf = conf
-        self.iou_thres = iou_thres
-
-    def process_batch(self, detections, labels):
-        """
-        Return intersection-over-union (Jaccard index) of boxes.
-        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
-        Arguments:
-            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
-            labels (Array[M, 5]), class, x1, y1, x2, y2
-        Returns:
-            None, updates confusion matrix accordingly
-        """
-        if detections is None:
-            gt_classes = labels.int()
-            for gc in gt_classes:
-                self.matrix[self.nc, gc] += 1  # background FN
-            return
-
-        detections = detections[detections[:, 4] > self.conf]
-        gt_classes = labels[:, 0].int()
-        detection_classes = detections[:, 5].int()
-        iou = box_iou(labels[:, 1:], detections[:, :4])
-
-        x = torch.where(iou > self.iou_thres)
-        if x[0].shape[0]:
-            matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )
-            if x[0].shape[0] > 1:
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-        else:
-            matches = np.zeros((0, 3))
-
-        n = matches.shape[0] > 0
-        m0, m1, _ = matches.transpose().astype(int)
-        for i, gc in enumerate(gt_classes):
-            j = m0 == i
-            if n and sum(j) == 1:
-                self.matrix[detection_classes[m1[j]], gc] += 1  # correct
-            else:
-                self.matrix[self.nc, gc] += 1  # true background
-
-        if n:
-            for i, dc in enumerate(detection_classes):
-                if not any(m1 == i):
-                    self.matrix[dc, self.nc] += 1  # predicted background
-
-    def matrix(self):
-        return self.matrix
-
-    def tp_fp(self):
-        tp = self.matrix.diagonal()  # true positives
-        fp = self.matrix.sum(1) - tp  # false positives
-        # fn = self.matrix.sum(0) - tp  # false negatives (missed detections)
-        return tp[:-1], fp[:-1]  # remove background class
-
-    # @TryExcept("WARNING ⚠️ ConfusionMatrix plot failure")
-    def plot(self, normalize=True, save_dir="", names=()):
-        import seaborn as sn
-
-        array = self.matrix / (
-            (self.matrix.sum(0).reshape(1, -1) + 1e-9) if normalize else 1
-        )  # normalize columns
-        array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
-
-        fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True)
-        nc, nn = self.nc, len(names)  # number of classes, names
-        sn.set(font_scale=1.0 if nc < 50 else 0.8)  # for label size
-        labels = (0 < nn < 99) and (nn == nc)  # apply names to ticklabels
-        ticklabels = (names + ["background"]) if labels else "auto"
-        with warnings.catch_warnings():
-            warnings.simplefilter(
-                "ignore"
-            )  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
-            sn.heatmap(
-                array,
-                ax=ax,
-                annot=nc < 30,
-                annot_kws={"size": 8},
-                cmap="Blues",
-                fmt=".2f",
-                square=True,
-                vmin=0.0,
-                xticklabels=ticklabels,
-                yticklabels=ticklabels,
-            ).set_facecolor((1, 1, 1))
-        ax.set_xlabel("True")
-        ax.set_ylabel("Predicted")
-        ax.set_title("Confusion Matrix")
-        fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250)
-        plt.close(fig)
-
-    def print(self):
-        for i in range(self.nc + 1):
-            print(" ".join(map(str, self.matrix[i])))
-
-
-def smooth(y, f=0.05):
-    # Box filter of fraction f
-    nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
-    p = np.ones(nf // 2)  # ones padding
-    yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
-    return np.convolve(yp, np.ones(nf) / nf, mode="valid")  # y-smoothed
-
-
-def plot_pr_curve(px, py, ap, save_dir=Path("pr_curve.png"), names=()):
-    # Precision-recall curve
-    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
-    py = np.stack(py, axis=1)
-
-    if 0 < len(names) < 21:  # display per-class legend if < 21 classes
-        for i, y in enumerate(py.T):
-            ax.plot(
-                px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}"
-            )  # plot(recall, precision)
-    else:
-        ax.plot(px, py, linewidth=1, color="grey")  # plot(recall, precision)
-
-    ax.plot(
-        px,
-        py.mean(1),
-        linewidth=3,
-        color="blue",
-        label="all classes %.3f mAP@0.5" % ap[:, 0].mean(),
-    )
-    ax.set_xlabel("Recall")
-    ax.set_ylabel("Precision")
-    ax.set_xlim(0, 1)
-    ax.set_ylim(0, 1)
-    ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    ax.set_title("Precision-Recall Curve")
-    fig.savefig(save_dir, dpi=250)
-    plt.close(fig)
-
-
-def plot_mc_curve(
-    px,
-    py,
-    save_dir=Path("mc_curve.png"),
-    names=(),
-    xlabel="Confidence",
-    ylabel="Metric",
-):
-    # Metric-confidence curve
-    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
-
-    if 0 < len(names) < 21:  # display per-class legend if < 21 classes
-        for i, y in enumerate(py):
-            ax.plot(px, y, linewidth=1, label=f"{names[i]}")  # plot(confidence, metric)
-    else:
-        ax.plot(px, py.T, linewidth=1, color="grey")  # plot(confidence, metric)
-
-    y = smooth(py.mean(0), 0.05)
-    ax.plot(
-        px,
-        y,
-        linewidth=3,
-        color="blue",
-        label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}",
-    )
-    ax.set_xlabel(xlabel)
-    ax.set_ylabel(ylabel)
-    ax.set_xlim(0, 1)
-    ax.set_ylim(0, 1)
-    ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    ax.set_title(f"{ylabel}-Confidence Curve")
-    fig.savefig(save_dir, dpi=250)
-    plt.close(fig)
-
-
-def compute_ap(recall, precision):
-    """Compute the average precision, given the recall and precision curves
-    # Arguments
-        recall:    The recall curve (list)
-        precision: The precision curve (list)
-    # Returns
-        Average precision, precision curve, recall curve
-    """
-
-    # Append sentinel values to beginning and end
-    mrec = np.concatenate(([0.0], recall, [1.0]))
-    mpre = np.concatenate(([1.0], precision, [0.0]))
-
-    # Compute the precision envelope
-    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
-
-    # Integrate area under curve
-    method = "interp"  # methods: 'continuous', 'interp'
-    if method == "interp":
-        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
-        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
-    else:  # 'continuous'
-        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x-axis (recall) changes
-        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
-
-    return ap, mpre, mrec
-
-
-def ap_per_class(
-    tp,
-    conf,
-    pred_cls,
-    target_cls,
-    plot=False,
-    save_dir=Path(),
-    names=(),
-    eps=1e-16,
-    prefix="",
-):
-    """Compute the average precision, given the recall and precision curves.
-    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
-    # Arguments
-        tp:  True positives (nparray, nx1 or nx10).
-        conf:  Objectness value from 0-1 (nparray).
-        pred_cls:  Predicted object classes (nparray).
-        target_cls:  True object classes (nparray).
-        plot:  Plot precision-recall curve at mAP@0.5
-        save_dir:  Plot save directory
-    # Returns
-        The average precision as computed in py-faster-rcnn.
-    """
-
-    # Sort by objectness
-    i = np.argsort(-conf)
-    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
-
-    # Find unique classes
-    unique_classes, nt = np.unique(target_cls, return_counts=True)
-    nc = unique_classes.shape[0]  # number of classes, number of detections
-
-    # Create Precision-Recall curve and compute AP for each class
-    px, py = np.linspace(0, 1, 1000), []  # for plotting
-    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
-    for ci, c in enumerate(unique_classes):
-        i = pred_cls == c
-        n_l = nt[ci]  # number of labels
-        n_p = i.sum()  # number of predictions
-        if n_p == 0 or n_l == 0:
-            continue
-
-        # Accumulate FPs and TPs
-        fpc = (1 - tp[i]).cumsum(0)
-        tpc = tp[i].cumsum(0)
-
-        # Recall
-        recall = tpc / (n_l + eps)  # recall curve
-        r[ci] = np.interp(
-            -px, -conf[i], recall[:, 0], left=0
-        )  # negative x, xp because xp decreases
-
-        # Precision
-        precision = tpc / (tpc + fpc)  # precision curve
-        p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
-
-        # AP from recall-precision curve
-        for j in range(tp.shape[1]):
-            ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
-            if plot and j == 0:
-                py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
-
-    # from IPython import embed; embed()
-    # Compute F1 (harmonic mean of precision and recall)
-    f1 = 2 * p * r / (p + r + eps)
-    names = [
-        v for k, v in names.items() if k in unique_classes
-    ]  # list: only classes that have data
-    names = dict(enumerate(names))  # to dict
-    if plot:
-        plot_pr_curve(px, py, ap, save_dir / f"{prefix}PR_curve.png", names)
-        plot_mc_curve(px, f1, save_dir / f"{prefix}F1_curve.png", names, ylabel="F1")
-        plot_mc_curve(
-            px, p, save_dir / f"{prefix}P_curve.png", names, ylabel="Precision"
-        )
-        plot_mc_curve(px, r, save_dir / f"{prefix}R_curve.png", names, ylabel="Recall")
-
-    i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
-    p, r, f1 = p[:, i], r[:, i], f1[:, i]
-    tp = (r * nt).round()  # true positives
-    fp = (tp / (p + eps) - tp).round()  # false positives
-    return tp, fp, p, r, f1, ap, unique_classes.astype(int)
-
-
-class Metric:
-    def __init__(self) -> None:
-        self.p = []  # (nc, )
-        self.r = []  # (nc, )
-        self.f1 = []  # (nc, )
-        self.all_ap = []  # (nc, 10)
-        self.ap_class_index = []  # (nc, )
-        self.nc = 0
-
-    @property
-    def ap50(self):
-        """AP@0.5 of all classes.
-        Return:
-            (nc, ) or [].
-        """
-        return self.all_ap[:, 0] if len(self.all_ap) else []
-
-    @property
-    def ap(self):
-        """AP@0.5:0.95
-        Return:
-            (nc, ) or [].
-        """
-        return self.all_ap.mean(1) if len(self.all_ap) else []
-
-    @property
-    def mp(self):
-        """mean precision of all classes.
-        Return:
-            float.
-        """
-        return self.p.mean() if len(self.p) else 0.0
-
-    @property
-    def mr(self):
-        """mean recall of all classes.
-        Return:
-            float.
-        """
-        return self.r.mean() if len(self.r) else 0.0
-
-    @property
-    def map50(self):
-        """Mean AP@0.5 of all classes.
-        Return:
-            float.
-        """
-        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
-
-    @property
-    def map75(self):
-        """Mean AP@0.75 of all classes.
-        Return:
-            float.
-        """
-        return self.all_ap[:, 5].mean() if len(self.all_ap) else 0.0
-
-    @property
-    def map(self):
-        """Mean AP@0.5:0.95 of all classes.
-        Return:
-            float.
-        """
-        return self.all_ap.mean() if len(self.all_ap) else 0.0
-
-    def mean_results(self):
-        """Mean of results, return mp, mr, map50, map"""
-        return [self.mp, self.mr, self.map50, self.map]
-
-    def class_result(self, i):
-        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
-        return self.p[i], self.r[i], self.ap50[i], self.ap[i]
-
-    @property
-    def maps(self):
-        """mAP of each class"""
-        maps = np.zeros(self.nc) + self.map
-        for i, c in enumerate(self.ap_class_index):
-            maps[c] = self.ap[i]
-        return maps
-
-    def fitness(self):
-        # Model fitness as a weighted combination of metrics
-        w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
-        return (np.array(self.mean_results()) * w).sum()
-
-    def update(self, results):
-        """
-        Args:
-            results: tuple(p, r, ap, f1, ap_class)
-        """
-        self.p, self.r, self.f1, self.all_ap, self.ap_class_index = results
-
-
-class DetMetrics:
-    def __init__(self, save_dir=Path("."), plot=False, names=()) -> None:
-        self.save_dir = save_dir
-        self.plot = plot
-        self.names = names
-        self.box = Metric()
-        self.speed = {
-            "preprocess": 0.0,
-            "inference": 0.0,
-            "loss": 0.0,
-            "postprocess": 0.0,
-        }
-        self.probs = {}
-        self.tp, self.fp = [], []
-
-    def process(self, tp, conf, pred_cls, target_cls):
-
-        if len(conf) > 0:
-            for cls_id in range(len(self.names)):
-                conf_with_cls  = conf[np.where(pred_cls == cls_id)]
-                if len(conf_with_cls) > 0:
-                    highest_prob = conf_with_cls.max()
-                    self.probs[self.names[cls_id]] = [highest_prob]
-
-        results = ap_per_class(
-            tp,
-            conf,
-            pred_cls,
-            target_cls,
-            plot=self.plot,
-            save_dir=self.save_dir,
-            names=self.names,
-        )
-        self.tp, self.fp = results[:2]
-        
-        results = results[2:]
-        self.box.nc = len(self.names)
-        self.box.update(results)
-
-    @property
-    def keys(self):
-        return [
-            "metrics/precision(B)",
-            "metrics/recall(B)",
-            "metrics/mAP50(B)",
-            "metrics/mAP50-95(B)",
-        ]
-
-    def mean_results(self):
-        return self.box.mean_results()
-
-    def class_result(self, i):
-        return self.box.class_result(i)
-
-    @property
-    def maps(self):
-        return self.box.maps
-
-    @property
-    def fitness(self):
-        return self.box.fitness()
-
-    @property
-    def ap_class_index(self):
-        return self.box.ap_class_index
-
-    @property
-    def results_dict(self):
-        return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))
-    
-    def custom_result(self, instances_info =None, iou_match_dict: dict = None, prob_of_classes: dict = None):
-        """_summary_
-
-        Args:
-            instances_info (np.ndarray, optional): _description_. Defaults to None.
-            iou_list (list, optional): _description_. Defaults to None.
-            iou_match_dict (dict, optional): _description_. Defaults to None.
-            prob_of_classes (dict, optional): _description_. Defaults to None.
-
-        Returns:
-            _type_: _description_
-        """
-
-        instances_info = instances_info.tolist() if isinstance(instances_info, np.ndarray) else instances_info
-        total_instances = sum(instances_info)
-        total_instances = total_instances.item() if not isinstance(total_instances, int) else total_instances
-
-        custom_res = {}
-
-        for ci, c in self.names.items():
-            iou_match = iou_match_dict[c]
-            average_iou = np.mean(iou_match).item() if len(iou_match) > 0 else 0
-            _tp = self.tp[ci] if len(self.tp) == len(self.names) else 0
-            _fp = self.fp[ci] if len(self.fp) == len(self.names) else 0
-            _miss = instances_info[ci] - _tp
-            custom_res.update(
-                {
-                    self.names[ci]: {
-                        "actual": instances_info[ci],
-                        "correct": int(_tp),
-                        "missed_detection": int(_miss),
-                        "false_detection": int(_fp),
-                        "average_iou": round(average_iou, 4),
-                        "average_dice": 0
-                    }
-                }
-            )
-
-        custom_res.update({
-            "probability": prob_of_classes,
-            "average": {
-                "actual": int(total_instances // len(self.names)),
-                "correct": int(self.tp.sum() // len(self.names)) if len(self.tp) > 0 else 0,
-                "missed_detection": 0,
-                "false_detection": int(self.fp.sum() // len(self.names)) if len(self.fp) > 0 else 0,
-                "average_iou": round(np.mean([custom_res[cls_name]['average_iou'] for cls_name in self.names.values()]).item(), 4),
-                "average_dice": 0,
-            },
-            "total": {
-                "actual": total_instances,
-                "correct": int(self.tp.sum()) if len(self.tp) > 0 else 0 ,
-                "missed_detection": 0,
-                "false_detection": int(self.fp.sum()) if len(self.fp) > 0 else 0,
-            },
-        })
-        custom_res["total"]["missed_detection"] = (
-            custom_res["total"]["actual"] - custom_res["total"]["correct"]
-        )
-        custom_res["average"]["missed_detection"] = custom_res["total"][
-            "missed_detection"
-        ] // len(self.names)
-
-
-        
-        return custom_res
-
-
-class SegmentMetrics:
-    def __init__(self, save_dir=Path("."), plot=False, names=()) -> None:
-        self.save_dir = save_dir
-        self.plot = plot
-        self.names = names
-        self.box = Metric()
-        self.seg = Metric()
-        self.speed = {
-            "preprocess": 0.0,
-            "inference": 0.0,
-            "loss": 0.0,
-            "postprocess": 0.0,
-        }
-        self.tp = []
-        self.fp = []
-        self.probs = {}
-        
-
-    def process(self, tp_m, tp_b, conf, pred_cls, target_cls):
-
-        if len(conf) > 0:
-            for cls_id in range(len(self.names)):
-                conf_with_cls  = conf[np.where(pred_cls == cls_id)]
-                if len(conf_with_cls) > 0:
-                    highest_prob = conf_with_cls.max()
-                    self.probs[self.names[cls_id]] = [highest_prob]
-
-
-        res_mask = ap_per_class(
-            tp_m,
-            conf,
-            pred_cls,
-            target_cls,
-            plot=self.plot,
-            save_dir=self.save_dir,
-            names=self.names,
-            prefix="Mask",
-        )
-
-        tp, fp, results_mask = res_mask[0], res_mask[1], res_mask[2:]
-        self.seg.nc = len(self.names)
-        self.seg.update(results_mask)
-        self.tp = tp
-        self.fp = fp
-        # print(self.tp, self.fp)
-
-        results_box = ap_per_class(
-            tp_b,
-            conf,
-            pred_cls,
-            target_cls,
-            plot=self.plot,
-            save_dir=self.save_dir,
-            names=self.names,
-            prefix="Box",
-        )[2:]
-        self.box.nc = len(self.names)
-        self.box.update(results_box)
-
-    @property
-    def keys(self):
-        return [
-            "metrics/precision(B)",
-            "metrics/recall(B)",
-            "metrics/mAP50(B)",
-            "metrics/mAP50-95(B)",
-            "metrics/precision(M)",
-            "metrics/recall(M)",
-            "metrics/mAP50(M)",
-            "metrics/mAP50-95(M)",
-        ]
-
-    def mean_results(self):
-        return self.box.mean_results() + self.seg.mean_results()
-
-    def class_result(self, i):
-        return self.box.class_result(i) + self.seg.class_result(i)
-
-    def custom_result(self, instances_info =None, iou_match_dict: dict = None, prob_of_classes: dict = None):
-        """_summary_
-
-        Args:
-            instances_info (np.ndarray, optional): _description_. Defaults to None.
-            iou_list (list, optional): _description_. Defaults to None.
-            iou_match_dict (dict, optional): _description_. Defaults to None.
-            prob_of_classes (dict, optional): _description_. Defaults to None.
-
-        Returns:
-            _type_: _description_
-        """
-
-        instances_info = instances_info.tolist() if isinstance(instances_info, np.ndarray) else instances_info
-        total_instances = sum(instances_info)
-        total_instances = total_instances.item() if not isinstance(total_instances, int) else total_instances
-
-        custom_res = {}
-
-        for ci, c in self.names.items():
-            iou_match = iou_match_dict[c]
-            average_iou = np.mean(iou_match).item() if len(iou_match) > 0 else 0
-            _tp = self.tp[ci] if len(self.tp) == len(self.names) else 0
-            _fp = self.fp[ci] if len(self.fp) == len(self.names) else 0
-            _miss = instances_info[ci] - _tp
-            custom_res.update(
-                {
-                    self.names[ci]: {
-                        "actual": instances_info[ci],
-                        "correct": int(_tp),
-                        "missed_detection": int(_miss),
-                        "false_detection": int(_fp),
-                        "average_iou": round(average_iou, 4),
-                        "average_dice": 0
-                    }
-                }
-            )
-
-        custom_res.update({
-            "probability": prob_of_classes,
-            "average": {
-                "actual": int(total_instances // len(self.names)),
-                "correct": int(self.tp.sum() // len(self.names)) if len(self.tp) > 0 else 0,
-                "missed_detection": 0,
-                "false_detection": int(self.fp.sum() // len(self.names)) if len(self.fp) > 0 else 0,
-                "average_iou": round(np.mean([custom_res[cls_name]['average_iou'] for cls_name in self.names.values()]).item(), 4),
-                "average_dice": 0,
-            },
-            "total": {
-                "actual": total_instances,
-                "correct": int(self.tp.sum()) if len(self.tp) > 0 else 0 ,
-                "missed_detection": 0,
-                "false_detection": int(self.fp.sum()) if len(self.fp) > 0 else 0,
-            },
-        })
-        custom_res["total"]["missed_detection"] = (
-            custom_res["total"]["actual"] - custom_res["total"]["correct"]
-        )
-        custom_res["average"]["missed_detection"] = custom_res["total"][
-            "missed_detection"
-        ] // len(self.names)
-
-
-        return custom_res
-
-    @property
-    def maps(self):
-        return self.box.maps + self.seg.maps
-
-    @property
-    def fitness(self):
-        return self.seg.fitness() + self.box.fitness()
-
-    @property
-    def ap_class_index(self):
-        # boxes and masks have the same ap_class_index
-        return self.box.ap_class_index
-
-    @property
-    def results_dict(self):
-        return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))
-
-
-class ClassifyMetrics:
-    def __init__(self) -> None:
-        self.top1 = 0
-        self.top5 = 0
-
-        self.speed = {
-            "preprocess": 0.0,
-            "inference": 0.0,
-            "loss": 0.0,
-            "postprocess": 0.0,
-        }
-
-
-    def process(self, targets, pred):
-        # target classes and predicted classes
-        pred, targets = torch.cat(pred), torch.cat(targets)
-        correct = (targets[:, None] == pred).float()
-        acc = torch.stack(
-            (correct[:, 0], correct.max(1).values), dim=1
-        )  # (top1, top5) accuracy
-        self.top1, self.top5 = acc.mean(0).tolist()
-
-    def custom_result(self ,targets, pred):
-        # from IPython import embed; embed()
-        metric_per_class = {}
-        pred, targets = torch.cat(pred), torch.cat(targets)
-        pred = pred.cpu().numpy()[:, 0]
-        targets = targets.cpu().numpy()
-        conf_mat = confusion_matrix(targets, pred)
-
-        """{
-                "actual": 
-                "correct": 
-                "recall": 
-                "precision": 
-            }
-        """
-        k = conf_mat.shape[0]
-        for cls_id in range(k):
-            tp = conf_mat[cls_id, cls_id]
-            fp = np.sum(conf_mat[cls_id, :]) - tp
-            fn = np.sum(conf_mat[:, cls_id]) - tp
-            tn = np.sum(conf_mat) - tp - fp - fn
-
-            res_per_class = {
-                "actual": tp + fp,
-                "correct": tp,
-                "miss_detection": 0,
-                "false_detection": fp, 
-                "recall": 0 if tp + fn == 0 else tp / (tp+fn),
-                "precision": tp / (tp + fp)
-            }   
-
-
-            metric_per_class[cls_id] = res_per_class
-
-        return metric_per_class
-
-
-    @property
-    def fitness(self):
-        return self.top5
-
-    @property
-    def results_dict(self):
-        return dict(zip(self.keys + ["fitness"], [self.top1, self.top5, self.fitness]))
-
-    @property
-    def keys(self):
-        return ["metrics/accuracy_top1", "metrics/accuracy_top5"]
-
-
-if __name__ == "__main__":
-    det_metric = DetMetrics()
-
--- a/cope2n-ai-fi/modules/sdsvkie/eval_with_api.py
+++ b/cope2n-ai-fi/modules/sdsvkie/eval_with_api.py
@ -1,275 +0,0 @@
-import json
-import logging
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
-
-import requests
-import tqdm
-from sdsvkie.utils.eval_kie import eval_kie
-from sdsvkie.utils.io_file import read_json, write_json
-
-logging.basicConfig(
-    level=logging.INFO,
-    # format=""
-
-)
-logger = logging.getLogger()
-
-
-HEADERS = {
-    'accept': 'application/json',
-    'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2OTA2ODk4ODcsInVzZXJuYW1lIjoiYWRtaW4ifQ.Oybpc9tBsN35vCn3jzekkABDQKJT6yO1aBBJ4rMNln0'
-}
-
-URL = "http://107.120.133.27:8082/predict/image"
-
-
-def run(
-        data_dir: str,
-        url: str,
-        gt_path: str,
-        field_pred_file: str,
-        samples: Union[int, None] = None,
-        
-):
-
-    files = get_files(data_dir, recursive=False, limit=samples)
-    preds = predict(url, files)
-
-    ##  process for table 
-    # table_eval_result = {}
-    # table_preds = get_table_preds(preds)
-    # table_eval_result = 
-    
-    # process for seller, buyer, ...
-    field_eval_result = {} 
-    # field_preds = get_field_preds_from_api(api_preds=preds)
-    field_preds = get_field_preds_from_file(pred_file=field_pred_file)
-    classes = get_classes(preds=field_preds)
-    if len(classes) == 0:
-        raise Exception("Can not get the classes list")
-    field_eval_result = eval(
-        gt=gt_path,
-        pred=field_preds,
-        classes=classes,
-        classes_ignore=['other', 'table']
-    )
-    print(field_eval_result)
-    ## combine result 
-    combine_result = {}
-    # combine_result = combine_result(table_res=table_eval_result, field_res=field_eval_result)
-    
-    
-    print_result(
-        data_path=data_dir,
-        num_samples=len(list(field_preds.keys())),
-        target_level=0.05,
-        result=1.0,   # edit here
-    )
-    return combine_result
-
-
-
-def print_result(
-        data_path: str, 
-        num_samples:int, 
-        target_level: float, 
-        result: float, 
-        metric: str = "NLD", 
-        avg_time: float = 1.6363
-    ):
-    
-    print(f"Path of validation dataset: {data_path}\n"
-        + f"Number of validation dataset: {num_samples}\n"
-        + f"Evaluation metric: {metric}\n"
-        + f"Target level: {target_level}\n"
-        + f"Archieved level: {result}\n"
-        + f"Average time: {avg_time}\n"
-        + f"Verification result: {'PASS' if result > target_level else 'FAILED'}"
-        )
-
-
-def get_field_preds_from_api(api_preds: str) -> dict:
-    field_preds = get_fields_preds(api_preds)
-    field_preds = combine_to_single_file(field_preds)
-    return field_preds
-
-def get_field_preds_from_file(pred_file: str) -> dict:
-    """
-    Get predictions from json file
-    """
-    field_preds = read_json(pred_file)
-    return field_preds
-    
-
-
-def get_fields_preds(preds: List[Dict]):
-    preds = [
-       {item['file_path']: format_output_api(item['response_dict'])}
-       for item in preds
-    ]
-    return preds
-
-def combine_result(table_res: Dict, field_res: Dict):
-    return {}
-
-def _str2dict(text: str) -> Dict:
-    try:
-        data = json.loads(text)
-    except Exception as err:
-        logger.error(f"{err} - data: {text}")
-        data = {}
-    return data
-
-
-def predict_one_file(url: str, file: Union[str, Path]) -> Dict:
-    """
-    Output format: 
-    { 
-        file_path: path of file
-        response_dict: 
-    }
-    """
-    if isinstance(file, str):
-        file = Path(file)
-
-    payload = {}
-    filename = file.name
-    files = [
-        (
-            'file',
-            (
-                filename,
-                open(str(file), 'rb'),
-                'application/pdf'
-            )
-        )
-    ]
-    # logger.info(f"Files: {file}")
-    response = requests.request(
-        "POST", url, headers=HEADERS, data=payload, files=files)
-
-    response_dict = _str2dict(response.text)
-
-    return {
-        "file_path": str(file),
-        "pred_data": response_dict
-    }
-
-
-def predict(url: str, files: List[Union[str, Path]]) -> List[Dict]:
-    """
-    List of {'file_path', 'response_dict'}
-    """
-
-    preds = []
-    for idx, file in tqdm.tqdm(enumerate(files)):
-        try:
-            pred = predict_one_file(url, file)
-            preds.append(pred)
-        except:
-            logger.info(f"Error at file: {file}")
-    return preds
-
-
-def get_files(data_dir: str, recursive: bool = False, limit: Union[int, None] = None) -> List[Union[Path, str]]:
-    if recursive:
-        files = Path(data_dir).rglob("*")
-    else:
-        files = Path(data_dir).glob("*")
-
-    if limit:
-        files = list(files)[:limit]
-    return files
-
-
-def _stem_filename(filename: str) -> str:
-    """
-    Stem a file path: x/y.txt -> y
-    """
-    return Path(filename).stem
-
-
-def format_output_api(output_api: Dict, skip_fields=['table']) -> Dict:
-    if "pages" not in output_api:
-        return {}
-    pages = output_api['pages']
-
-    result = {}
-    for page in pages:
-        fields = page['fields']
-        for field_item in fields:
-            field_label, field_value = field_item['label'], field_item['value']
-            if field_label in result or field_label in skip_fields:
-                continue
-            result[field_label] = field_value
-    return result
-
-
-def combine_to_single_file(preds: List[Dict]) -> None:
-    if len(preds) == 0:
-        return {} 
-    combined_data = {
-        _stem_filename(item["filename"]): item["pred_data"]
-        for item in preds
-    }
-    return combined_data
-
-
-def eval(
-    gt: Union[str, Dict],
-    pred: Union[str, Dict],
-    classes: List[str],
-    classes_ignore: List[str] = []
-) -> Dict:
-    eval_res = eval_kie(
-        gt_e2e_path=gt,
-        pred_e2e_path=pred,
-        kie_labels=classes,
-        skip_labels=classes_ignore
-    )
-    return eval_res
-
-
-def get_classes(preds: Dict) -> List[str]:
-    classes = []
-    for k, v in preds.items():
-        if v:
-            classes = list(v.keys())
-            break
-    return classes
-
-
-def test():
-    import requests
-
-    url = "http://107.120.133.27:8082/predict/image"
-
-    payload = {}
-    files = [
-        ('file', ('(1 of 19)_HOADON_1C23TYY_50.pdf', open(
-            '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/final/all_in/(1 of 19)_HOADON_1C23TYY_50.pdf', 'rb'), 'application/pdf'))
-    ]
-    headers = {
-        'accept': 'application/json',
-        'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2OTA2ODk4ODcsInVzZXJuYW1lIjoiYWRtaW4ifQ.Oybpc9tBsN35vCn3jzekkABDQKJT6yO1aBBJ4rMNln0'
-    }
-
-    response = requests.request(
-        "POST", url, headers=headers, data=payload, files=files)
-
-    print(response.text)
-    # print(json.loa  ds(response.text))
-
-
-if __name__ == "__main__":
-    limit = 5
-    run(
-        data_dir="/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/final/all_in",
-        url=URL,
-        gt_path="/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/final/all_in.json",
-        field_pred_file="/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/invoice_all_in_final_e2e_21072023_5.json",
-        samples=limit
-    )
-
-    # test()
--- a/cope2n-ai-fi/modules/sdsvkie/index.html
+++ b/cope2n-ai-fi/modules/sdsvkie/index.html
@ -1,15 +0,0 @@
-<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="vi"><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="zHShHBII_Chw737pdVlj6Q">(function(){window.google={kEI:'1d43ZPakBPjJ0PEPi7S6aA',kEXPI:'0,1359409,6059,206,4804,2316,383,246,5,1129120,1197734,658,380098,16115,19398,9286,22431,1361,12317,2817,14765,4998,13228,3847,6884,31560,887,1985,2891,11754,606,29842,826,30022,2614,13142,3,346,230,20573,4,1538,2304,42127,13658,4437,16786,5830,2527,4094,7596,1,39047,2,3105,2,14022,25739,5679,1021,31122,4568,6252,23423,1253,5835,14967,4333,24,4993,2467,445,2,2,1,6959,19673,8155,7381,15969,874,19632,8,1922,9779,12415,8224,751,1503,13261,6305,20199,927,4869,14,14326,14,82,12151,8055,1622,1778,669,4308,8051,11189,575,3560,988,1494,1536,426,5685,3225,6480,1804,6250,1979,2243,1757,1127,152,9329,3571,6273,147,269,1896,276,3607,671,2054,1696,562,196,1330,1697,1635,1606,4198,2,931,906,31,6,267,3417,1055,2181,386,2483,663,536,1261,958,951,492,931,355,838,278,628,444,892,3,952,1237,951,3347,84,480,72,734,862,521,117,360,18,409,966,335,1,5,84,619,679,1973,123,686,255,123,2,728,765,57,392,549,25,65,410,101,110,418,626,225,585,656,52,163,8,645,2,184,1289,3,40,7,251,161,280,5,69,716,221,596,50,520,40,1361,111,218,214,739,34,2,14,70,421,164,600,620,520,141,5206827,2,21,47,136,329,8798385,3306,141,795,19736,1,346,4808,17,1,5,1,6,1,32,106,10,2,20725522,2920861,299113,550,2773269,1268323,1964,16673,2893,6250,12560,3179,726,841,213,430,270,1412577,194320,23565287,2064,34',kBL:'gVXf',kOPI:89978449};google.sn='webhp';google.kHL='vi';})();(function(){
-var e=this||self;var g,h=[];function k(a){for(var c;a&&(!a.getAttribute||!(c=a.getAttribute("eid")));)a=a.parentNode;return c||g}function l(a){for(var c=null;a&&(!a.getAttribute||!(c=a.getAttribute("leid")));)a=a.parentNode;return c}function m(a){/^http:/i.test(a)&&"https:"===window.location.protocol&&(google.ml&&google.ml(Error("a"),!1,{src:a,glmm:1}),a="");return a}
-function p(a,c,b,f){var d="";-1===c.search("&ei=")&&(d="&ei="+k(b),-1===c.search("&lei=")&&(b=l(b))&&(d+="&lei="+b));b="";e._cshid&&-1===c.search("&cshid=")&&"slh"!==a&&(b="&cshid="+e._cshid);return"/"+(f||"gen_204")+"?atyp=i&ct="+String(a)+"&cad="+(c+d)+"&zx="+String(Date.now())+b};g=google.kEI;google.getEI=k;google.getLEI=l;google.ml=function(){return null};google.log=function(a,c,b,f,d){b||(b=p(a,c,f,d));if(b=m(b)){a=new Image;var n=h.length;h[n]=a;a.onerror=a.onload=a.onabort=function(){delete h[n]};a.src=b}};google.logUrl=function(a){return p("",a)};}).call(this);(function(){google.y={};google.sy=[];google.x=function(a,b){if(a)var c=a.id;else{do c=Math.random();while(google.y[c])}google.y[c]=[a,b];return!1};google.sx=function(a){google.sy.push(a)};google.lm=[];google.plm=function(a){google.lm.push.apply(google.lm,a)};google.lq=[];google.load=function(a,b,c){google.lq.push([[a],b,c])};google.loadAll=function(a,b){google.lq.push([a,b])};google.bx=!1;google.lx=function(){};}).call(this);google.f={};(function(){
-document.documentElement.addEventListener("submit",function(b){var a;if(a=b.target){var c=a.getAttribute("data-submitfalse");a="1"===c||"q"===c&&!a.elements.q.value?!0:!1}else a=!1;a&&(b.preventDefault(),b.stopPropagation())},!0);document.documentElement.addEventListener("click",function(b){var a;a:{for(a=b.target;a&&a!==document.documentElement;a=a.parentElement)if("A"===a.tagName){a="1"===a.getAttribute("data-nohref");break a}a=!1}a&&b.preventDefault()},!0);}).call(this);</script><style>#gbar,#guser{font-size:13px;padding-top:1px !important;}#gbar{height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline !important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 !important}.gbf .gb4{color:#900 !important}
-</style><style>body,td,a,p,.h{font-family:arial,sans-serif}body{margin:0;overflow-y:scroll}#gog{padding:3px 8px 0}td{line-height:.8em}.gac_m td{line-height:17px}form{margin-bottom:20px}.h{color:#1558d6}em{font-weight:bold;font-style:normal}.lst{height:25px;width:496px}.gsfi,.lst{font:18px arial,sans-serif}.gsfs{font:17px arial,sans-serif}.ds{display:inline-box;display:inline-block;margin:3px 0 4px;margin-left:4px}input{font-family:inherit}body{background:#fff;color:#000}a{color:#4b11a8;text-decoration:none}a:hover,a:active{text-decoration:underline}.fl a{color:#1558d6}a:visited{color:#4b11a8}.sblc{padding-top:5px}.sblc a{display:block;margin:2px 0;margin-left:13px;font-size:11px}.lsbb{background:#f8f9fa;border:solid 1px;border-color:#dadce0 #70757a #70757a #dadce0;height:30px}.lsbb{display:block}#WqQANb a{display:inline-block;margin:0 12px}.lsb{background:url(/images/nav_logo229.png) 0 -261px repeat-x;border:none;color:#000;cursor:pointer;height:30px;margin:0;outline:0;font:15px arial,sans-serif;vertical-align:top}.lsb:active{background:#dadce0}.lst:focus{outline:none}.Ucigb{width:458px}</style><script nonce="zHShHBII_Chw737pdVlj6Q">(function(){window.google.erd={jsr:1,bv:1776,de:true};
-var h=this||self;var k,l=null!=(k=h.mei)?k:1,n,p=null!=(n=h.sdo)?n:!0,q=0,r,t=google.erd,v=t.jsr;google.ml=function(a,b,d,m,e){e=void 0===e?2:e;b&&(r=a&&a.message);if(google.dl)return google.dl(a,e,d),null;if(0>v){window.console&&console.error(a,d);if(-2===v)throw a;b=!1}else b=!a||!a.message||"Error loading script"===a.message||q>=l&&!m?!1:!0;if(!b)return null;q++;d=d||{};b=encodeURIComponent;var c="/gen_204?atyp=i&ei="+b(google.kEI);google.kEXPI&&(c+="&jexpid="+b(google.kEXPI));c+="&srcpg="+b(google.sn)+"&jsr="+b(t.jsr)+"&bver="+b(t.bv);var f=a.lineNumber;void 0!==f&&(c+="&line="+f);var g=
-a.fileName;g&&(0<g.indexOf("-extension:/")&&(e=3),c+="&script="+b(g),f&&g===window.location.href&&(f=document.documentElement.outerHTML.split("\n")[f],c+="&cad="+b(f?f.substring(0,300):"No script found.")));c+="&jsel="+e;for(var u in d)c+="&",c+=b(u),c+="=",c+=b(d[u]);c=c+"&emsg="+b(a.name+": "+a.message);c=c+"&jsst="+b(a.stack||"N/A");12288<=c.length&&(c=c.substr(0,12288));a=c;m||google.log(0,"",a);return a};window.onerror=function(a,b,d,m,e){r!==a&&(a=e instanceof Error?e:Error(a),void 0===d||"lineNumber"in a||(a.lineNumber=d),void 0===b||"fileName"in a||(a.fileName=b),google.ml(a,!1,void 0,!1,"SyntaxError"===a.name||"SyntaxError"===a.message.substring(0,11)||-1!==a.message.indexOf("Script error")?3:0));r=null;p&&q>=l&&(window.onerror=null)};})();</script></head><body bgcolor="#fff"><script nonce="zHShHBII_Chw737pdVlj6Q">(function(){var src='/images/nav_logo229.png';var iesg=false;document.body.onload = function(){window.n && window.n();if (document.images){new Image().src=src;}
-if (!iesg){document.f&&document.f.q.focus();document.gbqf&&document.gbqf.q.focus();}
-}
-})();</script><div id="mngb"><div id=gbar><nobr><b class=gb1>Tìm ki&#7871;m</b> <a class=gb1 href="http://www.google.com.vn/imghp?hl=vi&tab=wi">Hình &#7843;nh</a> <a class=gb1 href="http://maps.google.com/maps?hl=vi&tab=wl">Maps</a> <a class=gb1 href="https://play.google.com/?hl=vi&tab=w8">Play</a> <a class=gb1 href="https://www.youtube.com/?tab=w1">YouTube</a> <a class=gb1 href="https://news.google.com/?tab=wn">Tin t&#7913;c</a> <a class=gb1 href="https://mail.google.com/mail/?tab=wm">Gmail</a> <a class=gb1 href="https://drive.google.com/?tab=wo">Drive</a> <a class=gb1 style="text-decoration:none" href="https://www.google.com.vn/intl/vi/about/products?tab=wh"><u>Thêm</u> &raquo;</a></nobr></div><div id=guser width=100%><nobr><span id=gbn class=gbi></span><span id=gbf class=gbf></span><span id=gbe></span><a href="http://www.google.com.vn/history/optout?hl=vi" class=gb4>Li&#803;ch s&#432;&#777; Web</a> | <a  href="/preferences?hl=vi" class=gb4>Cài &#273;&#7863;t</a> | <a target=_top id=gb_70 href="https://accounts.google.com/ServiceLogin?hl=vi&passive=true&continue=http://www.google.com/&ec=GAZAAQ" class=gb4>&#272;&#259;ng nh&#7853;p</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div></div><center><br clear="all" id="lgpd"><div id="lga"><img alt="Google" height="92" src="/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.png" style="padding:28px 0 14px" width="272" id="hplogo"><br><br></div><form action="/search" name="f"><table cellpadding="0" cellspacing="0"><tr valign="top"><td width="25%">&nbsp;</td><td align="center" nowrap=""><input name="ie" value="ISO-8859-1" type="hidden"><input value="vi" name="hl" type="hidden"><input name="source" type="hidden" value="hp"><input name="biw" type="hidden"><input name="bih" type="hidden"><div class="ds" style="height:32px;margin:4px 0"><div style="position:relative;zoom:1"><input class="lst Ucigb" style="margin:0;padding:5px 8px 0 6px;vertical-align:top;color:#000;padding-right:38px" autocomplete="off" value="" title="Tìm trên Google" maxlength="2048" name="q" size="57"><img src="/textinputassistant/tia.png" style="position:absolute;cursor:pointer;right:5px;top:4px;z-index:300" data-script-url="/textinputassistant/11/vi_tia.js" id="tsuid_1" alt="" height="23" width="27"><script nonce="zHShHBII_Chw737pdVlj6Q">(function(){var id='tsuid_1';document.getElementById(id).onclick = function(){var s = document.createElement('script');s.src = this.getAttribute('data-script-url');(document.getElementById('xjsc')||document.body).appendChild(s);};})();</script></div></div><br style="line-height:0"><span class="ds"><span class="lsbb"><input class="lsb" value="Tìm trên Google" name="btnG" type="submit"></span></span><span class="ds"><span class="lsbb"><input class="lsb" id="tsuid_2" value="Xem trang &#273;&#7847;u tiên tìm &#273;&#432;&#7907;c" name="btnI" type="submit"><script nonce="zHShHBII_Chw737pdVlj6Q">(function(){var id='tsuid_2';document.getElementById(id).onclick = function(){if (this.form.q.value){this.checked = 1;if (this.form.iflsig)this.form.iflsig.disabled = false;}
-else top.location='/doodles/';};})();</script><input value="AOEireoAAAAAZDfs5TmWBqHzA-IR1T30Oi85AUDoQoip" name="iflsig" type="hidden"></span></span></td><td class="fl sblc" align="left" nowrap="" width="25%"><a href="/advanced_search?hl=vi&amp;authuser=0">Tìm ki&#7871;m nâng cao</a></td></tr></table><input id="gbv" name="gbv" type="hidden" value="1"><script nonce="zHShHBII_Chw737pdVlj6Q">(function(){var a,b="1";if(document&&document.getElementById)if("undefined"!=typeof XMLHttpRequest)b="2";else if("undefined"!=typeof ActiveXObject){var c,d,e=["MSXML2.XMLHTTP.6.0","MSXML2.XMLHTTP.3.0","MSXML2.XMLHTTP","Microsoft.XMLHTTP"];for(c=0;d=e[c++];)try{new ActiveXObject(d),b="2"}catch(h){}}a=b;if("2"==a&&-1==location.search.indexOf("&gbv=2")){var f=google.gbvu,g=document.getElementById("gbv");g&&(g.value=a);f&&window.setTimeout(function(){location.href=f},0)};}).call(this);</script></form><div id="gac_scont"></div><div style="font-size:83%;min-height:3.5em"><br><div id="gws-output-pages-elements-homepage_additional_languages__als"><style>#gws-output-pages-elements-homepage_additional_languages__als{font-size:small;margin-bottom:24px}#SIvCob{color:#3c4043;display:inline-block;line-height:28px;}#SIvCob a{padding:0 3px;}.H6sW5{display:inline-block;margin:0 2px;white-space:nowrap}.z4hgWe{display:inline-block;margin:0 2px}</style><div id="SIvCob">Google có các th&#7913; ti&#7871;ng:  <a href="http://www.google.com/setprefs?sig=0_LiRsKCij61938RPrg0HatfNL9R8%3D&amp;hl=en&amp;source=homepage&amp;sa=X&amp;ved=0ahUKEwi245qf2Kb-AhX4JDQIHQuaDg0Q2ZgBCAU">English</a>    <a href="http://www.google.com/setprefs?sig=0_LiRsKCij61938RPrg0HatfNL9R8%3D&amp;hl=fr&amp;source=homepage&amp;sa=X&amp;ved=0ahUKEwi245qf2Kb-AhX4JDQIHQuaDg0Q2ZgBCAY">Français</a>    <a href="http://www.google.com/setprefs?sig=0_LiRsKCij61938RPrg0HatfNL9R8%3D&amp;hl=zh-TW&amp;source=homepage&amp;sa=X&amp;ved=0ahUKEwi245qf2Kb-AhX4JDQIHQuaDg0Q2ZgBCAc">&#32321;&#39636;&#20013;&#25991;</a>  </div></div></div><span id="footer"><div style="font-size:10pt"><div style="margin:19px auto;text-align:center" id="WqQANb"><a href="/intl/vi/ads/">Qu&#7843;ng cáo</a><a href="/services/">Gia&#777;i pha&#769;p Kinh doanh</a><a href="/intl/vi/about.html">Gi&#7899;i thi&#7879;u v&#7873; Google</a><a href="http://www.google.com/setprefdomain?prefdom=VN&amp;prev=http://www.google.com.vn/&amp;sig=K_USzddBar78vXxQUAlYD0s7dc7wA%3D">Google.com.vn</a></div></div><p style="font-size:8pt;color:#70757a">&copy; 2023</p></span></center><script nonce="zHShHBII_Chw737pdVlj6Q">(function(){window.google.cdo={height:757,width:1440};(function(){var a=window.innerWidth,b=window.innerHeight;if(!a||!b){var c=window.document,d="CSS1Compat"==c.compatMode?c.documentElement:c.body;a=d.clientWidth;b=d.clientHeight}a&&b&&(a!=google.cdo.width||b!=google.cdo.height)&&google.log("","","/client_204?&atyp=i&biw="+a+"&bih="+b+"&ei="+google.kEI);}).call(this);})();</script> <script nonce="zHShHBII_Chw737pdVlj6Q">(function(){google.xjs={ck:'xjs.hp.7OK0Zk1e1VY.L.X.O',cs:'ACT90oGoTtEyU0QPoieI0SztcjxmRBMKuQ',excm:[]};})();</script>  <script nonce="zHShHBII_Chw737pdVlj6Q">(function(){var u='/xjs/_/js/k\x3dxjs.hp.en.jodjsTdQot0.O/am\x3dAgAAdAIAKACw/d\x3d1/ed\x3d1/rs\x3dACT90oGMuax7_CJvh2mHTlqLp1N805IOLg/m\x3dsb_he,d';var amd=0;
-var e=this||self,g=function(c){return c};var k;var n=function(c,f){this.g=f===l?c:""};n.prototype.toString=function(){return this.g+""};var l={};
-function p(){var c=u,f=function(){};google.lx=google.stvsc?f:function(){google.timers&&google.timers.load&&google.tick&&google.tick("load","xjsls");var a=document;var b="SCRIPT";"application/xhtml+xml"===a.contentType&&(b=b.toLowerCase());b=a.createElement(b);b.id="base-js";a=null===c?"null":void 0===c?"undefined":c;if(void 0===k){var d=null;var m=e.trustedTypes;if(m&&m.createPolicy){try{d=m.createPolicy("goog#html",{createHTML:g,createScript:g,createScriptURL:g})}catch(r){e.console&&e.console.error(r.message)}k=
-d}else k=d}a=(d=k)?d.createScriptURL(a):a;a=new n(a,l);b.src=a instanceof n&&a.constructor===n?a.g:"type_error:TrustedResourceUrl";var h,q;(h=(a=null==(q=(h=(b.ownerDocument&&b.ownerDocument.defaultView||window).document).querySelector)?void 0:q.call(h,"script[nonce]"))?a.nonce||a.getAttribute("nonce")||"":"")&&b.setAttribute("nonce",h);document.body.appendChild(b);google.psa=!0;google.lx=f};google.bx||google.lx()};google.xjsu=u;setTimeout(function(){0<amd?google.caft(function(){return p()},amd):p()},0);})();window._ = window._ || {};window._DumpException = _._DumpException = function(e){throw e;};window._s = window._s || {};_s._DumpException = _._DumpException;window._qs = window._qs || {};_qs._DumpException = _._DumpException;function _F_installCss(c){}
-(function(){google.jl={blt:'none',chnk:0,dw:false,dwu:true,emtn:0,end:0,ico:false,ikb:0,ine:false,injs:'none',injt:0,injth:0,injv2:false,lls:'default',pdt:0,rep:0,snet:true,strt:0,ubm:false,uwp:true};})();(function(){var pmc='{\x22d\x22:{},\x22sb_he\x22:{\x22agen\x22:true,\x22cgen\x22:true,\x22client\x22:\x22heirloom-hp\x22,\x22dh\x22:true,\x22ds\x22:\x22\x22,\x22fl\x22:true,\x22host\x22:\x22google.com\x22,\x22jsonp\x22:true,\x22msgs\x22:{\x22cibl\x22:\x22Xóa tìm ki&#7871;m\x22,\x22dym\x22:\x22Có ph&#7843;i b&#7841;n mu&#7889;n tìm:\x22,\x22lcky\x22:\x22Xem trang &#273;&#7847;u tiên tìm &#273;&#432;&#7907;c\x22,\x22lml\x22:\x22Tìm hi&#7875;u thêm\x22,\x22psrc\x22:\x22&#272;ã xóa tìm ki&#7871;m này kh&#7887;i \\u003Ca href\x3d\\\x22/history\\\x22\\u003EL&#7883;ch s&#7917; Web\\u003C/a\\u003E c&#7911;a b&#7841;n\x22,\x22psrl\x22:\x22Xóa\x22,\x22sbit\x22:\x22Tìm ki&#7871;m b&#7857;ng hình &#7843;nh\x22,\x22srch\x22:\x22Tìm trên Google\x22},\x22ovr\x22:{},\x22pq\x22:\x22\x22,\x22rfs\x22:[],\x22sbas\x22:\x220 3px 8px 0 rgba(0,0,0,0.2),0 0 0 1px rgba(0,0,0,0.08)\x22,\x22stok\x22:\x22eeOJZ_zWwu1haY9vT9tVq7xv54E\x22}}';google.pmc=JSON.parse(pmc);})();</script>       </body></html>
--- a/cope2n-ai-fi/modules/sdsvkie/notebooks/data.ipynb
+++ b/cope2n-ai-fi/modules/sdsvkie/notebooks/data.ipynb
--- a/cope2n-ai-fi/modules/sdsvkie/notebooks/data.py
+++ b/cope2n-ai-fi/modules/sdsvkie/notebooks/data.py
@ -1,84 +0,0 @@
-# from sdsvkie.utils.io_file import read_json
-import json 
-import Levenshtein
-from pathlib import Path
-import shutil
-import re 
-from unidecode import unidecode
-
-# from sdsvkie.utils.io_file import read_json
-
-
-def normalize(text):
-    text = text.lower()
-    text = unidecode(text)
-    text = re.sub(r'[^a-zA-Z0-9\s]+', '', text)
-    return text 
-    
-
-
-def is_match(src, str_new, thr=0.7):
-    src = normalize(src)
-    str_new = normalize(str_new)
-    distance = Levenshtein.ratio(src, str_new)
-    if distance > thr:
-        return True 
-    else:
-        return False
-
-def get_store_name(gt_store, store_list):
-    for store in store_list:
-        if is_match(store, gt_store, thr=0.6):
-            return store.lower()
-        
-    if len(gt_store) == 0:
-        return "other_non_title"
-    else:
-        return "other_have_title_{}".format(gt_store)
-    
-
-def read_json(json_path):
-    with open(json_path, "r", encoding="utf8") as f:
-        data = json.load(f)
-    return data
-
-
-
-
-json_path = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_ss_receipt.json"
-pred_data = read_json(json_path)
-
-store_names = [normalize(item['Store_name_value']) for k, item in pred_data.items()]
-# store_names = list(set(store_names))
-from collections import Counter
-my_counter = Counter(store_names)
-list_tuples = my_counter.most_common()
-print(list_tuples)
-stores = [x[0] for x in list_tuples]
-print(stores)
-
-
-store_names = stores[1:]
-
-img_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted/All"
-out_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Done"
-out_dir = Path(out_dir)
-for img_name, item in pred_data.items():
-    store_name = item['Store_name_value']
-    store_category = get_store_name(store_name, store_list=store_names)
-    store_category = store_category.replace(" ", "_")
-    print(store_category)
-    out_dir_by_store = out_dir / store_category
-    if not out_dir_by_store.exists():
-        out_dir_by_store.mkdir(parents=True, exist_ok=True)
-
-    img_full_name = Path(img_name).with_suffix(".jpg")
-    img_full_path = Path(img_dir) / img_full_name
-
-    txt_full_path = img_full_path.with_suffix(".txt")
-    if not img_full_path.exists():
-        print(str(img_full_path))
-        continue
-    else:
-        shutil.copy(str(img_full_path), out_dir_by_store)
-        shutil.copy(str(txt_full_path), out_dir_by_store)
--- a/cope2n-ai-fi/modules/sdsvkie/notebooks/page0.jpg
+++ b/cope2n-ai-fi/modules/sdsvkie/notebooks/page0.jpg
--- a/cope2n-ai-fi/modules/sdsvkie/notebooks/pdf2image.ipynb
+++ b/cope2n-ai-fi/modules/sdsvkie/notebooks/pdf2image.ipynb
--- a/cope2n-ai-fi/modules/sdsvkie/notebooks/scp.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/notebooks/scp.sh
@ -1 +0,0 @@
-rsync -r --exclude='workdirs/' --exclude='notebooks/' --exclude='weights/' --exclude='wandb/' --exclude='microsoft/' /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie user@107.120.133.42:/mnt/data/kie
--- a/cope2n-ai-fi/modules/sdsvkie/notebooks/sdsvap_invoice.ipynb
+++ b/cope2n-ai-fi/modules/sdsvkie/notebooks/sdsvap_invoice.ipynb
@ -1,194 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os \n",
-    "import glob \n",
-    "from tqdm import tqdm \n",
-    "import cv2 \n",
-    "import shutil\n",
-    "from pathlib import Path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "DATA_DIR = \"/mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/raw/IMGS\"\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def copy_only_first_page(data_dir, out_dir, skip_types=['Receipt_taxi','Receipt_food_Cam', 'Receipt_food_Scan']):\n",
-    "    paths = sorted(glob.glob(data_dir + \"/*/*\"))\n",
-    "    print(\"Total paths: \", len(paths))\n",
-    "    out_dir = Path(out_dir)\n",
-    "    for path in paths:\n",
-    "        type_doc = Path(path).parent.name\n",
-    "        out_dir_full = out_dir / type_doc\n",
-    "        if not out_dir_full.exists():\n",
-    "            out_dir_full.mkdir(parents=True)\n",
-    "        if type_doc in skip_types:\n",
-    "            shutil.copy(path, str(out_dir_full))\n",
-    "        else:\n",
-    "            if \"_1.jpg\" in path:\n",
-    "                shutil.copy(path, out_dir_full)\n",
-    "            prefix_name = \"_\".join(path.split(\"_\")[:-1]) + \"_1.jpg\"\n",
-    "            print(prefix_name)\n",
-    "            if Path(prefix_name).exists():\n",
-    "                continue\n",
-    "            else:\n",
-    "                shutil.copy(path, out_dir_full)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "classes = [\n",
-    "    # id invoice\n",
-    "    'No_key',    # số hóa đơn\n",
-    "    'No_value', \n",
-    "    'Form_key',    # mẫu số hóa đơn\n",
-    "    'Form_value', \n",
-    "    'Serial_key',     # số kí hiệu hoá đơn\n",
-    "    'Serial_value', \n",
-    "    'Date_value', \n",
-    "\n",
-    "    # seller info\n",
-    "    'Seller_company_name_key', \n",
-    "    'Seller_company_name_value', \n",
-    "    'Seller_tax_code_key', \n",
-    "    'Seller_tax_code_value', \n",
-    "    'Seller_address_value',\n",
-    "    'Seller_address_key', \n",
-    "    'Seller_tel_key',\n",
-    "    'Seller_tel_value', \n",
-    "    \n",
-    "    # buyer info\n",
-    "    'Buyer_personal_name_key',\n",
-    "    'Buyer_personal_name_value', \n",
-    "    'Buyer_company_name_key', \n",
-    "    'Buyer_company_name_value', \n",
-    "    'Buyer_tax_code_key', \n",
-    "    'Buyer_tax_code_value', \n",
-    "    'Buyer_address_key', \n",
-    "    'Buyer_address_value', \n",
-    "    'Buyer_address_key',\n",
-    "    'Buyer_address_value',\n",
-    "\n",
-    "    # money info\n",
-    "    'Tax_amount_key', \n",
-    "    'Tax_amount_value', \n",
-    "    'Total_key', \n",
-    "    'Total_value', \n",
-    "    'Total_in_words_key', \n",
-    "    'Total_in_words_value',\n",
-    "    \n",
-    "    'Other', \n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "classes = [x.lower() for x in classes]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['no_key',\n",
-       " 'no_value',\n",
-       " 'form_key',\n",
-       " 'form_value',\n",
-       " 'serial_key',\n",
-       " 'serial_value',\n",
-       " 'date_value',\n",
-       " 'seller_company_name_key',\n",
-       " 'seller_company_name_value',\n",
-       " 'seller_tax_code_key',\n",
-       " 'seller_tax_code_value',\n",
-       " 'seller_address_value',\n",
-       " 'seller_address_key',\n",
-       " 'seller_tel_key',\n",
-       " 'seller_tel_value',\n",
-       " 'buyer_personal_name_key',\n",
-       " 'buyer_personal_name_value',\n",
-       " 'buyer_company_name_key',\n",
-       " 'buyer_company_name_value',\n",
-       " 'buyer_tax_code_key',\n",
-       " 'buyer_tax_code_value',\n",
-       " 'buyer_address_key',\n",
-       " 'buyer_address_value',\n",
-       " 'buyer_address_key',\n",
-       " 'buyer_address_value',\n",
-       " 'tax_amount_key',\n",
-       " 'tax_amount_value',\n",
-       " 'total_key',\n",
-       " 'total_value',\n",
-       " 'total_in_words_key',\n",
-       " 'total_in_words_value',\n",
-       " 'other']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "classes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "py38_hoanglv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.16"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/cope2n-ai-fi/modules/sdsvkie/notebooks/wildreceipt_data.ipynb
+++ b/cope2n-ai-fi/modules/sdsvkie/notebooks/wildreceipt_data.ipynb
--- a/cope2n-ai-fi/modules/sdsvkie/requirements.txt
+++ b/cope2n-ai-fi/modules/sdsvkie/requirements.txt
@ -1,15 +0,0 @@
-protobuf>=3.19.6,<4
-opencv-python>=4.4.0
-torch>=1.4
-torchvision
-transformers>=4.25.1
-datasets>=2.5.2
-Pillow==9.5.0
-wandb
-easydict==1.10
-terminaltables==3.1.10
-tqdm
-rapidfuzz==2.13.7
-PyMuPDF==1.20.2
-sentencepiece
-underthesea
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/2023-04-20_0101803564_0300741922_1C23TYY_714.pdf
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/2023-04-20_0101803564_0300741922_1C23TYY_714.pdf
@ -1,49 +0,0 @@
-%PDF-1.3
-%“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com
-1 0 obj
-<<
-/F1 2 0 R
->>
-endobj
-2 0 obj
-<<
-/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
->>
-endobj
-3 0 obj
-<<
-/PageMode /UseNone /Pages 5 0 R /Type /Catalog
->>
-endobj
-4 0 obj
-<<
-/Author (anonymous) /CreationDate (D:20230522134603-07'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20230522134603-07'00') /Producer (ReportLab PDF Library - www.reportlab.com) 
-  /Subject (unspecified) /Title (untitled) /Trapped /False
->>
-endobj
-5 0 obj
-<<
-/Count 0 /Kids [  ] /Type /Pages
->>
-endobj
-xref
-0 6
-0000000000 65535 f 
-0000000073 00000 n 
-0000000104 00000 n 
-0000000211 00000 n 
-0000000279 00000 n 
-0000000575 00000 n 
-trailer
-<<
-/ID 
-[<4d2762f6f45f96a78f66af9b0251b167><4d2762f6f45f96a78f66af9b0251b167>]
-% ReportLab generated PDF document -- digest (http://www.reportlab.com)
-
-/Info 4 0 R
-/Root 3 0 R
-/Size 6
->>
-startxref
-629
-%%EOF
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/build_batch_2.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/build_batch_2.py
@ -1,10 +0,0 @@
-import os
-from pathlib import Path 
-import shutil
-data_dir = "/mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Data"
-
-data_dir = Path(data_dir)
-
-for path in data_dir.glob("*/*.pdf"):
-    if path.with_suffix(".xml").exists():
-        shutil.copy(str(path), "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/multi_page_vat/batch_2")
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/check_duplicate_vat.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/check_duplicate_vat.py
@ -1,117 +0,0 @@
-
-from pathlib import Path
-import shutil
-import json 
-
-def write_txt(txt, data, mode="w"):
-    with open(txt, mode, encoding="utf8") as f:
-        for line in data:
-            f.write(line + "\n")
-
-
-def read_txt(txt):
-    with open(txt, "r", encoding="utf8") as f:
-        data = [line.strip() for line in f]
-    return data
-
-def get_no(items):
-    no = "xxxx"
-    for item in items:
-        if "No_value" in item:
-            tmp = item.split("\t")
-            no = tmp[-2]
-    
-    return no 
-
-def write_json(json_path, data):
-    with open(json_path, "w", encoding="utf8") as f:
-        json.dump(data, f, ensure_ascii=False, sort_keys=True)
-
-
-def read_json(json_path):
-    with open(json_path, "r", encoding="utf8") as f:
-        data = json.load(f)
-    return data
-
-
-def check(txt_dir):
-    log_dict = {}
-    txt_dir = Path(txt_dir)
-    txt_paths = txt_dir.rglob("*.txt")
-    for txt_path in txt_paths:
-        items = read_txt(str(txt_path))
-        no_doc = get_no(items)
-        if no_doc not in log_dict:
-            log_dict[no_doc] = [str(txt_path.with_suffix(".jpg"))]
-        else:
-            log_dict[no_doc].append(str(txt_path.with_suffix(".jpg")))
-            
-    not_dups = []
-    for no, paths in log_dict.items():
-        if len(paths) == 1:
-            not_dups.append(no)
-    # if "xxxx" in log_dict.keys():
-    #     log_dict.pop("xxxx")
-    for _ in not_dups:
-        log_dict.pop(_)
-        
-    print(log_dict.keys())
-    return log_dict
-
-# print(check("/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/train/SS_Invoice"))
-        
-        
-def get_leak_test(data_dir):
-    test_keys = []
-    data_dir = Path(data_dir)
-    test_paths = data_dir.rglob("test_*")
-    # print(list(test_paths))
-    for path in test_paths:
-        img_name = path.stem
-        img_name = img_name.replace("test_","")
-        test_keys.append(img_name)
-        
-        
-    # write_txt("leak.txt", test_keys)
-    return test_keys
-        
-        
-def create_new_test(ori_dir, out_dir, test_keys):
-    ori_dir = Path(ori_dir)
-    out_dir = Path(out_dir)
-    if not out_dir.exists():
-        out_dir.mkdir(parents=True, exist_ok=True)
-
-    img_paths = ori_dir.rglob("*.jpg")
-    for img_path in img_paths:
-        img_key = img_path.stem
-        if img_key in test_keys:
-            continue
-        txt_path = img_path.with_suffix(".txt")
-        shutil.copy(str(img_path), str(out_dir))
-        shutil.copy(str(txt_path), str(out_dir))
-
-def create_new_e2e_test(ori_json, out_json, test_keys):
-    ori_data = read_json(ori_json)
-    out_dict = {}
-    for k, v in ori_data.items():
-        if k  in test_keys:
-            continue
-        out_dict[k] = v
-    
-    write_json(out_json, out_dict)
-    
-        
-test_keys = get_leak_test("/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/EXCESS")
-# create_new_test(
-#     ori_dir="/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss",
-#     out_dir="/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss_rm_leak",
-#     test_keys=test_keys
-# )
-
-create_new_e2e_test(
-    ori_json="/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss_e2e.json",
-    out_json="/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss_e2e_rm_leak.json",
-    test_keys=test_keys
-)
-
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/clean_json.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/clean_json.py
@ -1,39 +0,0 @@
-import argparse
-from pathlib import Path
-from tqdm import tqdm
-from sdsvkie.utils.io_file import read_json, write_json
-
-
-def clean_json(in_json, out_json, pdf_dir):
-    data_src = read_json(in_json)
-    
-    pdf_dir = Path(pdf_dir)
-    pdf_paths = pdf_dir.glob("*.pdf")
-    pdf_keys = set([pdf_path.stem for pdf_path in pdf_paths])
-
-    data_tgt = {}
-    for src_pdf_key in data_src.keys():
-        if src_pdf_key in pdf_keys:
-            data_tgt[src_pdf_key] = data_src[src_pdf_key]
-
-    write_json(out_json, data_tgt, sort_keys=False)
-
-
-    
-    
-
-        
-        
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(prog="Rename labels")
-    parser.add_argument("--input", type=str, required=True, help="dataset directory")
-    parser.add_argument("--out", type=str, required=False, help="output")
-    parser.add_argument("--dir", type=str, required=True, help="document type: receipt / invoice")
-    
-    args = parser.parse_args()
-    clean_json(
-        in_json=args.input,
-        out_json=args.out,
-        pdf_dir=args.dir
-    )
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/clean_multipage_data.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/clean_multipage_data.py
@ -1,49 +0,0 @@
-import os
-
-import json
-from pathlib import Path
-
-def read_json(json_path):
-    with open(json_path, 'r') as f:
-        data = json.load(f)
-    return data
-
-def write_json(json_path, data):
-    with open(json_path, 'w') as f:
-        json.dump(data, f, ensure_ascii=False)
-
-
-def clean_json(json_in, json_out, valid_names):
-    out_data = {}
-    data = read_json(json_in)
-    for name_key, items in data.items():
-        if name_key in valid_names:
-            out_data[name_key] = items
-
-    write_json(json_out, out_data)
-
-# DIRNAMES = ['SL_HCM', 'SL_HN_batch_1', 'SL_HN_batch_2', 'Invoices_SAVINA']
-# ROOT_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/multi_page"
-
-DIRNAMES = ['test_sbt_v2']
-ROOT_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed"
-for dirname in DIRNAMES:
-    json_path = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_end2end/sbt_validation_e2e.json"
-    json_out_path = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_end2end/sbt_validation_e2e_v2.json"
-    valid_names = [p.stem for p in (Path(ROOT_DIR) / dirname).glob("*")]
-    print(valid_names)
-    clean_json(json_path, json_out_path, valid_names)
-
-
-# def combine_json(json_paths, json_out):
-#     datas = [read_json(json_path) for json_path in json_paths]
-#     out_data = {}
-#     for data in datas:
-#         out_data.update(data) 
-#     write_json(json_out, out_data)
-
-
-# json_paths = [Path(ROOT_DIR) / (dirname + "_out.json") for dirname in DIRNAMES]
-# json_out = ROOT_DIR + "/test_e2e_multi_pages.json"
-# combine_json(json_paths, json_out)
-
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/copy_img.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/copy_img.py
@ -1,23 +0,0 @@
-import os 
-import shutil
-import glob 
-from pathlib import Path
-
-
-if __name__ == "__main__":
-    src_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/train/SL_HCM_batch_2_multi_pages"
-    tgt_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/key_information_extraction/"
-    num_files = 100
-
-    files = glob.glob(os.path.join(src_dir, "*.jpg"))
-    count = 0
-    for file in files:
-        src_path = os.path.join(src_dir, file)
-        tgt_path = os.path.join(tgt_dir, file)
-        if os.path.isfile(src_path):
-            shutil.copy(src_path, tgt_path)
-            count += 1
-            if count == num_files:
-                break
-
-    print(f"Copied {count} files from {src_dir} to {tgt_dir}")
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/create_train_test_data.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/create_train_test_data.py
@ -1,25 +0,0 @@
-import os 
-import shutil
-from pathlib  import Path 
-
-
-SRC_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/train/sbt/batch_1"
-TEST_DIR = "/mnt/ssd1T/tuanlv/06.KVUCombineStage/datasets/invoices-receipts/SS_invoices/SBT/validation_data/images"
-OUT_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_sbt"
-
-# Get a list of all the files in the test directory
-test_files = [Path(f).name for f in os.listdir(TEST_DIR) if ".txt" not in f]
-
-# Create the output directory if it doesn't exist
-os.makedirs(OUT_DIR, exist_ok=True)
-
-# Move the matching files from the source directory to the output directory
-for filename in os.listdir(SRC_DIR):
-    if Path(filename).name in test_files:
-        src_path = os.path.join(SRC_DIR, filename)
-        # out_path = os.path.join(OUT_DIR, filename)
-        shutil.move(src_path, OUT_DIR)
-
-        #move .txt 
-        src_txt_path = Path(os.path.join(SRC_DIR, filename)).with_suffix(".txt")
-        shutil.move(str(src_txt_path), OUT_DIR)
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/extract_xml.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/extract_xml.py
@ -1,314 +0,0 @@
-import shutil
-import xml.etree.ElementTree as ET
-from datetime import datetime
-# from sdsvkie.utils.io_file import read_json, write_json
-import json 
-
-import csv
-import ast 
-def get_xml_from_csv(csv_file):
-    data = {}
-    with open(csv_file, 'r') as file:
-        csv_reader = csv.DictReader(file)
-        for row in csv_reader:
-            # print(row)
-            pdf_path = row['file_path']
-            pdf_key = Path(pdf_path).stem
-            xml_paths = ast.literal_eval(row['xml_path'])
-            data[pdf_key] = xml_paths
-
-    return data
-
-def get_xml_from_dirs(dir_path, pdf_keys):
-    dir_path = Path(dir_path)
-    xml_paths = dir_path.rglob("*.xml")
-    xml_paths = [str(path) for path in xml_paths]
-    xml_infos = {}
-    
-    for pdf_key in pdf_keys:
-        xml_infos[pdf_key] = xml_paths
-    return xml_infos
-
-
-def write_json(json_path, data, sort_keys=True):
-    with open(json_path, "w", encoding="utf8") as f:
-        json.dump(data, f, ensure_ascii=False, sort_keys=sort_keys)
-
-
-def read_json(json_path):
-    with open(json_path, "r", encoding="utf8") as f:
-        data = json.load(f)
-    return data
-
-
-from pathlib import Path
-import tqdm 
-import logging 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-def convert_date(date_str: str, ori_pattern: str = '%Y-%m-%d', tgt_pattern: str = '%d/%m/%Y'):
-    date_obj = datetime.strptime(date_str, ori_pattern)
-
-    # convert back to string in DD-MM-YY format
-    new_date_str = date_obj.strftime(tgt_pattern)
-    return new_date_str
-
-def extract(xml_in, field_mapping):
-    
-
-    with open(xml_in, "r") as f:
-        xml_string = f.read()
-    # parse the XML string
-    root = ET.fromstring(xml_string)
-
-    # extract the SHDon and NLap elements
-    output = {}
-    for key in field_mapping:
-        pattern = f".//{field_mapping[key]}"
-        value = root.find(pattern)
-        value = "" if value is None else value.text
-        if key == "date_value" and value != "":
-            value = convert_date(value)
-
-        if key in ["tax_amount_value", "total_value"] and value != "":
-            value = str(int(float(value)))
-
-        
-        output[key] = value
-    return output
-
-field_mapping = {
-        "no_value": "SHDon",
-        "form_value": "KHMSHDon",
-        "serial_value": "XXXXXXX",
-        "date_value": "NLap",     # 2023-06-05 -> YY-MM-DD
-        "seller_company_name_value": "NBan/Ten",
-        "seller_address_value": "NBan/DChi",
-        "seller_tel_value": "XXXXXXXXX",
-        "seller_tax_code_value": "NBan/MST",
-        "buyer_personal_name_value": "NMua/HVTNMHang",
-        "buyer_company_name_value": "NMua/Ten",
-        "buyer_address_value": "NMua/DChi",
-        "buyer_tax_code_value": "NMua/MST",
-        "buyer_tel_value": "NMua/SDT",
-        "tax_amount_value": "TThue",
-        "total_value": "TgTTTBSo",
-        "total_in_words_value": "TgTTTBChu"
-    }
-
-## fields need care: serial_value, seller_tel_value, buyer_tel_value
-
-def get_xml_list_info(xml_dir):
-    xml_dir = Path(xml_dir)
-    xml_files = xml_dir.glob("*/*.xml")
-    xml_info = {}
-    for xml_file in xml_files:
-        pdf_key = xml_file.stem
-        xml_info[pdf_key] = str(xml_file)
-    return xml_info
-
-def process(json_in, json_out, xml_dir):
-    assert Path(json_in).exists() == True 
-    assert Path(xml_dir).exists() == True
-    data_in = read_json(json_in)
-    data_out = {}
-    if data_in is None or not data_in:
-        logger.error("empty file")
-        return 
-    
-    xml_info = get_xml_list_info(xml_dir)
-    for pdf_key in tqdm.tqdm(data_in.keys()):
-        
-        xml_path = xml_info[pdf_key] if pdf_key in xml_info else None
-        if xml_path is None:
-            continue 
-        else:
-            output = extract(xml_path, field_mapping)
-
-        data_out[pdf_key] = output
-
-    write_json(json_out, data_out, sort_keys=False)
-
-
-def get_xml_list_info_v2(xml_dir):
-    xml_dir = Path(xml_dir)
-    xml_files = xml_dir.glob("*/*.xml")
-    
-    xml_info = {}
-    for xml_file in xml_files:
-        pdf_key = xml_file.stem
-        
-        if pdf_key in xml_info:
-            xml_info[pdf_key].append(str(xml_file))
-        else:
-            xml_info[pdf_key] = [str(xml_file)]
-        
-    return xml_info
-
-def extract_v2(xml_paths, preds, field_mapping, pdf_key=None):
-
-   
-    xml_path = None
-    if len(xml_paths) == 1:
-        xml_path = xml_paths[0]
-    else:
-        # find best xml
-        for xml_in in xml_paths:
-            try:
-                with open(xml_in, "r", encoding='utf8') as f:
-                    xml_string = f.read()
-                    root = ET.fromstring(xml_string, parser = ET.XMLParser(encoding = 'iso-8859-5'))
-            except Exception as err:
-                print("Error exception (check) ", err, xml_in)
-                continue
-
-            key_checks = ["no_value"]
-            is_exists_xml = False
-            for key_check in key_checks:
-                pattern = f".//{field_mapping[key_check]}"
-                value = root.find(pattern)
-                value = "" if value is None else value.text
-                
-                if value == preds[key_check]:
-                    is_exists_xml = True
-            if is_exists_xml:
-                xml_path = xml_in
-        if xml_path is None:
-            print("Not found best xml for ",pdf_key,  xml_paths)
-            return None, None
-        
-    # 
-    try:
-        with open(xml_path, "r") as f:
-            xml_string = f.read()
-        # parse the XML string
-        root = ET.fromstring(xml_string)
-    except Exception as err:
-        print("Error exception: ", err, xml_path)
-        return None, None
-    # extract the SHDon and NLap elements
-    output = {}
-    for key in field_mapping:
-        pattern = f".//{field_mapping[key]}"
-        value = root.find(pattern)
-        value = "" if value is None else value.text
-        if key == "date_value" and value != "":
-            value = convert_date(value)
-        if key in ["tax_amount_value", "total_value"] and value != "":
-            value = str(int(float(value)))
-
-        output[key] = value
-    
-    return output, xml_path
-
-def process_v2(json_in, json_out, csv_file, xml_dir, xml_out_dir, pdf_xml_json):
-    assert Path(json_in).exists() == True 
-    assert Path(xml_dir).exists() == True
-     # make dir
-    if not Path(xml_out_dir).exists():
-        Path(xml_out_dir).mkdir(parents=True, exist_ok=True)
-
-    data_in = read_json(json_in)
-    data_out = {}
-    if data_in is None or not data_in:
-        logger.error("empty file")
-        return 
-    
-    # xml_info = get_xml_list_info_v2(xml_dir)
-    # xml_info = get_xml_from_csv(csv_file=csv_file)
-    xml_info = get_xml_from_dirs(dir_path=csv_file, pdf_keys=list(data_in.keys()))
-    print("Num xml: ", len(xml_info))
-    succes = 0
-    pdf_xml_info = {}
-    set_xml_paths = set()
-    for pdf_key in tqdm.tqdm(data_in.keys()):
-        
-        xml_paths = xml_info[pdf_key] if pdf_key in xml_info else None
-        # print(xml_paths)
-        preds = data_in[pdf_key]
-        if xml_paths is None or len(xml_paths) == 0:
-            print("Not exist xml because xml_paths is None or len xml_paths = 0", pdf_key)
-            continue 
-        else:
-            output, xml_path = extract_v2(xml_paths, preds, field_mapping, pdf_key=pdf_key)
-
-        if output is not None:
-            pdf_xml_info[pdf_key] = xml_path
-            shutil.copy(xml_path, xml_out_dir)
-            # if Path(xml_path).stem in set_xml_paths:
-                # print(pdf_key, xml_path)
-            set_xml_paths.add(Path(xml_path).stem)
-            succes += 1
-            data_out[pdf_key] = output
-    print("Succes: ", succes)
-    print(len(set_xml_paths))
-    write_json(pdf_xml_json, pdf_xml_info, sort_keys=False)
-    write_json(json_out, data_out, sort_keys=False)
-            
-
-def combine_xml(json_src, json_refer):
-    data_src = read_json(json_src)
-    data_refer = read_json(json_refer)
-
-    for pdf_key in data_src.keys():
-        for field_key in data_src[pdf_key]:
-            if data_src[pdf_key][field_key] == "":
-                data_src[pdf_key][field_key] = data_refer[pdf_key][field_key]
-
-    write_json(json_src, data=data_src, sort_keys=False)
-
-
-
-def create_data_from_json(in_dir, out_dir, json_path):
-    in_dir = Path(in_dir)
-    out_dir = Path(out_dir)
-
-    if not out_dir.exists():
-        out_dir.mkdir(parents=True, exist_ok=True)
-
-    data = read_json(json_path)
-
-    for pdf_key in data.keys():
-        pdf_path = in_dir / (pdf_key + ".pdf")
-        shutil.copy(str(pdf_path), str(out_dir))
-
-if __name__ == "__main__":
-    
-    
-    # json_in = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_multi_page.json"
-    # json_out = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_multi_page_from_xml.json"
-    # xml_dir = "/mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Test"
-    # process(json_in=json_in, json_out=json_out, xml_dir=xml_dir)
-
-    # json_in = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_multi_page_from_xml.json"
-    # json_refer = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_multi_page.json"
-    # json_in = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_one_page_e2e_from_xml.json"
-    # json_refer = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_one_page_e2e.json"
-    
-    # combine_xml(json_src=json_in, json_refer=json_refer)
-
-    ## One page
-    # json_in = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_one_page_e2e.json"
-    # json_out = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_one_page_e2e_from_xml.json"
-    ## Multi page
-    json_in = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v1_multi_page.json"
-    json_out = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v1_multi_page_from_xml.json"
-
-    # csv_file = "/mnt/ssd1T/tuanlv/02.KeyValueUnderstanding/inferences/e2e_outputs/FI_June_data.csv"
-    csv_file = "/mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Data"
-    pdf_xml_json = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v1_multi_page_metadata.json"
-
-    xml_dir = "/mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Test"
-    xml_out_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/Invoice_v1_multi_page_xml"
-    
-    process_v2(json_in=json_in, json_out=json_out, csv_file=csv_file, xml_dir=xml_dir, xml_out_dir=xml_out_dir, pdf_xml_json=pdf_xml_json)
-
-    # in_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/Invoice_v2_multi_page"
-    # out_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/Invoice_v2_multi_page_clean"
-    # json_path = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_multi_page_from_xml.json"
-    # in_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/Invoice_v2_one_page"
-    # out_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/Invoice_v2_one_page_clean"
-    # json_path = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_one_page_e2e_from_xml.json"
-    
-    # create_data_from_json(in_dir, out_dir, json_path)
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/format_json.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/format_json.py
@ -1,42 +0,0 @@
-import json 
-
-
-
-json_path = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/multi_page_vat/SL_HCM.json"
-with open(json_path, 'r', encoding='utf8') as f:
-    data = json.load(f)
-print(data[list(data.keys())[0]].keys())
-
-keys = [
-    'serial_value',
-    'no_value', 
-    'form_value', 
-    'date', 
-    
-    'seller_company_name_value', 
-    'seller_address_value', 
-    'seller_mobile_value', 
-    'seller_tax_code_value', 
-
-    'buyer_name_value', 
-    'buyer_company_name_value', 
-    'buyer_address_value', 
-    'buyer_mobile_value', 
-    'buyer_tax_code_value', 
-    
-    'VAT_amount_value',
-    'total_in_words_value', 
-    'total_value'
-]
-new_data = {}
-for file_name, items in data.items():
-    
-    new_items = {}
-    for k in keys:
-        new_items[k] = items[k]
-    new_data[file_name] = new_items
-    
-    
-with open(json_path, 'w', encoding='utf8') as f:
-    json.dump(new_data, f, ensure_ascii=False)
-    
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/get_more_data.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/get_more_data.py
@ -1,31 +0,0 @@
-import os
-import shutil 
-
-
-from pathlib import Path
-
-folder1 = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v2_multi_page"
-folder2 = "/mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Invoice_v2_multi_page"
-
-out_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v2_multi_page_2"
-out_dir = Path(out_dir)
-if not out_dir.exists():
-    out_dir.mkdir(parents=True, exist_ok=True)
-
-# Get list of files in both folders
-files1 = [f for f in os.listdir(folder1) if os.path.isfile(os.path.join(folder1, f))]
-files2 = [f for f in os.listdir(folder2) if os.path.isfile(os.path.join(folder2, f))]
-
-# Get list of file names in both folders
-names1 = [os.path.splitext(f)[0] for f in files1]
-names2 = [os.path.splitext(f)[0] for f in files2]
-
-# Find duplicates by comparing names
-duplicates = set(names1) ^ set(names2)
-print(len(duplicates))
-# Print duplicate file names
-for d in duplicates:
-    print(f"Duplicate file name found: {d}")
-    pdf_path = Path(folder2) / (d+".pdf")
-    shutil.copy(str(pdf_path), str(out_dir))
-
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/leak.txt
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/leak.txt
@ -1,106 +0,0 @@
-200
-Invoice_ho_1007_000
-210
-invoice_126
-inv_SDV_016
-invoice_108
-invoice_215
-invoice_135
-inv_SDV_004
-292
-164
-242
-inv_SDV_240
-207
-invoice_0525_000
-invoice_1279_000
-306
-d2.sc_1261_000
-invoice_90
-304
-s1.sc_1258_000
-ce_1h_0967_000
-invoice_1392_000
-193
-invoice_109
-281
-354
-invoice_1059_000
-es_10_1043_000
-257
-invoice_65
-invoice_1252_006
-331
-scan__1319_000
-230
-20210_1314_000
-328
-o1_aa_1093_000
-342
-invoice_149
-invoice_1304_000
-c2_em_0081_000
-Invoice_En_1074_000
-invoice_89
-Invoice_Sh_0712_000
-invoice_202
-hotel_0209_000
-invoice_0872_000
-invoice_72
-InvoiceofP_0648_000
-invoice_133
-C1_Invoice_0968_000
-invoice_0803_000
-invoice_50
-invoice_208
-253
-inv_SDV_215
-360
-invoice_1393_000
-scan__0953_000
-invoice_22
-O1_Invoice_1348_000
-inv_SDV_231
-252
-273
-156
-330
-invoice_0457_001
-invoice_0180_001
-invoice_182
-326
-14
-301
-334
-01gtk_0199_000
-343
-Invoice201_0930_000
-invoice_1
-344
-inv_SDV_021
-invoice_170
-E2.Invoice_0561_000
-Invoice_Sh_0262_000
-1.1Invoice_1431_000
-invoice_0112_000
-invoice_195
-314
-2021._0035_000
-invoice_0013_000
-invoice_1204_000
-2021._0868_000
-scan__0520_000
-255
-invoice_200
-C3_Invoice_1359_000
-invoice_49
-invoice_1095_000
-hq_20_0003_000
-invoice_180
-invoice_184
-340
-invoice_0447_000
-invoice_6
-invoice_190
-invoice_105
-invoice_0673_000
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/remove_imgs.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/remove_imgs.py
@ -1,24 +0,0 @@
-import os 
-import shutil
-from pathlib  import Path 
-
-
-SRC_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/train/sbt/batch_1"
-TEST_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/sbt_txt"
-
-# Get a list of all the files in the test directory
-test_files = sorted([Path(f).stem for f in os.listdir(TEST_DIR) if ".txt" in f])
-print(len(test_files))
-
-# Create the output directory if it doesn't exist
-# Move the matching files from the source directory to the output directory
-i = 0
-src_files = sorted(os.listdir(SRC_DIR))
-print(len(src_files))
-for filename in src_files:
-    # print(Path(filename).stem )
-    if  Path(filename).stem not in test_files:
-        print(Path(filename).stem)
-        i+=1 
-
-print(i)
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/rename.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/rename.py
@ -1,15 +0,0 @@
-import os
-
-def rename_files(folder_path):
-    # Get a list of all the files in the folder
-    files = os.listdir(folder_path)
-    # Iterate over the files and rename them
-    for i, filename in enumerate(files):
-        # Construct the new filename
-        new_filename = filename.replace(" ", "_")
-        # Rename the file
-        os.rename(os.path.join(folder_path, filename), os.path.join(folder_path, new_filename))
-
-rename_files(
-    folder_path="/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/sbt/images",
-)
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/rename.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/rename.sh
@ -1,10 +0,0 @@
-python rename_labels.py \
-    --in_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed  \
-    --out_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed \
-    --doc_type receipt
-
-
-python rename_labels.py \
-    --in_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed  \
-    --out_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed \
-    --doc_type invoice
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/rename_labels.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/rename_labels.py
@ -1,191 +0,0 @@
-import argparse
-from pathlib import Path
-from tqdm import tqdm
-import json 
-
-INVOICE_MAPPING = {
-    'no_key': 'No_key',    # số hóa đơn
-    'no_value': 'No_value', 
-    'form_key': 'Form_key',    # mẫu số hóa đơn
-    'form_value': 'Form_value', 
-    'serial_key': 'Serial_key',     # số kí hiệu hoá đơn
-    'serial_value': 'Serial_value', 
-    'date': 'Date_value',
-
-    # seller info
-    'seller_company_name_key': 'Seller_company_name_key', 
-    'seller_company_name_value': 'Seller_company_name_value', 
-    'seller_tax_code_key': 'Seller_tax_code_key', 
-    'seller_tax_code_value': 'Seller_tax_code_value', 
-    'seller_address_value': 'Seller_address_value',
-    'seller_address_key': 'Seller_address_key', 
-    'seller_mobile_key': 'Seller_tel_key',
-    'seller_mobile_value': 'Seller_tel_value', 
-    
-    # buyer info
-    'buyer_name_key': 'Buyer_personal_name_key',
-    'buyer_name_value': 'Buyer_personal_name_value', 
-    'buyer_company_name_value': 'Buyer_company_name_value', 
-    'buyer_company_name_key': 'Buyer_company_name_key', 
-    'buyer_tax_code_key': 'Buyer_tax_code_key', 
-    'buyer_tax_code_value': 'Buyer_tax_code_value', 
-    'buyer_address_key': 'Buyer_address_key', 
-    'buyer_address_value': 'Buyer_address_value', 
-    'buyer_mobile_key': 'Buyer_tel_key',
-    'buyer_mobile_value': 'Buyer_tel_value',
-
-    # money info
-    'VAT_amount_key': 'Tax_amount_key', 
-    'VAT_amount_value': 'Tax_amount_value', 
-    'total_key': 'Total_key', 
-    'total_value': 'Total_value', 
-    'total_in_words_key': 'Total_in_words_key', 
-    'total_in_words_value': 'Total_in_words_value',
-    
-    'other': 'Other', 
-}
-
-RECEIPT_MAPPING = {
-    "Store_name_value": "seller_company_name_value",
-    "Seller_company_name_value": "seller_company_name_value", 
-    "id": "no_value",
-    "No_value": "no_value",
-
-    "Date_value": "date_value",
-    "Total_key": "total_key",
-    "Total_value": "total_value",
-
-    "Others": "other",
-    "others": "other",
-    "Other": "other",
-}
-
-def write_txt(txt, data, mode="w"):
-    with open(txt, mode, encoding="utf8") as f:
-        for line in data:
-            f.write(line + "\n")
-
-
-def read_txt(txt):
-    with open(txt, "r", encoding="utf8") as f:
-        data = [line.strip() for line in f]
-    return data
-
-
-
-def edit_file(in_txt, out_txt, mapping):
-    data = read_txt(in_txt)
-    new_items = []
-    not_exits_label = False 
-    not_edit = True
-    for item in data:
-        splited_item = item.split("\t")
-        label = splited_item[-1]
-        if label in mapping.keys():
-            new_label = mapping[label]
-            splited_item[-1] = new_label
-            not_edit = False
-        else:
-            # print(label, "not in ", mapping.keys())
-            not_exits_label = True
-            splited_item[-1] = label.lower()
-            
-        splited_item[-1] = splited_item[-1].lower()
-        new_item = "\t".join(splited_item)
-        new_items.append(new_item)
-        
-    if not_exits_label:
-        print("Not exists label: ", in_txt)
-    
-    if not not_edit:
-        print("Not edit: ", in_txt)
-    write_txt(out_txt, new_items)
-
-def rename_labels(data_dir, out_dir, doc_type):
-    data_dir = Path(data_dir)
-    out_dir = Path(out_dir)
-    if not out_dir.exists():
-        out_dir.mkdir(parents=True, exist_ok=True)
-    if doc_type == "receipt":
-        mapping = RECEIPT_MAPPING
-    elif doc_type == 'invoice':
-        mapping = INVOICE_MAPPING
-    else:
-        raise NotImplementedError()
-    txt_paths = data_dir.rglob("*.txt")
-    for txt_path in tqdm(txt_paths):
-        txt_dir = str(Path(str(txt_path).replace(str(data_dir), "")).parent)  # a/b/c/x.txt -> c/x.txt -> c 
-        
-        if txt_dir[0] == "/":
-            txt_dir = txt_dir[1:]
-        out_sub_dir = out_dir / Path(txt_dir)
-
-        if not out_sub_dir.exists():
-            out_sub_dir.mkdir(parents=True, exist_ok=True)
-        
-        out_txt = out_sub_dir / txt_path.name
-        
-        # if "failure" in str(out_txt):
-        #     # print(out_txt)
-        #     print(out_sub_dir)
-        #     print(out_txt)
-        # print(out_txt)
-        edit_file(str(txt_path), out_txt=out_txt, mapping=mapping)
-        
-def write_json(json_path, data):
-    with open(json_path, "w", encoding="utf8") as f:
-        json.dump(data, f, ensure_ascii=False, sort_keys=True)
-
-
-def read_json(json_path):
-    with open(json_path, "r", encoding="utf8") as f:
-        data = json.load(f)
-    return data
-def rename_label_in_json(json_in, json_out, doc_type):
-    if doc_type == "invoice":
-        mapping = INVOICE_MAPPING
-    else:
-        mapping = RECEIPT_MAPPING
-    ori_data = read_json(json_in)
-    new_data = {}
-    for img_key, field_item in ori_data.items():
-        new_field_item = {}
-        for field_key, field_value in field_item.items():
-            if field_key in mapping:
-                new_field_key = mapping[field_key]
-            else:
-                new_field_key = field_key
-            new_field_key = new_field_key.lower()
-            new_field_item[new_field_key] = field_value
-            
-        new_data[img_key] = new_field_item
-        
-    write_json(json_out, new_data)
-                
-            
-    
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(prog="Rename labels")
-    parser.add_argument("--in_dir", type=str, required=True, help="dataset directory")
-    parser.add_argument("--out_dir", type=str, required=False, help="output")
-    parser.add_argument("--doc_type", type=str, required=True, help="document type: receipt / invoice")
-    
-    args = parser.parse_args()
-    rename_labels(
-        data_dir=args.in_dir,
-        out_dir=args.out_dir,
-        doc_type=args.doc_type
-    )
-
-    # rename_label_in_json(
-    #     json_in=args.in_dir,
-    #     json_out=args.out_dir,
-    #     doc_type=args.doc_type
-    # )
-    
-    
-"""
-
-"""
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/sort_e2e.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/sort_e2e.py
@ -1,83 +0,0 @@
-import argparse
-from sdsvkie.utils import read_json, yaml_load, write_json
-# from sdsvkie.utils.post_processing.invoice_post_processing import *
-# from sdsvkie.utils.post_processing.common_post_processing import normalize_number
-from tqdm import tqdm
-
-INVOICE_KEYS =  [
- 'no_key',
- 'no_value',
- 'form_key',
- 'form_value',
- 'serial_key',
- 'serial_value',
- 
- 'date_value',
- 
- 'seller_company_name_key',
- 'seller_company_name_value',
- 'seller_address_value',
- 'seller_address_key',
- 'seller_tel_key',
- 'seller_tel_value',
- 'seller_tax_code_key',
- 'seller_tax_code_value',
-
- 'buyer_personal_name_key',
- 'buyer_personal_name_value',
- 'buyer_company_name_value', 
- 'buyer_company_name_key',
-
- 'buyer_address_key',
- 'buyer_address_value',
-  'buyer_tax_code_key',
- 'buyer_tax_code_value',
- 'buyer_tel_key',
- 'buyer_tel_value',
- 
- 'tax_amount_key',
- 'tax_amount_value',
- 'total_key',
- 'total_value',
- 'total_in_words_key',
- 'total_in_words_value',
- 'other'
-]
-
-def sort_invoice(data):
-    
-    sorted_data = {}
-    for img_key, img_data in tqdm(data.items()):
-        sorted_img_data = {}
-        for field_key in INVOICE_KEYS:
-            if "_key" in field_key or "other" in field_key:
-                continue
-            sorted_img_data[field_key] = img_data.get(field_key, "")
-            
-        sorted_data[img_key] = sorted_img_data
-        
-    return sorted_data
-
-
-def sort_receipt(data):
-    return data
-
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input", type=str, help="e2e label file path")
-    parser.add_argument("--out", type=str, help='postprocess e2e label')
-    parser.add_argument("--doc_type", default="invoice")
-    args = parser.parse_args()
-
-    data = read_json(args.input)
-    if args.doc_type:
-        
-        sorted_data = sort_invoice(data)
-    else:
-        sorted_data = sort_receipt(data)
-        
-    write_json(args.out, sorted_data, sort_keys=False)
-
-
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/split_batches.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/split_batches.py
@ -1,34 +0,0 @@
-import os
-import shutil
-
-def split_folder_into_batches(input_folder, output_folder, n):
-    # Get the list of image files in the input folder
-    image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
-    # Sort the list of image files
-    image_files.sort()
-    print("len: ", len(image_files))
-    # Calculate the number of images per batch
-    batch_size = len(image_files) // n
-    # Create the output directories
-    for i in range(n):
-        batch_dir = os.path.join(output_folder, f"batch_{i+1}")
-        os.makedirs(batch_dir, exist_ok=True)
-    # Split the images into batches
-    for i, image_file in enumerate(image_files):
-        # print(i, image_file)
-        batch_index = i // batch_size
-        batch_dir = os.path.join(output_folder, f"batch_{batch_index+1}")
-        if not os.path.exists(batch_dir):
-            os.makedirs(batch_dir, exist_ok=True)
-        # print(batch_dir)
-        # Find the corresponding label file
-        image_name, image_ext = os.path.splitext(image_file)
-        label_file = f"{image_name}.txt"
-        label_path = os.path.join(input_folder, label_file)
-        # Copy the image and label files into the appropriate batch directory
-        print(label_path, os.path.join(input_folder, image_file), batch_dir)
-        shutil.copy(os.path.join(input_folder, image_file), batch_dir)
-        shutil.copy(label_path, batch_dir)
-
-# Example usage:
-split_folder_into_batches("/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt", "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt_split", 3)
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/common/xml2pdf.py
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/common/xml2pdf.py
@ -1,31 +0,0 @@
-# import xml.etree.ElementTree as ET
-# from reportlab.pdfgen import canvas
-# from reportlab.lib.pagesizes import letter
-
-# # Load XML file
-# tree = ET.parse('/mnt/hdd2T/AICR/Projects/2023/Xml_SAVINA/2023-04-20_0101803564_0300741922_1C23TYY_714.xml')
-# root = tree.getroot()
-
-# # Create PDF canvas
-# pdf_canvas = canvas.Canvas('./2023-04-20_0101803564_0300741922_1C23TYY_714.pdf', pagesize=letter)
-
-# # Iterate over XML elements and draw on PDF canvas
-# for element in root.iter():
-#     if element.tag == 'paragraph':
-#         pdf_canvas.drawString(int(element.get('x')), int(element.get('y')), element.text)
-#     elif element.tag == 'image':
-#         pdf_canvas.drawImage(element.get('src'), int(element.get('x')), int(element.get('y')), int(element.get('width')), int(element.get('height')))
-
-# # Save PDF file
-# pdf_canvas.save()
-
-import pyxml2pdf
-
-# Create an XML file
-
-
-# Create a new xml2pdf object
-xml2pdf = pyxml2pdf.("/mnt/hdd2T/AICR/Projects/2023/Xml_SAVINA/2023-04-20_0101803564_0300741922_1C23TYY_714.xml")
-
-# Save the output PDF file
-xml2pdf.save("my_pdf_file.pdf")
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/cvat_tool.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/cvat_tool.sh
@ -1,256 +0,0 @@
-python tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/batch_1_taxi_sub_1_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/batch_1_taxi_sub_1_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo_OCR/Batch_1_Good/Taxi_sub_1
-
-python tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/batch_1_taxi_sub_2_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/batch_1_taxi_sub_2_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/Taxi_sub_2
-
-python rename_labels.py \
-    --in_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed  \
-    --out_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed \
-    --doc_type receipt
-
-
-python rename_labels.py \
-    --in_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed  \
-    --out_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed \
-    --doc_type receipt
-# text detect
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/batch_1_taxi_sub_1_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/batch_1_taxi_sub_1_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/taxi_sub_1  
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/batch_1_food.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/batch_1_food_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/food  
-
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/labeling/train_vnpt_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/labeling/train_vnpt_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/train_with_vnpt  
-
-##########
-python tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo_OCR/Batch_1_Good/Taxi_sub_1  \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/batch_1_taxi_sub_1_done.xml   \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Taxi_sub_1 \
-    --line_to_word \
-    --other_class Others
-
-python tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/Taxi_sub_2  \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/batch_1_taxi_sub_2_done.xml   \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Taxi_sub_2 \
-    --other_class Others
-
-python tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/Food  \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/batch_1_food_done.xml   \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Food \
-    --other_class Others
-
-python tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_2/Good/Food  \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_2/batch_2_food_done.xml   \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_2/Good/Food \
-    --other_class Others
-
-# WILD RECEIPT
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/WildReceipt/re_labeling/batches/batch_1  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/WildReceipt/re_labeling/batches/batch_1 \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/WildReceipt/re_labeling/wild_batch_1_done.xml   \
-    --other_class Others
-
-
-
-# SS RECEIPT
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted/ss_receipt_batch_1_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted/ss_receipt_batch_1_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted_txt/batch_1 \
-    --other_class Others
-
-
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_txt/mc_ocr_batch_3_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_txt/mc_ocr_batch_3_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_v2/batch_3 \
-    --other_class Others \
-    --resever_parent_dir
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/ss_receipt_all_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/ss_receipt_all_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Done \
-    --other_class Others \
-    --resever_parent_dir
-
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/pseudo_label/mcocr_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/pseudo_label/mcocr_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/train/mc_ocr \
-    --other_class Others \
-    --resever_parent_dir
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/pseudo_label/sdsap_receipt_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/pseudo_label/sdsap_receipt_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/train/sdsap_receipt \
-    --other_class Others \
-    --resever_parent_dir
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/pseudo_label/ss_receipt_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/pseudo_label/ss_receipt_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/train/ss_receipt \
-    --other_class Others \
-    --resever_parent_dir
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/pseudo_label/wildreceipt_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/pseudo_label/wildreceipt_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/train/wildreceipt \
-    --other_class Others \
-    --resever_parent_dir
-
-# INVOICE
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/Labeling/invoice_sl_hn_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/Labeling/invoice_sl_hn_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/SL_HN_Invoice_txt \
-    --other_class other
-
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/Labeling/invoice_sl_hcm_tmp_done.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/Labeling/invoice_sl_hcm_tmp_done_2.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/invoice_add_sl_hcm_finetuning/SL_HCM_Invoice_wg_txt_need_review \
-    --other_class other
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/Labeling/invoice_ss_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/Labeling/invoice_ss_wg_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/parse_wg/SS_Invoice \
-    --other_class other
-
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/multi_pages/labels/ss_hcm_batch_2_multi_pages_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/multi_pages/labels/ss_hcm_batch_2_multi_pages_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/SL_HCM_batch_2_first_last_page_txt \
-    --other_class other
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/sbt/sbt_craw_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/sbt/sbt_craw_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/Crawled_invoices_SBT \
-    --other_class other
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/sbt_test_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/sbt_test_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_sbt_wg \
-    --other_class other
-
-python sdsvkie/tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/viettinbank_pocr_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/viettinbank_pocr_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/invoice_viettinbank_poc_txt \
-    --other_class other
-
-
-
-# UPDATE TXT 
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_txt/batch_1  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted/batch_1 \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_txt/mc_ocr_batch_1_done.xml   \
-    --other_class Others
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_txt/batch_2  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted/batch_2 \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_txt/mc_ocr_batch_2_done.xml   \
-    --other_class Others
-
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_v2/batch_3  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_v2/batch_3_2 \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_txt/mc_ocr_batch_3_done.xml   \
-    --other_class Others \
-    --resever_parent_dir
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Done  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Done \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/ss_receipt_by_store_done.xml   \
-    --other_class Others \
-    --resever_parent_dir
-
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted_txt/batch_1  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted/batch_1 \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted/ss_receipt_batch_1_done.xml   \
-    --other_class Others
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted_txt/batch_2  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted/batch_2 \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted/ss_receipt_batch_2_done.xml   \
-    --other_class Others
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/SL_HN_Invoice  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/SL_HN_Invoice \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/Labeling/invoice_sl_hn_done.xml   \
-    --other_class other \
-    --line_to_word
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/train/SS_Invoice  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/train/SS_Invoice \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/Labeling/ss_invoice_done.xml   \
-    --other_class other \
-    --line_to_word
-
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/multi_pages/SL_HCM_batch_2_first_last_page  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/multi_pages/SL_HCM_batch_2_first_last_page \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/multi_pages/labels/sl_hcm_batch_2_multi_pages_done.xml   \
-    --other_class other \
-    --line_to_word
-
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/Crawled_invoices_SBT_no_wg_txt  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/Crawled_invoices_SBT_no_wg_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/intermediate/sbt/sbt_craw_done.xml   \
-    --other_class other \
-    --line_to_word
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_sbt  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_sbt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/sbt_test_done.xml   \
-    --other_class other \
-    --line_to_word
-
-python sdsvkie/tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/invoice_viettinbank_poc_txt  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/invoice_viettinbank_poc_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/viettinbank_poc_done.xml   \
-    --other_class other \
-    --line_to_word
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/eval_e2e.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/eval_e2e.sh
@ -1,96 +0,0 @@
-#SDSAP
-python sdsvkie/utils/eval_kie.py \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/food_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_end2end_food_pred_v1.json \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml
-
-
-
-python sdsvkie/utils/eval_kie.py \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/all_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_all_end2end_w0.1_h0.3_thr5.json \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_all_end2end_w0.1_h0.3_thr5.json
-
-
-python sdsvkie/utils/eval_kie.py \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/taxi_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_text_det_20230425.json \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_text_det_20230425_fail.json
-
-
-python sdsvkie/utils/eval_kie.py \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/taxi_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_w0.2_h0.2_thr5.json \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_w0.2_h0.2_thr5_fail.json
-
-
-# 10/5/2023
-python sdsvkie/utils/eval_kie.py \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_8_lr9e_6/config.yaml \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/test_end2end/all_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_8_lr9e_6/receipt_e2e.json \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_8_lr9e_6/receipt_e2e_fail.json
-
-python sdsvkie/utils/eval_kie.py \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/config.yaml \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_end2end/all_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/receipt_e2e_best.json \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/receipt_e2e_best_fail.json
-
-
-
-
-
-
-
-
-
-=============================== INVOICE ====================================================================
-python sdsvkie/utils/eval_kie.py \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/test_end2end/test_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/e2e/test_vnpt_epx_4_best_not_ocr_merge_use_label.json \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/failure_of_test_vnpt_epx_4_best_not_ocr_merge_use_label.json
-
-
-python sdsvkie/utils/eval_kie.py \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/test_end2end/test_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/pred_test_end2end.json \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/vnpt_epx_falures_v2.json
-
-python sdsvkie/utils/eval_kie.py \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss_e2e_rm_leak.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/test_ss_rm_leak.json \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/test_ss_e2e_rm_leak_fail.json
-
-python sdsvkie/utils/eval_kie.py \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/multi_page/test_e2e_multi_pages.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/test_e2e_multi_page.json \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/config.yaml \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/test_e2e_multi_page_fail.json
-
-
-
-python sdsvkie/utils/eval_kie.py \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss_e2e_rm_leak.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_ss_rm_leak.json \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/config.yaml \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_ss_rm_leak_fail.json
-
-python sdsvkie/utils/eval_kie.py \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/multi_page/test_e2e_multi_pages.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_e2e_multi_page.json \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/config.yaml \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_e2e_multi_page_fail.json
-
-
-python sdsvkie/utils/eval_kie.py \
--gt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_end2end/sbt_validation_e2e.json \
--pred /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/sbt_validation_e2e_ep50.json \
--cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/config.yaml  \
--log_failure_case /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/sbt_validation_e2e_ep50_fail.json
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/infer.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/infer.sh
@ -1,199 +0,0 @@
-python sdsvkie/tools/infer.py --cfg workdirs/invoice/exp1/config.yaml --inference_weights workdirs/invoice/exp1/best --device cuda --img ../TokenClassification_invoice/DATA/test/01067_0452_000.jpg 
-
-python sdsvkie/tools/infer.py --cfg workdirs/invoice/exp1/config.yaml --inference_weights workdirs/invoice/exp1/best --device "cuda:1" \
-    --img ../craw_data/output/synth_template_4/one_line  --txt_out workdirs/visualize/vnpt_one_line_txt --kie_wordgroup_out
-
-
-
-python sdsvkie/tools/infer.py --cfg workdirs/invoice/exp1/config.yaml --inference_weights workdirs/invoice/exp1/best --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/craw_data/output/synth_vnpt_r20/one_line  --txt_out workdirs/visualize/vnpt_one_line_r20_txt --kie_wordgroup_out
-
-
-python sdsvkie/tools/infer.py --cfg workdirs/invoice/exp_add_vnpt_template/config.yaml --inference_weights workdirs/invoice/exp_add_vnpt_r2/best --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/craw_data/output/synth_vnpt_r2_2/one_line \
-    --vis_out  workdirs/visualize/vnpt_r2_phase_2 \
-     --txt_out workdirs/visualize/vnpt_r2_phase_2_txt --kie_wordgroup_out
-
-
-
-python sdsvkie/tools/infer.py --cfg workdirs/invoice/exp_add_vnpt_template/config.yaml --inference_weights workdirs/invoice/exp_add_vnpt_r2/best --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/craw_data/output/synth_vnpt_r2_2/one_line \
-    --vis_out  workdirs/visualize/vnpt_r2_phase_2 \
-     --txt_out workdirs/visualize/vnpt_r2_phase_2_txt --kie_wordgroup_out
-
-
-python sdsvkie/tools/infer.py --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoiceadd_vnpt_final/config.yaml --inference_weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoiceadd_vnpt_final/epoch_60 --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/test \
-    --vis_out  workdirs/visualize/test_sorted     
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoiceadd_vnpt_final/config.yaml \
-    --inference_weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoiceadd_vnpt_final/epoch_60 \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/test \
-    --vis_out  workdirs/visualize/test_sorted     
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoiceadd_vnpt_final/config.yaml \
-    --inference_weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoiceadd_vnpt_final/epoch_60 \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/test \
-     
-
-# test e2e 
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/exp_wild_1/config.yaml \
-    --inference_weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/exp_wild_1/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/intermediate/labeling/phase_1/batch_1 \
-    --vis_out  workdirs/visualize/SDSAP_Invoice_exp_wild_1 
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --inference_weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/test \
-    --e2e workdirs/e2e/test_sampling_sortword_exp4_best.json
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --inference_weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Dataset/common_tools/JPG/one_page/vat \
-    --e2e workdirs/e2e/test_invoice_vnpt_exp4_sdsv_invoice.json \
-    --vis workdirs/e2e/test_invoice_vnpt_exp4_sdsv_invoice_visualize
-
-# sdsAP
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_1/config.yaml \
-    --inference_weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_1/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Taxi_sub_2 \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/infer/visualize/exp_1_batch_1_taxi_sub_2   
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_2/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_2/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_2/Noise \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/infer/visualize/exp_2_batch_2_noise
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_2/Good/Taxi \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/infer/visualize/exp_3_batch_2_good_taxi
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/Taxi \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/infer/visualize/exp_3_batch_2_good_taxi
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/food \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/infer/visualize/exp_3_batch_1_food
-
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/Taxi \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/infer/visualize/exp_3_test_end2end_taxi_2
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/test_end2end/All  \
-    --vis_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/receipt_e2e_infer_best
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/last  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test_sbt  \
-    --vis_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/test_sbt_infer
-
-
-
-
-# MCOCR
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted/batch_1 \
-    --txt_out  /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted_txt/batch_1
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted/batch_1 \
-    --txt_out  /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/SS_Receipt/Images_splitted_txt/batch_1
-
-
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/02062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/02062023/best \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/sl_hcm_hn_savina  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/02062023/multi_pages_sl_hcm_hn_savina_wg_txt \
-    --kie_wordgroup_out
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/last \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/raw/Crawled_invoices_SBT  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/Crawled_invoices_SBT \
-    --kie_wordgroup_out
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/last \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/raw/Crawled_invoices_SBT  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/Crawled_invoices_SBT_no_wg_txt
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/epoch_50  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/tuanlv/06.KVUCombineStage/datasets/invoices-receipts/SS_invoices/SBT/validation_data/valid_images  \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_6/sbt_validation_e2e_ep50_old_textdet_infer
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/Viettinbank_POC/POC_OCR/invoice_JPG  \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/visualize/vietinbank
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/1000416613  \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/visualize/intermediate_1000416613
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/infer_e2e.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/infer_e2e.sh
@ -1,210 +0,0 @@
-# SDSAP_Receipt
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/All  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_all_end2end.json \
-    --vis /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_all_end2end
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images/train  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/train_mcocr_end2end.json \
-    --vis /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/train_mcocr_end2end
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best  \
-    --text_det /mnt/ssd500/datnt/mmdetection/logs/textdet-fwd-table-receipt-20230425/best_bbox_mAP_epoch_15_lite.pth \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/Taxi  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_text_det_20230425.json \
-    --vis /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_text_det_20230425 
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/v1/train  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_end2end.json
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/Taxi  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_w0.2_h0.2_thr5.json \
-    --vis /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_w0.2_h0.2_thr5
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/test_end2end/All  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_all_end2end_w0.1_h0.3_thr5_v2.json \
-    --vis /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/test_taxi_end2end_w0.1_h0.3_thr5_v2
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_8_lr9e_6/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_8_lr9e_6/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/test_end2end/All  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_8_lr9e_6/receipt_e2e.json \
-    --vis /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_8_lr9e_6/receipt_e2e
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/test_end2end/All  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/receipt_e2e_best.json \
-    --vis /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_9_lr5e_6_no_scheduler/receipt_e2e_best
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/Webcash/testing  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/pred_webcash_testing.json \
-    --vis /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_3/pred_webcash_testing
-
-
-
-#invoice
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/test  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/invoice_end2end_last.json
-
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Dataset/common_tools/Split_by_pages/multi_page/vat  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/pred_vat_multi_page_v2.json
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Dataset/common_tools/Split_by_pages/multi_page/vat  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/pred_vat_multi_page_v2.json
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/multi_page_vat/SL_HCM  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/multi_page_vat/SL_HCM.json
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss_rm_leak  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/test_ss_rm_leak.json
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/test_ss.json
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/SL_HCM_Invoice  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/SL_HCM_Invoice.json \
-    --vis  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/SL_HCM_Invoice
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss_rm_leak  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_ss_rm_leak.json \
-    --vis  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_ss_rm_leak
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_ss_sl_hcm_batch_2_multi_page_jpg.json \
-    --vis  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_ss_sl_hcm_batch_2_multi_page_jpg
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/multi_page/SL_HN_batch_2  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/multi_page_SL_HN_batch_2.json
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/multi_page/test_e2e_multi_page  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/26052023/test_e2e_multi_page.json
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/02062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/02062023/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/multi_page/SL_HN_batch_2  \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/02062023/multi_page_SL_HN_batch_2.json
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/best  \
-    --device "cuda:0" \
-    --img /mnt/hdd2T/AICR/Projects/2023/Vietinbank_POC/Invoice_JPG/ \
-    --e2e  /mnt/hdd2T/AICR/Projects/2023/Vietinbank_POC/Invoice_KIE_Results/result.json
-    
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/best  \
-    --device "cuda:0" \
-    --img /mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Invoice_v2_multi_page \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/Invoice_v2_multi_page.json
-    
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/19072023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/19072023/best  \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/Viettinbank_POC/POC_OCR/invoice_JPG/ \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/19072023/vietinbank_poc_infer.json \
-    --vis workdirs/invoice/19072023/visualize/vietinbank_infer
-
-
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/best  \
-    --device "cuda:0" \
-    --img /mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Test/1000416613/1000416613_0102310385_26062023163233062_001.pdf \
-    --e2e  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/1000416613_0102310385_26062023163233062_001.json
-            
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/label_cvat.json
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/label_cvat.json
@ -1,226 +0,0 @@
-[
-  {
-    "name": "total_in_words_key",
-    "id": 92,
-    "color": "#33ddff",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "no_value",
-    "id": 93,
-    "color": "#fa3253",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "form_key",
-    "id": 94,
-    "color": "#34d1b7",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "no_key",
-    "id": 95,
-    "color": "#ff007c",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "form_value",
-    "id": 96,
-    "color": "#ddff33",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "serial_key",
-    "id": 97,
-    "color": "#24b353",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "serial_value",
-    "id": 98,
-    "color": "#b83df5",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "date_value",
-    "id": 99,
-    "color": "#66ff66",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "seller_company_name_key",
-    "id": 100,
-    "color": "#32b7fa",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "seller_company_name_value",
-    "id": 101,
-    "color": "#ffcc33",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "seller_tax_code_key",
-    "id": 102,
-    "color": "#83e070",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "seller_tax_code_value",
-    "id": 103,
-    "color": "#fafa37",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "seller_address_value",
-    "id": 104,
-    "color": "#5986b3",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "seller_address_key",
-    "id": 105,
-    "color": "#8c78f0",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "seller_tel_key",
-    "id": 106,
-    "color": "#ff6a4d",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "seller_tel_value",
-    "id": 107,
-    "color": "#f078f0",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_personal_name_key",
-    "id": 108,
-    "color": "#2a7dd1",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_personal_name_value",
-    "id": 109,
-    "color": "#83e070",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_company_name_value",
-    "id": 110,
-    "color": "#5986b3",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_company_name_key",
-    "id": 111,
-    "color": "#8c78f0",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_tax_code_key",
-    "id": 112,
-    "color": "#ff6a4d",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_tax_code_value",
-    "id": 113,
-    "color": "#f078f0",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_address_key",
-    "id": 114,
-    "color": "#2a7dd1",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_address_value",
-    "id": 115,
-    "color": "#b25050",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_tel_key",
-    "id": 116,
-    "color": "#cc3366",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "buyer_tel_value",
-    "id": 117,
-    "color": "#cc9933",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "tax_amount_key",
-    "id": 118,
-    "color": "#aaf0d1",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "tax_amount_value",
-    "id": 119,
-    "color": "#ff00cc",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "total_key",
-    "id": 120,
-    "color": "#3df53d",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "total_value",
-    "id": 121,
-    "color": "#fa32b7",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "total_in_words_value",
-    "id": 122,
-    "color": "#3d3df5",
-    "type": "any",
-    "attributes": []
-  },
-  {
-    "name": "other",
-    "id": 123,
-    "color": "#733380",
-    "type": "any",
-    "attributes": []
-  }
-]
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/post_eval_res.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/post_eval_res.sh
@ -1,10 +0,0 @@
-python sdsvkie/tools/postprocess_e2e_label.py \
-    --cfg workdirs/invoice/vnpt_exp_4/config.yaml \
-    --input /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/test_end2end/test_e2e.json \
-    --out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SS_Invoice/test_end2end/test_e2e_post.json
-
-
-python sdsvkie/tools/postprocess_e2e_label.py \
-    --cfg workdirs/invoice/vnpt_exp_4/config.yaml \
-    --input /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/invoice_exp4/test_end2end_post.json \
-    --out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/invoice_exp4/test_end2end_post.json
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/pseudo_label.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/pseudo_label.sh
@ -1,36 +0,0 @@
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/exp_wild_1/config.yaml \
-    --inference_weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/exp_wild_1/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_2/Good/Food  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_2/Good/Food
-    
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_2/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/training/sdsap_receipt/exp_2/best  \
-    --device "cuda:0" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Taxi_sub_2  \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_1/Good/Taxi_sub_2
-    
-
-    
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/multi_pages/SL_HCM_batch_2_first_last_page \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/SL_HCM_batch_2_first_last_page_vis \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/SL_HCM_batch_2_first_last_page_txt \
-    --kie_wordgroup_out
-
-
-python sdsvkie/tools/infer.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/best \
-    --device "cuda:1" \
-    --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/invoice_viettinbank_poc \
-    --vis_out  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/invoice_viettinbank_poc_vis \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/invoice_viettinbank_poc_txt \
-    --kie_wordgroup_out
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/run_ocr.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/run_ocr.sh
@ -1,19 +0,0 @@
-python sdsvkie/tools/run_ocr.py \
--img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/raw/IMGS \
--out_dir /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/SDSAP_Invoice/visualize_ocr \
--device "cuda:1" \
--reserve_parent_dir
-
-
-# python sdsvkie/tools/run_ocr.py \
-# --img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/raw/IMGS \
-# --out_dir /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/SDSAP_Invoice/visualize_ocr \
-# --device "cuda:1" \
-# --reserve_parent_dir
-
-
-python sdsvkie/tools/run_ocr.py \  
--img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/test_end2end/Taxi \
--device "cuda:1" \
--out_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/test_v2/taxi \
--text_det "/mnt/ssd500/datnt/mmdetection/logs/textdet-fwd-table-receipt-20230425/best_bbox_mAP_epoch_15_lite.pth"
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/run_ocr_gen_txt.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/run_ocr_gen_txt.sh
@ -1,35 +0,0 @@
-python sdsvkie/tools/run_ocr.py \
--img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/WildReceipt/pseudo_ocr \
--out_dir ./workdirs/visualize/WildReceipt \
--device "cuda:0" \
--out_txt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/WildReceipt/pseudo_ocr
-
-
-python sdsvkie/tools/run_ocr.py \
--img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Taxi_sub_1 \
--device "cuda:0" \
--out_txt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo_OCR/Batch_1_Good/Taxi_sub_1
-
-
-# pseudo for edit text detect boxes
-python sdsvkie/tools/run_ocr.py \
--img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/taxi_sub_1 \
--device "cuda:0" \
--out_txt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/taxi_sub_1
-
-
-python sdsvkie/tools/run_ocr.py \
--img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/taxi_sub_1 \
--device "cuda:0" \
--out_txt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/taxi_sub_1
-
-
-python sdsvkie/tools/run_ocr.py \
--img /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/food \
--device "cuda:0" \
--out_txt /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/label_ocr/all/batch_1/food
-
-python sdsvkie/tools/run_ocr.py \
--img /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/demos/invoice \
--device "cuda:0" \
--out_txt /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/demos/invoice
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/run_pdf2img.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/run_pdf2img.sh
@ -1,15 +0,0 @@
-python sdsvkie/utils/pdf2image.py \
--pdf_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/raw/batch_1/PDF \
--out_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/raw/batch_1/IMGS_dpi_300 \
--reserve_parent_dir
-
-
-python sdsvkie/utils/pdf2image.py \
--pdf_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/PDF/multi_page/Invoices_SL_HCM \
--out_dir /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/Invoices_SL_HCM
-
-
-
-python sdsvkie/utils/pdf2image.py \
--pdf_dir /mnt/hdd2T/AICR/Projects/2023/Invoice_SDSV/SDSV_Invoice_2023/All \
--out_dir /mnt/hdd2T/AICR/Projects/2023/Invoice_SDSV/SDSV_Invoice_2023/JPG
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/split_data.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/split_data.sh
@ -1,25 +0,0 @@
-python sdsvkie/utils/split_data.py \
-    --path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Food \
-    --out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/v2/ \
-    --test_ratio 0.05
-
-python sdsvkie/utils/split_data.py \
-    --path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Taxi_sub_1 \
-    --out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/v2/ \
-    --test_ratio 0.05
-
-python sdsvkie/utils/split_data.py \
-    --path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_1/Good/Taxi_sub_2 \
-    --out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/v2/ \
-    --test_ratio 0.05
-
-python sdsvkie/utils/split_data.py \
-    --path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_2/Good/Food \
-    --out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/v2/ \
-    --test_ratio 0.05
-
-
-python sdsvkie/utils/split_data.py \
-    --path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/WildReceipt/re_labeling/batches/batch_1 \
-    --out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/done/v2/ \
-    --test_ratio 0.0
--- a/cope2n-ai-fi/modules/sdsvkie/scripts/train.sh
+++ b/cope2n-ai-fi/modules/sdsvkie/scripts/train.sh
@ -1,7 +0,0 @@
-python sdsvkie/tools/train.py --cfg sdsvkie/cfg/wildreciept.yaml --device cuda:0 --save_dir workdirs/invoice/exp_wild_1
-
-
-python sdsvkie/tools/train.py --cfg sdsvkie/cfg/sdsap_receipt_scheduler_linear.yaml --device cuda:0 --save_dir workdirs/sdsap_receipt/exp_4_scheduler_linear
-
-
-python sdsvkie/tools/train.py --cfg sdsvkie/cfg/sdsap_receipt.yaml --device cuda:1 --save_dir workdirs/sdsap_receipt/exp_5
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/init.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/init.py
@ -1,5 +0,0 @@
-import os 
-import sys 
-sys.path.append(__file__)
-
-from .engine.predictor import Predictor
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/init.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/init.py
@ -1,33 +0,0 @@
-from types import SimpleNamespace
-from sdsvkie.utils.io_file import yaml_load
-from pathlib import Path
-from copy import copy, deepcopy
-
-def load_cfg(cfg, args=None):
-    """
-    Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
-    Inputs:
-        cfg (str) or (Path) or (SimpleNamespace): Configuration object to be converted to a dictionary.
-    Returns:
-        cfg (dict): Configuration object in dictionary format.
-    """
-    if isinstance(cfg, (str, Path)):
-        cfg = yaml_load(cfg)  # load dict
-    elif isinstance(cfg, SimpleNamespace):
-        cfg = vars(cfg)  # convert to dict
-
-    
-    if args is not None:
-        _args = deepcopy(args)
-        for k, v in args.items():
-            if v is None or v == "cfg" :
-                _args.pop(k)
-
-            if v is not None and k == "weights":
-                _args['inference_weights'] = v
-                _args.pop(k)
-    
-        cfg.update(_args)
-    return cfg
-
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/class_list.txt
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/class_list.txt
@ -1,39 +0,0 @@
-no_key
-no_value
-form_key
-form_value
-serial_key
-serial_value
-date_key
-date_value
-subtotal_key
-subtotal_value
-tax_rate_key
-tax_rate_value
-tax_amount_key
-tax_amount_value
-tips_key
-tips_value 
-total_key
-total_value
-total_in_words_key
-total_in_words_value
-seller_company_name_key
-seller_company_name_value
-seller_address_key
-seller_address_value
-seller_tel_key
-seller_tel_value 
-seller_tax_code_key
-seller_tax_code_value
-buyer_company_name_key
-buyer_company_name_value
-buyer_personal_name_key
-buyer_personal_name_value 
-buyer_tax_code_key
-buyer_tax_code_value
-buyer_address_key
-buyer_address_value
-buyer_tel_key
-buyer_tel_value
-other
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/default.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/default.yaml
@ -1,79 +0,0 @@
-
-debug: False
-v3: False
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/train_with_vnpt
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/test
-slice_interval: 75
-postprocess_type: invoice_postprocess
-classes: [
-    # id invoice
-    'no_key',    # số hóa đơn
-    'no_value', 
-    'form_key',    # mẫu số hóa đơn
-    'form_value', 
-    'serial_key',     # số kí hiệu hoá đơn
-    'serial_value', 
-    'date', 
-
-    # seller info
-    'seller_company_name_key', 
-    'seller_company_name_value', 
-    'seller_tax_code_key', 
-    'seller_tax_code_value', 
-    'seller_address_value',
-    'seller_address_key', 
-    'seller_mobile_key',
-    'seller_mobile_value',
-    
-    # buyer info
-    'buyer_name_key',
-    'buyer_name_value',
-    'buyer_company_name_value',
-    'buyer_company_name_key',
-    'buyer_tax_code_key',
-    'buyer_tax_code_value',
-    'buyer_address_key', 
-    'buyer_address_value',
-    'buyer_mobile_key',
-    'buyer_mobile_value',
-
-    # money info
-    'VAT_amount_key', 
-    'VAT_amount_value', 
-    'total_key', 
-    'total_value', 
-    'total_in_words_key', 
-    'total_in_words_value',
-    
-    'other', 
-]
-sampling: true            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 150
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"
-
-# opt + scheduler
-batch_size: 8
-epochs: 100
-lr: 5.0e-6
-shuffle: True
-num_workers: 4
-scheduler: False                        #  False or  "linear" / "cosine" / "cosine_with_restarts" /  "polynomial" / "constant" /  "constant_with_warmup" / "inverse_sqrt"
-save_dir: workdirs/invoice/exp1
-save_weight_interval: 10
-eval_delay: 0
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/invoice.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/invoice.yaml
@ -1,74 +0,0 @@
-
-debug: False
-v3: False
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/train
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss
-slice_interval: 75
-postprocess_type: invoice_postprocess
-classes: [
- 'no_key',
- 'no_value',
- 'form_key',
- 'form_value',
- 'serial_key',
- 'serial_value',
- 'date_value',
- 
- 'seller_company_name_key',
- 'seller_company_name_value',
- 'seller_tax_code_key',
- 'seller_tax_code_value',
- 'seller_address_value',
- 'seller_address_key',
- 'seller_tel_key',
- 'seller_tel_value',
-
- 'buyer_personal_name_key',
- 'buyer_personal_name_value',
- 'buyer_company_name_value', 
- 'buyer_company_name_key',
- 'buyer_tax_code_key',
- 'buyer_tax_code_value',
- 'buyer_address_key',
- 'buyer_address_value',
- 'buyer_tel_key',
- 'buyer_tel_value',
- 'tax_amount_key',
- 'tax_amount_value',
- 'total_key',
- 'total_value',
- 'total_in_words_key',
- 'total_in_words_value',
- 'other'
-]
-
-sampling: true            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 145
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"
-
-# opt + scheduler
-batch_size: 8
-epochs: 100
-lr: 5.0e-6
-shuffle: True
-num_workers: 4
-scheduler: "linear"                        #  False or  "linear" / "cosine" / "cosine_with_restarts" /  "polynomial" / "constant" /  "constant_with_warmup" / "inverse_sqrt"
-save_dir: workdirs/invoice/19072023
-save_weight_interval: 10
-eval_delay: 50
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/invoice_relabel.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/invoice_relabel.yaml
@ -1,74 +0,0 @@
-
-debug: False
-v3: False
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/train
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss
-slice_interval: 75
-postprocess_type: invoice_postprocess
-classes: [
- 'no_key',
- 'no_value',
- 'form_key',
- 'form_value',
- 'serial_key',
- 'serial_value',
- 'date_value',
- 
- 'seller_company_name_key',
- 'seller_company_name_value',
- 'seller_tax_code_key',
- 'seller_tax_code_value',
- 'seller_address_value',
- 'seller_address_key',
- 'seller_tel_key',
- 'seller_tel_value',
-
- 'buyer_personal_name_key',
- 'buyer_personal_name_value',
- 'buyer_company_name_value', 
- 'buyer_company_name_key',
- 'buyer_tax_code_key',
- 'buyer_tax_code_value',
- 'buyer_address_key',
- 'buyer_address_value',
- 'buyer_tel_key',
- 'buyer_tel_value',
- 'tax_amount_key',
- 'tax_amount_value',
- 'total_key',
- 'total_value',
- 'total_in_words_key',
- 'total_in_words_value',
- 'other'
-]
-
-sampling: true            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 145
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"
-
-# opt + scheduler
-batch_size: 8
-epochs: 100
-lr: 5.0e-6
-shuffle: True
-num_workers: 4
-scheduler: "cosine"                        #  False or  "linear" / "cosine" / "cosine_with_restarts" /  "polynomial" / "constant" /  "constant_with_warmup" / "inverse_sqrt"
-save_dir: workdirs/invoice/26052023
-save_weight_interval: 10
-eval_delay: 50
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/invoice_relabel_finetuning.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/invoice_relabel_finetuning.yaml
@ -1,74 +0,0 @@
-
-debug: False
-v3: False
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/train
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/test_ss
-slice_interval: 75
-postprocess_type: invoice_postprocess
-classes: [
- 'no_key',
- 'no_value',
- 'form_key',
- 'form_value',
- 'serial_key',
- 'serial_value',
- 'date_value',
- 
- 'seller_company_name_key',
- 'seller_company_name_value',
- 'seller_tax_code_key',
- 'seller_tax_code_value',
- 'seller_address_value',
- 'seller_address_key',
- 'seller_tel_key',
- 'seller_tel_value',
-
- 'buyer_personal_name_key',
- 'buyer_personal_name_value',
- 'buyer_company_name_value', 
- 'buyer_company_name_key',
- 'buyer_tax_code_key',
- 'buyer_tax_code_value',
- 'buyer_address_key',
- 'buyer_address_value',
- 'buyer_tel_key',
- 'buyer_tel_value',
- 'tax_amount_key',
- 'tax_amount_value',
- 'total_key',
- 'total_value',
- 'total_in_words_key',
- 'total_in_words_value',
- 'other'
-]
-
-sampling: true            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 150
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/last"
-
-# opt + scheduler
-batch_size: 8
-epochs: 100
-lr: 1.0e-5
-shuffle: True
-num_workers: 4
-scheduler: "cosine"                        #  False or  "linear" / "cosine" / "cosine_with_restarts" /  "polynomial" / "constant" /  "constant_with_warmup" / "inverse_sqrt"
-save_dir: workdirs/invoice/add_sl_hcm
-save_weight_interval: 1
-eval_delay: 1
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/sdsap_receipt.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/sdsap_receipt.yaml
@ -1,47 +0,0 @@
-
-debug: False
-v3: False
-
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/train
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test
-slice_interval: 75
-postprocess_type: receipt_postprocess
-
-classes: [
-    "seller_company_name_value",
-    "no_value",
-    "date_value",
-    "total_key",
-    "total_value",
-    "other"
-]
-sampling: true            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 150
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"
-
-# opt + scheduler
-batch_size: 8
-epochs: 100
-lr: 5.0e-6
-scheduler: False
-shuffle: True
-num_workers: 4
-save_dir: workdirs/receipt/13062023
-save_weight_interval: 10
-eval_delay: 0
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/sdsap_receipt_finetuning.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/sdsap_receipt_finetuning.yaml
@ -1,47 +0,0 @@
-
-debug: False
-v3: False
-
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/train
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/processed/test
-slice_interval: 75
-postprocess_type: receipt_postprocess
-
-classes: [
-    "seller_company_name_value",
-    "no_value",
-    "date_value",
-    "total_key",
-    "total_value",
-    "other"
-]
-sampling: true            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 150
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/receipt/13062023_4/last"
-
-# opt + scheduler
-batch_size: 8
-epochs: 30
-lr: 3.0e-6
-scheduler: False
-shuffle: True
-num_workers: 4
-save_dir: workdirs/receipt/13062023
-save_weight_interval: 10
-eval_delay: 0
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/sdsap_receipt_relabel.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/sdsap_receipt_relabel.yaml
@ -1,47 +0,0 @@
-
-debug: False
-v3: False
-
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/train
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/test
-slice_interval: 75
-postprocess_type: receipt_postprocess
-
-classes: [
-    "Seller_company_name_value",
-    "ID_value",
-    "Date_value",
-    "Total_key",
-    "Total_value",
-    "Other"
-]
-sampling: true            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 150
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"
-
-# opt + scheduler
-batch_size: 8
-epochs: 100
-lr: 9.0e-6
-scheduler: "linear"
-shuffle: True
-num_workers: 4
-save_dir: workdirs/invoice/exp1
-save_weight_interval: 10
-eval_delay: 50
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/sdsap_receipt_scheduler_linear.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/sdsap_receipt_scheduler_linear.yaml
@ -1,47 +0,0 @@
-
-debug: False
-v3: False
-
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/train
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Receipt/test
-slice_interval: 75
-postprocess_type: receipt_postprocess
-
-classes: [
-    "Store_name_value",
-    "id",
-    "Date_value",
-    "Total_key",
-    "Total_value",
-    "Others"
-]
-sampling: true            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 150
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"
-
-# opt + scheduler
-batch_size: 8
-epochs: 100
-lr: 9.0e-6
-scheduler: "linear"
-shuffle: True
-num_workers: 4
-save_dir: workdirs/invoice/exp1
-save_weight_interval: 10
-eval_delay: 50
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/wildreciept.yaml
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/cfg/wildreciept.yaml
@ -1,47 +0,0 @@
-debug: False
-v3: False
-scheduler: False
-# common 
-device: 'cpu'       # 'cpu' / 'cuda:0' / 'cuda:1'  / 'cuda'
-
-#dataset
-train_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/WildReceipt/v1/train
-val_dir: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/WildReceipt/v1/test
-slice_interval: 75
-postprocess_type: receipt_postprocess
-
-classes: [
-    "Store_name_value",
-    "Date_value",
-    "Total_key",
-    "Total_value",
-    "Others"
-]
-sampling: True            # sampling window - fix long document (larger than 512 token)
-
-#model 
-img_size: 224            # fixed
-max_seq_length: 512        # fixed
-max_num_words: 150
-
-# tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/layoutlmv3-base"      # fixed
-# weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/layoutlmv3-base"
-tokenizer_weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"      # fixed
-weights: "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/microsoft/microsoft/layoutxlm-base"
-
-# opt + scheduler
-batch_size: 8
-epochs: 100
-lr: 5.0e-6
-shuffle: True
-num_workers: 4
-save_dir: workdirs/invoice/exp1
-save_weight_interval: 10
-eval_delay: 0
-wandb: null 
-
-# inference
-inference_weights: null 
-text_det: yolox-s-general-text-pretrain-20221226
-text_reg: satrn-lite-general-pretrain-20230106
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/datasets/base_dataset.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/datasets/base_dataset.py
@ -1,314 +0,0 @@
-import logging
-import os
-import pickle
-import random
-from copy import deepcopy
-from pathlib import Path
-
-import cv2
-import pandas as pd
-import torch
-from datasets import (Array2D, Array3D, ClassLabel, Dataset, Features,
-                      Sequence, Value, concatenate_datasets)
-from easydict import EasyDict
-from PIL import Image
-from tqdm import tqdm
-from sdsvkie.utils.io_file import read_txt
-
-from sdsvkie.utils import normalize_box, visualize_kie
-from sdsvkie.utils.augmentation import perturbate_character, sampling_data
-from sdsvkie.utils.word_formation import sliding_windows, sort_words
-import glob 
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
-
-
-IMG_EXTENSION = [".jpg", ".jpeg", ".png"]
-
-class BaseDataset:
-    def __init__(self, cfg):
-
-        self.cfg = cfg
-        self.feature_format = Features(
-            {
-                "image": Array3D(
-                    dtype="int64", shape=(3, self.cfg.img_size, self.cfg.img_size)
-                ),
-                "input_ids": Sequence(feature=Value(dtype="int64")),
-                "attention_mask": Sequence(Value(dtype="int64")),
-                "bbox": Array2D(dtype="int64", shape=(self.cfg.max_seq_length, 4)),
-                "labels": Sequence(ClassLabel(names=self.cfg.classes)),
-            }
-        ) if not self.cfg.v3 else \
-            Features(
-                {
-                    "pixel_values": Array3D(
-                        dtype="float32", shape=(3, self.cfg.img_size, self.cfg.img_size)
-                    ),
-                    "input_ids": Sequence(feature=Value(dtype="int64")),
-                    "attention_mask": Sequence(Value(dtype="int64")),
-                    "bbox": Array2D(dtype="int64", shape=(self.cfg.max_seq_length, 4)),
-                    "labels": Sequence(ClassLabel(names=self.cfg.classes)),
-                }
-            )
-        
-        logger.info("Feature format: {}".format(self.feature_format.keys()))
-
-
-    def _build_df(self, data_dir):
-        """Build dataframe from data directory
-
-        Args:
-            data_dir (str): structure data folder
-                - data_dir
-                    - img1.jpg
-                    - img1.txt
-                    - ...
-        """
-        data_dir = Path(data_dir)
-
-        # img_paths = glob.glob("*") + glob.glob("*/*")
-
-        img_paths = [
-            path for path in list(data_dir.rglob("*")) 
-            
-            if ".txt" not in str(path) and path.with_suffix(".txt").exists() and path.suffix.lower() in IMG_EXTENSION
-        ]
-
-
-        label_paths = [str(path.with_suffix(".txt")) for path in img_paths]
-
-
-        img_paths = [str(path) for path in img_paths]
-        
-
-        
-        assert len(label_paths) == len(img_paths)
-        # remove empty txt
-        ids = [id for id in range(len(label_paths)) if len(read_txt(label_paths[id])) > 0]
-        label_paths = [label_paths[id] for id in ids]
-        img_paths = [img_paths[id] for id in ids]
-
-        dataframe = pd.DataFrame.from_dict(
-            {"image_path": img_paths, "label": label_paths}
-        )
-        return dataframe
-
-    def build_dataloader_from_dataset(
-        self,
-        dataset,
-        processor,
-        device,
-        batch_size,
-        shuffle=True,
-        num_workers=4,
-        cache_file="./cache.pkl",
-        use_sampling=False,
-    ):
-
-        if not os.path.exists(cache_file):
-            self._build_cache(
-                dataset,
-                processor,
-                cache_file=cache_file,
-                max_seq_length=self.cfg.max_seq_length,
-            )
-        cache = self._load_cache(cache_file)
-        dataset = dataset.map(
-            self._prepare_data,
-            fn_kwargs={"cache": cache, "sampling": use_sampling},
-            remove_columns=dataset.column_names,
-            features=self.feature_format,
-            batched=False,
-            batch_size=self.cfg.batch_size,
-        )
-
-        dataset.set_format(type="torch", device=device)
-        dataloader = torch.utils.data.DataLoader(
-            dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers
-        )
-        return dataloader
-
-    def build_dataloader_from_dir(
-        self,
-        data_dir,
-        processor,
-        device,
-        batch_size,
-        shuffle=True,
-        num_workers=4,
-        cache_file="./cache.pkl",
-        use_sampling=False,
-    ):
-
-        dataset = self._build_dataset(data_dir)
-        dataloader = self.build_dataloader_from_dataset(
-            dataset,
-            processor,
-            device,
-            batch_size,
-            shuffle,
-            num_workers,
-            cache_file,
-            use_sampling,
-        )
-        return dataloader
-
-    def _build_dataset(self, data_dir):
-        df = self._build_df(data_dir)
-        dataset = Dataset.from_pandas(df)
-        logger.info(f"Load example for {data_dir}")
-        dataset = dataset.map(lambda example: self._load_example_info(example))
-        return dataset
-
-    def _build_cache(self, dataset, processor, max_seq_length, cache_file=""):
-        logger.info(f"Caching {cache_file}...")
-        cache = {}
-        for examples in tqdm(dataset):
-            encoding_inputs = self._cache_feature(examples, processor, max_seq_length)
-            cache[examples["image_path"]] = encoding_inputs
-
-        with open(cache_file, "wb") as f:
-            pickle.dump(cache, f)
-
-    def _load_cache(self, cache_file):
-        with open(cache_file, "rb") as f:
-            cache = pickle.load(f)
-        return cache
-
-    def _prepare_data(self, example, cache, sampling):
-        encoded_inputs_windows = cache[example["image_path"]]
-        if len(encoded_inputs_windows) == 0:
-            raise Exception("Empty encoded_inputs_windows")
-        if sampling:
-            if random.random() < 0.6:
-                encoded_inputs = encoded_inputs_windows[-1]
-            else:
-                encoded_inputs = random.choice(encoded_inputs_windows)
-        else:
-            encoded_inputs = encoded_inputs_windows[-1]
-        for k, v in encoded_inputs.items():
-            if k in ["image", "pixel_values"]:
-                encoded_inputs[k] = encoded_inputs[k][0]
-        return encoded_inputs
-
-    def _cache_feature(self, example, processor, max_seq_length=512):
-        """Sampling (optional) + apply LayoutLMProcessor
-
-        Args:
-            examples (dict): dict {'image_path', 'words', 'bbox', 'word_labels'}
-            max_seq_length (int, optional): _description_. Defaults to 512.
-
-        Returns:
-            list[dict]: list encoding inputs
-        """
-
-        image = Image.open(example["image_path"]).convert("RGB")
-        batch_words = example["words"]
-        batch_boxes = example["bbox"]
-        batch_labels = example["word_labels"]
-
-        window_size = self.cfg.max_num_words
-        slice_interval = self.cfg.slice_interval
-
-        word_windows = sliding_windows(batch_words, window_size, slice_interval)
-        box_windows = sliding_windows(batch_boxes, window_size, slice_interval)
-        label_windows = sliding_windows(batch_labels, window_size, slice_interval)
-
-        encoded_inputs = []
-        for words, boxes, labels in zip(word_windows, box_windows, label_windows):
-            # Process examples
-            encoded_input = processor(
-                image,
-                padding="max_length",
-                truncation=True,
-                text=words,
-                boxes=boxes,
-                word_labels=labels,
-                max_length=max_seq_length,
-            )
-
-            encoded_inputs.append(encoded_input)
-
-        # full page
-        encoded_input = processor(
-            image,
-            padding="max_length",
-            truncation=True,
-            text=batch_words,
-            boxes=batch_boxes,
-            word_labels=batch_labels,
-            max_length=max_seq_length,
-        )
-        encoded_inputs.append(encoded_input)
-
-        return encoded_inputs
-
-    def _load_example_info(self, example, aug_prob=0.0):
-        """_summary_
-
-        Args:
-            example (_type_): _description_
-            aug_prob (float, optional): _description_. Defaults to 0.0.
-        """
-
-        image_path = example["image_path"]
-        label_path = example["label"]
-        assert os.path.exists(image_path)
-        assert os.path.exists(label_path)
-        # try:
-        image = cv2.imread(image_path)
-        h, w, _ = image.shape
-        with open(label_path) as f:
-            lines = [line.replace("\n", "").replace("\r", "") for line in f.readlines()]
-
-        words, boxes, labels = [], [], []
-        # print(label_path)
-        for i, line in enumerate(lines):
-            x1, y1, x2, y2, text, label = line.split("\t")
-            box = [int(x1), int(y1), int(x2), int(y2)]
-            if text != " ":
-                words.append(text)
-                boxes.append(box)
-                labels.append(label)
-
-        if aug_prob > 0:
-            p_words = perturbate_character(words, aug_prob)
-            logging.info("{} - {}".format(len(p_words), len(words)))
-
-        # custom for sort boxes
-        items = {
-            'boxes': boxes,
-            'texts': words,
-            'labels': labels
-        }
-        # boxes, words, labels = sort_words(boxes, words, labels)
-        sorted_items = sort_words(items)
-        boxes, words, labels = sorted_items['boxes'], sorted_items['texts'], sorted_items['labels']
-
-        # print(image_path)
-        # print(image_path)
-        labels = [self.cfg.classes.index(label) for label in labels]
-        if self.cfg.debug:
-            visualize_kie(
-                img=image,
-                boxes=boxes,
-                pred_labels=labels,
-                outdir="wordirs/debug_{}".format(
-                    "val" if "train" not in image_path else "train"
-                ),
-                image_name=os.path.basename(image_path),
-            )
-
-        boxes = [normalize_box(box, width=w, height=h) for box in boxes]
-
-        example["words"] = words
-        example["bbox"] = boxes  # TODO: Check this
-        example["word_labels"] = labels
-        # except Exception as err:
-        #     logger.info(f"Exception: {err} at image path: {example['image_path']}")
-        #     example["words"] = []
-        #     example["bbox"] = []  # TODO: Check this
-        #     example["word_labels"] = []
-        return example
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/engine/init.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/engine/init.py
@ -1 +0,0 @@
-from .predictor import Predictor
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/engine/predictor.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/engine/predictor.py
@ -1,457 +0,0 @@
-import logging
-import time
-
-import cv2
-import numpy as np
-from sdsvkie.utils.word_formation import merge_boxes
-import torch
-from easydict import EasyDict
-from PIL import Image
-
-from sdsvkie.cfg import load_cfg
-# from sdsvkie.models.layoutlm import LayoutLM
-from sdsvkie.models.layoutlmv2 import LayoutLMv2
-from sdsvkie.models.ocr import OCREngine
-from sdsvkie.utils import invoice_postprocess  # invoice
-from sdsvkie.utils import receipt_postprocess  # receipt
-from sdsvkie.utils import POSTPROCESS_FUNC
-from sdsvkie.utils import (Word, construct_word_groups_to_kie_label,
-                           normalize_box, sliding_windows, sort_words,
-                           unnormalize_box, words_to_lines,
-                           pdf_to_image)
-
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-class Predictor:
-    def __init__(self, cfg, **kwargs) -> None:
-        """
-
-        Args:
-            cfg (dict / str): config
-            **kwargs: device=..., weights=..., text_det=..., text_reg=...
-        """
-
-        if isinstance(cfg, str):
-            cfg = load_cfg(cfg, kwargs)
-        self.cfg = EasyDict(cfg)
-        print(self.cfg)
-
-        self.model = None
-        self.processor = None
-        self.ocr_engine = None
-        self.classes = self.cfg["classes"]
-
-        self.max_num_words = self.cfg.max_num_words
-        self.model = None
-        self.processor = None 
-        self.ocr_engine = None
-
-    def _init_predictor(self, model=None, proccessor=None, ocr_engine=None):
-
-        if self.cfg.device != "cpu" and not torch.cuda.is_available():
-            logger.info("Can not found cuda, training with CPU!!!")
-            self.cfg.device = "cpu"
-
-        if self.cfg["inference_weights"] is None:
-            logger.info(
-                "Not yet set value for inference weights, use weights instead!!!"
-            )
-        if model is None:
-            self.model = LayoutLMv2._load_model(self.cfg)
-        if proccessor is None:
-            self.processor = LayoutLMv2._load_processor(self.cfg)
-
-        if ocr_engine is None:
-            self.ocr_engine = OCREngine(
-                text_det=self.cfg["text_det"],
-                text_recog=self.cfg["text_reg"],
-                device=self.cfg["device"],
-            )
-        
-        
-    def __call__(self, input , ocr_output=None, return_raw=False):
-        """Inference KIE -
-        Pipeline:  Img -> OCR -> box + text (word-level) -> sort by x, y-axis -> LayoutLM -> Word formation -> result
-
-        Args:
-            input (np.ndarray / str / pdf path): BGR image (cv2)
-
-        Returns:
-            (dict): {
-                'kie_raw_output': 
-                'kie_post_output':
-                'end2end_results': (dict) : {kie_label : value}
-            }
-        """
-        if self.model is None:
-            self._init_predictor()
-        #check single or multi images
-        if isinstance(input, np.ndarray):
-            final_out= self.predict_single_image(input, ocr_output=ocr_output, return_raw=return_raw)
-        elif isinstance(input, list):
-            items = [self.predict_single_image(im, ocr_output=ocr_output, return_raw=return_raw) for im in input]
-            final_out = self.aggregate_outputs(items)
-        else:  #pdf
-            import time 
-            t1 = time.time()
-            imgs = pdf_to_image(input)
-            print("1. pdf2img: ", round(time.time() - t1, 4))
-            t2 = time.time()
-            items = [self.predict_single_image(im, ocr_output=ocr_output, return_raw=return_raw) for im in imgs]
-            final_out = self.aggregate_outputs(items)
-            print("2. kie: ", round(time.time() - t2, 4))
-            print(f"3. full pipeline for {len(imgs)} pages: {round(time.time() - t1, 4)}")
-        return final_out
-
-    def predict_single_image(self, img, ocr_output=None, return_raw=False):
-
-        if ocr_output is None:
-            ocr_output = self.ocr_engine(img, extend_ratio=[0.1, 0.3], ratio_thr=5)   # solve long box
-            # ocr_output = self.ocr_engine(img)
-        kie_input = self.prepare_inputs(img, ocr_output)
-        kie_output = self.predict(kie_input)
-        kie_post_output = self.postprocessing(kie_output)
-        formated_output = self.format_output(kie_post_output)
-        output = {
-            "kie_raw_output": kie_output if return_raw else None,  # raw output from layoutlm model
-            "kie_post_output": kie_post_output if return_raw else None,  # wordgroup
-            "end2end_results": formated_output,                         # field_key + field_value
-        }
-        return output
-
-    def predict(self, inputs: dict):
-        """predict
-
-        Args:
-            inputs (dict): format
-                {
-                    'image": PIL RGB,
-                    'boxes': list[],
-                    'texts'
-                }
-
-        Returns:
-            list[Word]: list of Word object
-        """
-        window_size = self.cfg.max_num_words
-        slice_interval = self.cfg.slice_interval
-        
-        image, batch_boxes, batch_words = (
-            inputs["img"],
-            inputs["boxes"],
-            inputs["texts"],
-        )
-        results = []
-        non_norm_boxes = inputs['non_norm_boxes']
-        if len(batch_boxes) == 0:
-            logger.info("Not found any words in image!!! Continue...")
-            return results
-        
-        text_windows = sliding_windows(batch_words, window_size, slice_interval)
-        box_windows = sliding_windows(batch_boxes, window_size, slice_interval)
-        # print([len(t) for t in text_windows])
-
-        out_boxes_windows = []
-        # out_labels_windows = []
-        out_logits_windows = []
-
-        for i in range(len(text_windows)):
-            words = text_windows[i]  # len: MAX_N_WORDS
-            boxes = box_windows[i]
-
-            # Preprocess
-            dummy_word_labels = [0] * len(words)
-            encoding = self.processor(
-                image,
-                text=words,
-                boxes=boxes,
-                word_labels=dummy_word_labels,
-                return_tensors="pt",
-                padding="max_length",
-                truncation=True,
-                max_length=self.cfg.max_seq_length,
-            )
-
-            label_ = encoding.pop('labels')
-            # Run model
-            for k, v in encoding.items():
-                encoding[k] = v.to(self.cfg.device)
-            with torch.no_grad():
-                output = self.model(**encoding)
-
-
-            logits = output.logits.squeeze()  # seq_len * classes
-            predictions = output.logits.argmax(-1).squeeze().tolist()
-            token_boxes = encoding.bbox.squeeze().tolist()
-
-            # Postprocess
-            # is_subword = (encoding["labels"] == -100).detach().cpu()[0]
-            is_subword = (label_ == -100).detach().cpu()[0]
-            logit_predictions = logits[torch.logical_not(is_subword), :]
-            true_boxes = torch.Tensor(
-                [
-                    unnormalize_box(box, image.size[0], image.size[1])
-                    for idx, box in enumerate(token_boxes)
-                    if not is_subword[idx]
-                ]
-            )
-
-            # print("logit shape: ", logit_predictions.shape)
-            # print("box shape: ", true_boxes.shape)
-            # true_boxes = torch.Tensor(
-            #     [
-            #         unnormalize_box(box, image.size[0], image.size[1])
-            #         for idx, box in enumerate(boxes)
-            #     ]
-            # )
-            out_boxes_windows.append(true_boxes)
-            out_logits_windows.append(logit_predictions)
-
-        # merge output by average logits between overlap window
-        merged_out_boxes = out_boxes_windows[0]
-        merged_out_logit = out_logits_windows[0]
-        overlap = window_size - slice_interval 
-        for i in range(1, len(out_boxes_windows)):
-            if overlap != 0:
-                prev_overlap_logits = merged_out_logit[-overlap:, :]  
-                curr_overlap_logits = out_logits_windows[i][:overlap, :]  
-                avg_overlap_logits = (
-                    prev_overlap_logits + curr_overlap_logits
-                ) / 2  
-                curr_logits = torch.cat(
-                    [avg_overlap_logits, out_logits_windows[i][overlap:, :]], dim=0
-                )
-                merged_out_logit = torch.cat(
-                    [merged_out_logit[:-overlap, :], curr_logits], dim=0
-                )
-            else:
-                merged_out_logit = torch.cat(
-                    [merged_out_logit, out_logits_windows[i]], dim=0
-                )
-
-            merged_out_boxes = torch.cat(
-                [merged_out_boxes, out_boxes_windows[i][overlap:, :]], dim=0
-            )
-
-        # print(f"merged_out_logit={len(merged_out_logit)} - merged_out_boxes={len(merged_out_boxes)}")
-        # from IPython import embed; embed()
-        assert len(merged_out_logit) == len(
-            merged_out_boxes
-        ), f"{len(merged_out_logit)} # {len(merged_out_boxes)}"
-        predictions = merged_out_logit.argmax(-1).squeeze().tolist()
-        if not isinstance(predictions, list):
-            predictions = [predictions]
-
-
-        assert len(predictions) == len(batch_words), f"{len(predictions)} # {len(batch_words)}"
-        # for word_index in range(len(batch_words)):
-        for word_index, word in enumerate(batch_words):
-            word = batch_words[word_index]
-            # bndbox = [int(coord) for coord in merged_out_boxes[word_index]]
-            bndbox = non_norm_boxes[word_index]
-            kie_label = self.cfg.classes[predictions[word_index]]
-            results.append(
-                Word(
-                    text=word,
-                    bndbox=bndbox,
-                    kie_label=kie_label,
-                    conf_cls=inputs["recog_confs"][word_index]
-                )
-            )
-
-        return results
-
-    def postprocessing(self, words):
-        """Post processing for output of kie
-        - Merge wordgroup list for each field
-
-        Args:
-            items (dict): _description_
-        """
-        list_lines, _ = words_to_lines(words)
-        list_word_group = []
-        for line in list_lines:
-            for word_group in line.list_word_groups:
-                word_group.update_kie_label()
-                word_group.update_conf()
-                list_word_group.append(word_group)
-
-        kie_dict = construct_word_groups_to_kie_label(list_word_group)
-        #receipt postprocess
-        if 'postprocess_type' in self.cfg and self.cfg.postprocess_type == "receipt_postprocess":
-            kie_dict = receipt_postprocess(kie_dict, words) 
-        else:  #invoice_postprocess
-            # kie_dict = invoice_postprocess(kie_dict)
-            kie_dict = self._postprocess_kie_wordgroups(kie_dict, doc_type=self.cfg.postprocess_type)
-        return kie_dict
-
-    def format_output(self, kie_dict):
-        """
-        Args:
-            kie_dict (dict) : format 
-                {
-                    'field_name': list[Wordgroup]
-                }
-
-        Returns:
-            (dict): format 
-                {
-                    'field_name': {
-                        'box': list,
-                        'value': str,
-                        'conf': float
-                    }
-                }
-        """
-        end2end_results = {}
-        filtered_dict = {k:v for k,v in kie_dict.items() if "key" not in k}
-        for field_name, wg_list in filtered_dict.items():
-            wg_list = [wg_list] if not isinstance(wg_list, list) else wg_list
-
-            if len(wg_list) == 0:
-                text, conf, box = "", 0.0, []
-            else:
-                text = " ".join([wg.text for wg in wg_list]).strip().replace("✪", " ")
-                conf = sum(wg.conf for wg in wg_list) / len(wg_list)
-                box = merge_boxes([wg.boundingbox for wg in wg_list])
-
-            end2end_results[field_name] = {
-                "box": box,
-                "value": text, 
-                "conf": conf
-            }
-        
-        # add empty values for missing fields
-        for class_name in self.classes:
-            if "key" not in class_name and class_name not in end2end_results and class_name.lower() not in ['other', 'others']:
-                end2end_results[class_name] = {
-                    "box": [],
-                    "value": "", 
-                    "conf": 0.0
-                }
-
-        end2end_results = self._postprocess_recognized_text(end2end_results, doc_type=self.cfg.postprocess_type)
-        # sort by keys
-        end2end_results = dict(sorted(end2end_results.items()))
-        return end2end_results
-    
-    
-    def _postprocess_kie_wordgroups(self, result, doc_type, metadata=None):
-        """post process for wordgroup outputs
-
-        Args:
-            result (dict): {'field_name': list[Wordgroup]}
-            doc_type (str): invoice / receipt
-            metadata (_type_, optional): _description_. Defaults to None.
-
-        Returns:
-            _type_: _description_
-        """
-        for field_name in result.keys():
-            if field_name not in POSTPROCESS_FUNC[doc_type]:
-                continue
-            postprocess_func = POSTPROCESS_FUNC[doc_type][field_name].get("wordgroup", None)
-            if postprocess_func is None:
-                continue
-            result[field_name] = postprocess_func(result[field_name], metadata={"field_name": field_name, "wg_res": result})
-        return result
-    
-    def _postprocess_recognized_text(self, result, doc_type, metadata=None):
-        for field_name in result.keys():
-            if field_name not in POSTPROCESS_FUNC[doc_type]:
-                continue
-            postprocess_func = POSTPROCESS_FUNC[doc_type][field_name].get("text", None)
-            if postprocess_func is None:
-                continue
-            result[field_name]["value"] = postprocess_func(result[field_name]['value'], metadata)
-        return result
-
-    def prepare_inputs(self, img, ocr_output):
-        """Prepare input for KIE  model
-
-        Args:
-            img (np.ndarray): BGR image
-            ocr_output (dict): format
-                {
-                    "img_path": img_path,
-                    "img": image,
-                    "boxes": boxes,
-                    "texts": words,
-                    "kie_labels": word_labels
-                }
-        """
-        assert "boxes" in ocr_output, "boxes not exists in ocr_output"
-        assert "texts" in ocr_output, "texts not exists in ocr_output"
-
-        # cv2 to PIL (RGB)
-        img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
-        w, h = img.size
-        texts = ocr_output["texts"]
-        boxes = ocr_output["boxes"]
-        recog_confs = ocr_output['recog_confs']
-        det_confs = ocr_output['det_confs']
-        texts = [text.replace(" ", "✪") for text in texts]   # layoutlm will throw an error if the input has space characters
-        word_items = {
-            'boxes': boxes,
-            'texts': texts,
-            'det_confs': det_confs,
-            'recog_confs': recog_confs
-        }
-        sorted_word_items = sort_words(word_items)
-        (boxes, texts, det_confs, recog_confs) = (
-            sorted_word_items['boxes'], 
-            sorted_word_items['texts'], 
-            sorted_word_items['det_confs'], 
-            sorted_word_items['recog_confs']
-        )
-        non_norm_boxes = sorted_word_items['boxes']
-        boxes = [normalize_box(box, width=w, height=h) for box in boxes]
-        out_item = {
-            "img": img, 
-            "boxes": np.array(boxes), 
-            "texts": texts,
-            "det_confs": det_confs,
-            "recog_confs": recog_confs,
-            "non_norm_boxes": non_norm_boxes
-        }
-        return out_item
-
-
-    
-    def aggregate_outputs(self, outs):
-        f"""Postprocess the outputs of the muliple pages
-
-        Args:
-            outs (_type_): _description_
-            
-        Returns:
-            output: (dict): final output 
-            
-        """
-        combine_out = {
-            'kie_raw_output': [],
-            'kie_post_output': [],
-            'end2end_result_each_page': [],
-            'end2end_results': None
-        }
-        
-        for idx, out in enumerate(outs):
-            combine_out['kie_raw_output'].append(out['kie_raw_output'])
-            combine_out['kie_post_output'].append(out['kie_post_output'])
-            combine_out['end2end_result_each_page'].append(out['end2end_results'])
-            
-        #merge end2end result
-        end2end_results = combine_out['end2end_result_each_page'][0]
-        
-        for page_id, end2end_results_page in enumerate(combine_out['end2end_result_each_page'][1:]):
-            for field_key, field_value in end2end_results_page.items():
-                if "value" in end2end_results[field_key] \
-                  and (end2end_results[field_key]['value'] == "" or end2end_results[field_key]['value'] == "0"):
-                    end2end_results[field_key] = field_value
-        combine_out['end2end_results'] = end2end_results
-        
-        return combine_out
-        
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/engine/trainer.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/engine/trainer.py
@ -1,260 +0,0 @@
-import logging
-import os
-import time
-
-import torch
-import wandb
-from easydict import EasyDict
-from sklearn.metrics import precision_recall_fscore_support
-from terminaltables import AsciiTable
-from torch.optim import AdamW
-from torch.optim.lr_scheduler import LinearLR
-from tqdm import tqdm
-
-from sdsvkie.datasets.base_dataset import BaseDataset
-from sdsvkie.models.layoutlmv2 import LayoutLMv2
-from sdsvkie.models.layoutlm import LayoutLM
-from sdsvkie.utils import yaml_save, get_info_env, get_logger
-from transformers import get_scheduler
-
-# logging = logging.getlogging(__name__)
-# logging.basicConfig(level=logging.INFO)
-
-
-
-class Trainer:
-    def __init__(self, cfg: dict):
-        self.cfg = EasyDict(cfg)
-        self.model = None
-        self.processor = None
-        self._init_trainer()
-
-    def _init_trainer(self):
-        
-        if self.cfg.v3:
-            os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-        if not os.path.exists(self.cfg.save_dir):
-            os.makedirs(self.cfg.save_dir, exist_ok=True)
-
-        timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
-        log_file = "{}/{}.log".format(self.cfg.save_dir, str(timestamp))
-        self.logger = get_logger(log_file=log_file, log_level='INFO')
-        self.logger.info(self.cfg)
-        # self.logger.info(get_info_env())
-
-        if not torch.cuda.is_available():
-            self.logger.info("Can not found cuda, training with CPU!!!")
-            self.cfg.device = "cpu"
-
-        
-        
-        self.model = LayoutLMv2._load_model(self.cfg)
-        self.processor = LayoutLMv2._load_processor(self.cfg)
-
-        self.model.to(self.cfg.device)
-
-        if self.cfg.wandb:
-            wandb.init(
-                project=self.cfg.wandb,
-            )
-
-    def _build_dataloader(self, data_dir, cache_file, use_sampling=False):
-        base_dataset = BaseDataset(self.cfg)
-        dataloader = base_dataset.build_dataloader_from_dir(
-            data_dir=data_dir,
-            processor=self.processor,
-            device="cpu",
-            batch_size=self.cfg.batch_size,
-            num_workers=self.cfg.num_workers,
-            shuffle=False,
-            cache_file=cache_file,
-            use_sampling=use_sampling,
-        )
-        return dataloader
-
-    def val(self, val_dir=None):
-        val_dir = val_dir if val_dir is not None else self.cfg.val_dir
-        val_cache_file = os.path.splitext(val_dir)[0] + ".pkl"
-        val_dataloader = self._build_dataloader(
-            val_dir if val_dir is not None else self.cfg.val_dir,
-            cache_file=val_cache_file
-        )
-        acc = self.val_on_dataloader(val_dataloader)
-        return acc
-
-    def train(self, train_dir=None, val_dir=None):
-        self.logger.info("Building train dataloader...")
-        base_dataset = BaseDataset(self.cfg)
-        train_dir = train_dir if train_dir is not None else self.cfg.train_dir
-        val_dir = val_dir if val_dir is not None else self.cfg.val_dir
-        train_cache_file = os.path.splitext(train_dir)[0] + ".pkl"
-        val_cache_file = os.path.splitext(val_dir)[0] + ".pkl"
-        if self.cfg.sampling:
-            train_dataset = base_dataset._build_dataset(data_dir=train_dir)
-            train_dataloader = base_dataset.build_dataloader_from_dataset(
-                train_dataset,
-                batch_size=self.cfg.batch_size,
-                processor=self.processor,
-                device="cpu",
-                shuffle=True,
-                num_workers=self.cfg.num_workers,
-                cache_file=train_cache_file,
-                use_sampling=True,
-            )
-        else:
-            train_dataloader = self._build_dataloader(
-                train_dir if train_dir is not None else self.cfg.train_dir,
-                cache_file=train_cache_file,
-                use_sampling=False,
-            )
-
-        self.logger.info("Building valid dataloader...")
-        val_dataloader = self._build_dataloader(
-            val_dir if val_dir is not None else self.cfg.val_dir,
-            cache_file= val_cache_file,
-            use_sampling=False,
-        )
-
-        self.logger.info(
-            f"Info dataset: train = {len(train_dataloader)}, test = {len(val_dataloader)}"
-        )
-
-        optimizer = AdamW(self.model.parameters(), lr=self.cfg.lr)
-        if self.cfg.scheduler:
-            # scheduler = torch.optim.lr_scheduler.OneCycleLR(
-            #     optimizer, 
-            #     max_lr=self.cfg.lr, 
-            #     steps_per_epoch=len(train_dataloader), 
-            #     epochs=self.cfg.epochs, 
-            #     anneal_strategy='cos', 
-            #     pct_start=0.1,
-            #     div_factor=25,   #init lr = max_lr / div_factor
-            #     final_div_factor=1e4, # min lr = init_lr / final_dev_factor
-            # )
-            num_training_steps = self.cfg.epochs * len(train_dataloader)
-            scheduler = get_scheduler(
-                name=self.cfg.scheduler, 
-                optimizer=optimizer, 
-                num_warmup_steps=0,
-                num_training_steps=num_training_steps,
-
-            )
-
-        if self.cfg.wandb:
-            wandb.config = dict(self.cfg)
-
-        best_acc = 0.0
-        best_epoch = 0
-        yaml_save(os.path.join(self.cfg.save_dir, "config.yaml"), dict(self.cfg))
-        for epoch in range(self.cfg.epochs):
-
-            # sampling slice window
-            if (
-                self.cfg.sampling and epoch != 0 and epoch % self.cfg.sampling == 0
-            ):  # sampling each cfg.sampling epochs
-                train_dataloader = base_dataset.build_dataloader_from_dataset(
-                    train_dataset,
-                    batch_size=self.cfg.batch_size,
-                    processor=self.processor,
-                    device="cpu",
-                    shuffle=True,
-                    num_workers=self.cfg.num_workers,
-                    cache_file=train_cache_file,
-                    use_sampling=True,
-                )
-
-            self.model.train()
-            self.logger.info(f"Epoch: {epoch}:")
-
-            running_loss = 0.0
-           
-            for batch in tqdm(train_dataloader):
-                # forward pass
-                batch = self._to_device(batch, self.cfg.device)
-                
-                outputs = self.model(**batch)
-                loss = outputs.loss
-                running_loss += loss.item()
-                # backward pass to get the gradients
-                loss.backward()
-
-                # update
-                optimizer.step()
-                if self.cfg.scheduler:
-                    scheduler.step()
-                optimizer.zero_grad()
-
-            loss_avg = running_loss / len(train_dataloader)
-            self.logger.info(f"Epoch[{epoch}/{self.cfg.epochs}] - lr: {round(scheduler.get_last_lr()[0], 9) if self.cfg.scheduler else self.cfg.lr} - loss: {loss_avg}")
-            if self.cfg.wandb:
-                wandb.log({"train_loss": loss_avg})
-
-            # valid
-            if epoch >= self.cfg.eval_delay:
-                acc = self.val_on_dataloader(val_dataloader)
-
-                if acc > best_acc:
-                    self.model.save_pretrained(os.path.join(self.cfg.save_dir, "best"))
-                    self.logger.info(f"Update best acc, prev best acc = {best_acc}, current best acc = {acc}")
-                    best_acc = acc
-                    best_epoch = epoch
-
-                if epoch % self.cfg.save_weight_interval == 0:
-                    self.model.save_pretrained(
-                        os.path.join(self.cfg.save_dir, "epoch_{}".format(epoch))
-                    )
-
-            self.model.save_pretrained(os.path.join(self.cfg.save_dir, "last"))
-        self.logger.info(f"Best accuracy = {best_acc} at epoch {best_epoch}")
-
-    def _to_device(self, batch, device):
-        batch = {k: v.to(device) for k, v in batch.items()}
-        return batch
-
-    def val_on_dataloader(self, dataloader):
-        self.model.eval()
-        total, correct = 0, 0
-        preds, truths = [], []
-
-        running_loss = 0.0
-        for batch in tqdm(dataloader):
-            with torch.no_grad():
-                batch = self._to_device(batch, self.cfg.device)
-                outputs = self.model(**batch)
-
-                loss = outputs.loss
-                running_loss += loss.item()
-                predictions = outputs.logits.argmax(dim=2)
-                valid_samples = batch["labels"] != -100
-                predictions = predictions[valid_samples]
-                batch_labels = batch["labels"][valid_samples]
-
-                preds.extend(predictions.detach().cpu().numpy().tolist())
-                truths.extend(batch_labels.detach().cpu().numpy().tolist())
-                correct += (predictions == batch_labels).float().sum()
-                total += predictions.numel()
-
-        loss_avg = running_loss / len(dataloader)
-
-        p, r, f1, support = precision_recall_fscore_support(truths, preds)
-        # self.logger.info("shapeeee", p.shape)
-        table_data = [["Class", "P", "R", "F1", "#samples"]]
-        for c in range(len(self.cfg.classes)):
-            if c < p.shape[0]:
-                table_data.append([self.cfg.classes[c], p[c], r[c], f1[c], support[c]])
-            continue
-
-        f1_avg = sum(f1) / len(f1)
-        table = AsciiTable(table_data)
-        self.logger.info(table.table)
-        self.logger.info(
-            "Validation F1: {} - #samples: {} - #corrects: {}".format(
-                f1_avg, total, correct
-            )
-        )
-
-        if self.cfg.wandb:
-            wandb.log({"val_loss": loss_avg, "f1": f1_avg})
-
-        return f1_avg
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/init.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/init.py
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/cvat.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/cvat.py
@ -1,671 +0,0 @@
-
-
-
-
-
-
-import argparse
-import glob
-import json
-import os
-import xml.etree.ElementTree as ET
-
-import numpy as np
-import tqdm
-from sdsvkie.utils.word_formation import Box, check_iou
-from pathlib import Path 
-
-def read_txt(txt):
-    with open(txt, 'r', encoding='utf8') as f:
-        data = [line.strip() for line in f]
-    return data 
-
-def write_txt(txt, data):
-    with open(txt, 'w', encoding='utf8') as f:
-        for line in data:
-            f.write(line + "\n")
-
-
-
-"""
-
-general json format: 
-
-
-python cvat.py --task pseudo --xml sample_cvat/annotations.xml --xml_out sample_cvat/annotations_out.xml --pseudo_path sample_cvat/pseudo.json
-    
-"""
-
-
-
-
-
-"""
-
-
-config for fwd
-{
-    "id": "OCR005_2",
-    "name": "Số Hợp đồng",
-    "type": "text",
-    "value": "33252663",
-    "page_num": 0,
-    "box": [192, 168, 220, 250]
-
-},
-
-"""
-# CONFIG = {
-#     "label": "field",
-
-#     #edit here 
-#     "attribute_names": ["type", "value", "id", "name"]    # name of attribute in cvat label: text / checkbox
-# }
-
-
-CONFIG = {
-    "label": "word",
-    # "attribute_names": ["text", "kie_label"]    # name of attribute in cvat label: text / checkbox
-    "attribute_names": [] 
-}
-
-
-
-class CVAT:
-    def __init__(self):
-        pass 
-
-    def create_xml_from_json(self, json_path, xml_in, xml_out):
-        """
-        json data format: 
-        {
-            "img_1.jpg": [
-                {
-                    "box": [x1, y1, x2, y2] 
-                    "label": str                # (not required)
-                    "attrib1": str, 
-                    "attrib2": str,
-                },
-                {
-                    "box": [x1, y1, x2, y2] 
-                    "label": str                # (not required)
-                    "attrib1": str, 
-                    "attrib2": str,
-                }
-                ...
-            ],
-            "img_2.jpg": [...]
-        }        
-        """
-
-        data = self.read_json(json_path)
-
-       
-
-        tree = ET.parse(xml_in)
-        root = tree.getroot()
-        for img_item in root.iter("image"):
-            img_path = img_item.attrib['name']
-
-            img_data = data[img_path]
-
-            for item in img_data:
-                et = ET.Element('box')
-                #default values
-                et.attrib['occluded']="0"
-                et.attrib['source']="manual"
-                et.attrib['z_order'] = "0"
-
-                #overwrite values
-                if 'label' in item:
-                    et.attrib['label'] = item['label']
-                else:
-                    et.attrib['label'] = CONFIG['label']
-
-                xmin, ymin, xmax, ymax = item['box']
-                (
-                    et.attrib['xtl'], et.attrib['ytl'], 
-                    et.attrib['xbr'], et.attrib['ybr']
-                ) = (
-                    str(xmin), str(ymin), 
-                    str(xmax), str(ymax)
-                )
-
-                for att_name in CONFIG['attribute_names']:
-                    if att_name not in item:
-                        continue
-
-                    att_et = ET.Element('atrribute')
-                    att_et.attrib['name'] = att_name
-                    att_et.text = item[att_name]
-                    et.append(att_et)
-
-
-                img_item.append(et)
-
-        tree.write(xml_out, encoding='utf8')
-
-
-
-    def get_data_from_txt_dir(self, txt_dir, resever_parent_dir=False):
-        if resever_parent_dir:
-            txt_paths = glob.glob(txt_dir + "/*/*.txt")
-        else:
-            txt_paths = glob.glob(txt_dir + "/*.txt")
-        data = {}
-        for txt_path in txt_paths:
-            if resever_parent_dir:
-                txt_name = "/".join(txt_path.split("/")[-2:])
-            else:
-                txt_name = os.path.basename(txt_path)
-        
-            
-            txt_data = read_txt(txt_path)
-            format_data = []
-            for line in txt_data:
-                items = line.split("\t")
-                # assert len(items) == 6, "error get len = {} - {}".format(len(items), items)
-                
-                box = [int(float(x)) for x in items[:4]]
-                text = items[4]
-                
-                if len(items) == 6:
-                    kie_label = items[5]
-                else:
-                    kie_label = "word"
-                format_data.append(
-                    {
-                        'box': box,
-                        'text': text, 
-                        'label': kie_label
-                    }
-                )
-            data[txt_name] = format_data
-
-        return data
-    
-    def get_data_from_txt_path(self, txt_path):    
-        
-        txt_data = read_txt(txt_path)
-        format_data = []
-        for line in txt_data:
-            items = line.split("\t")
-            assert len(items) == 6, "error get len = {} - {}".format(len(items), items)
-            box = [int(float(x)) for x in items[:4]]
-            text = items[4]
-            kie_label = items[5]
-            format_data.append(
-                {
-                    'box': box,
-                    'text': text, 
-                    'label': kie_label
-                }
-            )
-        return format_data
-    
-    def format_data_invoice(self, data):
-        
-        new_data = {}
-        for txt_name, value in data.items():
-            items = []
-            for item in value:
-                text = item['text']
-                if "____kie_wordgroup" in text:
-                    new_item = {
-                        'box': item['box'],
-                        'label': item['label']
-                    }
-                else:
-
-                    new_item = {
-                        "box": item['box'],
-                        'text': "xxxxxx",
-                        'kie_label': item['label'],
-                        'label': "word"
-                    }
-
-                items.append(new_item)
-            new_data[txt_name] = items
-        return new_data
-
-            
-    
-
-    def create_xml_from_txt(self, txt_dir, xml_in, xml_out, skip_labels=[], resever_parent_dir=False):
-        """
-        
-        """
-        data = self.get_data_from_txt_dir(txt_dir, resever_parent_dir)
-        print(list(data.keys()))
-
-        # for invoice 
-        if len(skip_labels) > 0 and "word" in skip_labels:
-            data = self.format_data_invoice(data)
-        tree = ET.parse(xml_in)
-        root = tree.getroot()
-        count = 0 
-        for img_item in tqdm.tqdm(root.iter("image")):
-            count += 1
-            img_path = img_item.attrib['name']
-
-           
-            txt_name = os.path.splitext(img_path)[0] + ".txt"
-            
-            img_data = data.get(txt_name, [])
-            # from IPython import embed; embed()
-            # break
-            if len(img_data) > 0:
-                # img_item.clear()
-                for child in img_item:
-                    img_item.remove(child)
-            
-            for item in img_data:
-                et = ET.Element('box')
-                #default values
-                et.attrib['occluded']="0"
-                et.attrib['source']="manual"
-                et.attrib['z_order'] = "0"
-
-                #overwrite values
-               
-                if 'label' in item:
-                    if item['label'] in skip_labels:
-                        continue
-                    et.attrib['label'] = item['label']
-                else:
-                    et.attrib['label'] = CONFIG['label']
-
-                xmin, ymin, xmax, ymax = item['box']
-                (
-                    et.attrib['xtl'], et.attrib['ytl'], 
-                    et.attrib['xbr'], et.attrib['ybr']
-                ) = (
-                    str(xmin), str(ymin), 
-                    str(xmax), str(ymax)
-                )
-
-                for att_name in CONFIG['attribute_names']:
-                    if att_name not in item:
-                        continue
-
-                    att_et = ET.Element('atrribute')
-                    att_et.attrib['name'] = att_name
-                    att_et.text = item[att_name]
-                    et.append(att_et)
-                
-
-                img_item.append(et)
-        print("Num imgs: ", count)
-        tree.write(xml_out, encoding='utf8')
-
-    def get_data_from_xml(self, xml, skip_labels=[]):
-        """ parse xml to dict
-        
-
-        Args:
-            xml (str): cvat anno xml path
-
-        Returns:
-            (dict): {
-                'img_1.jpg': [
-                    'kie_label': [xyxy]
-                    ...
-                ],
-                'img_2.jpg': ...
-            }
-        """
-        anno_data = open(xml, encoding='utf8')
-        tree  = ET.parse(anno_data)
-        root = tree.getroot()
-        data = {}
-
-        for obj in tqdm.tqdm(root.iter("image")):
-            img_path = obj.attrib['name']
-            img_data = []
-            for box in obj.iter("box"):
-                box_label = box.attrib['label']
-                if box_label in skip_labels:
-                    continue
-
-                #get coordinate
-                xmin, ymin, xmax, ymax = box.attrib['xtl'], box.attrib['ytl'], box.attrib['xbr'], box.attrib['ybr']
-                xmin, ymin, xmax, ymax = int(float(xmin)),  int(float(ymin)),  int(float(xmax)),  int(float(ymax))
-
-                item = {
-                    box_label: [xmin, ymin, xmax, ymax]
-                }
-                img_data.append(item)
-            
-            data[img_path] = img_data
-        
-        return data
-    @staticmethod
-    def write_json(json_path, data):
-        with open(json_path, 'w', encoding='utf8') as f:
-            json.dump(data, f, ensure_ascii=False)
-
-
-    def read_json(self, json_path):
-        with open(json_path, 'r', encoding='utf8') as f:
-            data = json.load(f)
-
-        return data 
-
-    def update_label_kie(self, txt_dir, json_path, out_dir):
-        if not os.path.exists(out_dir):
-            os.makedirs(out_dir, exist_ok=True)
-        data = self.read_json(json_path)
-        txt_paths = glob.glob(txt_dir + "/*.txt")
-        for txt_path in tqdm.tqdm(txt_paths):
-            ori_data = read_txt(txt_path)
-
-            boxes = []
-
-            img_name = os.path.splitext(os.path.basename(txt_path))[0]
-            img_name = "_".join(img_name.split("_")[:-1]) + "_1" + ".jpg"
-            # img_name = os.path.splitext(os.path.basename(txt_path))[0] + ".jpg"
-            new_img_data = data[img_name]
-            for line in ori_data:
-                xmin, ymin, xmax, ymax, text, kie_label = line.strip().split("\t")
-                if "____kie_wordgroup" in text:
-                    continue
-                xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
-                # box_word = [xmin, ymin, xmax, ymax]
-                new_kie_label = "other"
-                for label_info in new_img_data:
-                    for label, box_wordgroup in label_info.items():
-                        # print(label, box_wordgroup)
-                        box_word = Box(
-                            xmin, ymin, xmax, ymax
-                        )
-                        box_wordgroup = Box(
-                            box_wordgroup[0], box_wordgroup[1], box_wordgroup[2],box_wordgroup[3]
-                        )
-                        if  check_iou(box1=box_word, box2=box_wordgroup, threshold=0.85):
-                            new_kie_label = label
-                            break 
-
-                    if new_kie_label != "other":
-                        break
-                
-                        
-                new_box = Box(
-                    xmin=xmin, 
-                    ymin=ymin,
-                    xmax=xmax,
-                    ymax=ymax, 
-                    label=text,
-                    kie_label=new_kie_label
-                    
-                )
-                boxes.append(new_box)
-                
-            boxes = sorted(boxes, key=lambda box: [box.ymin, xmin])
-            new_data = [
-               "\t".join([str(box.xmin), str(box.ymin), str(box.xmax), str(box.ymax), box.label, box.kie_label])
-                    for box in boxes
-            ]
-
-
-            write_txt(os.path.join(out_dir, os.path.basename(txt_path)), new_data)
-    def _check_iou(self, box1, box2, threshold=0.9):
-        """_summary_
-
-        Args:
-            box1 (_type_): word box
-            box2 (_type_): line box 
-            threshold (float, optional): _description_. Defaults to 0.9.
-
-        Returns:
-            _type_: _description_
-        """
-        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
-        area2 =  (box2[2] - box2[0]) * (box2[3] - box2[1])
-        xmin_intersect = max(box1[0], box2[0])
-        ymin_intersect = max(box1[1], box2[1])
-        xmax_intersect = min(box1[2], box2[2])
-        ymax_intersect = min(box1[3], box2[3])
-
-
-        if xmax_intersect < xmin_intersect or ymax_intersect < ymin_intersect:
-            area_intersect = 0
-        else:
-            area_intersect = (xmax_intersect - xmin_intersect) * (
-                ymax_intersect - ymin_intersect
-            )
-        # union = area1 + area2 - area_intersect
-        iou = area_intersect / area1
-        return iou 
-        # if iou > threshold:
-        #     return True
-        # return False
-
-    def _update_label_for_word(self, box, line_items, threshold=0.75, other_class='others'):
-        have_label = False
-        max_iou = -1
-        for line_item in line_items:
-            # 465	901	664	940
-            
-            curr_iou = self._check_iou(box, line_item['box'], threshold=threshold)
-            if curr_iou > threshold and curr_iou > max_iou:
-                
-                max_iou = curr_iou
-                kie_label = line_item['label']
-                have_label = True
-                
-                # if box[0] == 465 and box[-1] == 940:
-                #     print(box, curr_iou, kie_label, line_item)
-                # break
-
-        if not have_label:
-            kie_label = other_class
-        return kie_label
-
-    def update_label_kie_from_xml(self, txt_dir, xml, out_dir, skip_labels = [], line_to_word=False, other_class="others", resever_parent_dir=False):
-        if not os.path.exists(out_dir):
-            os.makedirs(out_dir, exist_ok=True)
-        
-        #read xml 
-        xml_data = {}
-        anno_data = open(xml, encoding='utf8')
-        tree  = ET.parse(anno_data)
-        root = tree.getroot()
-        # data = {}
-
-        for obj in tqdm.tqdm(root.iter("image")):
-            img_path = obj.attrib['name']
-            # img_data = []
-            if not line_to_word:
-                img_data = {}
-                for box in obj.iter("box"):
-                    box_label = box.attrib['label']
-                    if box_label in skip_labels:
-                        continue
-
-                    #get coordinate
-                    xmin, ymin, xmax, ymax = box.attrib['xtl'], box.attrib['ytl'], box.attrib['xbr'], box.attrib['ybr']
-                    box_int = int(float(xmin)),  int(float(ymin)),  int(float(xmax)),  int(float(ymax))
-                    box_key = ",".join([str(x) for x in box_int])
-                    img_data[box_key] = box_label
-
-            else:
-                img_data = []
-                for box in obj.iter("box"):
-                    box_label = box.attrib['label']
-                    if box_label in skip_labels:
-                        continue
-
-                    #get coordinate
-                    xmin, ymin, xmax, ymax = box.attrib['xtl'], box.attrib['ytl'], box.attrib['xbr'], box.attrib['ybr']
-                    box_int = int(float(xmin)),  int(float(ymin)),  int(float(xmax)),  int(float(ymax))
-                    box_key = ",".join([str(x) for x in box_int])
-                    img_data.append(
-                        {
-                            'box': box_int,
-                            'label': box_label
-                        }
-                    )
-            xml_data[os.path.splitext(img_path)[0]] = img_data
-
-        # print(xml_data)
-        if resever_parent_dir:
-            txt_paths = glob.glob(txt_dir + "/*/*.txt")
-        else:
-            txt_paths = glob.glob(txt_dir + "/*.txt")
-        updated_imgs = []
-        for txt_path in tqdm.tqdm(txt_paths):
-            is_update = False 
-            # print(txt_path)
-            ori_data = read_txt(txt_path)
-            # print(ori_data)
-            img_new_data = []
-            if resever_parent_dir:
-                img_key = str(Path(txt_path).with_suffix('').relative_to(Path(txt_path).parent.parent))   # a/xyz
-            else:
-                img_key = os.path.splitext(os.path.basename(txt_path))[0]   # xyz
-            if img_key not in xml_data:
-                print(txt_path)
-                continue
-            img_annoted_data = xml_data[img_key]
-            # print(img_key, img_annoted_data)
-            if not line_to_word:
-        
-                for line in ori_data:
-                    xmin, ymin, xmax, ymax, text, kie_label = line.strip().split("\t")
-                    if "____kie_wordgroup" in text:
-                        continue
-                    box_int = int(xmin), int(ymin), int(xmax), int(ymax)
-                    box_key = ",".join([str(x) for x in box_int])
-
-                    if box_key in img_annoted_data:
-                        if kie_label != img_annoted_data[box_key]:
-                            is_update.append(txt_path)
-                            # print(kie_label, img_annoted_data[box_key])
-                        kie_label = img_annoted_data[box_key]
-                    else:
-                        kie_label = other_class
-                    img_new_data.append("\t".join([xmin, ymin, xmax, ymax, text, kie_label]))
-            else:
-                
-                # print("ori_data: ", ori_data)
-                for line in ori_data:
-                    # print(line)
-                    items = line.strip().split("\t")
-                    if len(items) == 5:
-                        xmin, ymin, xmax, ymax, text = items
-                    else:
-                        xmin, ymin, xmax, ymax, text, label = items
-                    
-                    box_int = int(xmin), int(ymin), int(xmax), int(ymax)
-                    kie_label = self._update_label_for_word(box_int, img_annoted_data, threshold=0.75, other_class=other_class)
-
-                    if label != kie_label:
-                        print(kie_label, label)
-                        is_update = True
-                    img_new_data.append("\t".join([xmin, ymin, xmax, ymax, text, kie_label]))
-            if resever_parent_dir:
-                out_sub_dir = Path(out_dir) / Path(img_key).parts[-2]
-                if not out_sub_dir.exists():
-                    out_sub_dir.mkdir(parents=True)
-            # else:
-            #     out_sub_dir = out_dir
-            # out_sub_dir = str(out_sub_dir)
-            write_txt(os.path.join(out_dir, img_key + ".txt"), img_new_data)
-        
-            if is_update:
-                updated_imgs.append(txt_path)
-            else:
-                print("No update: ", txt_path)
-        print("updated_imgs: ", list(set(updated_imgs)))
-        print("num updated_imgs: ", len(list(set(updated_imgs))))
-            
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--xml", type=str, default='annotations.xml')
-    parser.add_argument("--pseudo_path", type=str, default="pseudolabel.json")
-    parser.add_argument("--xml_out", type=str, default="annotations_out.xml")
-    parser.add_argument("--task", type=str, default='pseudo', help='pseudo / parse_data / update_txt / update_txt_from_xml')
-    parser.add_argument("--txt_in", type=str, default='txt_dir_in')
-    parser.add_argument("--txt_out", type=str, default='txt_dir_out')
-    parser.add_argument("--line_to_word", action='store_true')
-    parser.add_argument("--other_class", type=str, default='other')
-    parser.add_argument("--resever_parent_dir", action="store_true")
-
-    args = parser.parse_args()
-    cvat = CVAT()
-
-    if args.task == 'parse_data':
-        data = cvat.get_data_from_xml(
-            xml=args.xml
-        )
-        CVAT.write_json(args.xml_out, data)
-
-    elif args.task == 'pseudo_from_json':
-        
-        cvat.create_xml_from_json(
-            xml_in=args.xml,
-            xml_out=args.xml_out,
-            json_path=args.pseudo_path
-        )
-
-    elif args.task == 'pseudo_from_txt':
-        
-        cvat.create_xml_from_txt(
-            xml_in=args.xml,
-            xml_out=args.xml_out,
-            txt_dir=args.pseudo_path,
-            # skip_labels=['word']
-            # skip_labels=[args.other_class],
-            resever_parent_dir=args.resever_parent_dir
-        )
-    elif args.task == 'update_txt':
-        cvat.update_label_kie(
-            txt_dir=args.txt_in,
-            json_path=args.pseudo_path,
-            out_dir=args.txt_out
-        )
-
-    elif args.task == 'update_txt_from_xml':
-        cvat.update_label_kie_from_xml(
-            txt_dir=args.txt_in,
-            xml=args.xml,
-            out_dir=args.txt_out,
-            skip_labels = ['word'], 
-            line_to_word=args.line_to_word, 
-            other_class=args.other_class,
-            resever_parent_dir=args.resever_parent_dir
-        )
-    else:
-        raise NotImplementedError(f"{args.task} not yet implemented")
-
-    
-
-
-
-"""
-python tools/cvat.py --task update_txt --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/craw_data/output/synth_vnpt_r20/one_line_filtered  --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/craw_data/output/synth_vnpt_r20/one_line_filtered --pseudo_path ../workdirs/data/vnpt_oneline/annotations.json
-
-python tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/visualize/xml/vnpt_r2/annotations.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/visualize/xml/vnpt_r2/annotations_out.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/visualize/vnpt_r2_txt
-
-python tools/cvat.py --task pseudo_from_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/WildReceipt/re_labeling/wild_batch_1_raw.xml \
-    --xml_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/WildReceipt/re_labeling/wild_batch_1_pseudo.xml \
-    --pseudo_path /mnt/ssd1T/hoanglv/Projects/KIE/DATA/WildReceipt/re_labeling/batches/batch_1
-
-
-python tools/cvat.py --task update_txt_from_xml \
-    --txt_in  /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/visualize/vnpt_r2_txt \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/visualize/vnpt_r2.xml \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/visualize/vnpt_r2_done_txt
-
-python tools/cvat.py --task update_txt_from_xml \
-    --txt_in /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_2/Good/Food  \
-    --xml /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/labeling/Pseudo/batch_2/batch_2_food_done.xml   \
-    --txt_out /mnt/ssd1T/hoanglv/Projects/KIE/DATA/SDSAP_Invoice/processed/batch_2/Good/Food \
-    --other_class Others
-"""
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/gen_cvat_text_det.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/gen_cvat_text_det.py
@ -1,137 +0,0 @@
-import cv2
-import glob
-import os
-import fitz
-import json
-import numpy as np
-import pandas as pd
-from time import time
-import yaml
-from tqdm.auto import tqdm
-import xml.etree.ElementTree as ET
-from sklearn.model_selection import StratifiedKFold
-
-# from textdetection.src.serve_model import Predictor as TextDetector
-
-FOLDER = "/mnt/ssd1T/tuanlv/06.KVUCombineStage/preprocess/data/invoices-receipts/SBT/nttmai_renamed/"
-TXT_DIR = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt"
-OUT_FOLDER = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/sbt_batches"
-ANN_OUT = "processed_textdet_batch%d.json"
-N_BATCHES = 3
-
-os.makedirs(OUT_FOLDER, exist_ok=True)
-
-
-if __name__ == "__main__":
-    all_files = []
-    doc_types = []
-    for file_name in tqdm(sorted(os.listdir(FOLDER))):
-        try:
-            file_name_no_ext = file_name.split('.')[0]
-            all_files.append(file_name)
-            doc_type = "1"
-            doc_types.append(doc_type)
-            file_path = os.path.join(FOLDER, file_name)
-
-        except Exception as ex:
-            print('image:', file_name, '. Error:', ex)
-
-    df = pd.DataFrame({'file_name': all_files, 'doc_type': doc_types})
-
-    df = df[df.doc_type.isin(['1'])].reset_index(drop=True)
-
-    kfold = StratifiedKFold(n_splits=N_BATCHES)
-    fold_num = 0
-    for train_inds, val_inds in kfold.split(df, df['doc_type']):
-        df.loc[val_inds, 'fold'] = fold_num
-        fold_num+=1
-
-    df.to_csv(f'{OUT_FOLDER}/tmp.csv', index=False)
-
-    text_detector = TextDetector(setting['text_detection']['setting'], "textdetection")
-    
-    for batch in range(N_BATCHES):
-        print(f"================== Batch {batch} ================")
-        fold_df = df.loc[df.fold == batch]
-        # tree = ET.parse(XML_IN)
-        # root = tree.getroot()
-
-        # for image in root.findall('image'):
-        #     root.remove(image)
-        with open('/home/sds/namnt/FWD_Data/coco_template.json', 'r') as f:
-            coco_annotations = json.load(f)
-        
-        count = 2
-        img_id = 1
-        ann_id = 1
-
-        all_images = []
-        all_annotations = []
-
-        for row_num, row in tqdm(fold_df.iterrows(), total=len(fold_df)):
-            # try:
-            file_name = row['file_name']
-            file_name_no_ext = file_name.split('.')[0]
-            doc_type = row['doc_type']
-            file_path = os.path.join(FOLDER, file_name)
-            images = pdf2np_fitz(file_path, _type='fname')
-            
-            images, batch_boxes = text_detector(images)
-            for page_num, (img, boxes) in enumerate(zip(images, batch_boxes)):
-                os.makedirs(os.path.join(OUT_FOLDER, f"batch{batch}"), exist_ok=True)
-                out_img_path = os.path.join(OUT_FOLDER, f"batch{batch}", f"batch{batch}_{img_id:04d}_{file_name_no_ext}_{page_num}.jpg")
-                cv2.imwrite(out_img_path, img[:,:,::-1])
-                H, W = img.shape[:2]
-                c_img = {
-                    "id": int(img_id),
-                    "width": W,
-                    "height": H,
-                    "file_name": os.path.join(f"batch{batch}", f"batch{batch}_{img_id:04d}_{file_name_no_ext}_{page_num}.jpg"),
-                    "license": 0,
-                    "flickr_url": "",
-                    "coco_url": "",
-                    "date_captured": 0
-                }
-                all_images.append(c_img)
-
-                for box in boxes:
-                    x1,y1,x2,y2 = box
-                    w, h = x2-x1, y2-y1
-                    c_ann = {
-                        "id": int(ann_id),
-                        "image_id": int(img_id),
-                        "category_id": 1,
-                        "segmentation": [],
-                        "area": w*h,
-                        "bbox": [x1,y1,w,h],
-                        "iscrowd": 0,
-                        "attributes": {
-                            "occluded": False,
-                            "rotation": 0.0
-                        }
-                    }
-                    all_annotations.append(c_ann)
-                    ann_id += 1
-                
-                img_id += 1
-            # if count == 1:
-            #     break
-            # else:
-            #     count -= 1
-            # except Exception as ex:
-            #     print('image:', file_name, '. Error:', ex)
-
-        coco_annotations['categories'] = [{
-            "id": 1,
-            "name": "text",
-            "supercategory": ""
-        }]
-        coco_annotations['images'] = all_images
-        coco_annotations['annotations'] = all_annotations
-        with open(os.path.join(OUT_FOLDER, ANN_OUT%(batch)), 'w') as f:
-            json.dump(coco_annotations, f)
-
-        # break
-        
-
-    
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/infer.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/infer.py
@ -1,144 +0,0 @@
-"""
-Use for eval, debug
-"""
-import argparse
-import os
-from copy import copy
-from glob import glob
-from pathlib import Path
-
-import cv2
-import tqdm
-
-from sdsvkie.cfg import load_cfg
-from sdsvkie.engine import Predictor
-from sdsvkie.utils import visualize_kie
-from sdsvkie.utils.io_file import load_ocr_output, write_json, write_txt
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--cfg", type=str, default="sdsvkie/cfg/default.yaml")
-    parser.add_argument(
-        "--img", type=str, default="img.jpg", help="image path or directory"
-    )
-
-    # optional
-    parser.add_argument("--weights", type=str, default=None, required=False)
-
-    parser.add_argument("--device", type=str, default=None,  required=False)
-
-    parser.add_argument(
-        "--text_det", type=str, default=None, required=False, help="image path or directory"
-    )
-    parser.add_argument(
-        "--text_reg", type=str, default=None,  required=False, help="image path or directory"
-    )
-    parser.add_argument(
-        "--vis_out", type=str, default=None,  required=False,  help="visualize output directory"
-    )
-    parser.add_argument("--txt_out", type=str, required=False,  default=None)
-    parser.add_argument("--kie_wordgroup_out", required=False,  action="store_true")
-    parser.add_argument("--e2e", type=str, required=False,  default=None)
-    parser.add_argument("--not_use_ocr", required=False,  action="store_true")
-    parser.add_argument("--parse_e2e", action='store_true', help="Parse end2end result from word label")
-
-    args = parser.parse_args()
-
-    
-    # print(cfg)
-
-    predictor = Predictor(**vars(args))
-
-
-    # cfg = load_cfg(args.cfg,  vars(args))
-    # predictor = Predictor(cfg)
-
-    if args.txt_out:
-        if not os.path.exists(args.txt_out):
-            os.makedirs(args.txt_out, exist_ok=True)
-
-    if args.e2e:
-        outdir_e2e = os.path.dirname(args.e2e)
-        if not os.path.exists(outdir_e2e):
-            os.makedirs(outdir_e2e, exist_ok=True)
-
-    if os.path.isdir(args.img):
-        img_paths = glob(args.img + "/*")
-        print("Infence image dir, total imgs: {}".format(len(img_paths)))
-    else:
-        img_paths = [args.img]
-
-    out_dict = {}
-    for img_path in tqdm.tqdm(img_paths):
-
-        img = cv2.imread(img_path)
-        if img is None:
-            print("img is None: ", img_path)
-            continue
-        if args.not_use_ocr:
-            txt_path = str(Path(img_path).with_suffix(".txt"))
-            ocr_output = load_ocr_output(txt_path)
-            # print(len(ocr_output["boxes"]))
-            out = predictor(img, ocr_output=ocr_output, return_raw=True)
-        else:
-            out = predictor(img, return_raw=True)
-
-        # visualize
-        if args.vis_out:
-            # from IPython import embed; embed()
-            out_kie = out["kie_raw_output"]
-            visualize_kie(
-                img,
-                boxes=[word.boundingbox for word in out_kie],
-                pred_labels=[word.kie_label for word in out_kie],
-                image_name=os.path.basename(img_path),
-                outdir=args.vis_out,
-                skip_classes=["other---"]
-            )
-
-        if args.txt_out:
-            txt_out_path = os.path.join(
-                args.txt_out, os.path.splitext(os.path.basename(img_path))[0] + ".txt"
-            )
-
-            out_kie = out["kie_raw_output"]
-            boxes = [word.boundingbox for word in out_kie]
-            pred_labels = [word.kie_label for word in out_kie]
-            texts = [word.text for word in out_kie]
-
-            data = []
-            
-
-            if args.kie_wordgroup_out:
-                output = out["kie_post_output"]
-                # print(output)
-                # from IPython import embed; embed()
-                wordgroup_all_list = []
-                for kie_label, wordgroup_list in output.items():
-                    if isinstance(wordgroup_list, list):
-                        wordgroup_all_list.extend(wordgroup_list)
-                    else:
-                        wordgroup_all_list.append(wordgroup_list)
-                boxes = [word.boundingbox for word in wordgroup_all_list]
-                pred_labels = [word.kie_label for word in wordgroup_all_list]
-                texts = [word.text + "____kie_wordgroup" for word in wordgroup_all_list]
-
-                for box, text, kie_label in zip(boxes, texts, pred_labels):
-                    item = "\t".join([str(int(x)) for x in box])
-                    item = "\t".join([item, text, kie_label])
-                    data.append(item)
-            else:
-                for box, text, kie_label in zip(boxes, texts, pred_labels):
-                    item = "\t".join([str(int(x)) for x in box])
-                    item = "\t".join([item, text, kie_label])
-                    data.append(item)
-
-            write_txt(txt_out_path, data)
-
-        if args.e2e:
-
-            img_id = os.path.splitext(os.path.basename(img_path))[0]
-            out_dict[img_id] = out['end2end_results']
-
-    if args.e2e:
-        write_json(os.path.join(args.e2e), out_dict)
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/infer_e2e.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/infer_e2e.py
@ -1,122 +0,0 @@
-"""
-Use for deploy
-"""
-import argparse
-import os
-from glob import glob
-from pathlib import Path
-
-import cv2
-import tqdm
-
-from sdsvkie.cfg import load_cfg
-from sdsvkie.engine.predictor import Predictor
-from sdsvkie.utils.io_file import  write_json
-from sdsvkie.utils import visualize_kie, IMG_EXT, PDF_EXT
-import random
-"""
-python sdsvkie/tools/infer_e2e.py \
-    --cfg /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/config.yaml \
-    --weights /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/06062023/best  \
-    --device "cuda:0" \
-    --img /mnt/hdd2T/AICR/Projects/2023/Vietinbank_POC/Invoice_JPG/ \
-    --e2e  /mnt/hdd2T/AICR/Projects/2023/Vietinbank_POC/Invoice_KIE_Results/result.json
-
-
-"""
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--cfg", type=str, default="sdsvkie/cfg/default.yaml")
-    parser.add_argument("--img", type=str, default="img.jpg", help="image path or directory")
-    parser.add_argument("--weights", type=str, default=None)
-    parser.add_argument("--text_det", type=str, default=None)
-    parser.add_argument("--device", type=str, default=None)
-    parser.add_argument("--e2e", type=str, default=None)
-    parser.add_argument("--vis", type=str, default=None)
-
-
-    args = parser.parse_args()
-
-    # predictor = Predictor(cfg=cfg)
-    predictor = Predictor(**vars(args))
-
-    if args.e2e:
-        outdir_e2e = os.path.dirname(args.e2e)
-        if not os.path.exists(outdir_e2e):
-            os.makedirs(outdir_e2e, exist_ok=True)
-
-    if os.path.isdir(args.img):
-        img_paths = sorted(glob(args.img + "/*"))
-        print("Infence image dir, total imgs: {}".format(len(img_paths)))
-    else:
-        img_paths = [args.img]
-
-    out_dict = {}
-
-    if "Others" in predictor.classes:
-        colors = {
-            "Store_name_value": (30,97,235),
-            "id": (28,175,6),
-            "Date_value": (241,26,242),
-            "Total_value": (255,0,0),
-        }
-    else:
-        colors = [
-            (
-                random.randint(0, 255),
-                random.randint(0, 255),
-                random.randint(0, 255),
-            )
-                for _ in range(len(predictor.classes))
-        ]
-
-
-    import time 
-    
-    for img_path in tqdm.tqdm(img_paths):
-        print(img_path)
-        if Path(img_path).suffix.lower() in IMG_EXT:
-            img = cv2.imread(img_path)
-            if img is None:
-                print("img is None: ", img_path)
-                continue
-        elif Path(img_path).suffix.lower() in PDF_EXT:
-            img = img_path  #pdf
-        else:
-            continue
-        # try:
-        out = predictor(img)
-        # except Exception as err:
-        #     print(err, img_path)
-        #     continue 
-        
-        out_api = out['end2end_results']
-
-        if not args.e2e:
-            print(out_api)
-        else:
-
-            img_id = os.path.splitext(os.path.basename(img_path))[0]
-            out_dict[img_id] = {
-                field_name: field_item['value'] for field_name, field_item in out_api.items()
-            }
-
-        if args.vis:
-
-            visualize_kie(
-                img,
-                boxes=[field_item['box'] for field_name, field_item in out_api.items() if len(field_item['box']) > 0],
-                pred_labels=[field_name for field_name, field_item in out_api.items() if len(field_item['box']) > 0],
-                image_name=os.path.basename(img_path),
-                outdir=args.vis,
-                colors=colors,
-                texts = [field_item['value'] for field_name, field_item in out_api.items() if len(field_item['box']) > 0]
-            )
-    
-    if args.e2e:
-        write_json(os.path.join(args.e2e), out_dict)
-
-        
-
-    
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/parse_e2e.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/parse_e2e.py
@ -1,72 +0,0 @@
-"""
-Use for eval, debug
-"""
-import argparse
-import os
-from glob import glob
-from pathlib import Path
-
-from sdsvkie.utils.word_formation import Word
-import tqdm
-
-from sdsvkie.engine import Predictor
-from sdsvkie.utils.io_file import load_ocr_output, write_txt
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--cfg", type=str, default="sdsvkie/cfg/default.yaml")
-    parser.add_argument(
-        "--src", type=str, default="img.jpg / img_dir", help="image path or directory"
-    )
-    parser.add_argument("--tgt", type=str, required=True,  default=None)
-
-    args = parser.parse_args()
-
-    predictor = Predictor(args.cfg)
-
-    
-    if not os.path.exists(args.tgt):
-        os.makedirs(args.tgt, exist_ok=True)
-
-    txt_paths = glob(args.src + "/*.txt")
-    print("Total txt: {}".format(len(txt_paths)))
-
-
-    for txt_path in tqdm.tqdm(txt_paths):
-        ocr_output = load_ocr_output(txt_path)
-        
-        boxes, texts, labels = ocr_output['boxes'], ocr_output['texts'], ocr_output['labels']
-        words = []
-        for box, text, label in zip(boxes, texts, labels):
-            words.append(
-                Word(
-                    text=text,
-                    bndbox=box,
-                    kie_label=label,
-                    conf_cls=0.0
-                )
-            )
-            
-        kie_output = predictor.postprocessing(words)
-        
-
-        txt_out_path = str(Path(args.tgt) / Path(txt_path).name)
-
-
-        data = []
-
-        # print(output)
-        wordgroup_all_list = []
-        for kie_label, wordgroup_list in kie_output.items():
-            wordgroup_all_list.extend(wordgroup_list)
-        boxes = [word.boundingbox for word in wordgroup_all_list]
-        pred_labels = [word.kie_label for word in wordgroup_all_list]
-        texts = [word.text + "____kie_wordgroup" for word in wordgroup_all_list]
-
-        for box, text, kie_label in zip(boxes, texts, pred_labels):
-            item = "\t".join([str(int(x)) for x in box])
-            item = "\t".join([item, text, kie_label])
-            data.append(item)
-
-
-        write_txt(txt_out_path, data)
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/postprocess_e2e_label.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/postprocess_e2e_label.py
@ -1,72 +0,0 @@
-import argparse
-from sdsvkie.utils import read_json, yaml_load, write_json
-from sdsvkie.utils.post_processing.invoice_post_processing import *
-from sdsvkie.utils.post_processing.common_post_processing import normalize_number
-
-
-
-def postprocess_invoice(invoice_data):
-    if 'date' in invoice_data:
-        invoice_data['date'] = post_processing_datetime(invoice_data['date'])
-
-    #### normalize number
-    number_fields = ['total_value', 'VAT_amount_value']
-    for number_field in number_fields:
-        if number_field not in invoice_data:
-            continue
-        invoice_data[number_field] = normalize_number(invoice_data[number_field])
-    if 'buyer_tax_code_value' in invoice_data:
-        invoice_data['buyer_tax_code_value'] = normalize_number(invoice_data['buyer_tax_code_value'], rerserve_minus=True)
-    if 'seller_tax_code_value' in invoice_data:
-        invoice_data['seller_tax_code_value'] = normalize_number(invoice_data['seller_tax_code_value'], rerserve_minus=True)
-    if "seller_mobile_value" in invoice_data:
-        invoice_data['seller_mobile_value'] = normalize_number(invoice_data['seller_mobile_value'], rerserve_minus=False, reserve_plus=True)
-
-
-    for field_name in invoice_data.keys():
-        field_value = invoice_data[field_name]
-        field_value = field_value.replace("✪", " ")
-        field_value = field_value.replace("\t", " ")
-        field_value = re.sub(r"\s+", " ", field_value)
-        invoice_data[field_name] = field_value
-
-    return invoice_data
-
-
-
-
-def format_e2e_data(input_json, output_json, cfg):
-    cfg = yaml_load(cfg)
-    classes = cfg['classes']
-    value_classes = [cls_name for cls_name in classes if "_key" not in cls_name and "other" not in cls_name]
-    print(value_classes)
-    in_data = read_json(input_json)
-
-    out_data = {}
-    for img_id, img_data in in_data.items():
-
-        new_img_data = postprocess_invoice(img_data)
-        
-
-
-        for cls_value in value_classes:
-            if cls_value not in new_img_data:
-                new_img_data[cls_value] = ""
-
-
-        out_data[img_id] = new_img_data
-    
-
-    write_json(data=out_data, json_path=output_json)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--cfg", type=str)
-    parser.add_argument("--input", type=str, help="e2e label file path")
-    parser.add_argument("--out", type=str, help='postprocess e2e label')
-
-    args = parser.parse_args()
-
-    format_e2e_data(args.input, args.out, args.cfg)
-
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/run_ocr.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/run_ocr.py
@ -1,84 +0,0 @@
-import argparse
-import glob
-from pathlib import Path
-
-import cv2
-from tqdm import tqdm
-
-from sdsvkie.models.ocr import OCREngine
-from sdsvkie.utils.visualize import visualize_ocr
-from sdsvkie.utils.io_file import write_txt
-from sdsvkie.utils.word_formation import sort_words
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    
-    parser.add_argument("--img", type=str)
-    parser.add_argument("--out_dir", type=str, default=None)
-    parser.add_argument("--device", type=str, default='cpu')
-    parser.add_argument("--reserve_parent_dir", action='store_true')
-    parser.add_argument("--only_text", action='store_true')
-    parser.add_argument("--out_txt", type=str, default=None)
-    parser.add_argument("--text_det", default="yolox-s-general-text-pretrain-20221226")
-    parser.add_argument("--text_recog", default="satrn-lite-general-pretrain-20230106")
-    
-    args = parser.parse_args()
-    ocr_engine = OCREngine(text_det=args.text_det, text_recog=args.text_recog, device=args.device)
-
-
-    if args.reserve_parent_dir:
-        paths = glob.glob(args.img + "/*/*")
-    else:
-        paths = glob.glob(args.img + "/*")
-    for path in tqdm(paths):
-        img = cv2.imread(path)
-        if img is None:
-            print(path)
-            continue
-        ocr_output = ocr_engine(img, extend_ratio=[0.1, 0.3], ratio_thr=5)
-
-        if args.out_dir:
-            if args.reserve_parent_dir:
-                out_dir_img = Path(args.out_dir) / Path(path).parent.name
-            else:
-                out_dir_img = Path(args.out_dir)
-            if not out_dir_img.exists():
-                out_dir_img.mkdir(parents=True)
-
-        
-            visualize_ocr(
-                img = img,
-                boxes=ocr_output['boxes'],
-                texts=ocr_output['texts'],
-                image_name=str(Path(path).name),
-                outdir=str(out_dir_img)
-            )
-        
-        if args.out_txt:
-            
-
-            if args.reserve_parent_dir:
-                out_dir_txt = Path(args.out_txt) / Path(path).parent.name
-            else:
-                out_dir_txt = Path(args.out_txt)
-            if not out_dir_txt.exists():
-                out_dir_txt.mkdir(parents=True)
-            
-            out_txt_path = out_dir_txt / Path(path).with_suffix(".txt").name
-            data = []
-            if args.only_text:
-                out = sort_words(ocr_output)
-                text = " ".join(out['texts'])
-                data.append(text)
-            else:
-                
-                for box, text in zip(ocr_output['boxes'], ocr_output['texts']):
-                    item = "\t".join([str(int(x)) for x in box])
-                    item = "\t".join([item, text])
-                    data.append(item)
-            write_txt(str(out_txt_path), data)
-
-        
-
-
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/train.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/train.py
@ -1,27 +0,0 @@
-
-import argparse
-
-from sdsvkie.cfg import load_cfg
-from sdsvkie.engine.trainer import Trainer
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--cfg", type=str, default="sdsvkie/cfg/default.yaml")
-    parser.add_argument("--device", type=str, default="cuda")
-    parser.add_argument("--save_dir", type=str, default="./workdirs/exp")
-    parser.add_argument("--wandb", action="store_true")
-    
-
-    args = parser.parse_args()
-    # cfg = cfg2dict(args.cfg)
-    # cfg['device'] = args.device
-    # cfg['save_dir'] = args.save_dir
-    cfg = load_cfg(args.cfg, vars(args))
-    print(cfg)
-
-    if args.wandb:
-        cfg['wandb'] = "invoice"
-
-    trainer = Trainer(cfg)
-    trainer.train()
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/val.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/tools/val.py
@ -1,20 +0,0 @@
-
-import argparse
-
-from sdsvkie.cfg import load_cfg
-from sdsvkie.engine.trainer import Trainer
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--cfg", type=str, default="sdsvkie/cfg/default.yaml")
-    parser.add_argument("--device", type=str, default="cuda")
-    parser.add_argument("--weights", type=str, default=None)
-    
-
-    args = parser.parse_args()
-    cfg = load_cfg(args.cfg, vars(args))
-
-    trainer = Trainer(cfg)
-    metric = trainer.val()
-    print(metric)
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/init.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/init.py
@ -1,17 +0,0 @@
-from .transform import normalize_box, unnormalize_box
-from .augmentation import perturbate_character, sampling_data
-from .io_file import yaml_load, yaml_save, read_json,write_json
-from .visualize import visualize_kie, visualize_ocr
-from .word_formation import (
-    sliding_windows, Word_group, words_to_lines, 
-    sort_words, Word, merge_boxes, merge_wordgroups,
-    distance_of_boxes, y_distance
-)
-from .post_processing import *
-from .logger import get_logger
-from .common import get_info_env
-from .convert_pdf2image import pdf_to_image
-
-
-IMG_EXT = ['.jpg', ".png", ".jpeg"]
-PDF_EXT = [".pdf"]
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/augmentation.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/augmentation.py
@ -1,112 +0,0 @@
-import math
-import random
-
-VN_list_char = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!#$%&()*+,-./:;<=>?@[\]^_`{|}~"
-
-
-def perturbate_character(words: list, ratio=0.01):
-    # Algorithm
-    # Step 1: couting number of characters is words and sample the postion we want to perturbation
-    # ( words = ["abc", "lkdhf", "lfhdlsa", "akdjhf"] =>> total_char = 21, pertubation_position = [15,13,7]), these are the positions of perturbating chars in the concatenating string of words "abclkdhflfhdlsaakdjhf"
-    # start_pos = 0, ending_pos = 0
-    # Step 2: with each word in words, calculate the start_position and ending_pos of word in the concatenating string
-    # if ending pos > pertubation_position[-1] => conduct perturbation and plus 1 to perturbation index, else => continue
-    # Loop 1: words[0], start_pos = 0 ,ending_pos= 3 <= pertubation_position[-1] =7 => continue
-    # Loop 2:
-
-    total_char = sum(len(i) for i in words)
-    pertubation_positions = sorted(
-        random.sample(range(total_char), int(ratio * total_char))
-    )
-    # logging.info(pertubation_positions)
-    pos = 0
-    start_pos = 0
-    j = 0
-    for i, word in enumerate(words):
-        if j == len(pertubation_positions):
-            break
-        start_pos = pos
-        pos += len(word)
-        # logging.info(start_pos,pos)
-        while pos > pertubation_positions[j]:
-            x = random.randint(0, 3)
-            fixing_pos = pertubation_positions[j] - start_pos
-            if x == 0:  # append random char to the left
-                word = (
-                    word[:fixing_pos]
-                    + VN_list_char[random.randint(0, len(VN_list_char) - 1)]
-                    + word[fixing_pos:]
-                )
-
-            if x == 1:  # append random char to the right
-                word = (
-                    word[: fixing_pos + 1]
-                    + VN_list_char[random.randint(0, len(VN_list_char) - 1)]
-                    + word[fixing_pos + 1 :]
-                )
-
-            if x == 2:  # adjust to another random char at current position
-                word = (
-                    word[:fixing_pos]
-                    + VN_list_char[random.randint(0, len(VN_list_char) - 1)]
-                    + word[fixing_pos + 1 :]
-                )
-
-            if x == 3 and len(word) > 1:  # delete char at current position
-
-                word = word[:fixing_pos] + word[fixing_pos + 1 :]
-
-            j += 1
-            # logging.info(words[i], word)
-            words[i] = word
-            if j == len(pertubation_positions):
-                break
-
-    return words
-
-
-def sampling_data(words, boxes, labels, max_num_words=150, slice_interval=50):
-    # num_boxes = len(words)
-    # if num_boxes > max_num_words:
-    #     slide = max_num_words // 2
-    #     num_batches = math.ceil(num_boxes / slide)
-
-    #     idx_batches = [i for i in range(num_batches)]
-    #     idx_batch = random.choice(idx_batches)
-    #     start_idx = slide * idx_batch
-    #     words = words[start_idx:]
-    #     normalized_word_boxes = normalized_word_boxes[start_idx:]
-    #     word_labels = normalized_word_boxes[start_idx:]
-
-    total_word = len(words)
-    window_size = max_num_words
-    text_windows = [
-        words[i : i + window_size] for i in range(0, total_word, slice_interval)
-    ]
-    box_windows = [
-        boxes[i : i + window_size] for i in range(0, total_word, slice_interval)
-    ]
-    label_windown = [
-        labels[i : i + window_size] for i in range(0, total_word, slice_interval)
-    ]
-
-    # assert all(
-    #     [
-    #         len(_words) == len(boxes)
-    #         for _words, boxes in zip(words, normalized_word_boxes)
-    #     ]
-    # )
-    # assert all(
-    #     [
-    #         len(_words) == len(_word_labels)
-    #         for _words, _word_labels in zip(words, word_labels)
-    #     ]
-    # )
-    sampling_idx = random.choice([i for i in range(len(text_windows))])
-    sampling_words, sampling_boxes, sampling_labels = (
-        text_windows[sampling_idx],
-        box_windows[sampling_idx],
-        label_windown[sampling_idx],
-    )
-
-    return sampling_idx, sampling_words, sampling_boxes, sampling_labels
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/common.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/common.py
@ -1,8 +0,0 @@
-from torch.utils import collect_env
-
-
-def get_info_env():
-    return collect_env.get_pretty_env_info()
-
-def count_parameters(model):
-    return sum(p.numel() for p in model.parameters() if p.requires_grad)
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/convert_pdf2image.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/convert_pdf2image.py
@ -1,166 +0,0 @@
-import argparse
-import glob
-import os
-import shutil
-from pathlib import Path
-
-import fitz  # PyMuPDF, imported as fitz for backward compatibility reasons
-import numpy as np
-from tqdm import tqdm
-import cv2 
-from pdf2image import convert_from_path
-
-
-def convert_pdf2image(filename, out_dir, dpi=300, reserve_parent_dir=False, is_get_first_page=False):
-    """Convert and save to disk
-
-    Args:
-        filename (_type_): _description_
-        out_dir (_type_): _description_
-        dpi (int, optional): _description_. Defaults to 300.
-        reserve_parent_dir (bool, optional): _description_. Defaults to False.
-    """
-    out_dir = Path(out_dir)
-    filename = Path(filename)
-    filename_str = str(filename)
-
-    if reserve_parent_dir:
-        parent_dir = filename.parent.name
-        out_dir = out_dir / parent_dir
-
-    if not out_dir.exists():
-        out_dir.mkdir(parents=True)
-    if ".txt" in str(filename).lower():
-        return 
-    if ".pdf" not in str(filename).lower():
-        shutil.copy(filename, out_dir)
-        return
-
-    
-    # doc = fitz.open(filename_str)  # open document
-    # # zoom = 2  # zoom factor, standard: 72 dpi
-    
-    # # dpi = 300
-    # zoom = dpi // 72
-    # magnify = fitz.Matrix(zoom, zoom) 
-    # for idx, page in enumerate(doc):
-    #     pix = page.get_pixmap(matrix=magnify)  # render page to an image
-
-    #     outpath = out_dir / Path(os.path.splitext(os.path.basename(filename))[0] + "_" + str(idx+1) + ".jpg")
-    #     pix.pil_save(outpath)
-    try:
-        imgs = pdf_to_image(pdf=filename_str, is_get_first_page=is_get_first_page, dpi=dpi)
-    except:
-        print("Use v2: ", filename_str)
-        imgs = pdf_to_image_v2(pdf=filename_str, is_get_first_page=is_get_first_page, dpi=dpi)
-        print("Len img: ", len(imgs))
-    for idx, img in enumerate(imgs):
-        outpath = str(out_dir / Path(os.path.splitext(os.path.basename(filename))[0] + "_" + str(idx+1) + ".jpg"))
-        
-        cv2.imwrite(img=img, filename=outpath)
-    
-
-def pdf_to_image_v2(pdf, dpi=300, is_get_first_page=False, max_page=1000):
-    """_summary_
-
-    Args:
-        pdf (_type_): _description_
-        dpi (int, optional): _description_. Defaults to 300.
-        is_get_first_page (bool, optional): _description_. Defaults to False.
-        max_page (int, optional): _description_. Defaults to 1000.
-
-    Raises:
-        NotImplementedError: _description_
-
-    Returns:
-        _type_: _description_
-    """
-    if isinstance(pdf, str):
-        if not os.path.exists(pdf):
-            print(f"Not found pdf path at {pdf}")
-            return []
-        imgs = convert_from_path(pdf, dpi=dpi)  # PILLOW 
-    else:
-        raise NotImplementedError(f"Not yet implement for {type(pdf)} type !!!")
-    
-    
-    # zoom = dpi // 72
-    # magnify = fitz.Matrix(zoom, zoom) 
-    cv_imgs = []
-    
-    for idx, img in enumerate(imgs):
-        img = img.convert("RGB")
-        cv_img = np.array(img)
-        cv_img = cv_img[:, :, ::-1].copy()
-        cv_imgs.append(cv_img)
-        if is_get_first_page or idx >= max_page:
-            break
-    return cv_imgs
-
-
-def pdf_to_image(pdf, dpi=300, is_get_first_page=False, max_page=1000):
-    """_summary_
-
-    Args:
-        pdf (_type_): _description_
-        dpi (int, optional): _description_. Defaults to 300.
-        is_get_first_page (bool, optional): _description_. Defaults to False.
-        max_page (int, optional): _description_. Defaults to 1000.
-
-    Raises:
-        NotImplementedError: _description_
-
-    Returns:
-        _type_: _description_
-    """
-    if isinstance(pdf, str):
-        if not os.path.exists(pdf):
-            print(f"Not found pdf path at {pdf}")
-            return []
-        doc = fitz.open(pdf)  # open document
-    elif isinstance(pdf, bytes):
-        doc = fitz.open(stream=pdf, filetype='pdf')
-    else:
-        raise NotImplementedError(f"Not yet implement for {type(pdf)} type !!!")
-    
-    
-    zoom = dpi // 72
-    magnify = fitz.Matrix(zoom, zoom) 
-    imgs = []
-    
-    for idx, page in enumerate(doc):
-        pix = page.get_pixmap(matrix=magnify)  # render page to an image
-        
-        im = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
-        im = np.ascontiguousarray(im[..., [2, 1, 0]])  # rgb to bgr
-        imgs.append(im)
-        if is_get_first_page or idx >= max_page:
-            break
-    return imgs
-
-
-if __name__ == "__main__":
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--pdf_dir", type=str)
-    parser.add_argument("--out_dir", type=str)
-    parser.add_argument("--reserve_parent_dir", action='store_true')
-    args = parser.parse_args()
-
-    paths =  glob.glob(args.pdf_dir + "/*") \
-            + glob.glob(args.pdf_dir + "/*/*")  \
-            + glob.glob(args.pdf_dir + "/*/*/*") 
-            # + glob.glob(args.pdf_dir + "/*")
-    print(f"Total pdf paths in {args.pdf_dir}: {len(paths)} ")
-
-    error_pdfs = []
-    for path in tqdm(paths):
-        path = str(path)
-        try:
-            convert_pdf2image(path, args.out_dir, reserve_parent_dir=args.reserve_parent_dir)
-        except Exception as err:
-            print(err, path)
-            error_pdfs.append(path)
-            continue
-    print("Total error pdfs: ", len(error_pdfs))
-    print(error_pdfs)
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/cvat_utils.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/cvat_utils.py
@ -1,90 +0,0 @@
-from typing import List
-import random
-
-def gen_random_color():
-    red = random.randint(0, 255)
-    green = random.randint(0, 255)
-    blue = random.randint(0, 255)
-
-    # combine the values into a hexadecimal color code
-    color_code = "#{:02x}{:02x}{:02x}".format(red, green, blue)
-    return color_code
-
-def gen_raw_label(labels: List[str]):
-    """gen raw label for cvat tool
-
-    {
-        "name": "no_key",
-        "color": "#33ddff",
-        "type": "any",
-        "attributes": []
-    },
-
-    Args:
-        labels (List[str]): _description_
-    """
-    raw_label = []
-    for label in labels:
-        item = {
-            "name": label, 
-            "color": gen_random_color(),
-            "type": "any",
-            "attributes": []
-        }
-        raw_label.append(item)
-
-    return raw_label
-
-
-
-if __name__ == "__main__":
-    labels = [
-        # id invoice
-        'no_key',    # số hóa đơn
-        'no_value', 
-        'form_key',    # mẫu số hóa đơn
-        'form_value', 
-        'serial_key',     # số kí hiệu hoá đơn
-        'serial_value', 
-        'date', 
-
-        # seller info
-        'seller_company_name_key', 
-        'seller_company_name_value', 
-        'seller_tax_code_key', 
-        'seller_tax_code_value', 
-        'seller_address_value',
-        'seller_address_key', 
-        'seller_mobile_key', 
-        'seller_mobile_value', 
-
-        # Not yet support seller_bank_no, seller_bank_name
-        # 'seller_name_key', 
-        # 'seller_name_value', 
-        # 'seller_company_name_value',   -> seller_name_value
-        
-        # buyer info
-        'buyer_name_key',
-        'buyer_name_value', 
-        'buyer_company_name_value', 
-        'buyer_company_name_key', 
-        'buyer_tax_code_key', 
-        'buyer_tax_code_value', 
-        'buyer_address_key', 
-        'buyer_address_value', 
-        'buyer_mobile_key',
-        'buyer_mobile_value',
-
-        # money info
-        'VAT_amount_key', 
-        'VAT_amount_value', 
-        'total_key', 
-        'total_value', 
-        'total_in_words_key', 
-        'total_in_words_value',
-        
-        'other', 
-    ]
-
-    raw_label =  gen_raw_label(labels)
-    print(raw_label)
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/invoice_data.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/invoice_data.py
@ -1,87 +0,0 @@
-
-import os 
-import glob 
-import json 
-import os
-import shutil
-
-def read_txt(txt):
-    with open(txt, 'r', encoding='utf8') as f:
-        data = [line.strip() for line in f]
-    return data 
-
-def write_txt(txt, data):
-    with open(txt, 'w', encoding='utf8') as f:
-        for line in data:
-            f.write(line + "\n")
-
-def write_json(json_path, data):
-    with open(json_path, 'w', encoding='utf8') as f:
-        json.dump(data, f, ensure_ascii=False)
-
-
-def read_json(json_path):
-    with open(json_path, 'r', encoding='utf8') as f:
-        data = json.load(f)
-
-    return data 
-
-def create_template_info(data_dir, json_out):
-    outputs ={}
-    txt_paths = sorted(glob.glob(data_dir + "/*.txt"))
-    
-    for txt_path in txt_paths:
-        txt_name = os.path.basename(txt_path)
-        txt_data = read_txt(txt_path)
-        wordgroups = [item for item in txt_data if "____kie_wordgroup	seller_company_name_value" in item]
-        num_line_company = len(wordgroups)
-        outputs[txt_name] = num_line_company
-    write_json(json_out, outputs)
-    
-
-def filter_data(template_file, data_file, img_dir, txt_dir,  output):
-    template_data = read_json(template_file)
-    data = read_json(data_file)
-
-    new_data = []
-    for txt_name, num_wordgroup_line in template_data.items():
-        id = txt_name.split("_type99_")[0]
-        for txt_name_target, num_wordgroup_target_line in data.items():
-            id_target = txt_name_target.split("_type99_")[0]
-            # print(id, id_target, id_target != id)
-            if id_target != id:
-                # print(id_target, id)
-                continue
-            if num_wordgroup_line != num_wordgroup_target_line:
-                continue
-                
-            new_data.append(txt_name_target)
-
-    new_data = sorted(list(set(new_data)))
-    print(new_data[:5])
-    # return new_data
-
-    if not os.path.exists(output):
-        os.makedirs(output, exist_ok=True)
-
-    for txt_name in new_data:
-        img_path = os.path.join(img_dir, txt_name.replace(".txt", ".jpg"))
-        shutil.copy(img_path, output)
-        shutil.copy(os.path.join(txt_dir, txt_name), output)
-
-
-            
-
-if __name__ == "__main__":
-    target_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/visualize/vnpt_one_line_r20_txt"
-    template_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/visualize/vnpt_one_line_txt"
-    output_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/craw_data/output/synth_vnpt_r20/one_line_filtered"
-    img_target_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/craw_data/output/synth_vnpt_r20/one_line"
-    out_template_json = "vnpt_template.json"
-    out_target_json = "vnpt_r20.json"
-    create_template_info(template_dir, out_template_json)
-    create_template_info(target_dir, out_target_json)
-
-    new_data = filter_data(out_template_json, out_target_json,img_target_dir, target_dir,  output_dir)
-    print("Total after filter: ", len(new_data))
-
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/vnpt_r20.json
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/vnpt_r20.json
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/vnpt_template.json
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/vnpt_template.json
@ -1,48 +0,0 @@
-{
-    "invoice_vnpt_id10_type99_1.txt": 1,
-    "invoice_vnpt_id11_type99_1.txt": 1,
-    "invoice_vnpt_id12_type99_1.txt": 1,
-    "invoice_vnpt_id13_type99_1.txt": 2,
-    "invoice_vnpt_id14_type99_1.txt": 1,
-    "invoice_vnpt_id15_type99_1.txt": 0,
-    "invoice_vnpt_id16_type99_1.txt": 1,
-    "invoice_vnpt_id17_type99_1.txt": 1,
-    "invoice_vnpt_id18_type99_1.txt": 1,
-    "invoice_vnpt_id19_type99_1.txt": 1,
-    "invoice_vnpt_id20_type99_1.txt": 2,
-    "invoice_vnpt_id21_type99_1.txt": 1,
-    "invoice_vnpt_id22_type99_1.txt": 1,
-    "invoice_vnpt_id23_type99_1.txt": 1,
-    "invoice_vnpt_id24_type99_1.txt": 1,
-    "invoice_vnpt_id25_type99_1.txt": 2,
-    "invoice_vnpt_id26_type99_1.txt": 1,
-    "invoice_vnpt_id27_type99_1.txt": 4,
-    "invoice_vnpt_id28_type99_1.txt": 0,
-    "invoice_vnpt_id29_type99_1.txt": 1,
-    "invoice_vnpt_id30_type99_1.txt": 3,
-    "invoice_vnpt_id31_type99_1.txt": 1,
-    "invoice_vnpt_id32_type99_1.txt": 1,
-    "invoice_vnpt_id33_type99_1.txt": 1,
-    "invoice_vnpt_id34_type99_1.txt": 3,
-    "invoice_vnpt_id35_type99_1.txt": 1,
-    "invoice_vnpt_id36_type99_1.txt": 2,
-    "invoice_vnpt_id37_type99_1.txt": 1,
-    "invoice_vnpt_id39_type99_1.txt": 1,
-    "invoice_vnpt_id40_type99_1.txt": 1,
-    "invoice_vnpt_id43_type99_1.txt": 1,
-    "invoice_vnpt_id44_type99_1.txt": 1,
-    "invoice_vnpt_id45_type99_1.txt": 0,
-    "invoice_vnpt_id46_type99_1.txt": 2,
-    "invoice_vnpt_id47_type99_1.txt": 1,
-    "invoice_vnpt_id48_type99_1.txt": 1,
-    "invoice_vnpt_id49_type99_1.txt": 1,
-    "invoice_vnpt_id50_type99_1.txt": 1,
-    "invoice_vnpt_id52_type99_1.txt": 1,
-    "invoice_vnpt_id53_type99_1.txt": 1,
-    "invoice_vnpt_id54_type99_1.txt": 0,
-    "invoice_vnpt_id55_type99_1.txt": 0,
-    "invoice_vnpt_id56_type99_1.txt": 2,
-    "invoice_vnpt_id57_type99_1.txt": 1,
-    "invoice_vnpt_id8_type99_1.txt": 1,
-    "invoice_vnpt_id9_type99_1.txt": 1
-}
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/x.json
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/data_preprocessing/x.json
@ -1,213 +0,0 @@
-[
-    {
-        "name": "word",
-        "color": "#83e070",
-        "type": "any",
-        "attributes": [
-            {
-                "name": "text",
-                "input_type": "text",
-                "mutable": false,
-                "values": ["x"]
-            },
-            {
-                "name": "kie_label",
-                "input_type": "text",
-                "mutable": false,
-                "values": ["x"]
-            }
-        ]
-    },
-    {
-        "name": "no_key",
-        "color": "#cf04f1",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "no_value",
-        "color": "#0a01ce",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "form_key",
-        "color": "#bfe920",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "form_value",
-        "color": "#ac3436",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "serial_key",
-        "color": "#706724",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "serial_value",
-        "color": "#7a9b4b",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "date",
-        "color": "#23f0e9",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "seller_company_name_key",
-        "color": "#f47ccc",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "seller_company_name_value",
-        "color": "#9c9c73",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "seller_tax_code_key",
-        "color": "#afa0fa",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "seller_tax_code_value",
-        "color": "#6e7352",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "seller_address_value",
-        "color": "#121512",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "seller_address_key",
-        "color": "#188735",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "seller_mobile_key",
-        "color": "#7387fd",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "seller_mobile_value",
-        "color": "#325bf1",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_name_key",
-        "color": "#a5b431",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_name_value",
-        "color": "#e63dcc",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_company_name_value",
-        "color": "#e9bf0b",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_company_name_key",
-        "color": "#a8d921",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_tax_code_key",
-        "color": "#1d8f4f",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_tax_code_value",
-        "color": "#e638c6",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_address_key",
-        "color": "#74afe5",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_address_value",
-        "color": "#1518dc",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_mobile_key",
-        "color": "#13b1cd",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "buyer_mobile_value",
-        "color": "#c49d59",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "VAT_amount_key",
-        "color": "#69c945",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "VAT_amount_value",
-        "color": "#77c3be",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "total_key",
-        "color": "#d1353a",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "total_value",
-        "color": "#246976",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "total_in_words_key",
-        "color": "#45a8b5",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "total_in_words_value",
-        "color": "#d800df",
-        "type": "any",
-        "attributes": []
-    },
-    {
-        "name": "other",
-        "color": "#ba0fbd",
-        "type": "any",
-        "attributes": []
-    }
-]
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/eval_kie.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/eval_kie.py
@ -1,479 +0,0 @@
-import argparse
-import re
-from difflib import SequenceMatcher
-
-from rapidfuzz.distance import Levenshtein
-from terminaltables import AsciiTable
-
-from sdsvkie.cfg import load_cfg
-from sdsvkie.utils.io_file import read_json, write_json
-from pathlib import Path
-
-
-def is_type_list(x, type):
-
-    if not isinstance(x, list):
-        return False
-
-    return all(isinstance(item, type) for item in x)
-
-
-def cal_true_positive_char(pred, gt):
-    """Calculate correct character number in prediction.
-    Args:
-        pred (str): Prediction text.
-        gt (str): Ground truth text.
-    Returns:
-        true_positive_char_num (int): The true positive number.
-    """
-
-    all_opt = SequenceMatcher(None, pred, gt)
-    true_positive_char_num = 0
-    for opt, _, _, s2, e2 in all_opt.get_opcodes():
-        if opt == "equal":
-            true_positive_char_num += e2 - s2
-        else:
-            pass
-    return true_positive_char_num
-
-
-def post_processing(text, lowercase=False):
-    """
-    - Remove special characters and  extra spaces
-    """
-
-    text = re.sub(
-        r"[^aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789 ]",
-        " ",
-        text,
-    )
-    text = re.sub(r"\s\s+", " ", text)
-    text = text.strip()
-
-    if lowercase:
-        text = text.lower()
-
-    return text
-
-
-def count_matches(pred_texts, gt_texts, use_ignore=True):
-    """Count the various match number for metric calculation.
-    Args:
-        pred_texts (list[str]): Predicted text string.
-        gt_texts (list[str]): Ground truth text string.
-    Returns:
-        match_res: (dict[str: int]): Match number used for
-            metric calculation.
-    """
-    match_res = {
-        "gt_char_num": 0,
-        "pred_char_num": 0,
-        "true_positive_char_num": 0,
-        "gt_word_num": 0,
-        "match_word_num": 0,
-        "match_word_ignore_case": 0,
-        "match_word_ignore_case_symbol": 0,
-        "match_kie": 0,
-        "match_kie_ignore_case": 0,
-    }
-    # comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]')
-    # comp = re.compile('[]')
-    norm_ed_sum = 0.0
-
-    gt_texts_for_ned_word = []
-    pred_texts_for_ned_word = []
-    for pred_text, gt_text in zip(pred_texts, gt_texts):
-        if gt_text == pred_text:
-            match_res["match_word_num"] += 1
-            match_res["match_kie"] += 1
-        gt_text_lower = gt_text.lower()
-        pred_text_lower = pred_text.lower()
-        if gt_text_lower == pred_text_lower:
-            match_res["match_word_ignore_case"] += 1
-
-        # gt_text_lower_ignore = comp.sub('', gt_text_lower)
-        # pred_text_lower_ignore = comp.sub('', pred_text_lower)
-        if use_ignore:
-            gt_text_lower_ignore = post_processing(gt_text_lower)
-            pred_text_lower_ignore = post_processing(pred_text_lower)
-        else:
-            gt_text_lower_ignore = gt_text_lower
-            pred_text_lower_ignore = pred_text_lower
-
-        if gt_text_lower_ignore == pred_text_lower_ignore:
-            match_res["match_kie_ignore_case"] += 1
-
-        gt_texts_for_ned_word.append(gt_text_lower_ignore.split(" "))
-        pred_texts_for_ned_word.append(pred_text_lower_ignore.split(" "))
-
-        match_res["gt_word_num"] += 1
-
-        norm_ed = Levenshtein.normalized_distance(
-            pred_text_lower_ignore, gt_text_lower_ignore
-        )
-        # if norm_ed > 0.1:
-        #     print(gt_text_lower_ignore, pred_text_lower_ignore, sep='\n')
-        #     print("-"*20)
-        norm_ed_sum += norm_ed
-
-        # number to calculate char level recall & precision
-        match_res["gt_char_num"] += len(gt_text_lower_ignore)
-        match_res["pred_char_num"] += len(pred_text_lower_ignore)
-        true_positive_char_num = cal_true_positive_char(
-            pred_text_lower_ignore, gt_text_lower_ignore
-        )
-        match_res["true_positive_char_num"] += true_positive_char_num
-
-    normalized_edit_distance = norm_ed_sum / max(1, len(gt_texts))
-    match_res["ned"] = normalized_edit_distance
-
-    # NED for word-level
-    norm_ed_word_sum = 0.0
-    # print(pred_texts_for_ned_word[0])
-    unique_words = list(
-        set(
-            [x for line in pred_texts_for_ned_word for x in line]
-            + [x for line in gt_texts_for_ned_word for x in line]
-        )
-    )
-    preds = [
-        [unique_words.index(w) for w in pred_text_for_ned_word]
-        for pred_text_for_ned_word in pred_texts_for_ned_word
-    ]
-    truths = [
-        [unique_words.index(w) for w in gt_text_for_ned_word]
-        for gt_text_for_ned_word in gt_texts_for_ned_word
-    ]
-    for pred_text, gt_text in zip(preds, truths):
-        norm_ed_word = Levenshtein.normalized_distance(pred_text, gt_text)
-        # if norm_ed_word < 0.2:
-        #     print(pred_text, gt_text)
-        norm_ed_word_sum += norm_ed_word
-
-    normalized_edit_distance_word = norm_ed_word_sum / max(1, len(gt_texts))
-    match_res["ned_word"] = normalized_edit_distance_word
-
-    return match_res
-
-
-def eval_ocr_metric(pred_texts, gt_texts, metric="acc"):
-    """Evaluate the text recognition performance with metric: word accuracy and
-    1-N.E.D. See https://rrc.cvc.uab.es/?ch=14&com=tasks for details.
-    Args:
-        pred_texts (list[str]): Text strings of prediction.
-        gt_texts (list[str]): Text strings of ground truth.
-        metric (str | list[str]): Metric(s) to be evaluated. Options are:
-            - 'word_acc': Accuracy at word level.
-            - 'word_acc_ignore_case': Accuracy at word level, ignoring letter
-              case.
-            - 'word_acc_ignore_case_symbol': Accuracy at word level, ignoring
-              letter case and symbol. (Default metric for academic evaluation)
-            - 'char_recall': Recall at character level, ignoring
-              letter case and symbol.
-            - 'char_precision': Precision at character level, ignoring
-              letter case and symbol.
-            - 'one_minus_ned': 1 - normalized_edit_distance
-            In particular, if ``metric == 'acc'``, results on all metrics above
-            will be reported.
-    Returns:
-        dict{str: float}: Result dict for text recognition, keys could be some
-        of the following: ['word_acc', 'word_acc_ignore_case',
-        'word_acc_ignore_case_symbol', 'char_recall', 'char_precision',
-        '1-N.E.D'].
-    """
-    assert isinstance(pred_texts, list)
-    assert isinstance(gt_texts, list)
-    assert len(pred_texts) == len(gt_texts)
-
-    assert isinstance(metric, str) or is_type_list(metric, str)
-    if metric == "acc" or metric == ["acc"]:
-        metric = [
-            "word_acc",
-            "word_acc_ignore_case",
-            "word_acc_ignore_case_symbol",
-            "char_recall",
-            "char_precision",
-            "one_minus_ned",
-        ]
-    metric = set([metric]) if isinstance(metric, str) else set(metric)
-
-    # supported_metrics = set([
-    #     'word_acc', 'word_acc_ignore_case', 'word_acc_ignore_case_symbol',
-    #     'char_recall', 'char_precision', 'one_minus_ned', 'one_minust_ned_word'
-    # ])
-    # assert metric.issubset(supported_metrics)
-
-    match_res = count_matches(pred_texts, gt_texts)
-    eps = 1e-8
-    eval_res = {}
-
-    if "char_recall" in metric:
-        char_recall = (
-            1.0 * match_res["true_positive_char_num"] / (eps + match_res["gt_char_num"])
-        )
-        eval_res["char_recall"] = char_recall
-
-    if "char_precision" in metric:
-        char_precision = (
-            1.0
-            * match_res["true_positive_char_num"]
-            / (eps + match_res["pred_char_num"])
-        )
-        eval_res["char_precision"] = char_precision
-
-    if "word_acc" in metric:
-        word_acc = 1.0 * match_res["match_word_num"] / (eps + match_res["gt_word_num"])
-        eval_res["word_acc"] = word_acc
-
-    if "word_acc_ignore_case" in metric:
-        word_acc_ignore_case = (
-            1.0 * match_res["match_word_ignore_case"] / (eps + match_res["gt_word_num"])
-        )
-        eval_res["word_acc_ignore_case"] = word_acc_ignore_case
-
-    if "word_acc_ignore_case_symbol" in metric:
-        word_acc_ignore_case_symbol = (
-            1.0
-            * match_res["match_word_ignore_case_symbol"]
-            / (eps + match_res["gt_word_num"])
-        )
-        eval_res["word_acc_ignore_case_symbol"] = word_acc_ignore_case_symbol
-
-    if "one_minus_ned" in metric:
-
-        eval_res["1-N.E.D"] = 1.0 - match_res["ned"]
-
-    if "one_minus_ned_word" in metric:
-
-        eval_res["1-N.E.D_word"] = 1.0 - match_res["ned_word"]
-
-    if "line_acc_ignore_case_symbol" in metric:
-        line_acc_ignore_case_symbol = (
-            1.0 * match_res["match_kie_ignore_case"] / (eps + match_res["gt_word_num"])
-        )
-        eval_res["line_acc_ignore_case_symbol"] = line_acc_ignore_case_symbol
-
-    if "line_acc" in metric:
-        word_acc_ignore_case_symbol = (
-            1.0 * match_res["match_kie"] / (eps + match_res["gt_word_num"])
-        )
-        eval_res["line_acc"] = word_acc_ignore_case_symbol
-
-    for key, value in eval_res.items():
-        eval_res[key] = float("{:.4f}".format(value))
-
-    return eval_res
-
-
-def eval_kie(pred_e2e_path, gt_e2e_path, kie_labels=[], skip_labels=[], log_failure_case=None, norm_failcase=False):
-    # assert ".json" in pred_e2e_path and ".json" in gt_e2e_path, "only support json type"
-
-    f = None
-    if log_failure_case:
-        log_failure_case = Path(log_failure_case)
-        log_failure_case_dir = log_failure_case.parent
-        if not log_failure_case_dir.exists():
-            log_failure_case_dir.mkdir(parents=True)
-
-    if isinstance(gt_e2e_path, str):
-        gt_e2e = read_json(gt_e2e_path)
-    else:
-        gt_e2e = gt_e2e_path
-    if isinstance(pred_e2e_path, str):
-        preds_e2e = read_json(pred_e2e_path)
-    else:
-        preds_e2e = pred_e2e_path
-
-    KIE_LABELS_WITH_ONLY_VALUES = [
-        class_name
-        for class_name in kie_labels
-        if "_key" not in class_name
-        and "other" not in class_name
-        and class_name not in skip_labels
-    ]
-
-    pred_texts_dict = {label: [] for label in KIE_LABELS_WITH_ONLY_VALUES}
-    gt_texts_dict = {label: [] for label in KIE_LABELS_WITH_ONLY_VALUES}
-
-    results = {label: 1 for label in KIE_LABELS_WITH_ONLY_VALUES}
-    # print(KIE_LABELS_WITH_ONLY_VALUES)
-
-    fail_cases = {}
-    for img_id in preds_e2e.keys():
-        fail_cases[img_id] = {}
-        pred_items = preds_e2e[img_id]
-        gt_items = gt_e2e[img_id]
-        
-        if not pred_items:
-            pred_items = {
-                class_name: "" for class_name in KIE_LABELS_WITH_ONLY_VALUES
-            }
-        
-                                                                     
-        for class_name, text_gt in gt_items.items():
-            if class_name in skip_labels:
-                continue
-            # if class_name  == 'seller_name_value':
-            # print(gt_items)
-            if class_name not in pred_items:
-                text_pred = ""
-            else:
-                text_pred = pred_items[class_name]
-
-            if norm_failcase:
-                _text_pred = post_processing(text_pred, lowercase=True)
-                _text_gt = post_processing(text_gt, lowercase=True)
-
-            else:
-                _text_pred = text_pred 
-                _text_gt = text_gt
-
-            if _text_pred != _text_gt:
-                fail_cases[img_id][class_name] = {
-                    'pred': _text_pred,
-                    'gt': _text_gt
-                }
-            
-
-            pred_texts_dict[class_name].append(text_pred)
-            gt_texts_dict[class_name].append(text_gt)
-
-    if log_failure_case:
-        with open(log_failure_case, 'w', encoding='utf8') as f:
-            write_json(log_failure_case, fail_cases)
-
-    for class_name in KIE_LABELS_WITH_ONLY_VALUES:
-        pred_texts = pred_texts_dict[class_name]
-        gt_texts = gt_texts_dict[class_name]
-        result = eval_ocr_metric(
-            pred_texts,
-            gt_texts,
-            metric=[
-                "one_minus_ned",
-                "line_acc_ignore_case_symbol",
-                "line_acc",
-                "one_minus_ned_word",
-            ],
-        )
-        results[class_name] = {
-            "1-ned": result["1-N.E.D"],
-            "1-ned-word": result["1-N.E.D_word"],
-            "line_acc": result["line_acc"],
-            "line_acc_ignore_case_symbol": result["line_acc_ignore_case_symbol"],
-            "samples": len(pred_texts),
-        }
-
-    # avg reusults
-    sum_1_ned = sum(
-        [
-            results[class_name]["1-ned"] * results[class_name]["samples"]
-            for class_name in KIE_LABELS_WITH_ONLY_VALUES
-        ]
-    )
-    sum_1_ned_word = sum(
-        [
-            results[class_name]["1-ned-word"] * results[class_name]["samples"]
-            for class_name in KIE_LABELS_WITH_ONLY_VALUES
-        ]
-    )
-
-    sum_line_acc = sum(
-        [
-            results[class_name]["line_acc"] * results[class_name]["samples"]
-            for class_name in KIE_LABELS_WITH_ONLY_VALUES
-        ]
-    )
-    sum_line_acc_ignore_case_symbol = sum(
-        [
-            results[class_name]["line_acc_ignore_case_symbol"]
-            * results[class_name]["samples"]
-            for class_name in KIE_LABELS_WITH_ONLY_VALUES
-        ]
-    )
-
-    total_samples = sum(
-        [results[class_name]["samples"] for class_name in KIE_LABELS_WITH_ONLY_VALUES]
-    )
-    results["avg_all"] = {
-        "1-ned": round(sum_1_ned / total_samples, 4),
-        "1-ned-word": round(sum_1_ned_word / total_samples, 4),
-        "line_acc": round(sum_line_acc / total_samples, 4),
-        "line_acc_ignore_case_symbol": round(
-            sum_line_acc_ignore_case_symbol / total_samples, 4
-        ),
-        "samples": total_samples,
-    }
-
-    table_data = [
-        [
-            "class_name",
-            "1-NED",
-            "1-N.E.D_word",
-            "line-acc",
-            "line_acc_ignore_case_symbol",
-            "#samples",
-        ]
-    ]
-    for class_name in results.keys():
-        # if c < p.shape[0]:
-        table_data.append(
-            [
-                class_name,
-                results[class_name]["1-ned"],
-                results[class_name]["1-ned-word"],
-                results[class_name]["line_acc"],
-                results[class_name]["line_acc_ignore_case_symbol"],
-                results[class_name]["samples"],
-            ]
-        )
-
-    table = AsciiTable(table_data)
-    print(table.table)
-    return results
-
-
-if __name__ == "__main__":
-
-    # gt_e2e = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/test_end2end/test_e2e.json"
-
-    # # pred_e2e =  "/mnt/ssd1T/hoanglv/Projects/KIE/TokenClassification_invoice/workdirs/runs/pred_e2e.json"
-    # # pred_e2e =  "/mnt/ssd1T/hoanglv/Projects/KIE/TokenClassification_invoice/workdirs/runs/infer/layoutxlm-base-31-03-2023-maxwords150_samplingv2/pred_e2e.json"
-    # # pred_e2e = "/mnt/ssd1T/hoanglv/Projects/KIE/TokenClassification_invoice/workdirs/runs/infer/kie_e2e_pred_17-10-2022-maxwords150_samplingv2_rm_dup_boxes/pred_e2e.json"
-    # # pred_e2e = "/mnt/ssd1T/hoanglv/Projects/KIE/TokenClassification_invoice/workdirs/runs/infer/kie_e2e_pred_17-10-2022-maxwords150_samplingv2/pred_e2e.json"
-
-    # # pred_e2e = "/home/sds/hoanglv/Projects/TokenClassification_invoice/runs/infer/kie_e2e_pred_14-10-2022_2/pred_e2e.json"
-    # pred_e2e = "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/e2e/test_17_10_2022_last_use_ocr_merge_use_label.json"
-    
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--cfg", type=str)
-    parser.add_argument("--pred", type=str, help="predict json file path")
-    parser.add_argument("--gt", type=str, help="ground truth json file path")
-    parser.add_argument("--log_failure_case", type=str, default=None, help="log_failure_case path")
-    parser.add_argument("--norm_failcase", action='store_true')
-    args = parser.parse_args()
-
-    cfg = load_cfg(args.cfg)
-    kie_labels = cfg['classes']
-    
-    # res = eval_kie(pred_e2e, gt_e2e, skip_labels=["buyer_mobile_value"])
-    # print(res)
-    result = eval_kie(
-        pred_e2e_path=args.pred,
-        gt_e2e_path=args.gt,
-        kie_labels=kie_labels,
-        skip_labels=["Others", "other"],
-        log_failure_case=args.log_failure_case,
-        norm_failcase=args.norm_failcase
-    )
-
-    print("Path of validation dataset: /mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/invoice_kie_validation")
-    print("Number of validation dataset: ", result[list(result.keys())[0]]['samples'])
-    print("Evaluation metric: NLD")
-    print("Target level: ")
-    print("Archieved level: ")
-    print("Verification result: PASS")
-
-""""
-
-
-"""
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/io_file.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/io_file.py
@ -1,102 +0,0 @@
-import json
-import re
-from pathlib import Path
-
-import yaml
-
-
-def yaml_load(file="data.yaml", append_filename=False):
-    """
-    Load YAML data from a file.
-    Args:
-        file (str, optional): File name. Default is 'data.yaml'.
-        append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False.
-    Returns:
-        dict: YAML data and file name.
-    """
-    with open(file, errors="ignore", encoding="utf-8") as f:
-        s = f.read()  # string
-
-        # Remove special characters
-        if not s.isprintable():
-            s = re.sub(
-                r"[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+",
-                "",
-                s,
-            )
-
-        # Add YAML filename to dict and return
-        return (
-            {**yaml.safe_load(s), "yaml_file": str(file)}
-            if append_filename
-            else yaml.safe_load(s)
-        )
-
-
-def yaml_save(file="data.yaml", data=None):
-    """
-    Save YAML data to a file.
-    Args:
-        file (str, optional): File name. Default is 'data.yaml'.
-        data (dict, optional): Data to save in YAML format. Default is None.
-    Returns:
-        None: Data is saved to the specified file.
-    """
-    file = Path(file)
-    if not file.parent.exists():
-        # Create parent directories if they don't exist
-        file.parent.mkdir(parents=True, exist_ok=True)
-
-    with open(file, "w") as f:
-        # Dump data to file in YAML format, converting Path objects to strings
-        yaml.safe_dump(
-            {k: str(v) if isinstance(v, Path) else v for k, v in data.items()},
-            f,
-            sort_keys=False,
-            allow_unicode=True,
-        )
-
-
-def write_txt(txt, data, mode="w"):
-    with open(txt, mode, encoding="utf8") as f:
-        for line in data:
-            f.write(line + "\n")
-
-
-def read_txt(txt):
-    with open(txt, "r", encoding="utf8") as f:
-        data = [line.strip() for line in f]
-    return data
-
-
-def write_json(json_path, data, sort_keys=True):
-    with open(json_path, "w", encoding="utf8") as f:
-        json.dump(data, f, ensure_ascii=False, sort_keys=sort_keys)
-
-
-def read_json(json_path):
-    with open(json_path, "r", encoding="utf8") as f:
-        data = json.load(f)
-    return data
-
-
-
-def load_ocr_output(txt_path):
-    with open(txt_path) as f:
-        lines = [line.replace("\n", "").replace("\r", "") for line in f.readlines()]
-    words, boxes, labels = [], [], []
-    for i, line in enumerate(lines):
-        if len(line.split("\t")) == 6:
-            x1, y1, x2, y2, text, label = line.split("\t")
-        else:
-            x1, y1, x2, y2, text = line.split("\t")
-            label = None
-
-        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
-        box = [x1, y1, x2, y2]
-        if text != " ":
-            words.append(text)
-            boxes.append(box)
-            labels.append(label)
-
-    return {"boxes": boxes, "texts": words, 'labels': labels}
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/logger.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/logger.py
@ -1,48 +0,0 @@
-import os
-import sys
-import logging
-import functools
-
-logger_initialized = {}
-
-@functools.lru_cache()
-def get_logger(name='root', log_file=None, log_level=logging.DEBUG):
-    """Initialize and get a logger by name.
-    If the logger has not been initialized, this method will initialize the
-    logger by adding one or two handlers, otherwise the initialized logger will
-    be directly returned. During initialization, a StreamHandler will always be
-    added. If `log_file` is specified a FileHandler will also be added.
-    Args:
-        name (str): Logger name.
-        log_file (str | None): The log filename. If specified, a FileHandler
-            will be added to the logger.
-        log_level (int): The logger level. Note that only the process of
-            rank 0 is affected, and other processes will set the level to
-            "Error" thus be silent most of the time.
-    Returns:
-        logging.Logger: The expected logger.
-    """
-    logger = logging.getLogger(name)
-    if name in logger_initialized:
-        return logger
-    for logger_name in logger_initialized:
-        if name.startswith(logger_name):
-            return logger
-
-    formatter = logging.Formatter(
-        '[%(asctime)s] %(name)s %(levelname)s: %(message)s',
-        datefmt="%Y/%m/%d %H:%M:%S")
-
-    stream_handler = logging.StreamHandler(stream=sys.stdout)
-    stream_handler.setFormatter(formatter)
-    logger.addHandler(stream_handler)
-    if log_file is not None:
-        log_file_folder = os.path.split(log_file)[0]
-        os.makedirs(log_file_folder, exist_ok=True)
-        file_handler = logging.FileHandler(log_file, 'a', encoding='utf8')
-        file_handler.setFormatter(formatter)
-        logger.addHandler(file_handler)
-    
-    logger.setLevel(log_level)
-    logger_initialized[name] = True
-    return logger
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/post_processing/init.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/post_processing/init.py
@ -1,4 +0,0 @@
-from .common_post_processing import *
-from .invoice_post_processing import *
-from .receipt_post_processing import *
-from .hardcoded_postprocess_funcs import *
--- a/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/post_processing/common_post_processing.py
+++ b/cope2n-ai-fi/modules/sdsvkie/sdsvkie/utils/post_processing/common_post_processing.py
@ -1,129 +0,0 @@
-
-import re
-from datetime import datetime
-from sdsvkie.utils import Word_group
-
-
-YEAR_START = 2000
-def construct_word_groups_to_kie_label(list_word_groups: list):
-    kie_dict = dict()
-
-    for wg in list_word_groups:
-        if wg.kie_label.lower() in ['other', 'others']:
-            continue
-        if wg.kie_label not in kie_dict:
-            kie_dict[wg.kie_label] = [wg]
-        else:
-            kie_dict[wg.kie_label].append(wg)
-
-    return kie_dict
-
-
-def near(word_group1: Word_group, word_group2: Word_group):
-    min_height = min(
-        word_group1.boundingbox[3] - word_group1.boundingbox[1],
-        word_group2.boundingbox[3] - word_group2.boundingbox[1],
-    )
-    overlap = min(word_group1.boundingbox[3], word_group2.boundingbox[3]) - max(
-        word_group1.boundingbox[1], word_group2.boundingbox[1]
-    )
-
-    if overlap > 0:
-        return True
-    if abs(overlap / min_height) < 1.5:
-        print("near enough", abs(overlap / min_height), overlap, min_height)
-        return True
-    return False
-
-
-def normalize_number(text_str: str,  reserve_dot=False, reserve_plus=False, reserve_minus=False):
-    """
-    Normalize a string of numbers by removing non-numeric characters
-
-    """
-    assert isinstance(text_str, str), "input must be str"
-    reserver_chars = ""
-    if reserve_dot:
-        reserver_chars += ".,"
-    if reserve_plus:
-        reserver_chars += "+"
-    if reserve_minus:
-        reserver_chars += "-"
-    regex_fomula = "[^0-9{}]".format(reserver_chars)
-    normalized_text_str = re.sub(r"{}".format(regex_fomula), "", text_str)
-    return normalized_text_str
-
-
-def normalize_number_wordgroup(word_group, reserve_dot=False, reserve_plus=False, reserve_minus=False):
-    word_group.text = normalize_number(word_group.text,  reserve_dot=reserve_dot, reserve_plus=reserve_plus, reserve_minus=reserve_minus)
-    return word_group
-
-def tax_code_processing(tax_code_raw: str):    
-    """
-    
-    """
-    if len(tax_code_raw.replace(' ', '')) != 13 or (len(tax_code_raw.replace(' ', '')) != 14 and "-" not in tax_code_raw): # to remove the first/last number dupicated
-        tax_code_raw = tax_code_raw.split(' ')        
-        tax_code_raw = sorted(tax_code_raw, key=lambda x: len(x), reverse=True)[0]
-    return tax_code_raw.replace(' ', '')
-
-def normalize_tax_wordgroup(word_group, reserve_dot=False, reserve_plus=False, reserve_minus=False):
-    print("before: ", word_group.text)
-    word_group.text = tax_code_processing(word_group.text)
-    print("after: ", word_group.text)
-    word_group.text = normalize_number(word_group.text,  reserve_dot=reserve_dot, reserve_plus=reserve_plus, reserve_minus=reserve_minus)
-    return word_group
-
-
-def _date_format(date_string):
-    """Format date string according format dd/MM/yyyy"""
-    date_string = (
-        date_string.replace("ngay ", "")
-        .replace(" thang ", "/")
-        .replace(" nam ", "/")
-    )
-    day, month, year = date_string.split("/")
-    day = day.rjust(2, "0")
-    month = month.rjust(2, "0")
-    year = f"20{year}" if len(year) == 2 else year
-
-    # Check valid year
-    try:
-        _ = datetime(year=int(year), month=int(month), day=int(day))
-        if int(year) > YEAR_START:
-            return "/".join([day, month, year])
-    except:
-        print("Date is invalid", date_string)
-
-    return None
-
-def get_date(list_date):
-    """Regex get date"""
-
-    list_date = [
-        _date_format(date.group(0))
-        for date in list_date
-        # if _date_format(date.group(0)) is not None
-    ]
-    list_date = [
-        date for date in list_date if date is not None
-    ]
-    return list_date
-
-
-def merge_multi_page_results(result_pages):
-    """Merge the result of the multiple pages
-
-    Args:
-        results (list[dict]): list of result dict of each page 
-    """
-    if len(result_pages) == 0:
-        return {}
-
-    result = result_pages[0]
-    for result_page in result_pages[1:]:
-        for field_name, field_value in result_page.items():
-            if field_name not in result:
-                result[field_name] = field_value
-
-    return result
--- a/Show More
+++ b/Show More
				`@ -1 +0,0 @@`
				`rsync -r --exclude='workdirs/' --exclude='notebooks/' --exclude='weights/' --exclude='wandb/' --exclude='microsoft/' /mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie user@107.120.133.42:/mnt/data/kie`