2818 lines
185 KiB
Plaintext
2818 lines
185 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from PIL import Image, ImageDraw, ImageFont\n",
|
||
|
"import os \n",
|
||
|
"import random\n",
|
||
|
"import cv2\n",
|
||
|
"import numpy as np \n",
|
||
|
"from pathlib import Path"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"def visualize_kie(\n",
|
||
|
" img,\n",
|
||
|
" boxes, \n",
|
||
|
" pred_labels, \n",
|
||
|
" skip_classes=['other'], \n",
|
||
|
" image_name=\"out_kie.jpg\", \n",
|
||
|
" outdir=\"workdirs/visualize\"\n",
|
||
|
" ):\n",
|
||
|
" \"\"\"Visualize kie output \n",
|
||
|
"\n",
|
||
|
" Args:\n",
|
||
|
" image (PIL.Image): _description_\n",
|
||
|
" boxes (list[xyxy]): list of xyxy boxes\n",
|
||
|
" kie_preds (list[str]): _description_\n",
|
||
|
" skip_classes (list[str], optional): list of skip labels. Defaults to ['other'].\n",
|
||
|
" image_path (str, optional): _description_. Defaults to None.\n",
|
||
|
" outdir (_type_, optional): _description_. Defaults to None.\n",
|
||
|
" \"\"\"\n",
|
||
|
" if not os.path.exists(outdir):\n",
|
||
|
" os.makedirs(outdir, exist_ok=True)\n",
|
||
|
"\n",
|
||
|
" image = img.copy()\n",
|
||
|
" if isinstance(image, np.ndarray):\n",
|
||
|
" image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n",
|
||
|
" \n",
|
||
|
" set_labels = sorted(list(set(pred_labels)))\n",
|
||
|
" colors = [\n",
|
||
|
" (\n",
|
||
|
" random.randint(0, 255),\n",
|
||
|
" random.randint(0, 255),\n",
|
||
|
" random.randint(0, 255),\n",
|
||
|
" ) for _ in range(len(set_labels))\n",
|
||
|
" ]\n",
|
||
|
"\n",
|
||
|
" text_checkin = [0] * len(set_labels)\n",
|
||
|
"\n",
|
||
|
" draw = ImageDraw.Draw(image)\n",
|
||
|
" font = ImageFont.load_default()\n",
|
||
|
" for i, pred_label in enumerate(pred_labels):\n",
|
||
|
" if pred_label in skip_classes:\n",
|
||
|
" continue\n",
|
||
|
" pred_label_idx = set_labels.index(pred_label)\n",
|
||
|
" box = boxes[i]\n",
|
||
|
" color = colors[pred_label_idx]\n",
|
||
|
" draw.rectangle(box,fill=None, outline=color)\n",
|
||
|
"\n",
|
||
|
" # skip visualize text label for other boxes \n",
|
||
|
" if text_checkin[set_labels.index(pred_label)] == 0:\n",
|
||
|
" draw.text((int(box[0])+10, int(box[1])-10),fill=color, text=pred_label, font = font)\n",
|
||
|
" text_checkin[pred_label_idx] = 1\n",
|
||
|
" image.save(os.path.join(outdir, image_name))\n",
|
||
|
"\n",
|
||
|
"def visualize_ocr(\n",
|
||
|
" img,\n",
|
||
|
" boxes, \n",
|
||
|
" texts,\n",
|
||
|
" image_name=\"out_ocr.jpg\", \n",
|
||
|
" outdir=\"workdirs/visualize\"\n",
|
||
|
" ):\n",
|
||
|
" \"\"\"Visualize kie output \n",
|
||
|
"\n",
|
||
|
" Args:\n",
|
||
|
" image (PIL.Image): _description_\n",
|
||
|
" boxes (list[xyxy]): list of xyxy boxes\n",
|
||
|
" kie_preds (list[str]): _description_\n",
|
||
|
" skip_classes (list[str], optional): list of skip labels. Defaults to ['other'].\n",
|
||
|
" image_path (str, optional): _description_. Defaults to None.\n",
|
||
|
" outdir (_type_, optional): _description_. Defaults to None.\n",
|
||
|
" \"\"\"\n",
|
||
|
" if not os.path.exists(outdir):\n",
|
||
|
" os.makedirs(outdir, exist_ok=True)\n",
|
||
|
"\n",
|
||
|
" image = img.copy()\n",
|
||
|
" if isinstance(image, np.ndarray):\n",
|
||
|
" image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n",
|
||
|
" color = (\n",
|
||
|
" random.randint(0, 255),\n",
|
||
|
" random.randint(0, 255),\n",
|
||
|
" random.randint(0, 255),\n",
|
||
|
" )\n",
|
||
|
"\n",
|
||
|
" \n",
|
||
|
"\n",
|
||
|
" draw = ImageDraw.Draw(image)\n",
|
||
|
" font = ImageFont.load_default()\n",
|
||
|
" for i, text in enumerate(texts):\n",
|
||
|
" box = boxes[i]\n",
|
||
|
" draw.rectangle(box,fill=None, outline=color) \n",
|
||
|
" # draw.text((int(box[0])+10, int(box[1])-10),fill=color, text=text, font = font)\n",
|
||
|
" image.save(os.path.join(outdir, image_name))\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import glob\n",
|
||
|
"def read_txt(txt):\n",
|
||
|
" with open(txt, 'r', encoding='utf8') as f:\n",
|
||
|
" data = [line.strip() for line in f]\n",
|
||
|
" return data \n",
|
||
|
"\n",
|
||
|
"def get_data_from_txt_dir(txt_dir):\n",
|
||
|
" txt_paths = glob.glob(txt_dir + \"/*.txt\")\n",
|
||
|
" data = {}\n",
|
||
|
" for txt_path in txt_paths:\n",
|
||
|
" txt_name = os.path.basename(txt_path)\n",
|
||
|
" \n",
|
||
|
" \n",
|
||
|
" txt_data = read_txt(txt_path)\n",
|
||
|
" format_data = []\n",
|
||
|
" for line in txt_data:\n",
|
||
|
" items = line.split(\"\\t\")\n",
|
||
|
" assert len(items) == 6, \"error get len = {} - {} at {}\".format(len(items), items, txt_path)\n",
|
||
|
" box = [int(float(x)) for x in items[:4]]\n",
|
||
|
" text = items[4]\n",
|
||
|
" kie_label = items[5]\n",
|
||
|
" format_data.append(\n",
|
||
|
" {\n",
|
||
|
" 'box': box,\n",
|
||
|
" 'text': text, \n",
|
||
|
" 'label': kie_label\n",
|
||
|
" }\n",
|
||
|
" )\n",
|
||
|
" data[txt_name] = format_data\n",
|
||
|
"\n",
|
||
|
" return data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import glob \n",
|
||
|
"import os \n",
|
||
|
"import cv2\n",
|
||
|
"\n",
|
||
|
"img_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/invoice_viettinbank_poc\"\n",
|
||
|
"txt_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/invoice_viettinbank_poc\"\n",
|
||
|
"vis_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/notebooks/visualize/invoice_viettinbank_poc\"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"56\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"from pathlib import Path\n",
|
||
|
"img_paths = [p for p in glob.glob(f\"{img_dir}/*\") if \".txt\" not in p]\n",
|
||
|
"print(len(img_paths))\n",
|
||
|
"all_data = get_data_from_txt_dir(txt_dir)\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"for img_path in img_paths:\n",
|
||
|
" img = cv2.imread(img_path)\n",
|
||
|
" if img is None:\n",
|
||
|
" print(\"Image is none: \", img_path)\n",
|
||
|
" txt_file = Path(img_path).with_suffix(\".txt\").name\n",
|
||
|
" img_data = all_data[str(Path(img_path).with_suffix(\".txt\").name)]\n",
|
||
|
" # print(img_data)\n",
|
||
|
" boxes = [item['box'] for item in img_data]\n",
|
||
|
" texts = [item['text'] for item in img_data]\n",
|
||
|
" labels = [item['label'] for item in img_data]\n",
|
||
|
" \n",
|
||
|
" \n",
|
||
|
" visualize_kie(\n",
|
||
|
" img,\n",
|
||
|
" boxes, \n",
|
||
|
" labels, \n",
|
||
|
" skip_classes=['Others', 'other'], \n",
|
||
|
" image_name=os.path.basename(img_path), \n",
|
||
|
" outdir=vis_dir\n",
|
||
|
" )\n",
|
||
|
" # break"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 21,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def _check_iou(box1, box2, threshold=0.9):\n",
|
||
|
" \"\"\"_summary_\n",
|
||
|
"\n",
|
||
|
" Args:\n",
|
||
|
" box1 (_type_): word box\n",
|
||
|
" box2 (_type_): line box \n",
|
||
|
" threshold (float, optional): _description_. Defaults to 0.9.\n",
|
||
|
"\n",
|
||
|
" Returns:\n",
|
||
|
" _type_: _description_\n",
|
||
|
" \"\"\"\n",
|
||
|
" area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])\n",
|
||
|
" area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])\n",
|
||
|
" xmin_intersect = max(box1[0], box2[0])\n",
|
||
|
" ymin_intersect = max(box1[1], box2[1])\n",
|
||
|
" xmax_intersect = min(box1[2], box2[2])\n",
|
||
|
" ymax_intersect = min(box1[3], box2[3])\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
" if xmax_intersect < xmin_intersect or ymax_intersect < ymin_intersect:\n",
|
||
|
" area_intersect = 0\n",
|
||
|
" else:\n",
|
||
|
" area_intersect = (xmax_intersect - xmin_intersect) * (\n",
|
||
|
" ymax_intersect - ymin_intersect\n",
|
||
|
" )\n",
|
||
|
" # union = area1 + area2 - area_intersect\n",
|
||
|
" iou = area_intersect / area1\n",
|
||
|
" return iou "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 23,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"1.0"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 23,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"_check_iou(box1=(465, 901, 664, 940), box2=(465, 901, 735, 940))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"ename": "",
|
||
|
"evalue": "",
|
||
|
"output_type": "error",
|
||
|
"traceback": [
|
||
|
"\u001b[1;31mRunning cells with 'py38_hoanglv' requires the ipykernel package.\n",
|
||
|
"\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
|
||
|
"\u001b[1;31mCommand: 'conda install -n py38_hoanglv ipykernel --update-deps --force-reinstall'"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sdsvkie.utils.io_file import read_json\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"pred_data = read_json(\"/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_5/test_mcocr_not_label_ep21.json\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 37,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['minimart anan', '', 'vincommerce', 'nha sach gdtc cam pha', 'cua hang nam oanh', 'pho mo', 'saigon coop coop food hn the kpark', 'sctc co tho 104 tran phu cam pha', 'the coffee house', 'payoo', 'sieu thi bach hoa tong hop', 'sctc co thu 104 tran phu cam pha', 'thuc coffee', 'i vincommerce', 'f vincommerce', 'phuc anh minimart', 'vietcombank', 'guitar cafe', 'sieu thi minh loan', 'nha hang aozora sushi', 'thanh xuan', 'legend welcome to trung nguyen legend', 'coffee milano coffee vietnam', 'tiem da thanh xuan', 'gia re moi ngay', 'tiem da', 'oanh cua hang nam', 'phuc anh minimart noi ha lam', 'payoo uimmart', 'phuc coffee tea phuc long', 'anan minimart', 'c h a m', 'minimart', 'saigon coop coop food hn van khe', 'hniqlo vincom', 'ma co so opmart thue xa lo ha noi', 'ch thoi trang adidas', 'mart coopmart xa lo ha noi', 'vm hni cc nang huong', 'pharmacity', 'solanin l cong 1 phan', 'kai tea v kai tea', 'minimx', 'lamason10k', 'tien do thanh xuan', 'gia lam', 'sctc co thu 104 tran phu so cam pha', 'sieu thi my pham thong he shop', 'bidv', 'cafe cj building', 'go cool mart', 'saigon coop food hn', 'coopmart hau giang', 'feel coffee', 'coop saigon kpark food hn the', 'nha hang ut giang nguyen chanh', 'lahatafe just good coffee', 'satra group satrafoods nguyen van qua 2', 'tien no thanh xuan', '0 trung tam thuong mai aeon ha dong', 'vm thang long', 'cong ty co phan bot phap van cau gia', 'bieu thi bach hoa tong hop', 'nghcanh', 'big c long bien', 'books n coffee', 'coopmart', 'coop hoc quoc', 'vitimex quang ninh vincom cam pha', 'co opmart nguyen dinh chiel', 'cuitar cafe', 'vinmart', 'bieu thi thu cung pet yeu', 'pha nha sach gdtc cam pha', 'sieu thi minh loan phan', 'ly', 'flomesweethome ban hang', 'big c an phu', 'tiem tra', 'nha sachgdte cam pha', 'coopmart nguyen dinh chieu', 'biku thi bach hoa tong hoa', 'cp c o 0 p co opmart nguyen dinh chieu', 'nha nai 0110 am pha do 21d', 'ngoc com ga bao', 'jet mart', 'sushi nha hang aozora', 'bidv ', 'coffee milano coffee vietnam ', 'milano coffee milano coffee vietnam 4', 'coop smile', 'coor p mari', 'i thuc coffee', 'farnilymart', 'tran phu cam sctc co thu 104 pha', 'milano coffee milano coffee vietnam de', 'mart coopmart hau giang', 'satra group satrafoods 2b duong binh loi', 'ga dong quan', 'minimart anal', 'cty ld tnhh kfc viet nam', 'minimart phuc anh ha', 'velutino cong ty cp dau tu velutino viet nam', 'bun ho hu nn o xuan', 'biep thi bach hoa tono hoi', 'nha thuoc benh vien da khoa thanh pho vinh', 'phu noi', 'cua hang nam oanh tp ha', 'uniqlo vincom pham ngoc thach', 'vcb loi', 'saigon coop coop food hn the k park', 'hau giang', 'saigon coop food hn the kpark coop', 'welcome to trung nguyen legend', 'big c di an cty tnhh eb binh duong', 'milano coffee milang coffee vietnam ', 'bach hoa xanh', 'familymart', 'nha son gdty cam pha', 'dim sum corner trung hoa', 'minimart phuc anh ha noi lam', 'saigon coop pa coop food hn the kpark', 'cua hang tu chon anh giang', 'oma b a k e 4 e', 'hai ly', 'thghu tuan', 'covb food hn the', 'packer brinition 12341', 'viet tttm cho sui', 'tttm van hosieu thi seika mart', 'cho sui phu thi gia lam', 'circle k vietnam', 'cty cp sach tbth quang ninh', 'vinh', 'parts ga teaus 19', 'lahatafe', 'charles', 'coop food hn the', 'the coffee house thuy', 'saigon coop coop food hi the kpark', 'mooserroo vietham the moose roo smokehouse', 'tien da thanh xuan', 'saigo coop food', 'coopsmic', 'thu thanh xuan', 'phuong oc hai phong', 'kai tea kai tea', 'talala', 'ittm', 'vincom skylake', 'vision vincommerce', 'marumart']\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"store_names = [normalize(item['Store_name_value']) for k, item in pred_data.items()]\n",
|
||
|
"# store_names = list(set(store_names))\n",
|
||
|
"from collections import Counter\n",
|
||
|
"my_counter = Counter(store_names)\n",
|
||
|
"list_tuples = my_counter.most_common()\n",
|
||
|
"stores = [x[0] for x in list_tuples]\n",
|
||
|
"print(stores)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 40,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"[('minimart anan', 318),\n",
|
||
|
" ('', 296),\n",
|
||
|
" ('vincommerce', 177),\n",
|
||
|
" ('nha sach gdtc cam pha', 101),\n",
|
||
|
" ('cua hang nam oanh', 64),\n",
|
||
|
" ('pho mo', 41),\n",
|
||
|
" ('saigon coop coop food hn the kpark', 33),\n",
|
||
|
" ('sctc co tho 104 tran phu cam pha', 23),\n",
|
||
|
" ('the coffee house', 23),\n",
|
||
|
" ('payoo', 20),\n",
|
||
|
" ('sieu thi bach hoa tong hop', 18),\n",
|
||
|
" ('sctc co thu 104 tran phu cam pha', 16),\n",
|
||
|
" ('thuc coffee', 13),\n",
|
||
|
" ('i vincommerce', 10),\n",
|
||
|
" ('f vincommerce', 9),\n",
|
||
|
" ('phuc anh minimart', 9),\n",
|
||
|
" ('vietcombank', 9),\n",
|
||
|
" ('guitar cafe', 8),\n",
|
||
|
" ('sieu thi minh loan', 5),\n",
|
||
|
" ('nha hang aozora sushi', 5),\n",
|
||
|
" ('thanh xuan', 3),\n",
|
||
|
" ('legend welcome to trung nguyen legend', 3),\n",
|
||
|
" ('coffee milano coffee vietnam', 2),\n",
|
||
|
" ('tiem da thanh xuan', 2),\n",
|
||
|
" ('gia re moi ngay', 2),\n",
|
||
|
" ('tiem da', 2),\n",
|
||
|
" ('oanh cua hang nam', 2),\n",
|
||
|
" ('phuc anh minimart noi ha lam', 2),\n",
|
||
|
" ('payoo uimmart', 2),\n",
|
||
|
" ('phuc coffee tea phuc long', 2),\n",
|
||
|
" ('anan minimart', 2),\n",
|
||
|
" ('c h a m', 2),\n",
|
||
|
" ('minimart', 2),\n",
|
||
|
" ('saigon coop coop food hn van khe', 2),\n",
|
||
|
" ('hniqlo vincom', 1),\n",
|
||
|
" ('ma co so opmart thue xa lo ha noi', 1),\n",
|
||
|
" ('ch thoi trang adidas', 1),\n",
|
||
|
" ('mart coopmart xa lo ha noi', 1),\n",
|
||
|
" ('vm hni cc nang huong', 1),\n",
|
||
|
" ('pharmacity', 1),\n",
|
||
|
" ('solanin l cong 1 phan', 1),\n",
|
||
|
" ('kai tea v kai tea', 1),\n",
|
||
|
" ('minimx', 1),\n",
|
||
|
" ('lamason10k', 1),\n",
|
||
|
" ('tien do thanh xuan', 1),\n",
|
||
|
" ('gia lam', 1),\n",
|
||
|
" ('sctc co thu 104 tran phu so cam pha', 1),\n",
|
||
|
" ('sieu thi my pham thong he shop', 1),\n",
|
||
|
" ('bidv', 1),\n",
|
||
|
" ('cafe cj building', 1),\n",
|
||
|
" ('go cool mart', 1),\n",
|
||
|
" ('saigon coop food hn', 1),\n",
|
||
|
" ('coopmart hau giang', 1),\n",
|
||
|
" ('feel coffee', 1),\n",
|
||
|
" ('coop saigon kpark food hn the', 1),\n",
|
||
|
" ('nha hang ut giang nguyen chanh', 1),\n",
|
||
|
" ('lahatafe just good coffee', 1),\n",
|
||
|
" ('satra group satrafoods nguyen van qua 2', 1),\n",
|
||
|
" ('tien no thanh xuan', 1),\n",
|
||
|
" ('0 trung tam thuong mai aeon ha dong', 1),\n",
|
||
|
" ('vm thang long', 1),\n",
|
||
|
" ('cong ty co phan bot phap van cau gia', 1),\n",
|
||
|
" ('bieu thi bach hoa tong hop', 1),\n",
|
||
|
" ('nghcanh', 1),\n",
|
||
|
" ('big c long bien', 1),\n",
|
||
|
" ('books n coffee', 1),\n",
|
||
|
" ('coopmart', 1),\n",
|
||
|
" ('coop hoc quoc', 1),\n",
|
||
|
" ('vitimex quang ninh vincom cam pha', 1),\n",
|
||
|
" ('co opmart nguyen dinh chiel', 1),\n",
|
||
|
" ('cuitar cafe', 1),\n",
|
||
|
" ('vinmart', 1),\n",
|
||
|
" ('bieu thi thu cung pet yeu', 1),\n",
|
||
|
" ('pha nha sach gdtc cam pha', 1),\n",
|
||
|
" ('sieu thi minh loan phan', 1),\n",
|
||
|
" ('ly', 1),\n",
|
||
|
" ('flomesweethome ban hang', 1),\n",
|
||
|
" ('big c an phu', 1),\n",
|
||
|
" ('tiem tra', 1),\n",
|
||
|
" ('nha sachgdte cam pha', 1),\n",
|
||
|
" ('coopmart nguyen dinh chieu', 1),\n",
|
||
|
" ('biku thi bach hoa tong hoa', 1),\n",
|
||
|
" ('cp c o 0 p co opmart nguyen dinh chieu', 1),\n",
|
||
|
" ('nha nai 0110 am pha do 21d', 1),\n",
|
||
|
" ('ngoc com ga bao', 1),\n",
|
||
|
" ('jet mart', 1),\n",
|
||
|
" ('sushi nha hang aozora', 1),\n",
|
||
|
" ('bidv ', 1),\n",
|
||
|
" ('coffee milano coffee vietnam ', 1),\n",
|
||
|
" ('milano coffee milano coffee vietnam 4', 1),\n",
|
||
|
" ('coop smile', 1),\n",
|
||
|
" ('coor p mari', 1),\n",
|
||
|
" ('i thuc coffee', 1),\n",
|
||
|
" ('farnilymart', 1),\n",
|
||
|
" ('tran phu cam sctc co thu 104 pha', 1),\n",
|
||
|
" ('milano coffee milano coffee vietnam de', 1),\n",
|
||
|
" ('mart coopmart hau giang', 1),\n",
|
||
|
" ('satra group satrafoods 2b duong binh loi', 1),\n",
|
||
|
" ('ga dong quan', 1),\n",
|
||
|
" ('minimart anal', 1),\n",
|
||
|
" ('cty ld tnhh kfc viet nam', 1),\n",
|
||
|
" ('minimart phuc anh ha', 1),\n",
|
||
|
" ('velutino cong ty cp dau tu velutino viet nam', 1),\n",
|
||
|
" ('bun ho hu nn o xuan', 1),\n",
|
||
|
" ('biep thi bach hoa tono hoi', 1),\n",
|
||
|
" ('nha thuoc benh vien da khoa thanh pho vinh', 1),\n",
|
||
|
" ('phu noi', 1),\n",
|
||
|
" ('cua hang nam oanh tp ha', 1),\n",
|
||
|
" ('uniqlo vincom pham ngoc thach', 1),\n",
|
||
|
" ('vcb loi', 1),\n",
|
||
|
" ('saigon coop coop food hn the k park', 1),\n",
|
||
|
" ('hau giang', 1),\n",
|
||
|
" ('saigon coop food hn the kpark coop', 1),\n",
|
||
|
" ('welcome to trung nguyen legend', 1),\n",
|
||
|
" ('big c di an cty tnhh eb binh duong', 1),\n",
|
||
|
" ('milano coffee milang coffee vietnam ', 1),\n",
|
||
|
" ('bach hoa xanh', 1),\n",
|
||
|
" ('familymart', 1),\n",
|
||
|
" ('nha son gdty cam pha', 1),\n",
|
||
|
" ('dim sum corner trung hoa', 1),\n",
|
||
|
" ('minimart phuc anh ha noi lam', 1),\n",
|
||
|
" ('saigon coop pa coop food hn the kpark', 1),\n",
|
||
|
" ('cua hang tu chon anh giang', 1),\n",
|
||
|
" ('oma b a k e 4 e', 1),\n",
|
||
|
" ('hai ly', 1),\n",
|
||
|
" ('thghu tuan', 1),\n",
|
||
|
" ('covb food hn the', 1),\n",
|
||
|
" ('packer brinition 12341', 1),\n",
|
||
|
" ('viet tttm cho sui', 1),\n",
|
||
|
" ('tttm van hosieu thi seika mart', 1),\n",
|
||
|
" ('cho sui phu thi gia lam', 1),\n",
|
||
|
" ('circle k vietnam', 1),\n",
|
||
|
" ('cty cp sach tbth quang ninh', 1),\n",
|
||
|
" ('vinh', 1),\n",
|
||
|
" ('parts ga teaus 19', 1),\n",
|
||
|
" ('lahatafe', 1),\n",
|
||
|
" ('charles', 1),\n",
|
||
|
" ('coop food hn the', 1),\n",
|
||
|
" ('the coffee house thuy', 1),\n",
|
||
|
" ('saigon coop coop food hi the kpark', 1),\n",
|
||
|
" ('mooserroo vietham the moose roo smokehouse', 1),\n",
|
||
|
" ('tien da thanh xuan', 1),\n",
|
||
|
" ('saigo coop food', 1),\n",
|
||
|
" ('coopsmic', 1),\n",
|
||
|
" ('thu thanh xuan', 1),\n",
|
||
|
" ('phuong oc hai phong', 1),\n",
|
||
|
" ('kai tea kai tea', 1),\n",
|
||
|
" ('talala', 1),\n",
|
||
|
" ('ittm', 1),\n",
|
||
|
" ('vincom skylake', 1),\n",
|
||
|
" ('vision vincommerce', 1),\n",
|
||
|
" ('marumart', 1)]"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 40,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"my_counter.most_common()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 38,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"152"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 38,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"len(stores)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 45,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"img_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_5/test_mcocr_not_label_ep21\"\n",
|
||
|
"\n",
|
||
|
"store_names = [\n",
|
||
|
" 'minimart anan',\n",
|
||
|
" 'vincommerce', \n",
|
||
|
" 'nha sach gdtc cam pha',\n",
|
||
|
" 'cua hang nam oanh', \n",
|
||
|
" 'pho mo', \n",
|
||
|
" 'saigon coop coop food hn the kpark', \n",
|
||
|
" 'sctc co tho 104 tran phu cam pha', \n",
|
||
|
" 'the coffee house', \n",
|
||
|
" 'payoo', \n",
|
||
|
" 'sieu thi bach hoa tong hop', \n",
|
||
|
" 'sctc co thu 104 tran phu cam pha', \n",
|
||
|
" 'thuc coffee', \n",
|
||
|
" 'vincommerce', \n",
|
||
|
" 'phuc anh minimart', \n",
|
||
|
" 'vietcombank', \n",
|
||
|
" 'guitar cafe', \n",
|
||
|
" 'sieu thi minh loan'\n",
|
||
|
" ]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 47,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"other/have_title/HNIQLO_Vincom\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Ma_Co_SO_opMart_thue:_XÃ_LO_HA_NOI\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/have_title/CH_THỜI_TRANG_ADIDAS\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/mart_Co.opMart_XÃ_LO_HA_NOI\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/have_title/COFFEE_Milano_Coffee_Vietnam\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/VM+_HNI_CC_Nàng_Hương\n",
|
||
|
"pho_mo\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/have_title/Tiêm_đã_Thanh_Xuân\n",
|
||
|
"other/have_title/GIÁ_RẺ_MOI_NGÀY\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Nhà_Hàng_Aozora_Sushi\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"guitar_cafe\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Pharmacity\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/Solanin_l_Công_1)_Phân\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"pho_mo\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/have_title/Tiêm_đã\n",
|
||
|
"pho_mo\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"vincommerce\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/have_title/Nhà_Hàng_Aozora_Sushi\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/KAI_TEA_V_KAI_TEA\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/MINIMX\n",
|
||
|
"pho_mo\n",
|
||
|
"other/have_title/Thanh_Xuân\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/have_title/LAMASON10K\n",
|
||
|
"other/have_title/Tiền_đo_Thanh_Xuân\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"pho_mo\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/Gia_Lam\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"the_coffee_house\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/BIDV&\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vietcombank\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Cafe_CJ_Building\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"pho_mo\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/have_title/GO_CO.OL_mart\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"pho_mo\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"minimart_anan\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Co.opMart_HAU_GIANG\n",
|
||
|
"other/non_title\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"the_coffee_house\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"pho_mo\n",
|
||
|
"vincommerce\n",
|
||
|
"thuc_coffee\n",
|
||
|
"pho_mo\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/Payoo_UIMMART\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Nhà_Hàng_Út_Giang_Nguyễn_Chánh\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"payoo\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/PHUC_Coffee_Tea_PHUC_LONG\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Tiêm_đã\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/LahaTafe_JUST_GOOD_COFFEE\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vietcombank\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Legend_Welcome_to_Trung_Nguyen_Legend\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/C_H_A_M\n",
|
||
|
"other/have_title/Satra_Group_Satrafoods_Nguyễn_Văn_Quá_2\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vietcombank\n",
|
||
|
"payoo\n",
|
||
|
"pho_mo\n",
|
||
|
"vietcombank\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/Tiền_No_Thanh_Xuân\n",
|
||
|
"other/have_title/0_TRUNG_TÂM_THƯƠNG_MẠI_AEON_-_HÀ_ĐÔNG\n",
|
||
|
"other/non_title\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/have_title/PHUC_Coffee_Tea_PHUC_LONG\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/VM_Thăng_Long\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/have_title/CÔNG_TY_cổ_PHẦN_BOT_PHÁP_VÂN_-_CẦU_GIA\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/Nhà_Hàng_Aozora_Sushi\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/have_title/Ng.H.Cảnh\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/BIG_C_LONG_BIEN\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/BOOKS_n_COFFEE\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"pho_mo\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"pho_mo\n",
|
||
|
"other/have_title/Co.opMart\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/co.op_HOC_QUOC\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"the_coffee_house\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Vitimex_Quảng_Ninh_VinCom_Cẩm_Phà\n",
|
||
|
"other/have_title/Co_.opMart_NGUYỄN_DINH_CHIEL\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"guitar_cafe\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/VinMart\n",
|
||
|
"guitar_cafe\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/Biêu_thị_thủ_cung_PET_YÊU\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"the_coffee_house\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Ly\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/FlomeSweetHome_-_Bán_hàng\n",
|
||
|
"other/have_title/BIG_C_AN_PHU\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/Tiêm_trả\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/have_title/Tiếm_đã_Thanh_Xuân\n",
|
||
|
"other/have_title/Legend_Welcome_to_Trung_Nguyen_Legend\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/have_title/Legend_Welcome_to_Trung_Nguyen_Legend\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"pho_mo\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/GIÁ_RE_MỔI_NGÀY\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"payoo\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"vietcombank\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Co.opMart_NGUYỄN_DINH_CHIEU\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"pho_mo\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/have_title/CP_C_O_0_P_Co_opMart_NGUYỄN_DINH_CHIEU\n",
|
||
|
"other/have_title/NHÃ_NAI_-_01).10_._AM_PHA_DO_21D\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/NGỌC_CƠM_GÀ_BẢO\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/JET_MART\n",
|
||
|
"other/non_title\n",
|
||
|
"payoo\n",
|
||
|
"pho_mo\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"pho_mo\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Sushi_Nhà_Hàng_Aozora\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/BIDV_,\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/COFFEE_Milano_Coffee_Vietnam_####\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"payoo\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/MILANO_COFFEE_Milano_Coffee_Vietnam_4\n",
|
||
|
"other/have_title/co.op_smile\n",
|
||
|
"other/have_title/CO.Or_P_mari\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"vincommerce\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/have_title/FarnilyMart\n",
|
||
|
"other/have_title/TRẦN_PHÚ_-_CẨM_SCTC_CÔ_THỦ_104_PHẢ\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/MILANO_COFFEE_Milano_Coffee_Vietnam_đẻ\n",
|
||
|
"other/have_title/mart_Co.opMart_HAU_GIANG\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/Satra_Group_Satrafoods_2B_Đường_Bình_Lợi\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Ga_Dong_Quan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/have_title/Cty_LD_TNHH_KFC_Viet_Nam\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"pho_mo\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/VELUTINO_CÔNG_TY_CP_ĐẦU_TƯ_VELUTINO_VIỆT_NAM\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Bún_HÒ_Hu_NN_O_Xuân\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/non_title\n",
|
||
|
"vietcombank\n",
|
||
|
"pho_mo\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/NHÀ_THUỐC_BỆNH_VIỆN_ĐA_KHOA_THÀNH_PHỐ_VINH\n",
|
||
|
"minimart_anan\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"guitar_cafe\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"vietcombank\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Thanh_Xuân\n",
|
||
|
"pho_mo\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"guitar_cafe\n",
|
||
|
"pho_mo\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/UNIQLO_Vincom_Phạm_Ngọc_Thạch\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/VCB_-_Lợi\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"pho_mo\n",
|
||
|
"pho_mo\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/have_title/HAU_GIANG\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Welcome_to_Trung_Nguyen_Legend\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/BIG_C_DI_AN_CTY_TNHH_EB_BINH_DUONG\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/MILANO_COFFEE_Milang_Coffee_Vietnam_####\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/BÁCH_HÓA_XANH\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"guitar_cafe\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"payoo\n",
|
||
|
"other/have_title/FamilyMart\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/Payoo_UIMMART\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"payoo\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/DIM_SUM_CORNER_TRUNG_HÒA\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"pho_mo\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"pho_mo\n",
|
||
|
"vietcombank\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/oma_B_a_k_e_4_é\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Hải_Ly\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/have_title/THGHU_TUẤN\n",
|
||
|
"other/non_title\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Covb_Food_HN_The\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"payoo\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/PACKER_BRINITION_12341\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Nhà_Hàng_Aozora_Sushi\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Việt_TTTM_Cho_Sui\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"payoo\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n",
|
||
|
"other/have_title/TTTM_VAN_HO-SIEU_THI_SEIKA_MART\n",
|
||
|
"other/have_title/Chợ_Sủi_Phú_Thị_Gia_Lâm\n",
|
||
|
"guitar_cafe\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"pho_mo\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/CIRCLE_K_VIETNAM\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/have_title/CTY_CP_SÁCH_&_TBTH_QUẢNG_NINH\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/VINH\n",
|
||
|
"other/have_title/PARTS_GA_TEAUS_1.9\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/LahaTafe\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/charles\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/Nhà_Hàng_Aozora_Sushi\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/C_H_A_M\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"pho_mo\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/have_title/Thanh_Xuân\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"vincommerce\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/have_title/MOOSERROO_VIETHAM_THE_MOOSE_&_ROO_SMOKEHOUSE\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/Tiền_đã_Thanh_Xuân\n",
|
||
|
"pho_mo\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"payoo\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"payoo\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"guitar_cafe\n",
|
||
|
"minimart_anan\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"other/non_title\n",
|
||
|
"sieu_thi_bach_hoa_tong_hop\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/have_title/coopsmic\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"vincommerce\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/have_title/thủ_Thanh_Xuân\n",
|
||
|
"vietcombank\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"phuc_anh_minimart\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/Phương_Ốc_Hải_Phòng\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/have_title/KAI_TEA_KAI_TEA\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"vincommerce\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"sctc_co_tho_104_tran_phu__cam_pha\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"the_coffee_house\n",
|
||
|
"saigon_coop_coop_food_hn_the_kpark\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/TALALA\n",
|
||
|
"the_coffee_house\n",
|
||
|
"other/have_title/ITTM\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"guitar_cafe\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/have_title/VINCOM_SKYLAKE\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"nha_sach_gdtc_cam_pha\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/have_title/COFFEE_Milano_Coffee_Vietnam\n",
|
||
|
"other/non_title\n",
|
||
|
"the_coffee_house\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/have_title/MARUMART\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"cua_hang_nam_oanh\n",
|
||
|
"minimart_anan\n",
|
||
|
"other/non_title\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"pho_mo\n",
|
||
|
"vincommerce\n",
|
||
|
"other/non_title\n",
|
||
|
"minimart_anan\n",
|
||
|
"payoo\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import Levenshtein\n",
|
||
|
"from pathlib import Path\n",
|
||
|
"import shutil\n",
|
||
|
"import re \n",
|
||
|
"from unidecode import unidecode\n",
|
||
|
"\n",
|
||
|
"def normalize(text):\n",
|
||
|
" text = text.lower()\n",
|
||
|
" text = unidecode(text)\n",
|
||
|
" text = re.sub(r'[^a-zA-Z0-9\\s]+', '', text)\n",
|
||
|
" return text \n",
|
||
|
" \n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"def is_match(src, str_new, thr=0.7):\n",
|
||
|
" src = normalize(src)\n",
|
||
|
" str_new = normalize(str_new)\n",
|
||
|
" distance = Levenshtein.ratio(src, str_new)\n",
|
||
|
" if distance > thr:\n",
|
||
|
" return True \n",
|
||
|
" else:\n",
|
||
|
" return False\n",
|
||
|
"\n",
|
||
|
"def get_store_name(gt_store, store_list):\n",
|
||
|
" for store in store_list:\n",
|
||
|
" if is_match(store, gt_store, thr=0.6):\n",
|
||
|
" return store.lower()\n",
|
||
|
" \n",
|
||
|
" if len(gt_store) == 0:\n",
|
||
|
" return \"other/non_title\"\n",
|
||
|
" else:\n",
|
||
|
" return \"other/have_title/{}\".format(gt_store)\n",
|
||
|
" \n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"out_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_5/test_mcocr_not_label_ep21_by_store_name\"\n",
|
||
|
"out_dir = Path(out_dir)\n",
|
||
|
"for img_name, item in pred_data.items():\n",
|
||
|
" store_name = item['Store_name_value']\n",
|
||
|
" store_category = get_store_name(store_name, store_list=store_names)\n",
|
||
|
" store_category = store_category.replace(\" \", \"_\")\n",
|
||
|
" print(store_category)\n",
|
||
|
" out_dir_by_store = out_dir / store_category\n",
|
||
|
" if not out_dir_by_store.exists():\n",
|
||
|
" out_dir_by_store.mkdir(parents=True, exist_ok=True)\n",
|
||
|
"\n",
|
||
|
" img_full_name = Path(img_name).with_suffix(\".jpg\")\n",
|
||
|
" img_full_path = Path(img_dir) / img_full_name\n",
|
||
|
" if not img_full_path.exists():\n",
|
||
|
" print(str(img_full_path))\n",
|
||
|
" continue\n",
|
||
|
" else:\n",
|
||
|
" shutil.copy(str(img_full_path), out_dir_by_store)\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 50,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import glob\n",
|
||
|
"import os \n",
|
||
|
"import shutil\n",
|
||
|
"non_title_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/sdsap_receipt/exp_5/test_mcocr_not_label_ep21_by_store_name/other/non_title\"\n",
|
||
|
"src_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted/not_labeling\"\n",
|
||
|
"\n",
|
||
|
"out_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/DATA/OTHER_DATA/MC-OCR/raw/Images_aligned/train_splitted/no_detect_seller\"\n",
|
||
|
"\n",
|
||
|
"non_seller_img_paths = glob.glob(non_title_dir + \"/*.jpg\")\n",
|
||
|
"if not os.path.exists(out_dir):\n",
|
||
|
" os.makedirs(out_dir, exist_ok=True)\n",
|
||
|
"for img_path in non_seller_img_paths:\n",
|
||
|
" shutil.copy(os.path.join(src_dir, os.path.basename(img_path)), out_dir)\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"ename": "",
|
||
|
"evalue": "",
|
||
|
"output_type": "error",
|
||
|
"traceback": [
|
||
|
"\u001b[1;31mFailed to start the Kernel 'py38_hoanglv (Python 3.8.16)'. \n",
|
||
|
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details. Cannot find module './351.node.js'\n",
|
||
|
"\u001b[1;31mRequire stack:\n",
|
||
|
"\u001b[1;31m- /home/sds/.vscode-server/extensions/ms-toolsai.jupyter-2023.3.1201040234/out/extension.node.js\n",
|
||
|
"\u001b[1;31m- /home/sds/.vscode-server/bin/252e5463d60e63238250799aef7375787f68b4ee/out/vs/loader.js\n",
|
||
|
"\u001b[1;31m- /home/sds/.vscode-server/bin/252e5463d60e63238250799aef7375787f68b4ee/out/bootstrap-amd.js\n",
|
||
|
"\u001b[1;31m- /home/sds/.vscode-server/bin/252e5463d60e63238250799aef7375787f68b4ee/out/bootstrap-fork.js"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import json \n",
|
||
|
"with open(\"/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/invoice/vnpt_exp_4/pred_vat_multi_page.json\", 'r', encoding='utf8') as f:\n",
|
||
|
" data = json.load(f)\n",
|
||
|
" \n",
|
||
|
"data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import shutil\n",
|
||
|
"import glob \n",
|
||
|
"from pathlib import Path"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"pdf_paths = glob.glob(\"/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/*.jpg\")\n",
|
||
|
"len(pdf_paths)\n",
|
||
|
"out_dir = \"/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/intermediate/multi_pages/SL_HCM_batch_2_first_last_page_2\"\n",
|
||
|
"if not Path(out_dir).exists():\n",
|
||
|
" Path(out_dir).mkdir(parents=True, exist_ok=True)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684662_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684662_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684662_3.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684671_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684671_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684676_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684676_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684846_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684846_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684855_1.jpg']\n",
|
||
|
"EAS03684662 EAS03684662\n",
|
||
|
"EAS03684662 EAS03684662\n",
|
||
|
"EAS03684662 EAS03684662\n",
|
||
|
"EAS03684662 EAS03684671\n",
|
||
|
"EAS03684671 EAS03684671\n",
|
||
|
"EAS03684671 EAS03684676\n",
|
||
|
"EAS03684676 EAS03684676\n",
|
||
|
"EAS03684676 EAS03684846\n",
|
||
|
"EAS03684846 EAS03684846\n",
|
||
|
"EAS03684846 EAS03684855\n",
|
||
|
"EAS03684855 EAS03684855\n",
|
||
|
"EAS03684855 EAS03684856\n",
|
||
|
"EAS03684856 EAS03684856\n",
|
||
|
"EAS03684856 EAS03684871\n",
|
||
|
"EAS03684871 EAS03684871\n",
|
||
|
"EAS03684871 EAS03684883\n",
|
||
|
"EAS03684883 EAS03684883\n",
|
||
|
"EAS03684883 EAS03685924\n",
|
||
|
"EAS03685924 EAS03685924\n",
|
||
|
"EAS03685924 EAS03709262\n",
|
||
|
"EAS03709262 EAS03709262\n",
|
||
|
"EAS03709262 EAS03742094\n",
|
||
|
"EAS03742094 EAS03742094\n",
|
||
|
"EAS03742094 EAS03742094\n",
|
||
|
"EAS03742094 EAS03742094\n",
|
||
|
"EAS03742094 EAS03742094\n",
|
||
|
"EAS03742094 EAS03743342\n",
|
||
|
"EAS03743342 EAS03743342\n",
|
||
|
"EAS03743342 EAS03743342\n",
|
||
|
"EAS03743342 EAS03743342\n",
|
||
|
"EAS03743342 EAS03743343\n",
|
||
|
"EAS03743343 EAS03743343\n",
|
||
|
"EAS03743343 EAS03743343\n",
|
||
|
"EAS03743343 EAS03743343\n",
|
||
|
"EAS03743343 EAS03743355\n",
|
||
|
"EAS03743355 EAS03743355\n",
|
||
|
"EAS03743355 EAS03743355\n",
|
||
|
"EAS03743355 EAS03743355\n",
|
||
|
"EAS03743355 EAS03743498\n",
|
||
|
"EAS03743498 EAS03743498\n",
|
||
|
"EAS03743498 EAS03743498\n",
|
||
|
"EAS03743498 EAS03743498\n",
|
||
|
"EAS03743498 EAS03750789\n",
|
||
|
"EAS03750789 EAS03750789\n",
|
||
|
"EAS03750789 EAS03784255\n",
|
||
|
"EAS03784255 EAS03784255\n",
|
||
|
"EAS03784255 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794195\n",
|
||
|
"EAS03794195 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794272\n",
|
||
|
"EAS03794272 EAS03794275\n",
|
||
|
"EAS03794275 EAS03794275\n",
|
||
|
"EAS03794275 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796731\n",
|
||
|
"EAS03796731 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03796733\n",
|
||
|
"EAS03796733 EAS03797799\n",
|
||
|
"EAS03797799 EAS03797799\n",
|
||
|
"EAS03797799 EAS03797799\n",
|
||
|
"EAS03797799 EAS03797799\n",
|
||
|
"EAS03797799 EAS03797799\n",
|
||
|
"EAS03797799 EAS03797799\n",
|
||
|
"EAS03797799 EAS03797799\n",
|
||
|
"EAS03797799 EAS03797799\n",
|
||
|
"EAS03797799 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797811\n",
|
||
|
"EAS03797811 EAS03797812\n",
|
||
|
"EAS03797812 EAS03797812\n",
|
||
|
"EAS03797812 EAS03797812\n",
|
||
|
"EAS03797812 EAS03797812\n",
|
||
|
"EAS03797812 EAS03797812\n",
|
||
|
"EAS03797812 EAS03797812\n",
|
||
|
"EAS03797812 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797829\n",
|
||
|
"EAS03797829 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03797830\n",
|
||
|
"EAS03797830 EAS03798008\n",
|
||
|
"EAS03798008 EAS03798008\n",
|
||
|
"EAS03798008 EAS03798008\n",
|
||
|
"EAS03798008 EAS03799021\n",
|
||
|
"EAS03799021 EAS03799021\n",
|
||
|
"EAS03799021 EAS03799021\n",
|
||
|
"EAS03799021 EAS03799021\n",
|
||
|
"EAS03799021 EAS03799021\n",
|
||
|
"EAS03799021 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801790\n",
|
||
|
"EAS03801790 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03801821\n",
|
||
|
"EAS03801821 EAS03802414\n",
|
||
|
"EAS03802414 EAS03802414\n",
|
||
|
"EAS03802414 EAS03802414\n",
|
||
|
"EAS03802414 EAS03802414\n",
|
||
|
"EAS03802414 EAS03802424\n",
|
||
|
"EAS03802424 EAS03802424\n",
|
||
|
"EAS03802424 EAS03802445\n",
|
||
|
"EAS03802445 EAS03802445\n",
|
||
|
"EAS03802445 EAS03802445\n",
|
||
|
"EAS03802445 EAS03802445\n",
|
||
|
"EAS03802445 EAS03802966\n",
|
||
|
"EAS03802966 EAS03802966\n",
|
||
|
"EAS03802966 EAS03803045\n",
|
||
|
"EAS03803045 EAS03803045\n",
|
||
|
"EAS03803045 EAS03803045\n",
|
||
|
"EAS03803045 EAS03803045\n",
|
||
|
"EAS03803045 EAS03803045\n",
|
||
|
"EAS03803045 EAS03803045\n",
|
||
|
"EAS03803045 EAS03803045\n",
|
||
|
"EAS03803045 EAS03803045\n",
|
||
|
"EAS03803045 EAS03805606\n",
|
||
|
"EAS03805606 EAS03805606\n",
|
||
|
"EAS03805606 EAS03805606\n",
|
||
|
"EAS03805606 EAS03805923\n",
|
||
|
"EAS03805923 EAS03805923\n",
|
||
|
"EAS03805923 EAS03805967\n",
|
||
|
"EAS03805967 EAS03805967\n",
|
||
|
"EAS03805967 EAS03805967\n",
|
||
|
"EAS03805967 EAS03807505\n",
|
||
|
"EAS03807505 EAS03807505\n",
|
||
|
"EAS03807505 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807922\n",
|
||
|
"EAS03807922 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03807924\n",
|
||
|
"EAS03807924 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808205\n",
|
||
|
"EAS03808205 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808208\n",
|
||
|
"EAS03808208 EAS03808299\n",
|
||
|
"EAS03808299 EAS03808299\n",
|
||
|
"EAS03808299 EAS03808299\n",
|
||
|
"EAS03808299 EAS03808299\n",
|
||
|
"EAS03808299 EAS03808299\n",
|
||
|
"EAS03808299 EAS03808299\n",
|
||
|
"EAS03808299 EAS03808302\n",
|
||
|
"EAS03808302 EAS03808302\n",
|
||
|
"EAS03808302 EAS03809583\n",
|
||
|
"EAS03809583 EAS03809583\n",
|
||
|
"EAS03809583 EAS03809656\n",
|
||
|
"EAS03809656 EAS03809656\n",
|
||
|
"EAS03809656 EAS03809701\n",
|
||
|
"EAS03809701 EAS03809701\n",
|
||
|
"EAS03809701 EAS03809701\n",
|
||
|
"EAS03809701 EAS03809701\n",
|
||
|
"EAS03809701 EAS03809701\n",
|
||
|
"EAS03809701 EAS03810209\n",
|
||
|
"EAS03810209 EAS03810209\n",
|
||
|
"EAS03810209 EAS03813592\n",
|
||
|
"EAS03813592 EAS03813592\n",
|
||
|
"EAS03813592 EAS03813592\n",
|
||
|
"EAS03813592 EAS03813592\n",
|
||
|
"EAS03813592 EAS03813592\n",
|
||
|
"EAS03813592 EAS03813594\n",
|
||
|
"EAS03813594 EAS03813594\n",
|
||
|
"EAS03813594 EAS03813594\n",
|
||
|
"EAS03813594 EAS03813594\n",
|
||
|
"EAS03813594 EAS03813594\n",
|
||
|
"EAS03813594 EAS03813595\n",
|
||
|
"EAS03813595 EAS03813595\n",
|
||
|
"EAS03813595 EAS03813608\n",
|
||
|
"EAS03813608 EAS03813608\n",
|
||
|
"EAS03813608 EAS03813646\n",
|
||
|
"EAS03813646 EAS03813646\n",
|
||
|
"EAS03813646 EAS03813647\n",
|
||
|
"EAS03813647 EAS03813647\n",
|
||
|
"EAS03813647 EAS03813647\n",
|
||
|
"EAS03813647 EAS03815264\n",
|
||
|
"EAS03815264 EAS03815264\n",
|
||
|
"EAS03815264 EAS03816169\n",
|
||
|
"EAS03816169 EAS03816169\n",
|
||
|
"EAS03816169 EAS03816170\n",
|
||
|
"EAS03816170 EAS03816170\n",
|
||
|
"EAS03816170 EAS03816171\n",
|
||
|
"EAS03816171 EAS03816171\n",
|
||
|
"EAS03816171 EAS03816171\n",
|
||
|
"EAS03816171 EAS03816179\n",
|
||
|
"EAS03816179 EAS03816179\n",
|
||
|
"EAS03816179 EAS03816183\n",
|
||
|
"EAS03816183 EAS03816183\n",
|
||
|
"EAS03816183 EAS03816196\n",
|
||
|
"EAS03816196 EAS03816196\n",
|
||
|
"EAS03816196 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818115\n",
|
||
|
"EAS03818115 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818116\n",
|
||
|
"EAS03818116 EAS03818130\n",
|
||
|
"EAS03818130 EAS03818130\n",
|
||
|
"EAS03818130 EAS03818130\n",
|
||
|
"EAS03818130 EAS03818142\n",
|
||
|
"EAS03818142 EAS03818142\n",
|
||
|
"EAS03818142 EAS03818142\n",
|
||
|
"EAS03818142 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818146\n",
|
||
|
"EAS03818146 EAS03818147\n",
|
||
|
"EAS03818147 EAS03818147\n",
|
||
|
"EAS03818147 EAS03818147\n",
|
||
|
"EAS03818147 EAS03818147\n",
|
||
|
"EAS03818147 EAS03818147\n",
|
||
|
"EAS03818147 EAS03818147\n",
|
||
|
"EAS03818147 EAS03818147\n",
|
||
|
"EAS03818147 EAS03818147\n",
|
||
|
"EAS03818147 EAS03818147\n",
|
||
|
"EAS03818147 EAS03819215\n",
|
||
|
"EAS03819215 EAS03819215\n",
|
||
|
"EAS03819215 EAS03819330\n",
|
||
|
"EAS03819330 EAS03819330\n",
|
||
|
"EAS03819330 EAS03819330\n",
|
||
|
"EAS03819330 EAS03819330\n",
|
||
|
"EAS03819330 EAS03819335\n",
|
||
|
"EAS03819335 EAS03819335\n",
|
||
|
"EAS03819335 EAS03819335\n",
|
||
|
"EAS03819335 EAS03819335\n",
|
||
|
"EAS03819335 EAS03819347\n",
|
||
|
"EAS03819347 EAS03819347\n",
|
||
|
"EAS03819347 EAS03819347\n",
|
||
|
"EAS03819347 EAS03819347\n",
|
||
|
"EAS03819347 EAS03819347\n",
|
||
|
"EAS03819347 EAS03819349\n",
|
||
|
"EAS03819349 EAS03819349\n",
|
||
|
"EAS03819349 EAS03819357\n",
|
||
|
"EAS03819357 EAS03819357\n",
|
||
|
"EAS03819357 EAS03819357\n",
|
||
|
"EAS03819357 EAS03819364\n",
|
||
|
"EAS03819364 EAS03819364\n",
|
||
|
"EAS03819364 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819374\n",
|
||
|
"EAS03819374 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819376\n",
|
||
|
"EAS03819376 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03819542\n",
|
||
|
"EAS03819542 EAS03820646\n",
|
||
|
"EAS03820646 EAS03820646\n",
|
||
|
"EAS03820646 EAS03820650\n",
|
||
|
"EAS03820650 EAS03820650\n",
|
||
|
"EAS03820650 EAS03820650\n",
|
||
|
"EAS03820650 EAS03820888\n",
|
||
|
"EAS03820888 EAS03820888\n",
|
||
|
"EAS03820888 EAS03820895\n",
|
||
|
"EAS03820895 EAS03820895\n",
|
||
|
"EAS03820895 EAS03820895\n",
|
||
|
"EAS03820895 EAS03820898\n",
|
||
|
"EAS03820898 EAS03820898\n",
|
||
|
"EAS03820898 EAS03820901\n",
|
||
|
"EAS03820901 EAS03820901\n",
|
||
|
"EAS03820901 EAS03820901\n",
|
||
|
"EAS03820901 EAS03820922\n",
|
||
|
"EAS03820922 EAS03820922\n",
|
||
|
"EAS03820922 EAS03820923\n",
|
||
|
"EAS03820923 EAS03820923\n",
|
||
|
"EAS03820923 EAS03826135\n",
|
||
|
"EAS03826135 EAS03826135\n",
|
||
|
"EAS03826135 EAS03828418\n",
|
||
|
"EAS03828418 EAS03828418\n",
|
||
|
"EAS03828418 EAS03828418\n",
|
||
|
"EAS03828418 EAS03828421\n",
|
||
|
"EAS03828421 EAS03828421\n",
|
||
|
"EAS03828421 EAS03833384\n",
|
||
|
"EAS03833384 EAS03833384\n",
|
||
|
"EAS03833384 EAS03834106\n",
|
||
|
"EAS03834106 EAS03834106\n",
|
||
|
"EAS03834106 EAS03834139\n",
|
||
|
"EAS03834139 EAS03834139\n",
|
||
|
"EAS03834139 EAS03834423\n",
|
||
|
"EAS03834423 EAS03834423\n",
|
||
|
"EAS03834423 EAS03834787\n",
|
||
|
"EAS03834787 EAS03834787\n",
|
||
|
"EAS03834787 EAS03834787\n",
|
||
|
"EAS03834787 EAS03834787\n",
|
||
|
"EAS03834787 EAS03834787\n",
|
||
|
"EAS03834787 EAS03834798\n",
|
||
|
"EAS03834798 EAS03834798\n",
|
||
|
"EAS03834798 EAS03834800\n",
|
||
|
"EAS03834800 EAS03834800\n",
|
||
|
"EAS03834800 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834825\n",
|
||
|
"EAS03834825 EAS03834834\n",
|
||
|
"EAS03834834 EAS03834834\n",
|
||
|
"EAS03834834 EAS03834840\n",
|
||
|
"EAS03834840 EAS03834840\n",
|
||
|
"EAS03834840 EAS03834840\n",
|
||
|
"['/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684662_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684671_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684662_3.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684676_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684671_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684846_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684676_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684855_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684846_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684856_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684855_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684871_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684856_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684883_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684871_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03685924_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03684883_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03709262_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03685924_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03742094_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03709262_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03743342_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03742094_5.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03743343_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03743342_4.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03743355_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03743343_4.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03743498_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03743355_4.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03750789_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03743498_4.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03784255_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03750789_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03794195_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03784255_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03794272_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pdf_paths = sorted(pdf_paths)\n",
|
||
|
"print(pdf_paths[:10])\n",
|
||
|
"valid_pdf_paths = []\n",
|
||
|
"past_pdf_key = None\n",
|
||
|
"past_pdf_path = None\n",
|
||
|
"for pdf_path in pdf_paths:\n",
|
||
|
" pdf_key = str(Path(pdf_path).stem).split(\"_\")[0]\n",
|
||
|
" pdf_page = int(str(Path(pdf_path).stem).split(\"_\")[-1])\n",
|
||
|
" if past_pdf_key is None:\n",
|
||
|
" past_pdf_key = pdf_key\n",
|
||
|
" past_pdf_path = pdf_path\n",
|
||
|
" \n",
|
||
|
" if pdf_page == 1:\n",
|
||
|
" valid_pdf_paths.append(pdf_path)\n",
|
||
|
" \n",
|
||
|
" if past_pdf_key != pdf_key:\n",
|
||
|
" valid_pdf_paths.append(past_pdf_path)\n",
|
||
|
" \n",
|
||
|
" print(past_pdf_key, pdf_key)\n",
|
||
|
" past_pdf_path = pdf_path\n",
|
||
|
" past_pdf_key = pdf_key\n",
|
||
|
"print(valid_pdf_paths)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"{'EAS03808205': ['/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_1.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_2.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_3.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_4.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_5.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_6.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_7.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_8.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_9.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_10.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_11.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_12.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_13.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_14.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_15.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_16.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_17.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_18.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_19.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_20.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_21.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_22.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_23.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_24.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_25.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_26.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_27.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_28.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_29.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_30.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_31.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_32.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_33.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_34.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_35.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/raw/JPG/multi_pages/SL_HCM_batch_2/EAS03808205_36.jpg', '/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data_dict = {}\n",
|
||
|
"for pdf_path in pdf_paths:\n",
|
||
|
" pdf_key = str(Path(pdf_path).stem).split(\"_\")[0]\n",
|
||
|
" if pdf_key not in data_dict:\n",
|
||
|
" data_dict[pdf_key] = [pdf_path]\n",
|
||
|
" else:\n",
|
||
|
" data_dict[pdf_key].append(pdf_path)\n",
|
||
|
"\n",
|
||
|
"for pdf_key in data_dict.keys():\n",
|
||
|
" data_dict[pdf_key] = sorted(data_dict[pdf_key], key=lambda pdf_path: int(Path(pdf_path).stem.split(\"_\")[-1]))\n",
|
||
|
"\n",
|
||
|
"print(data_dict)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"for k, pdf_list in data_dict.items():\n",
|
||
|
" first_page, last_page = pdf_list[0], pdf_list[-1]\n",
|
||
|
" shutil.copy(first_page, out_dir)\n",
|
||
|
" shutil.copy(last_page, out_dir)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 13,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"for pdf_path in valid_pdf_paths:\n",
|
||
|
" shutil.copy(pdf_path, out_dir)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "py38_hoanglv",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.16"
|
||
|
},
|
||
|
"orig_nbformat": 4
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|