68 lines
2.2 KiB
Python
Executable File
68 lines
2.2 KiB
Python
Executable File
CONFIF_PATH = __file__
|
|
TRAIN_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/DATA/train"
|
|
TEST_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/DATA/test"
|
|
TOKENIZER_DIR = "Kie_Hoanglv/model/layoutxlm-base-tokenizer"
|
|
TOKENIZER_NAME = "microsoft/layoutxlm-base"
|
|
MODEL_WEIGHT = "microsoft/layoutxlm-base"
|
|
# pretrained model hyperparameter
|
|
MAX_SEQ_LENGTH = 512
|
|
IMG_SIZE = 224 # default
|
|
|
|
VN_list_char = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!#$%&()*+,-./:;<=>?@[\]^_`{|}~"
|
|
|
|
DEVICE = "cuda:0"
|
|
SAVE_DIR = "runs/layoutxlm-base-17-10-2022-maxwords150_samplingv2"
|
|
BATCH_SIZE = 8
|
|
NUM_WORKER = 0
|
|
EPOCHS = 100
|
|
SAVE_INTERVAL = 1000
|
|
LR_RATE = 5e-6 # ori: 5e-5
|
|
|
|
# infer
|
|
MAX_N_WORDS = 150
|
|
TRAINED_DIR = "Kie_Hoanglv/model/layoutxlm-base-17-10-2022-maxwords150_samplingv2/last"
|
|
PRED_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/runs/infer/kie_e2e_pred_17-10-2022-maxwords150_samplingv2_rm_dup_boxes_test"
|
|
VISUALIZE_DIR = PRED_DIR + "/visualize"
|
|
|
|
KIE_LABELS = [
|
|
# id invoice
|
|
"no_key",
|
|
"no_value",
|
|
"form_key",
|
|
"form_value",
|
|
"serial_key",
|
|
"serial_value",
|
|
"date",
|
|
# seller info
|
|
"seller_company_name_key",
|
|
"seller_company_name_value",
|
|
"seller_tax_code_key",
|
|
"seller_tax_code_value",
|
|
"seller_address_value",
|
|
"seller_address_key",
|
|
"seller_mobile_key",
|
|
"seller_mobile_value",
|
|
# buyer info
|
|
"buyer_name_key",
|
|
"buyer_name_value",
|
|
"buyer_company_name_value",
|
|
"buyer_company_name_key",
|
|
"buyer_tax_code_key",
|
|
"buyer_tax_code_value",
|
|
"buyer_address_key",
|
|
"buyer_address_value",
|
|
"buyer_mobile_key",
|
|
"buyer_mobile_value",
|
|
# money info
|
|
"VAT_amount_key",
|
|
"VAT_amount_value",
|
|
"total_key",
|
|
"total_value",
|
|
"total_in_words_key",
|
|
"total_in_words_value",
|
|
"other",
|
|
]
|
|
|
|
|
|
SKIP_LABEL_EVAL = ["buyer_mobile_value"]
|