sbt-idp/cope2n-ai-fi/configs/config_invoice/layoutxlm_base_invoice.py
2023-11-30 18:22:16 +07:00

68 lines
2.2 KiB
Python
Executable File

CONFIF_PATH = __file__
TRAIN_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/DATA/train"
TEST_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/DATA/test"
TOKENIZER_DIR = "Kie_Hoanglv/model/layoutxlm-base-tokenizer"
TOKENIZER_NAME = "microsoft/layoutxlm-base"
MODEL_WEIGHT = "microsoft/layoutxlm-base"
# pretrained model hyperparameter
MAX_SEQ_LENGTH = 512
IMG_SIZE = 224 # default
VN_list_char = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!#$%&()*+,-./:;<=>?@[\]^_`{|}~"
DEVICE = "cuda:0"
SAVE_DIR = "runs/layoutxlm-base-17-10-2022-maxwords150_samplingv2"
BATCH_SIZE = 8
NUM_WORKER = 0
EPOCHS = 100
SAVE_INTERVAL = 1000
LR_RATE = 5e-6 # ori: 5e-5
# infer
MAX_N_WORDS = 150
TRAINED_DIR = "Kie_Hoanglv/model/layoutxlm-base-17-10-2022-maxwords150_samplingv2/last"
PRED_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/runs/infer/kie_e2e_pred_17-10-2022-maxwords150_samplingv2_rm_dup_boxes_test"
VISUALIZE_DIR = PRED_DIR + "/visualize"
KIE_LABELS = [
# id invoice
"no_key",
"no_value",
"form_key",
"form_value",
"serial_key",
"serial_value",
"date",
# seller info
"seller_company_name_key",
"seller_company_name_value",
"seller_tax_code_key",
"seller_tax_code_value",
"seller_address_value",
"seller_address_key",
"seller_mobile_key",
"seller_mobile_value",
# buyer info
"buyer_name_key",
"buyer_name_value",
"buyer_company_name_value",
"buyer_company_name_key",
"buyer_tax_code_key",
"buyer_tax_code_value",
"buyer_address_key",
"buyer_address_value",
"buyer_mobile_key",
"buyer_mobile_value",
# money info
"VAT_amount_key",
"VAT_amount_value",
"total_key",
"total_value",
"total_in_words_key",
"total_in_words_value",
"other",
]
SKIP_LABEL_EVAL = ["buyer_mobile_value"]