CONFIF_PATH = __file__ TRAIN_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/DATA/train" TEST_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/DATA/test" TOKENIZER_DIR = "Kie_Hoanglv/model/layoutxlm-base-tokenizer" TOKENIZER_NAME = "microsoft/layoutxlm-base" MODEL_WEIGHT = "microsoft/layoutxlm-base" # pretrained model hyperparameter MAX_SEQ_LENGTH = 512 IMG_SIZE = 224 # default VN_list_char = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!#$%&()*+,-./:;<=>?@[\]^_`{|}~" DEVICE = "cuda:0" SAVE_DIR = "runs/layoutxlm-base-17-10-2022-maxwords150_samplingv2" BATCH_SIZE = 8 NUM_WORKER = 0 EPOCHS = 100 SAVE_INTERVAL = 1000 LR_RATE = 5e-6 # ori: 5e-5 # infer MAX_N_WORDS = 150 TRAINED_DIR = "Kie_Hoanglv/model/layoutxlm-base-17-10-2022-maxwords150_samplingv2/last" PRED_DIR = "/home/sds/hoanglv/Projects/TokenClassification_invoice/runs/infer/kie_e2e_pred_17-10-2022-maxwords150_samplingv2_rm_dup_boxes_test" VISUALIZE_DIR = PRED_DIR + "/visualize" KIE_LABELS = [ # id invoice "no_key", "no_value", "form_key", "form_value", "serial_key", "serial_value", "date", # seller info "seller_company_name_key", "seller_company_name_value", "seller_tax_code_key", "seller_tax_code_value", "seller_address_value", "seller_address_key", "seller_mobile_key", "seller_mobile_value", # buyer info "buyer_name_key", "buyer_name_value", "buyer_company_name_value", "buyer_company_name_key", "buyer_tax_code_key", "buyer_tax_code_value", "buyer_address_key", "buyer_address_value", "buyer_mobile_key", "buyer_mobile_value", # money info "VAT_amount_key", "VAT_amount_value", "total_key", "total_value", "total_in_words_key", "total_in_words_value", "other", ] SKIP_LABEL_EVAL = ["buyer_mobile_value"]