sbt-idp/cope2n-ai-fi/modules/ocr_engine/settings.yml

device: &device cuda:0
max_img_size: [1920,1920] #text det default size: 1280x1280 #[] = originla size, TODO: fix the deskew code to resize the image only for detecting the angle, we want to feed the original size image to the text detection pipeline so that the bounding boxes would be mapped back to the original size
extend_bbox: [0, 0.0, 0.0, 0.0] # left, top, right, bottom
batch_size: 1 #1 means batch_mode = False
detector:
  # version: /mnt/hdd2T/datnt/datnt_from_ssd1T/mmdetection/wild_receipt_finetune_weights_c_lite.pth
  version: /workspace/cope2n-ai-fi/weights/models/sdsap_sbt/ocr_engine/sdsvtd/epoch_100_params.pth
  auto_rotate: True
  rotator_version: /workspace/cope2n-ai-fi/weights/models/sdsap_sbt/ocr_engine/sdsvtd/best_bbox_mAP_epoch_30_lite.pth
  device: *device

recognizer:
  # version: satrn-lite-general-pretrain-20230106
  version: /workspace/cope2n-ai-fi/weights/models/sdsvtr/hub/jxqhbem4to.pth
  max_seq_len_overwrite: 24 #default = 12
  return_confident: True
  device: *device
#extend the bbox to avoid losing accent mark in vietnames, if using ocr for only english, disable it

deskew:
  enable: True
  text_detector:
    config: /workspace/cope2n-ai-fi/modules/ocr_engine/externals/sdsv_dewarp/config/det.yaml
    weight: /workspace/cope2n-ai-fi/weights/models/sdsap_sbt/ocr_engine/sdsv_dewarp/ch_PP-OCRv3_det_infer
  text_cls:
    config: /workspace/cope2n-ai-fi/modules/ocr_engine/externals/sdsv_dewarp/config/cls.yaml
    weight: /workspace/cope2n-ai-fi/weights/models/sdsap_sbt/ocr_engine/sdsv_dewarp/ch_ppocr_mobile_v2.0_cls_infer
  device: *device


words_to_lines:
  gradient: 0.6
  max_x_dist: 20
  max_running_y_shift_degree: 10 #degrees
  y_overlap_threshold: 0.5
  word_formation_mode: line