sbt-idp/cope2n-ai-fi/modules/TemplateMatching/textrecognition/configs/satrn_big.py

1116 lines
33 KiB
Python
Raw Normal View History

2023-11-30 11:22:16 +00:00
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type="TextLoggerHook")])
dist_params = dict(backend="nccl")
log_level = "INFO"
load_from = None
resume_from = "logs/satrn_big_2022-10-31/last.pth"
workflow = [("train", 1)]
opencv_num_threads = 0
mp_start_method = "fork"
img_h = 32
img_w = 128
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_pipeline = [
dict(type="LoadImageFromFile"),
dict(
type="ResizeOCR",
height=32,
min_width=128,
max_width=128,
keep_aspect_ratio=False,
width_downsample_ratio=0.25,
),
dict(type="ShearOCR", p=0.5, shear_limit=45),
dict(
type="ColorJitterOCR",
p=0.5,
brightness=0.25,
contrast=0.25,
saturation=0.25,
hue=0.25,
),
dict(type="GaussianNoiseOCR", p=0.5),
dict(type="GaussianBlurOCR", blur=(3, 5), p=0.5),
dict(type="BlackBoxAttackOCR", p=0.5, box_size=12),
dict(type="DotAttackOCR", p=0.5, dot_size=(1, 3), dot_space=(5, 8)),
dict(type="LineAttackOCR", p=0.5, line_size=(1, 3), line_space=(5, 8)),
dict(type="InvertOCR", p=0.2),
dict(type="ToTensorOCR"),
dict(type="NormalizeOCR", mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
dict(
type="Collect",
keys=["img"],
meta_keys=[
"filename",
"ori_shape",
"img_shape",
"text",
"valid_ratio",
"resize_shape",
],
),
]
test_pipeline = [
dict(type="LoadImageFromFile"),
dict(
type="MultiRotateAugOCR",
rotate_degrees=[0, 90, 270],
transforms=[
dict(
type="ResizeOCR",
height=32,
min_width=128,
max_width=128,
keep_aspect_ratio=False,
width_downsample_ratio=0.25,
),
dict(type="ToTensorOCR"),
dict(
type="NormalizeOCR",
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
dict(
type="Collect",
keys=["img"],
meta_keys=[
"filename",
"ori_shape",
"img_shape",
"valid_ratio",
"resize_shape",
"img_norm_cfg",
"ori_filename",
],
),
],
),
]
dataset_type = "OCRDataset"
img_path_prefix = "data/Recognition/Real/"
dataset_list = "data/AnnFiles/current-dirs/2022-10-19/"
default_loader = dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser", keys=["filename", "text"], keys_idx=[0, 1], separator=" "
),
)
default_dataset = dict(
type="OCRDataset",
img_prefix=None,
ann_file=None,
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
handwriten_train = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Handwritten_Train/",),
ann_file="data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Handwritten_Train.txt",
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
printed_train = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Printed_Train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Printed_Train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
handwriten_val = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
printed_val = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
synthetic = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Synthetic/Using/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Synthetic_Using.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
blank_space = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Blank/Train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Blank_Train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
captcha_train = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Captcha_Train/DONE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Train_DONE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
captcha_val = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Captcha_Val/DONE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Val_DONE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
kie_train = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/KIE_Train/KIE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Train_KIE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
kie_val = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/KIE_Val/KIE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Val_KIE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
gplx_train = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/GPLX_Train/train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Train_train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
gplx_val = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/GPLX_Val/val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Val_val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
vietocr = dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/VietOCR_Train/Data/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_VietOCR_Train_Data.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
)
train_list = [
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Handwritten_Train/",),
ann_file="data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Handwritten_Train.txt",
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Printed_Train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Printed_Train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Synthetic/Using/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Synthetic_Using.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Blank/Train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Blank_Train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Captcha_Train/DONE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Train_DONE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/KIE_Train/KIE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Train_KIE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/GPLX_Train/train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Train_train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/VietOCR_Train/Data/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_VietOCR_Train_Data.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
]
val_list = [
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Captcha_Val/DONE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Val_DONE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/KIE_Val/KIE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Val_KIE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/GPLX_Val/val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Val_val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
]
test_list = [
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
]
fp16 = dict(loss_scale="dynamic")
label_convertor = dict(type="AttnConvertor", dict_type="DICT224", with_unknown=False)
model = dict(
type="SATRN",
backbone=dict(type="ResNetABI", in_channels=3, stem_channels=16, base_channels=16),
encoder=dict(
type="SatrnEncoder",
n_layers=12,
n_head=8,
d_k=32,
d_v=32,
d_model=256,
n_position=100,
d_inner=1024,
dropout=0.1,
),
decoder=dict(
type="NRTRDecoder",
n_layers=12,
d_embedding=256,
n_head=8,
d_model=256,
d_inner=1024,
d_k=32,
d_v=32,
),
loss=dict(type="TFLoss"),
label_convertor=dict(type="AttnConvertor", dict_type="DICT224", with_unknown=False),
max_seq_len=25,
)
optimizer = dict(type="Adam", lr=0.001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy="poly", power=0.9, min_lr=1e-06, by_epoch=False)
total_epochs = 15
custom_hooks = [
dict(
type="ExpMomentumEMAHook",
total_iter=20000,
resume_from=None,
momentum=0.0001,
priority=49,
)
]
data = dict(
samples_per_gpu=160,
workers_per_gpu=16,
val_dataloader=dict(samples_per_gpu=400),
test_dataloader=dict(samples_per_gpu=400),
train=dict(
type="UniformConcatDataset",
datasets=[
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Handwritten_Train/",),
ann_file="data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Handwritten_Train.txt",
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Printed_Train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Printed_Train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Synthetic/Using/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Synthetic_Using.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Blank/Train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Blank_Train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/Captcha_Train/DONE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Train_DONE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/KIE_Train/KIE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Train_KIE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/GPLX_Train/train/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Train_train.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Train/VietOCR_Train/Data/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_VietOCR_Train_Data.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
],
pipeline=[
dict(type="LoadImageFromFile"),
dict(
type="ResizeOCR",
height=32,
min_width=128,
max_width=128,
keep_aspect_ratio=False,
width_downsample_ratio=0.25,
),
dict(type="ShearOCR", p=0.5, shear_limit=45),
dict(
type="ColorJitterOCR",
p=0.5,
brightness=0.25,
contrast=0.25,
saturation=0.25,
hue=0.25,
),
dict(type="GaussianNoiseOCR", p=0.5),
dict(type="GaussianBlurOCR", blur=(3, 5), p=0.5),
dict(type="BlackBoxAttackOCR", p=0.5, box_size=12),
dict(type="DotAttackOCR", p=0.5, dot_size=(1, 3), dot_space=(5, 8)),
dict(type="LineAttackOCR", p=0.5, line_size=(1, 3), line_space=(5, 8)),
dict(type="InvertOCR", p=0.2),
dict(type="ToTensorOCR"),
dict(
type="NormalizeOCR",
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
dict(
type="Collect",
keys=["img"],
meta_keys=[
"filename",
"ori_shape",
"img_shape",
"text",
"valid_ratio",
"resize_shape",
],
),
],
),
val=dict(
type="UniformConcatDataset",
datasets=[
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Captcha_Val/DONE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Val_DONE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/KIE_Val/KIE/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Val_KIE.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/GPLX_Val/val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Val_val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
],
pipeline=[
dict(type="LoadImageFromFile"),
dict(
type="MultiRotateAugOCR",
rotate_degrees=[0, 90, 270],
transforms=[
dict(
type="ResizeOCR",
height=32,
min_width=128,
max_width=128,
keep_aspect_ratio=False,
width_downsample_ratio=0.25,
),
dict(type="ToTensorOCR"),
dict(
type="NormalizeOCR",
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
dict(
type="Collect",
keys=["img"],
meta_keys=[
"filename",
"ori_shape",
"img_shape",
"valid_ratio",
"resize_shape",
"img_norm_cfg",
"ori_filename",
],
),
],
),
],
),
test=dict(
type="UniformConcatDataset",
datasets=[
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
dict(
type="OCRDataset",
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
ann_file=(
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
),
loader=dict(
type="AnnFileLoader",
repeat=1,
parser=dict(
type="LineStrParser",
keys=["filename", "text"],
keys_idx=[0, 1],
separator=" ",
),
),
pipeline=None,
test_mode=False,
),
],
pipeline=[
dict(type="LoadImageFromFile"),
dict(
type="MultiRotateAugOCR",
rotate_degrees=[0, 90, 270],
transforms=[
dict(
type="ResizeOCR",
height=32,
min_width=128,
max_width=128,
keep_aspect_ratio=False,
width_downsample_ratio=0.25,
),
dict(type="ToTensorOCR"),
dict(
type="NormalizeOCR",
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
dict(
type="Collect",
keys=["img"],
meta_keys=[
"filename",
"ori_shape",
"img_shape",
"valid_ratio",
"resize_shape",
"img_norm_cfg",
"ori_filename",
],
),
],
),
],
),
)
evaluation = dict(interval=1, metric="acc")
work_dir = "logs/satrn_big_2022-10-31/"
gpu_ids = [0]