1116 lines
33 KiB
Python
Executable File
1116 lines
33 KiB
Python
Executable File
checkpoint_config = dict(interval=1)
|
|
log_config = dict(interval=50, hooks=[dict(type="TextLoggerHook")])
|
|
dist_params = dict(backend="nccl")
|
|
log_level = "INFO"
|
|
load_from = None
|
|
resume_from = "logs/satrn_big_2022-10-31/last.pth"
|
|
workflow = [("train", 1)]
|
|
opencv_num_threads = 0
|
|
mp_start_method = "fork"
|
|
img_h = 32
|
|
img_w = 128
|
|
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
|
train_pipeline = [
|
|
dict(type="LoadImageFromFile"),
|
|
dict(
|
|
type="ResizeOCR",
|
|
height=32,
|
|
min_width=128,
|
|
max_width=128,
|
|
keep_aspect_ratio=False,
|
|
width_downsample_ratio=0.25,
|
|
),
|
|
dict(type="ShearOCR", p=0.5, shear_limit=45),
|
|
dict(
|
|
type="ColorJitterOCR",
|
|
p=0.5,
|
|
brightness=0.25,
|
|
contrast=0.25,
|
|
saturation=0.25,
|
|
hue=0.25,
|
|
),
|
|
dict(type="GaussianNoiseOCR", p=0.5),
|
|
dict(type="GaussianBlurOCR", blur=(3, 5), p=0.5),
|
|
dict(type="BlackBoxAttackOCR", p=0.5, box_size=12),
|
|
dict(type="DotAttackOCR", p=0.5, dot_size=(1, 3), dot_space=(5, 8)),
|
|
dict(type="LineAttackOCR", p=0.5, line_size=(1, 3), line_space=(5, 8)),
|
|
dict(type="InvertOCR", p=0.2),
|
|
dict(type="ToTensorOCR"),
|
|
dict(type="NormalizeOCR", mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
dict(
|
|
type="Collect",
|
|
keys=["img"],
|
|
meta_keys=[
|
|
"filename",
|
|
"ori_shape",
|
|
"img_shape",
|
|
"text",
|
|
"valid_ratio",
|
|
"resize_shape",
|
|
],
|
|
),
|
|
]
|
|
test_pipeline = [
|
|
dict(type="LoadImageFromFile"),
|
|
dict(
|
|
type="MultiRotateAugOCR",
|
|
rotate_degrees=[0, 90, 270],
|
|
transforms=[
|
|
dict(
|
|
type="ResizeOCR",
|
|
height=32,
|
|
min_width=128,
|
|
max_width=128,
|
|
keep_aspect_ratio=False,
|
|
width_downsample_ratio=0.25,
|
|
),
|
|
dict(type="ToTensorOCR"),
|
|
dict(
|
|
type="NormalizeOCR",
|
|
mean=[0.485, 0.456, 0.406],
|
|
std=[0.229, 0.224, 0.225],
|
|
),
|
|
dict(
|
|
type="Collect",
|
|
keys=["img"],
|
|
meta_keys=[
|
|
"filename",
|
|
"ori_shape",
|
|
"img_shape",
|
|
"valid_ratio",
|
|
"resize_shape",
|
|
"img_norm_cfg",
|
|
"ori_filename",
|
|
],
|
|
),
|
|
],
|
|
),
|
|
]
|
|
dataset_type = "OCRDataset"
|
|
img_path_prefix = "data/Recognition/Real/"
|
|
dataset_list = "data/AnnFiles/current-dirs/2022-10-19/"
|
|
default_loader = dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser", keys=["filename", "text"], keys_idx=[0, 1], separator=" "
|
|
),
|
|
)
|
|
default_dataset = dict(
|
|
type="OCRDataset",
|
|
img_prefix=None,
|
|
ann_file=None,
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
handwriten_train = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Handwritten_Train/",),
|
|
ann_file="data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Handwritten_Train.txt",
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
printed_train = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Printed_Train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Printed_Train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
handwriten_val = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
printed_val = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
synthetic = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Synthetic/Using/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Synthetic_Using.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
blank_space = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Blank/Train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Blank_Train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
captcha_train = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Captcha_Train/DONE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Train_DONE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
captcha_val = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Captcha_Val/DONE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Val_DONE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
kie_train = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/KIE_Train/KIE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Train_KIE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
kie_val = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/KIE_Val/KIE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Val_KIE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
gplx_train = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/GPLX_Train/train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Train_train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
gplx_val = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/GPLX_Val/val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Val_val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
vietocr = dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/VietOCR_Train/Data/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_VietOCR_Train_Data.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
)
|
|
train_list = [
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Handwritten_Train/",),
|
|
ann_file="data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Handwritten_Train.txt",
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Printed_Train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Printed_Train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Synthetic/Using/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Synthetic_Using.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Blank/Train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Blank_Train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Captcha_Train/DONE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Train_DONE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/KIE_Train/KIE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Train_KIE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/GPLX_Train/train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Train_train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/VietOCR_Train/Data/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_VietOCR_Train_Data.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
]
|
|
val_list = [
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Captcha_Val/DONE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Val_DONE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/KIE_Val/KIE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Val_KIE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/GPLX_Val/val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Val_val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
]
|
|
test_list = [
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
]
|
|
fp16 = dict(loss_scale="dynamic")
|
|
label_convertor = dict(type="AttnConvertor", dict_type="DICT224", with_unknown=False)
|
|
model = dict(
|
|
type="SATRN",
|
|
backbone=dict(type="ResNetABI", in_channels=3, stem_channels=16, base_channels=16),
|
|
encoder=dict(
|
|
type="SatrnEncoder",
|
|
n_layers=12,
|
|
n_head=8,
|
|
d_k=32,
|
|
d_v=32,
|
|
d_model=256,
|
|
n_position=100,
|
|
d_inner=1024,
|
|
dropout=0.1,
|
|
),
|
|
decoder=dict(
|
|
type="NRTRDecoder",
|
|
n_layers=12,
|
|
d_embedding=256,
|
|
n_head=8,
|
|
d_model=256,
|
|
d_inner=1024,
|
|
d_k=32,
|
|
d_v=32,
|
|
),
|
|
loss=dict(type="TFLoss"),
|
|
label_convertor=dict(type="AttnConvertor", dict_type="DICT224", with_unknown=False),
|
|
max_seq_len=25,
|
|
)
|
|
optimizer = dict(type="Adam", lr=0.001)
|
|
optimizer_config = dict(grad_clip=None)
|
|
lr_config = dict(policy="poly", power=0.9, min_lr=1e-06, by_epoch=False)
|
|
total_epochs = 15
|
|
custom_hooks = [
|
|
dict(
|
|
type="ExpMomentumEMAHook",
|
|
total_iter=20000,
|
|
resume_from=None,
|
|
momentum=0.0001,
|
|
priority=49,
|
|
)
|
|
]
|
|
data = dict(
|
|
samples_per_gpu=160,
|
|
workers_per_gpu=16,
|
|
val_dataloader=dict(samples_per_gpu=400),
|
|
test_dataloader=dict(samples_per_gpu=400),
|
|
train=dict(
|
|
type="UniformConcatDataset",
|
|
datasets=[
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Handwritten_Train/",),
|
|
ann_file="data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Handwritten_Train.txt",
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Printed_Train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Train_Printed_Train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Synthetic/Using/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Synthetic_Using.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Blank/Train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Blank_Train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/Captcha_Train/DONE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Train_DONE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/KIE_Train/KIE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Train_KIE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/GPLX_Train/train/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Train_train.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Train/VietOCR_Train/Data/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_VietOCR_Train_Data.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
],
|
|
pipeline=[
|
|
dict(type="LoadImageFromFile"),
|
|
dict(
|
|
type="ResizeOCR",
|
|
height=32,
|
|
min_width=128,
|
|
max_width=128,
|
|
keep_aspect_ratio=False,
|
|
width_downsample_ratio=0.25,
|
|
),
|
|
dict(type="ShearOCR", p=0.5, shear_limit=45),
|
|
dict(
|
|
type="ColorJitterOCR",
|
|
p=0.5,
|
|
brightness=0.25,
|
|
contrast=0.25,
|
|
saturation=0.25,
|
|
hue=0.25,
|
|
),
|
|
dict(type="GaussianNoiseOCR", p=0.5),
|
|
dict(type="GaussianBlurOCR", blur=(3, 5), p=0.5),
|
|
dict(type="BlackBoxAttackOCR", p=0.5, box_size=12),
|
|
dict(type="DotAttackOCR", p=0.5, dot_size=(1, 3), dot_space=(5, 8)),
|
|
dict(type="LineAttackOCR", p=0.5, line_size=(1, 3), line_space=(5, 8)),
|
|
dict(type="InvertOCR", p=0.2),
|
|
dict(type="ToTensorOCR"),
|
|
dict(
|
|
type="NormalizeOCR",
|
|
mean=[0.485, 0.456, 0.406],
|
|
std=[0.229, 0.224, 0.225],
|
|
),
|
|
dict(
|
|
type="Collect",
|
|
keys=["img"],
|
|
meta_keys=[
|
|
"filename",
|
|
"ori_shape",
|
|
"img_shape",
|
|
"text",
|
|
"valid_ratio",
|
|
"resize_shape",
|
|
],
|
|
),
|
|
],
|
|
),
|
|
val=dict(
|
|
type="UniformConcatDataset",
|
|
datasets=[
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Captcha_Val/DONE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_Captcha_Val_DONE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/KIE_Val/KIE/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_KIE_Val_KIE.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/GPLX_Val/val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition_GPLX_Val_val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
],
|
|
pipeline=[
|
|
dict(type="LoadImageFromFile"),
|
|
dict(
|
|
type="MultiRotateAugOCR",
|
|
rotate_degrees=[0, 90, 270],
|
|
transforms=[
|
|
dict(
|
|
type="ResizeOCR",
|
|
height=32,
|
|
min_width=128,
|
|
max_width=128,
|
|
keep_aspect_ratio=False,
|
|
width_downsample_ratio=0.25,
|
|
),
|
|
dict(type="ToTensorOCR"),
|
|
dict(
|
|
type="NormalizeOCR",
|
|
mean=[0.485, 0.456, 0.406],
|
|
std=[0.229, 0.224, 0.225],
|
|
),
|
|
dict(
|
|
type="Collect",
|
|
keys=["img"],
|
|
meta_keys=[
|
|
"filename",
|
|
"ori_shape",
|
|
"img_shape",
|
|
"valid_ratio",
|
|
"resize_shape",
|
|
"img_norm_cfg",
|
|
"ori_filename",
|
|
],
|
|
),
|
|
],
|
|
),
|
|
],
|
|
),
|
|
test=dict(
|
|
type="UniformConcatDataset",
|
|
datasets=[
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Handwritten_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Handwritten_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
dict(
|
|
type="OCRDataset",
|
|
img_prefix=("data/Recognition/Real/Val/Printed_Val/",),
|
|
ann_file=(
|
|
"data/AnnFiles/current-dirs/2022-10-19/text_recognition__Val_Printed_Val.txt",
|
|
),
|
|
loader=dict(
|
|
type="AnnFileLoader",
|
|
repeat=1,
|
|
parser=dict(
|
|
type="LineStrParser",
|
|
keys=["filename", "text"],
|
|
keys_idx=[0, 1],
|
|
separator=" ",
|
|
),
|
|
),
|
|
pipeline=None,
|
|
test_mode=False,
|
|
),
|
|
],
|
|
pipeline=[
|
|
dict(type="LoadImageFromFile"),
|
|
dict(
|
|
type="MultiRotateAugOCR",
|
|
rotate_degrees=[0, 90, 270],
|
|
transforms=[
|
|
dict(
|
|
type="ResizeOCR",
|
|
height=32,
|
|
min_width=128,
|
|
max_width=128,
|
|
keep_aspect_ratio=False,
|
|
width_downsample_ratio=0.25,
|
|
),
|
|
dict(type="ToTensorOCR"),
|
|
dict(
|
|
type="NormalizeOCR",
|
|
mean=[0.485, 0.456, 0.406],
|
|
std=[0.229, 0.224, 0.225],
|
|
),
|
|
dict(
|
|
type="Collect",
|
|
keys=["img"],
|
|
meta_keys=[
|
|
"filename",
|
|
"ori_shape",
|
|
"img_shape",
|
|
"valid_ratio",
|
|
"resize_shape",
|
|
"img_norm_cfg",
|
|
"ori_filename",
|
|
],
|
|
),
|
|
],
|
|
),
|
|
],
|
|
),
|
|
)
|
|
evaluation = dict(interval=1, metric="acc")
|
|
work_dir = "logs/satrn_big_2022-10-31/"
|
|
gpu_ids = [0]
|