1 2 3 4 5 6 7 8 9 10 11 12 13
defaults: - encoder: efficientnet - decoder: transformer_decoder _target_: text_recognizer.networks.conv_transformer.ConvTransformer input_dims: [1, 576, 640] hidden_dim: 256 dropout_rate: 0.2 max_output_len: 682 num_classes: 1004 start_token: <s> end_token: <e> pad_token: <p>