defaults: - override /mapping: null - override /network: null - override /model: null mapping: _target_: text_recognizer.data.emnist_mapping.EmnistMapping extra_symbols: [ "\n" ] datamodule: word_pieces: false batch_size: 8 augment: false criterion: ignore_index: 3 network: _target_: text_recognizer.networks.vq_transformer.VqTransformer input_dims: [1, 576, 640] encoder_dim: 32 hidden_dim: 256 dropout_rate: 0.1 num_classes: 58 pad_index: 3 no_grad: true decoder: _target_: text_recognizer.networks.transformer.Decoder dim: 256 depth: 2 num_heads: 8 attn_fn: text_recognizer.networks.transformer.attention.Attention attn_kwargs: dim_head: 32 dropout_rate: 0.2 norm_fn: torch.nn.LayerNorm ff_fn: text_recognizer.networks.transformer.mlp.FeedForward ff_kwargs: dim_out: null expansion_factor: 4 glu: true dropout_rate: 0.2 cross_attend: true pre_norm: true rotary_emb: null pretrained_encoder_path: "training/logs/runs/2021-09-25/23-07-28" model: _target_: text_recognizer.models.vq_transformer.VqTransformerLitModel start_token: end_token: pad_token:

max_output_len: 682 # 451 alpha: 1.0 trainer: max_epochs: 64 limit_train_batches: 0.1 limit_val_batches: 0.1