_target_: text_recognizer.networks.conformer.Conformer depth: 8 num_classes: 57 dim: &dim 144 dim_gru: 144 block: _target_: text_recognizer.networks.conformer.ConformerBlock dim: *dim attn: _target_: text_recognizer.networks.conformer.Attention dim: *dim heads: 8 dim_head: 64 mult: 4 ff: _target_: text_recognizer.networks.conformer.Feedforward dim: *dim expansion_factor: 4 dropout: 0.1 conv: _target_: text_recognizer.networks.conformer.ConformerConv dim: *dim expansion_factor: 2 kernel_size: 31 dropout: 0.1 subsampler: _target_: text_recognizer.networks.conformer.Subsampler pixel_pos_embedding: _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding dim: 64 shape: [6, 127] channels: 64 height: 6 dim: *dim depth: 3 dropout: 0.1