_target_: text_recognizer.networks.transformer.decoder.Decoder depth: 4 block: _target_: text_recognizer.networks.transformer.decoder.DecoderBlock self_attn: _target_: text_recognizer.networks.transformer.attention.Attention dim: 64 num_heads: 4 dim_head: 64 dropout_rate: 0.05 causal: true rotary_embedding: _target_: text_recognizer.networks.transformer.embeddings.rotary.RotaryEmbedding dim: 128 cross_attn: _target_: text_recognizer.networks.transformer.attention.Attention dim: 64 num_heads: 4 dim_head: 64 dropout_rate: 0.05 causal: false norm: _target_: text_recognizer.networks.transformer.norm.RMSNorm normalized_shape: 192 ff: _target_: text_recognizer.networks.transformer.mlp.FeedForward dim_out: null expansion_factor: 4 glu: true dropout_rate: 0.2