defaults:
  - rotary_emb: null

_target_: text_recognizer.networks.transformer.Decoder
dim: 96
depth: 2
num_heads: 8
attn_fn: text_recognizer.networks.transformer.attention.Attention
attn_kwargs:
  dim_head: 16
  dropout_rate: 0.2
norm_fn: torch.nn.LayerNorm
ff_fn: text_recognizer.networks.transformer.mlp.FeedForward
ff_kwargs:
  dim_out: null
  expansion_factor: 4
  glu: true
  dropout_rate: 0.2
cross_attend: true
pre_norm: true
rotary_emb: null