summaryrefslogtreecommitdiff
path: root/training/conf/network/decoder/transformer_decoder.yaml
blob: 7dced160212846e91e9a292a8a52ad3b79004f46 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
_target_: text_recognizer.networks.transformer.Decoder
depth: 4
pre_norm: true
local_depth: 2
has_pos_emb: true
self_attn:
  _target_: text_recognizer.networks.transformer.attention.Attention
  dim: 64
  num_heads: 4
  dim_head: 64
  dropout_rate: 0.05
  causal: true
  rotary_embedding: 
    _target_: text_recognizer.networks.transformer.embeddings.rotary.RotaryEmbedding
    dim: 128
local_self_attn:
  _target_: text_recognizer.networks.transformer.local_attention.LocalAttention
  dim: 64
  num_heads: 4
  dim_head: 64
  dropout_rate: 0.05
  window_size: 22
  look_back: 1
  rotary_embedding: 
    _target_: text_recognizer.networks.transformer.embeddings.rotary.RotaryEmbedding
    dim: 128
cross_attn:
  _target_: text_recognizer.networks.transformer.attention.Attention
  dim: 64
  num_heads: 4
  dim_head: 64
  dropout_rate: 0.05
  causal: false
norm:
  _target_: text_recognizer.networks.transformer.norm.ScaleNorm
  normalized_shape: 192
ff: 
  _target_: text_recognizer.networks.transformer.mlp.FeedForward
  dim_out: null
  expansion_factor: 4
  glu: true
  dropout_rate: 0.2