summaryrefslogtreecommitdiff
path: root/training/conf/network/conformer.yaml
blob: 1d72dd516108f29b889b04363d1a69aea5a5e709 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
_target_: text_recognizer.networks.conformer.Conformer
depth: 8
num_classes: 57
dim: &dim 144
dim_gru: 144
block:
  _target_: text_recognizer.networks.conformer.ConformerBlock
  dim: *dim
  attn:
    _target_: text_recognizer.networks.conformer.Attention
    dim: *dim
    heads: 8
    dim_head: 64
    mult: 4
  ff:
    _target_: text_recognizer.networks.conformer.Feedforward
    dim: *dim
    expansion_factor: 4
    dropout: 0.1
  conv:
    _target_: text_recognizer.networks.conformer.ConformerConv
    dim: *dim
    expansion_factor: 2
    kernel_size: 31
    dropout: 0.1
subsampler:
  _target_: text_recognizer.networks.conformer.Subsampler
  pixel_pos_embedding:
    _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding
    dim: 64
    shape: [6, 127]
  channels: 64
  height: 6
  dim: *dim
  depth: 3
  dropout: 0.1