blob: 1d72dd516108f29b889b04363d1a69aea5a5e709 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
_target_: text_recognizer.networks.conformer.Conformer
depth: 8
num_classes: 57
dim: &dim 144
dim_gru: 144
block:
_target_: text_recognizer.networks.conformer.ConformerBlock
dim: *dim
attn:
_target_: text_recognizer.networks.conformer.Attention
dim: *dim
heads: 8
dim_head: 64
mult: 4
ff:
_target_: text_recognizer.networks.conformer.Feedforward
dim: *dim
expansion_factor: 4
dropout: 0.1
conv:
_target_: text_recognizer.networks.conformer.ConformerConv
dim: *dim
expansion_factor: 2
kernel_size: 31
dropout: 0.1
subsampler:
_target_: text_recognizer.networks.conformer.Subsampler
pixel_pos_embedding:
_target_: text_recognizer.networks.transformer.AxialPositionalEmbedding
dim: 64
shape: [6, 127]
channels: 64
height: 6
dim: *dim
depth: 3
dropout: 0.1
|