summaryrefslogtreecommitdiff
path: root/training/conf/network
diff options
context:
space:
mode:
Diffstat (limited to 'training/conf/network')
-rw-r--r--training/conf/network/conformer.yaml (renamed from training/conf/network/conformer/conformer.yaml)29
1 files changed, 19 insertions, 10 deletions
diff --git a/training/conf/network/conformer/conformer.yaml b/training/conf/network/conformer.yaml
index 2bb423f..1d72dd5 100644
--- a/training/conf/network/conformer/conformer.yaml
+++ b/training/conf/network/conformer.yaml
@@ -1,19 +1,17 @@
----
_target_: text_recognizer.networks.conformer.Conformer
-depth: 16
+depth: 8
+num_classes: 57
+dim: &dim 144
+dim_gru: 144
block:
_target_: text_recognizer.networks.conformer.ConformerBlock
- dim: &dim 128
+ dim: *dim
attn:
- _target_: text_recognizer.networks.transformer.Attention
+ _target_: text_recognizer.networks.conformer.Attention
dim: *dim
- num_heads: 8
+ heads: 8
dim_head: 64
- dropout_rate: 0.1
- causal: false
- rotary_embedding:
- _target_: text_recognizer.networks.transformer.RotaryEmbedding
- dim: 64
+ mult: 4
ff:
_target_: text_recognizer.networks.conformer.Feedforward
dim: *dim
@@ -25,3 +23,14 @@ block:
expansion_factor: 2
kernel_size: 31
dropout: 0.1
+subsampler:
+ _target_: text_recognizer.networks.conformer.Subsampler
+ pixel_pos_embedding:
+ _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding
+ dim: 64
+ shape: [6, 127]
+ channels: 64
+ height: 6
+ dim: *dim
+ depth: 3
+ dropout: 0.1