diff options
Diffstat (limited to 'training/conf/network')
-rw-r--r-- | training/conf/network/conformer.yaml (renamed from training/conf/network/conformer/conformer.yaml) | 29 |
1 files changed, 19 insertions, 10 deletions
diff --git a/training/conf/network/conformer/conformer.yaml b/training/conf/network/conformer.yaml index 2bb423f..1d72dd5 100644 --- a/training/conf/network/conformer/conformer.yaml +++ b/training/conf/network/conformer.yaml @@ -1,19 +1,17 @@ ---- _target_: text_recognizer.networks.conformer.Conformer -depth: 16 +depth: 8 +num_classes: 57 +dim: &dim 144 +dim_gru: 144 block: _target_: text_recognizer.networks.conformer.ConformerBlock - dim: &dim 128 + dim: *dim attn: - _target_: text_recognizer.networks.transformer.Attention + _target_: text_recognizer.networks.conformer.Attention dim: *dim - num_heads: 8 + heads: 8 dim_head: 64 - dropout_rate: 0.1 - causal: false - rotary_embedding: - _target_: text_recognizer.networks.transformer.RotaryEmbedding - dim: 64 + mult: 4 ff: _target_: text_recognizer.networks.conformer.Feedforward dim: *dim @@ -25,3 +23,14 @@ block: expansion_factor: 2 kernel_size: 31 dropout: 0.1 +subsampler: + _target_: text_recognizer.networks.conformer.Subsampler + pixel_pos_embedding: + _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding + dim: 64 + shape: [6, 127] + channels: 64 + height: 6 + dim: *dim + depth: 3 + dropout: 0.1 |