diff options
Diffstat (limited to 'training/conf/experiment')
-rw-r--r-- | training/conf/experiment/conv_transformer_lines.yaml | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml index 20e369e..fe9ef6e 100644 --- a/training/conf/experiment/conv_transformer_lines.yaml +++ b/training/conf/experiment/conv_transformer_lines.yaml @@ -10,7 +10,7 @@ defaults: - override /lr_schedulers: null - override /optimizers: null -epochs: &epochs 300 +epochs: &epochs 620 ignore_index: &ignore_index 3 num_classes: &num_classes 57 max_output_len: &max_output_len 89 @@ -18,6 +18,7 @@ summary: [[1, 1, 56, 1024], [1, 89]] criterion: ignore_index: *ignore_index + label_smoothing: 0.1 mapping: &mapping mapping: @@ -64,7 +65,7 @@ rotary_embedding: &rotary_embedding attn: &attn dim: &hidden_dim 256 - num_heads: 6 + num_heads: 4 dim_head: 64 dropout_rate: &dropout_rate 0.5 @@ -76,12 +77,12 @@ network: pad_index: *ignore_index encoder: _target_: text_recognizer.networks.encoders.efficientnet.EfficientNet - arch: b0 + arch: b1 stochastic_dropout_rate: 0.2 bn_momentum: 0.99 bn_eps: 1.0e-3 decoder: - depth: 3 + depth: 6 _target_: text_recognizer.networks.transformer.layers.Decoder self_attn: _target_: text_recognizer.networks.transformer.attention.Attention @@ -106,7 +107,15 @@ network: pixel_pos_embedding: _target_: text_recognizer.networks.transformer.embeddings.axial.AxialPositionalEmbedding dim: *hidden_dim - shape: [3, 64] + shape: &shape [3, 64] + axial_encoder: + _target_: text_recognizer.networks.transformer.axial_attention.encoder.AxialEncoder + dim: *hidden_dim + heads: 4 + shape: *shape + depth: 2 + dim_head: 64 + dim_index: 1 model: _target_: text_recognizer.models.transformer.TransformerLitModel |