summaryrefslogtreecommitdiff
path: root/training/conf/experiment/conv_transformer_lines.yaml
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-11-21 21:35:18 +0100
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-11-21 21:35:18 +0100
commitce99fd904576b8daeb2985f3341793c2a33e9d45 (patch)
treec30280046822b15d1723200a1fd58f73c0f28bc6 /training/conf/experiment/conv_transformer_lines.yaml
parentb44de0e11281c723ec426f8bec8ca0897ecfe3ff (diff)
Update config conv lines
Diffstat (limited to 'training/conf/experiment/conv_transformer_lines.yaml')
-rw-r--r--training/conf/experiment/conv_transformer_lines.yaml19
1 files changed, 14 insertions, 5 deletions
diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml
index 20e369e..fe9ef6e 100644
--- a/training/conf/experiment/conv_transformer_lines.yaml
+++ b/training/conf/experiment/conv_transformer_lines.yaml
@@ -10,7 +10,7 @@ defaults:
- override /lr_schedulers: null
- override /optimizers: null
-epochs: &epochs 300
+epochs: &epochs 620
ignore_index: &ignore_index 3
num_classes: &num_classes 57
max_output_len: &max_output_len 89
@@ -18,6 +18,7 @@ summary: [[1, 1, 56, 1024], [1, 89]]
criterion:
ignore_index: *ignore_index
+ label_smoothing: 0.1
mapping: &mapping
mapping:
@@ -64,7 +65,7 @@ rotary_embedding: &rotary_embedding
attn: &attn
dim: &hidden_dim 256
- num_heads: 6
+ num_heads: 4
dim_head: 64
dropout_rate: &dropout_rate 0.5
@@ -76,12 +77,12 @@ network:
pad_index: *ignore_index
encoder:
_target_: text_recognizer.networks.encoders.efficientnet.EfficientNet
- arch: b0
+ arch: b1
stochastic_dropout_rate: 0.2
bn_momentum: 0.99
bn_eps: 1.0e-3
decoder:
- depth: 3
+ depth: 6
_target_: text_recognizer.networks.transformer.layers.Decoder
self_attn:
_target_: text_recognizer.networks.transformer.attention.Attention
@@ -106,7 +107,15 @@ network:
pixel_pos_embedding:
_target_: text_recognizer.networks.transformer.embeddings.axial.AxialPositionalEmbedding
dim: *hidden_dim
- shape: [3, 64]
+ shape: &shape [3, 64]
+ axial_encoder:
+ _target_: text_recognizer.networks.transformer.axial_attention.encoder.AxialEncoder
+ dim: *hidden_dim
+ heads: 4
+ shape: *shape
+ depth: 2
+ dim_head: 64
+ dim_index: 1
model:
_target_: text_recognizer.models.transformer.TransformerLitModel