diff options
Diffstat (limited to 'training/conf/experiment/conv_transformer_paragraphs.yaml')
-rw-r--r-- | training/conf/experiment/conv_transformer_paragraphs.yaml | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml index 5fb7377..e958367 100644 --- a/training/conf/experiment/conv_transformer_paragraphs.yaml +++ b/training/conf/experiment/conv_transformer_paragraphs.yaml @@ -47,11 +47,11 @@ optimizers: lr_schedulers: network: _target_: torch.optim.lr_scheduler.OneCycleLR - max_lr: 1.5e-4 + max_lr: 1.0e-4 total_steps: null epochs: *epochs steps_per_epoch: 722 - pct_start: 0.03 + pct_start: 0.01 anneal_strategy: cos cycle_momentum: true base_momentum: 0.85 @@ -87,8 +87,6 @@ network: _target_: text_recognizer.networks.conv_transformer.ConvTransformer input_dims: [1, 576, 640] hidden_dim: &hidden_dim 192 - encoder_dim: 1280 - dropout_rate: 0.05 num_classes: *num_classes pad_index: *ignore_index encoder: @@ -99,7 +97,7 @@ network: bn_momentum: 0.99 bn_eps: 1.0e-3 decoder: - depth: 4 + depth: 3 local_depth: 2 _target_: text_recognizer.networks.transformer.layers.Decoder self_attn: @@ -114,8 +112,9 @@ network: local_self_attn: _target_: text_recognizer.networks.transformer.local_attention.LocalAttention << : *attn - window_size: 11 - look_back: 2 + window_size: 31 + look_back: 1 + autopad: true << : *rotary_embedding norm: _target_: text_recognizer.networks.transformer.norm.ScaleNorm |