diff options
Diffstat (limited to 'training')
-rw-r--r-- | training/conf/experiment/conv_transformer_paragraphs.yaml | 35 | ||||
-rw-r--r-- | training/conf/network/conv_transformer.yaml | 4 | ||||
-rw-r--r-- | training/conf/network/efficientnet.yaml | 1 |
3 files changed, 17 insertions, 23 deletions
diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml index 80f6ce6..41c236d 100644 --- a/training/conf/experiment/conv_transformer_paragraphs.yaml +++ b/training/conf/experiment/conv_transformer_paragraphs.yaml @@ -41,26 +41,21 @@ optimizer: eps: 1.0e-8 lr_scheduler: - _target_: torch.optim.lr_scheduler.OneCycleLR - max_lr: 3.0e-4 - total_steps: null - epochs: *epochs - steps_per_epoch: 3358 - pct_start: 0.1 - anneal_strategy: cos - cycle_momentum: true - base_momentum: 0.85 - max_momentum: 0.95 - div_factor: 25.0 - final_div_factor: 10000.0 - three_phase: true - last_epoch: -1 + _target_: torch.optim.lr_scheduler.ReduceLROnPlateau + mode: min + factor: 0.8 + patience: 10 + threshold: 1.0e-4 + threshold_mode: rel + cooldown: 0 + min_lr: 1.0e-5 + eps: 1.0e-8 verbose: false - interval: step + interval: epoch monitor: val/cer datamodule: - batch_size: 6 + batch_size: 8 train_fraction: 0.95 network: @@ -68,16 +63,16 @@ network: num_classes: *num_classes pad_index: *ignore_index encoder: - depth: 6 - decoder: depth: 4 + decoder: + depth: 6 pixel_embedding: - shape: [18, 80] + shape: [18, 79] model: max_output_len: *max_output_len trainer: - gradient_clip_val: 0.5 + gradient_clip_val: 1.0 max_epochs: *epochs accumulate_grad_batches: 1 diff --git a/training/conf/network/conv_transformer.yaml b/training/conf/network/conv_transformer.yaml index 016416c..24857c8 100644 --- a/training/conf/network/conv_transformer.yaml +++ b/training/conf/network/conv_transformer.yaml @@ -1,6 +1,6 @@ _target_: text_recognizer.networks.ConvTransformer input_dims: [1, 1, 576, 640] -hidden_dim: &hidden_dim 128 +hidden_dim: &hidden_dim 144 num_classes: 58 pad_index: 3 encoder: @@ -46,4 +46,4 @@ decoder: pixel_embedding: _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding dim: *hidden_dim - shape: [18, 80] + shape: [18, 79] diff --git a/training/conf/network/efficientnet.yaml b/training/conf/network/efficientnet.yaml index bbe68dd..4dd148f 100644 --- a/training/conf/network/efficientnet.yaml +++ b/training/conf/network/efficientnet.yaml @@ -3,5 +3,4 @@ arch: b0 stochastic_dropout_rate: 0.2 bn_momentum: 0.99 bn_eps: 1.0e-3 -stride: [2, 1] out_channels: 144 |