diff options
Diffstat (limited to 'training/conf/experiment')
-rw-r--r-- | training/conf/experiment/conv_transformer_lines.yaml | 39 |
1 files changed, 24 insertions, 15 deletions
diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml index 8404cd1..38b13a5 100644 --- a/training/conf/experiment/conv_transformer_lines.yaml +++ b/training/conf/experiment/conv_transformer_lines.yaml @@ -18,7 +18,7 @@ summary: [[1, 1, 56, 1024], [1, 89]] criterion: ignore_index: *ignore_index - # label_smoothing: 0.1 + label_smoothing: 0.05 callbacks: stochastic_weight_averaging: @@ -40,30 +40,38 @@ optimizers: lr_schedulers: network: - _target_: torch.optim.lr_scheduler.ReduceLROnPlateau - mode: min - factor: 0.5 - patience: 10 - threshold: 1.0e-4 - threshold_mode: rel - cooldown: 0 - min_lr: 1.0e-5 - eps: 1.0e-8 + _target_: torch.optim.lr_scheduler.OneCycleLR + max_lr: 3.0e-4 + total_steps: null + epochs: *epochs + steps_per_epoch: 1284 + pct_start: 0.3 + anneal_strategy: cos + cycle_momentum: true + base_momentum: 0.85 + max_momentum: 0.95 + div_factor: 25.0 + final_div_factor: 10000.0 + three_phase: true + last_epoch: -1 verbose: false - interval: epoch - monitor: val/loss + interval: step + monitor: val/cer datamodule: - batch_size: 16 + batch_size: 8 + train_fraction: 0.9 network: input_dims: [1, 1, 56, 1024] num_classes: *num_classes pad_index: *ignore_index + encoder: + depth: 5 decoder: - depth: 10 + depth: 6 pixel_embedding: - shape: [7, 128] + shape: [3, 64] model: max_output_len: *max_output_len @@ -71,3 +79,4 @@ model: trainer: gradient_clip_val: 0.5 max_epochs: *epochs + accumulate_grad_batches: 1 |