From 2c9066b685d41ef0ab5ea94e938b8a30b4123656 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Wed, 15 Jun 2022 00:15:40 +0200 Subject: Update configs --- .../conf/experiment/conv_transformer_lines.yaml | 39 +++++++++++++--------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'training/conf/experiment') diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml index 8404cd1..38b13a5 100644 --- a/training/conf/experiment/conv_transformer_lines.yaml +++ b/training/conf/experiment/conv_transformer_lines.yaml @@ -18,7 +18,7 @@ summary: [[1, 1, 56, 1024], [1, 89]] criterion: ignore_index: *ignore_index - # label_smoothing: 0.1 + label_smoothing: 0.05 callbacks: stochastic_weight_averaging: @@ -40,30 +40,38 @@ optimizers: lr_schedulers: network: - _target_: torch.optim.lr_scheduler.ReduceLROnPlateau - mode: min - factor: 0.5 - patience: 10 - threshold: 1.0e-4 - threshold_mode: rel - cooldown: 0 - min_lr: 1.0e-5 - eps: 1.0e-8 + _target_: torch.optim.lr_scheduler.OneCycleLR + max_lr: 3.0e-4 + total_steps: null + epochs: *epochs + steps_per_epoch: 1284 + pct_start: 0.3 + anneal_strategy: cos + cycle_momentum: true + base_momentum: 0.85 + max_momentum: 0.95 + div_factor: 25.0 + final_div_factor: 10000.0 + three_phase: true + last_epoch: -1 verbose: false - interval: epoch - monitor: val/loss + interval: step + monitor: val/cer datamodule: - batch_size: 16 + batch_size: 8 + train_fraction: 0.9 network: input_dims: [1, 1, 56, 1024] num_classes: *num_classes pad_index: *ignore_index + encoder: + depth: 5 decoder: - depth: 10 + depth: 6 pixel_embedding: - shape: [7, 128] + shape: [3, 64] model: max_output_len: *max_output_len @@ -71,3 +79,4 @@ model: trainer: gradient_clip_val: 0.5 max_epochs: *epochs + accumulate_grad_batches: 1 -- cgit v1.2.3-70-g09d2