diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-06-18 00:59:17 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-06-18 00:59:17 +0200 |
commit | 062eebf0d690365cf7d9f6019d147ea195cc3a63 (patch) | |
tree | 0e6c6ba1a4c23ed9b51787101d4f7b588830256f /training | |
parent | 7dfa030557ad500a54126b8af40f6f0c42c5d620 (diff) |
Update configs
Diffstat (limited to 'training')
-rw-r--r-- | training/conf/config.yaml | 6 | ||||
-rw-r--r-- | training/conf/experiment/conv_transformer_lines.yaml | 50 | ||||
-rw-r--r-- | training/conf/experiment/conv_transformer_paragraphs.yaml | 68 | ||||
-rw-r--r-- | training/conf/lr_schedulers/cosine_annealing.yaml | 13 | ||||
-rw-r--r-- | training/conf/optimizers/radam.yaml | 13 |
5 files changed, 83 insertions, 67 deletions
diff --git a/training/conf/config.yaml b/training/conf/config.yaml index f6118a9..fc06f7f 100644 --- a/training/conf/config.yaml +++ b/training/conf/config.yaml @@ -7,13 +7,11 @@ defaults: - datamodule: iam_extended_paragraphs - hydra: default - logger: wandb - - lr_schedulers: - - cosine_annealing + - lr_schedulers: cosine_annealing - mapping: characters - model: lit_transformer - network: conv_transformer - - optimizers: - - radam + - optimizers: radam - trainer: default seed: 4711 diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml index 38b13a5..260014c 100644 --- a/training/conf/experiment/conv_transformer_lines.yaml +++ b/training/conf/experiment/conv_transformer_lines.yaml @@ -30,37 +30,35 @@ callbacks: device: null optimizers: - radam: - _target_: torch.optim.RAdam - lr: 3.0e-4 - betas: [0.9, 0.999] - weight_decay: 0 - eps: 1.0e-8 - parameters: network + _target_: torch.optim.RAdam + lr: 3.0e-4 + betas: [0.9, 0.999] + weight_decay: 0 + eps: 1.0e-8 + parameters: network lr_schedulers: - network: - _target_: torch.optim.lr_scheduler.OneCycleLR - max_lr: 3.0e-4 - total_steps: null - epochs: *epochs - steps_per_epoch: 1284 - pct_start: 0.3 - anneal_strategy: cos - cycle_momentum: true - base_momentum: 0.85 - max_momentum: 0.95 - div_factor: 25.0 - final_div_factor: 10000.0 - three_phase: true - last_epoch: -1 - verbose: false - interval: step - monitor: val/cer + _target_: torch.optim.lr_scheduler.OneCycleLR + max_lr: 3.0e-4 + total_steps: null + epochs: *epochs + steps_per_epoch: 1354 + pct_start: 0.3 + anneal_strategy: cos + cycle_momentum: true + base_momentum: 0.85 + max_momentum: 0.95 + div_factor: 25.0 + final_div_factor: 10000.0 + three_phase: true + last_epoch: -1 + verbose: false + interval: step + monitor: val/cer datamodule: batch_size: 8 - train_fraction: 0.9 + train_fraction: 0.95 network: input_dims: [1, 1, 56, 1024] diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml index 1465e62..7f0273f 100644 --- a/training/conf/experiment/conv_transformer_paragraphs.yaml +++ b/training/conf/experiment/conv_transformer_paragraphs.yaml @@ -10,13 +10,15 @@ defaults: - override /lr_schedulers: null - override /optimizers: null -epochs: &epochs 629 +epochs: &epochs 600 +num_classes: &num_classes 58 ignore_index: &ignore_index 3 +max_output_len: &max_output_len 682 summary: [[1, 1, 576, 640], [1, 682]] criterion: ignore_index: *ignore_index - # label_smoothing: 0.1 + label_smoothing: 0.05 callbacks: stochastic_weight_averaging: @@ -28,32 +30,52 @@ callbacks: device: null optimizers: - radam: - _target_: torch.optim.RAdam - lr: 1.5e-4 - betas: [0.9, 0.999] - weight_decay: 0 - eps: 1.0e-8 - parameters: network + _target_: torch.optim.RAdam + lr: 3.0e-4 + betas: [0.9, 0.999] + weight_decay: 0 + eps: 1.0e-8 + parameters: network lr_schedulers: - network: - _target_: torch.optim.lr_scheduler.ReduceLROnPlateau - mode: min - factor: 0.5 - patience: 10 - threshold: 1.0e-4 - threshold_mode: rel - cooldown: 0 - min_lr: 1.0e-6 - eps: 1.0e-8 - verbose: false - interval: epoch - monitor: val/loss + _target_: torch.optim.lr_scheduler.OneCycleLR + max_lr: 2.0e-4 + total_steps: null + epochs: *epochs + steps_per_epoch: 3201 + pct_start: 0.1 + anneal_strategy: cos + cycle_momentum: true + base_momentum: 0.85 + max_momentum: 0.95 + div_factor: 25.0 + final_div_factor: 10000.0 + three_phase: true + last_epoch: -1 + verbose: false + interval: step + monitor: val/cer datamodule: batch_size: 6 - train_fraction: 0.9 + train_fraction: 0.95 + +network: + input_dims: [1, 1, 576, 640] + num_classes: *num_classes + pad_index: *ignore_index + encoder: + depth: 5 + decoder: + depth: 6 + pixel_embedding: + shape: [36, 40] + +model: + max_output_len: *max_output_len trainer: + gradient_clip_val: 0.5 max_epochs: *epochs + accumulate_grad_batches: 1 + resume_from_checkpoint: /home/aktersnurra/projects/text-recognizer/training/logs/runs/2022-06-16/21-00-39/checkpoints/last.ckpt diff --git a/training/conf/lr_schedulers/cosine_annealing.yaml b/training/conf/lr_schedulers/cosine_annealing.yaml index c53ee3a..e8364f0 100644 --- a/training/conf/lr_schedulers/cosine_annealing.yaml +++ b/training/conf/lr_schedulers/cosine_annealing.yaml @@ -1,8 +1,7 @@ -cosine_annealing: - _target_: torch.optim.lr_scheduler.CosineAnnealingLR - T_max: 256 - eta_min: 0.0 - last_epoch: -1 +_target_: torch.optim.lr_scheduler.CosineAnnealingLR +T_max: 256 +eta_min: 0.0 +last_epoch: -1 - interval: epoch - monitor: val/loss +interval: epoch +monitor: val/loss diff --git a/training/conf/optimizers/radam.yaml b/training/conf/optimizers/radam.yaml index 7ee1234..d11fcb5 100644 --- a/training/conf/optimizers/radam.yaml +++ b/training/conf/optimizers/radam.yaml @@ -1,7 +1,6 @@ -radam: - _target_: torch.optim.RAdam - lr: 1.5e-4 - betas: [0.9, 0.999] - weight_decay: 1.0e-4 - eps: 1.0e-8 - parameters: network +_target_: torch.optim.RAdam +lr: 1.5e-4 +betas: [0.9, 0.999] +weight_decay: 1.0e-4 +eps: 1.0e-8 +parameters: network |