diff options
Diffstat (limited to 'training/conf/experiment')
-rw-r--r-- | training/conf/experiment/conv_transformer_lines.yaml | 50 | ||||
-rw-r--r-- | training/conf/experiment/conv_transformer_paragraphs.yaml | 68 |
2 files changed, 69 insertions, 49 deletions
diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml index 38b13a5..260014c 100644 --- a/training/conf/experiment/conv_transformer_lines.yaml +++ b/training/conf/experiment/conv_transformer_lines.yaml @@ -30,37 +30,35 @@ callbacks: device: null optimizers: - radam: - _target_: torch.optim.RAdam - lr: 3.0e-4 - betas: [0.9, 0.999] - weight_decay: 0 - eps: 1.0e-8 - parameters: network + _target_: torch.optim.RAdam + lr: 3.0e-4 + betas: [0.9, 0.999] + weight_decay: 0 + eps: 1.0e-8 + parameters: network lr_schedulers: - network: - _target_: torch.optim.lr_scheduler.OneCycleLR - max_lr: 3.0e-4 - total_steps: null - epochs: *epochs - steps_per_epoch: 1284 - pct_start: 0.3 - anneal_strategy: cos - cycle_momentum: true - base_momentum: 0.85 - max_momentum: 0.95 - div_factor: 25.0 - final_div_factor: 10000.0 - three_phase: true - last_epoch: -1 - verbose: false - interval: step - monitor: val/cer + _target_: torch.optim.lr_scheduler.OneCycleLR + max_lr: 3.0e-4 + total_steps: null + epochs: *epochs + steps_per_epoch: 1354 + pct_start: 0.3 + anneal_strategy: cos + cycle_momentum: true + base_momentum: 0.85 + max_momentum: 0.95 + div_factor: 25.0 + final_div_factor: 10000.0 + three_phase: true + last_epoch: -1 + verbose: false + interval: step + monitor: val/cer datamodule: batch_size: 8 - train_fraction: 0.9 + train_fraction: 0.95 network: input_dims: [1, 1, 56, 1024] diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml index 1465e62..7f0273f 100644 --- a/training/conf/experiment/conv_transformer_paragraphs.yaml +++ b/training/conf/experiment/conv_transformer_paragraphs.yaml @@ -10,13 +10,15 @@ defaults: - override /lr_schedulers: null - override /optimizers: null -epochs: &epochs 629 +epochs: &epochs 600 +num_classes: &num_classes 58 ignore_index: &ignore_index 3 +max_output_len: &max_output_len 682 summary: [[1, 1, 576, 640], [1, 682]] criterion: ignore_index: *ignore_index - # label_smoothing: 0.1 + label_smoothing: 0.05 callbacks: stochastic_weight_averaging: @@ -28,32 +30,52 @@ callbacks: device: null optimizers: - radam: - _target_: torch.optim.RAdam - lr: 1.5e-4 - betas: [0.9, 0.999] - weight_decay: 0 - eps: 1.0e-8 - parameters: network + _target_: torch.optim.RAdam + lr: 3.0e-4 + betas: [0.9, 0.999] + weight_decay: 0 + eps: 1.0e-8 + parameters: network lr_schedulers: - network: - _target_: torch.optim.lr_scheduler.ReduceLROnPlateau - mode: min - factor: 0.5 - patience: 10 - threshold: 1.0e-4 - threshold_mode: rel - cooldown: 0 - min_lr: 1.0e-6 - eps: 1.0e-8 - verbose: false - interval: epoch - monitor: val/loss + _target_: torch.optim.lr_scheduler.OneCycleLR + max_lr: 2.0e-4 + total_steps: null + epochs: *epochs + steps_per_epoch: 3201 + pct_start: 0.1 + anneal_strategy: cos + cycle_momentum: true + base_momentum: 0.85 + max_momentum: 0.95 + div_factor: 25.0 + final_div_factor: 10000.0 + three_phase: true + last_epoch: -1 + verbose: false + interval: step + monitor: val/cer datamodule: batch_size: 6 - train_fraction: 0.9 + train_fraction: 0.95 + +network: + input_dims: [1, 1, 576, 640] + num_classes: *num_classes + pad_index: *ignore_index + encoder: + depth: 5 + decoder: + depth: 6 + pixel_embedding: + shape: [36, 40] + +model: + max_output_len: *max_output_len trainer: + gradient_clip_val: 0.5 max_epochs: *epochs + accumulate_grad_batches: 1 + resume_from_checkpoint: /home/aktersnurra/projects/text-recognizer/training/logs/runs/2022-06-16/21-00-39/checkpoints/last.ckpt |