diff options
Diffstat (limited to 'training/conf/experiment/vit_lines.yaml')
-rw-r--r-- | training/conf/experiment/vit_lines.yaml | 43 |
1 files changed, 13 insertions, 30 deletions
diff --git a/training/conf/experiment/vit_lines.yaml b/training/conf/experiment/vit_lines.yaml index f3049ea..08ed481 100644 --- a/training/conf/experiment/vit_lines.yaml +++ b/training/conf/experiment/vit_lines.yaml @@ -6,11 +6,11 @@ defaults: - override /datamodule: iam_lines - override /network: vit_lines - override /model: lit_transformer - - override /lr_scheduler: null - - override /optimizer: null + - override /lr_scheduler: cosine_annealing + - override /optimizer: adan tags: [lines, vit] -epochs: &epochs 128 +epochs: &epochs 320 ignore_index: &ignore_index 3 # summary: [[1, 1, 56, 1024], [1, 89]] @@ -26,37 +26,20 @@ criterion: decoder: max_output_len: 89 -callbacks: - stochastic_weight_averaging: - _target_: pytorch_lightning.callbacks.StochasticWeightAveraging - swa_epoch_start: 0.75 - swa_lrs: 1.0e-5 - annealing_epochs: 10 - annealing_strategy: cos - device: null - -optimizer: - _target_: adan_pytorch.Adan - lr: 3.0e-4 - betas: [0.02, 0.08, 0.01] - weight_decay: 0.02 +# callbacks: +# stochastic_weight_averaging: +# _target_: pytorch_lightning.callbacks.StochasticWeightAveraging +# swa_epoch_start: 0.75 +# swa_lrs: 1.0e-5 +# annealing_epochs: 10 +# annealing_strategy: cos +# device: null lr_scheduler: - _target_: torch.optim.lr_scheduler.ReduceLROnPlateau - mode: min - factor: 0.8 - patience: 10 - threshold: 1.0e-4 - threshold_mode: rel - cooldown: 0 - min_lr: 1.0e-5 - eps: 1.0e-8 - verbose: false - interval: epoch - monitor: val/cer + T_max: *epochs datamodule: - batch_size: 16 + batch_size: 8 train_fraction: 0.95 model: |