diff options
Diffstat (limited to 'training/conf/experiment')
-rw-r--r-- | training/conf/experiment/convformer_lines.yaml | 58 | ||||
-rw-r--r-- | training/conf/experiment/mammut_lines.yaml | 5 | ||||
-rw-r--r-- | training/conf/experiment/vit_lines.yaml | 3 |
3 files changed, 65 insertions, 1 deletions
diff --git a/training/conf/experiment/convformer_lines.yaml b/training/conf/experiment/convformer_lines.yaml new file mode 100644 index 0000000..f573433 --- /dev/null +++ b/training/conf/experiment/convformer_lines.yaml @@ -0,0 +1,58 @@ +# @package _global_ + +defaults: + - override /criterion: cross_entropy + - override /callbacks: htr + - override /datamodule: iam_lines + - override /network: convformer_lines + - override /model: lit_transformer + - override /lr_scheduler: cosine_annealing + - override /optimizer: adan + +tags: [lines, vit] +epochs: &epochs 320 +ignore_index: &ignore_index 3 +# summary: [[1, 1, 56, 1024], [1, 89]] + +logger: + wandb: + tags: ${tags} + +criterion: + ignore_index: *ignore_index + # label_smoothing: 0.05 + + +decoder: + max_output_len: 89 + +optimizer: + lr: 1.0e-3 + +# callbacks: +# stochastic_weight_averaging: +# _target_: pytorch_lightning.callbacks.StochasticWeightAveraging +# swa_epoch_start: 0.75 +# swa_lrs: 1.0e-5 +# annealing_epochs: 10 +# annealing_strategy: cos +# device: null + +lr_scheduler: + T_max: *epochs + +datamodule: + batch_size: 8 + train_fraction: 0.95 + +model: + max_output_len: 89 + +trainer: + fast_dev_run: false + gradient_clip_val: 1.0 + max_epochs: *epochs + accumulate_grad_batches: 1 + limit_train_batches: 1.0 + limit_val_batches: 1.0 + limit_test_batches: 1.0 diff --git a/training/conf/experiment/mammut_lines.yaml b/training/conf/experiment/mammut_lines.yaml index e74e219..eb6f765 100644 --- a/training/conf/experiment/mammut_lines.yaml +++ b/training/conf/experiment/mammut_lines.yaml @@ -39,12 +39,15 @@ lr_scheduler: T_max: *epochs datamodule: - batch_size: 8 + batch_size: 16 train_fraction: 0.95 model: max_output_len: 89 +optimizer: + lr: 1.0e-3 + trainer: fast_dev_run: false gradient_clip_val: 1.0 diff --git a/training/conf/experiment/vit_lines.yaml b/training/conf/experiment/vit_lines.yaml index 08ed481..2f7731e 100644 --- a/training/conf/experiment/vit_lines.yaml +++ b/training/conf/experiment/vit_lines.yaml @@ -26,6 +26,9 @@ criterion: decoder: max_output_len: 89 +optim4izer: + lr: 1.0e-3 + # callbacks: # stochastic_weight_averaging: # _target_: pytorch_lightning.callbacks.StochasticWeightAveraging |