diff options
Diffstat (limited to 'training/conf/experiment')
-rw-r--r-- | training/conf/experiment/conv_transformer_paragraphs.yaml (renamed from training/conf/experiment/cnn_transformer_paragraphs.yaml) | 6 | ||||
-rw-r--r-- | training/conf/experiment/conv_transformer_paragraphs_wp.yaml (renamed from training/conf/experiment/cnn_transformer_paragraphs_wp.yaml) | 0 |
2 files changed, 3 insertions, 3 deletions
diff --git a/training/conf/experiment/cnn_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml index 5ee5536..ebbd6ef 100644 --- a/training/conf/experiment/cnn_transformer_paragraphs.yaml +++ b/training/conf/experiment/conv_transformer_paragraphs.yaml @@ -38,7 +38,7 @@ callbacks: optimizers: madgrad: _target_: madgrad.MADGRAD - lr: 3.0e-4 + lr: 2.0e-4 momentum: 0.9 weight_decay: 5.0e-6 eps: 1.0e-6 @@ -48,7 +48,7 @@ optimizers: lr_schedulers: network: _target_: torch.optim.lr_scheduler.OneCycleLR - max_lr: 3.0e-4 + max_lr: 2.0e-4 total_steps: null epochs: *epochs steps_per_epoch: 632 @@ -134,7 +134,7 @@ trainer: stochastic_weight_avg: true auto_scale_batch_size: binsearch auto_lr_find: false - gradient_clip_val: 0.0 + gradient_clip_val: 0.75 fast_dev_run: false gpus: 1 precision: 16 diff --git a/training/conf/experiment/cnn_transformer_paragraphs_wp.yaml b/training/conf/experiment/conv_transformer_paragraphs_wp.yaml index 499a609..499a609 100644 --- a/training/conf/experiment/cnn_transformer_paragraphs_wp.yaml +++ b/training/conf/experiment/conv_transformer_paragraphs_wp.yaml |