diff options
Diffstat (limited to 'training')
-rw-r--r-- | training/conf/config.yaml | 6 | ||||
-rw-r--r-- | training/conf/optimizers/madgrad.yaml | 8 | ||||
-rw-r--r-- | training/conf/optimizers/radam.yaml | 7 |
3 files changed, 10 insertions, 11 deletions
diff --git a/training/conf/config.yaml b/training/conf/config.yaml index e783efd..11bb551 100644 --- a/training/conf/config.yaml +++ b/training/conf/config.yaml @@ -7,13 +7,13 @@ defaults: - datamodule: iam_extended_paragraphs - hydra: default - logger: wandb - - lr_schedulers: + - lr_schedulers: - cosine_annealing - mapping: characters # word_piece - model: lit_transformer - network: conv_transformer - - optimizers: - - madgrad + - optimizers: + - radam - trainer: default seed: 4711 diff --git a/training/conf/optimizers/madgrad.yaml b/training/conf/optimizers/madgrad.yaml deleted file mode 100644 index b6507b9..0000000 --- a/training/conf/optimizers/madgrad.yaml +++ /dev/null @@ -1,8 +0,0 @@ -madgrad: - _target_: madgrad.MADGRAD - lr: 1.0e-4 - momentum: 0.9 - weight_decay: 0 - eps: 1.0e-6 - - parameters: network diff --git a/training/conf/optimizers/radam.yaml b/training/conf/optimizers/radam.yaml new file mode 100644 index 0000000..7ee1234 --- /dev/null +++ b/training/conf/optimizers/radam.yaml @@ -0,0 +1,7 @@ +radam: + _target_: torch.optim.RAdam + lr: 1.5e-4 + betas: [0.9, 0.999] + weight_decay: 1.0e-4 + eps: 1.0e-8 + parameters: network |