diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-09-30 23:08:31 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-09-30 23:08:31 +0200 |
commit | 9e98c19d9e218b465a7d03c1b22c1d480f065741 (patch) | |
tree | 03e37dbf0f753976f055a7434ae6dbb9112129c0 /training/conf/experiment | |
parent | 4f0469f755507b15dae65510b2000a0ce077b423 (diff) |
Updates to config files
Diffstat (limited to 'training/conf/experiment')
-rw-r--r-- | training/conf/experiment/vqgan.yaml | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/training/conf/experiment/vqgan.yaml b/training/conf/experiment/vqgan.yaml index 34886ec..572c320 100644 --- a/training/conf/experiment/vqgan.yaml +++ b/training/conf/experiment/vqgan.yaml @@ -11,30 +11,41 @@ defaults: criterion: _target_: text_recognizer.criterions.vqgan_loss.VQGANLoss reconstruction_loss: - _target_: torch.nn.MSELoss + _target_: torch.nn.BCEWithLogitsLoss reduction: mean discriminator: _target_: text_recognizer.criterions.n_layer_discriminator.NLayerDiscriminator in_channels: 1 num_channels: 64 num_layers: 3 - vq_loss_weight: 1.0 + commitment_weight: 0.25 discriminator_weight: 0.8 discriminator_factor: 1.0 - discriminator_iter_start: 7e4 + discriminator_iter_start: 8.0e4 datamodule: - batch_size: 8 + batch_size: 12 # resize: [288, 320] + augment: false lr_schedulers: generator: - _target_: torch.optim.lr_scheduler.CosineAnnealingLR - T_max: 128 - eta_min: 4.5e-6 + _target_: torch.optim.lr_scheduler.OneCycleLR + max_lr: 3.0e-4 + total_steps: null + epochs: 64 + steps_per_epoch: 1685 + pct_start: 0.3 + anneal_strategy: cos + cycle_momentum: true + base_momentum: 0.85 + max_momentum: 0.95 + div_factor: 25.0 + final_div_factor: 10000.0 + three_phase: true last_epoch: -1 - - interval: epoch + verbose: false + interval: step monitor: val/loss # discriminator: @@ -66,7 +77,11 @@ optimizers: parameters: loss_fn.discriminator trainer: - max_epochs: 128 + max_epochs: 64 # limit_train_batches: 0.1 # limit_val_batches: 0.1 - gradient_clip_val: 100 + # gradient_clip_val: 100 + +# tune: false +# train: true +# test: false |