diff options
Diffstat (limited to 'training/conf/experiment')
-rw-r--r-- | training/conf/experiment/vqgan.yaml | 40 |
1 files changed, 26 insertions, 14 deletions
diff --git a/training/conf/experiment/vqgan.yaml b/training/conf/experiment/vqgan.yaml index 40af15a..34d8f84 100644 --- a/training/conf/experiment/vqgan.yaml +++ b/training/conf/experiment/vqgan.yaml @@ -16,29 +16,41 @@ criterion: discriminator: _target_: text_recognizer.criterions.n_layer_discriminator.NLayerDiscriminator in_channels: 1 - num_channels: 32 + num_channels: 64 num_layers: 3 - vq_loss_weight: 0.8 - discriminator_weight: 0.8 + vq_loss_weight: 0.25 + discriminator_weight: 1.0 discriminator_factor: 1.0 - discriminator_iter_start: 2e4 + discriminator_iter_start: 2.0e4 datamodule: - batch_size: 8 + batch_size: 12 lr_schedulers: generator: - _target_: torch.optim.lr_scheduler.CosineAnnealingLR - T_max: 256 - eta_min: 0.0 + _target_: torch.optim.lr_scheduler.OneCycleLR + max_lr: 3.0e-4 + total_steps: null + epochs: 64 + steps_per_epoch: 1685 + pct_start: 0.1 + anneal_strategy: cos + cycle_momentum: true + base_momentum: 0.85 + max_momentum: 0.95 + div_factor: 1.0e2 + final_div_factor: 1.0e4 + three_phase: true last_epoch: -1 + verbose: false - interval: epoch + # Non-class arguments + interval: step monitor: val/loss discriminator: _target_: torch.optim.lr_scheduler.CosineAnnealingLR - T_max: 256 + T_max: 64 eta_min: 0.0 last_epoch: -1 @@ -48,10 +60,10 @@ lr_schedulers: optimizers: generator: _target_: madgrad.MADGRAD - lr: 4.5e-6 + lr: 1.0e-4 momentum: 0.5 weight_decay: 0 - eps: 1.0e-6 + eps: 1.0e-7 parameters: network @@ -65,7 +77,7 @@ optimizers: parameters: loss_fn.discriminator trainer: - max_epochs: 256 - # gradient_clip_val: 0.25 + max_epochs: 64 + # gradient_clip_val: 1.0e1 summary: null |