summaryrefslogtreecommitdiff
path: root/training/conf/experiment/vqgan.yaml
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-08-29 21:40:19 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-08-29 21:40:19 +0200
commit2f1bb639fd5bb6b510af85fb597e9322abc17bc0 (patch)
tree3269155b33f33bf2964dc1bdff34d7929b3227f2 /training/conf/experiment/vqgan.yaml
parentda7d2171c818afefb3bad3cd66ce85fddd519c1c (diff)
Remove uploading of code to Wandb, upload config instead
Diffstat (limited to 'training/conf/experiment/vqgan.yaml')
-rw-r--r--training/conf/experiment/vqgan.yaml40
1 files changed, 26 insertions, 14 deletions
diff --git a/training/conf/experiment/vqgan.yaml b/training/conf/experiment/vqgan.yaml
index 40af15a..34d8f84 100644
--- a/training/conf/experiment/vqgan.yaml
+++ b/training/conf/experiment/vqgan.yaml
@@ -16,29 +16,41 @@ criterion:
discriminator:
_target_: text_recognizer.criterions.n_layer_discriminator.NLayerDiscriminator
in_channels: 1
- num_channels: 32
+ num_channels: 64
num_layers: 3
- vq_loss_weight: 0.8
- discriminator_weight: 0.8
+ vq_loss_weight: 0.25
+ discriminator_weight: 1.0
discriminator_factor: 1.0
- discriminator_iter_start: 2e4
+ discriminator_iter_start: 2.0e4
datamodule:
- batch_size: 8
+ batch_size: 12
lr_schedulers:
generator:
- _target_: torch.optim.lr_scheduler.CosineAnnealingLR
- T_max: 256
- eta_min: 0.0
+ _target_: torch.optim.lr_scheduler.OneCycleLR
+ max_lr: 3.0e-4
+ total_steps: null
+ epochs: 64
+ steps_per_epoch: 1685
+ pct_start: 0.1
+ anneal_strategy: cos
+ cycle_momentum: true
+ base_momentum: 0.85
+ max_momentum: 0.95
+ div_factor: 1.0e2
+ final_div_factor: 1.0e4
+ three_phase: true
last_epoch: -1
+ verbose: false
- interval: epoch
+ # Non-class arguments
+ interval: step
monitor: val/loss
discriminator:
_target_: torch.optim.lr_scheduler.CosineAnnealingLR
- T_max: 256
+ T_max: 64
eta_min: 0.0
last_epoch: -1
@@ -48,10 +60,10 @@ lr_schedulers:
optimizers:
generator:
_target_: madgrad.MADGRAD
- lr: 4.5e-6
+ lr: 1.0e-4
momentum: 0.5
weight_decay: 0
- eps: 1.0e-6
+ eps: 1.0e-7
parameters: network
@@ -65,7 +77,7 @@ optimizers:
parameters: loss_fn.discriminator
trainer:
- max_epochs: 256
- # gradient_clip_val: 0.25
+ max_epochs: 64
+ # gradient_clip_val: 1.0e1
summary: null