diff options
Diffstat (limited to 'training/conf')
-rw-r--r-- | training/conf/experiment/vqgan.yaml | 8 | ||||
-rw-r--r-- | training/conf/network/decoder/vae_decoder.yaml | 4 | ||||
-rw-r--r-- | training/conf/network/encoder/vae_encoder.yaml | 4 | ||||
-rw-r--r-- | training/conf/network/vqvae.yaml | 4 |
4 files changed, 10 insertions, 10 deletions
diff --git a/training/conf/experiment/vqgan.yaml b/training/conf/experiment/vqgan.yaml index 34d8f84..485e963 100644 --- a/training/conf/experiment/vqgan.yaml +++ b/training/conf/experiment/vqgan.yaml @@ -24,21 +24,21 @@ criterion: discriminator_iter_start: 2.0e4 datamodule: - batch_size: 12 + batch_size: 6 lr_schedulers: generator: _target_: torch.optim.lr_scheduler.OneCycleLR max_lr: 3.0e-4 total_steps: null - epochs: 64 - steps_per_epoch: 1685 + epochs: 100 + steps_per_epoch: 3369 pct_start: 0.1 anneal_strategy: cos cycle_momentum: true base_momentum: 0.85 max_momentum: 0.95 - div_factor: 1.0e2 + div_factor: 1.0e3 final_div_factor: 1.0e4 three_phase: true last_epoch: -1 diff --git a/training/conf/network/decoder/vae_decoder.yaml b/training/conf/network/decoder/vae_decoder.yaml index 7558ff0..60cdcf1 100644 --- a/training/conf/network/decoder/vae_decoder.yaml +++ b/training/conf/network/decoder/vae_decoder.yaml @@ -1,5 +1,5 @@ _target_: text_recognizer.networks.vqvae.decoder.Decoder out_channels: 1 -hidden_dim: 32 -channels_multipliers: [4, 4, 2, 1] +hidden_dim: 64 +channels_multipliers: [8, 4, 2, 1] dropout_rate: 0.0 diff --git a/training/conf/network/encoder/vae_encoder.yaml b/training/conf/network/encoder/vae_encoder.yaml index b32f425..73529fc 100644 --- a/training/conf/network/encoder/vae_encoder.yaml +++ b/training/conf/network/encoder/vae_encoder.yaml @@ -1,5 +1,5 @@ _target_: text_recognizer.networks.vqvae.encoder.Encoder in_channels: 1 -hidden_dim: 32 -channels_multipliers: [1, 2, 4, 4] +hidden_dim: 64 +channels_multipliers: [1, 2, 4, 8] dropout_rate: 0.0 diff --git a/training/conf/network/vqvae.yaml b/training/conf/network/vqvae.yaml index 936e575..70d27d7 100644 --- a/training/conf/network/vqvae.yaml +++ b/training/conf/network/vqvae.yaml @@ -3,7 +3,7 @@ defaults: - decoder: vae_decoder _target_: text_recognizer.networks.vqvae.vqvae.VQVAE -hidden_dim: 128 +hidden_dim: 512 embedding_dim: 64 -num_embeddings: 2048 +num_embeddings: 4096 decay: 0.99 |