From 240f5e9f20032e82515fa66ce784619527d1041e Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sun, 8 Aug 2021 19:59:55 +0200 Subject: Add VQGAN and loss function --- training/conf/network/decoder/pixelcnn_decoder.yaml | 5 +++++ training/conf/network/decoder/pixelcnn_encoder.yaml | 5 ----- training/conf/network/decoder/vae_decoder.yaml | 2 +- training/conf/network/encoder/pixelcnn_decoder.yaml | 5 ----- training/conf/network/encoder/pixelcnn_encoder.yaml | 5 +++++ training/conf/network/encoder/vae_encoder.yaml | 2 +- training/conf/network/vqvae.yaml | 2 +- training/conf/network/vqvae_pixelcnn.yaml | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) create mode 100644 training/conf/network/decoder/pixelcnn_decoder.yaml delete mode 100644 training/conf/network/decoder/pixelcnn_encoder.yaml delete mode 100644 training/conf/network/encoder/pixelcnn_decoder.yaml create mode 100644 training/conf/network/encoder/pixelcnn_encoder.yaml (limited to 'training/conf/network') diff --git a/training/conf/network/decoder/pixelcnn_decoder.yaml b/training/conf/network/decoder/pixelcnn_decoder.yaml new file mode 100644 index 0000000..cdddb7a --- /dev/null +++ b/training/conf/network/decoder/pixelcnn_decoder.yaml @@ -0,0 +1,5 @@ +_target_: text_recognizer.networks.vqvae.pixelcnn.Decoder +out_channels: 1 +hidden_dim: 8 +channels_multipliers: [8, 2, 1] +dropout_rate: 0.25 diff --git a/training/conf/network/decoder/pixelcnn_encoder.yaml b/training/conf/network/decoder/pixelcnn_encoder.yaml deleted file mode 100644 index 47a130d..0000000 --- a/training/conf/network/decoder/pixelcnn_encoder.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_target_: text_recognizer.networks.vqvae.pixelcnn.Encoder -in_channels: 1 -hidden_dim: 8 -channels_multipliers: [1, 2, 8, 8] -dropout_rate: 0.25 diff --git a/training/conf/network/decoder/vae_decoder.yaml b/training/conf/network/decoder/vae_decoder.yaml index 0a36a54..a5e7286 100644 --- a/training/conf/network/decoder/vae_decoder.yaml +++ b/training/conf/network/decoder/vae_decoder.yaml @@ -1,5 +1,5 @@ _target_: text_recognizer.networks.vqvae.decoder.Decoder out_channels: 1 hidden_dim: 32 -channels_multipliers: [4, 4, 2, 1] +channels_multipliers: [8, 8, 4, 1] dropout_rate: 0.25 diff --git a/training/conf/network/encoder/pixelcnn_decoder.yaml b/training/conf/network/encoder/pixelcnn_decoder.yaml deleted file mode 100644 index 3895164..0000000 --- a/training/conf/network/encoder/pixelcnn_decoder.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_target_: text_recognizer.networks.vqvae.pixelcnn.Decoder -out_channels: 1 -hidden_dim: 8 -channels_multipliers: [8, 8, 2, 1] -dropout_rate: 0.25 diff --git a/training/conf/network/encoder/pixelcnn_encoder.yaml b/training/conf/network/encoder/pixelcnn_encoder.yaml new file mode 100644 index 0000000..f12957b --- /dev/null +++ b/training/conf/network/encoder/pixelcnn_encoder.yaml @@ -0,0 +1,5 @@ +_target_: text_recognizer.networks.vqvae.pixelcnn.Encoder +in_channels: 1 +hidden_dim: 8 +channels_multipliers: [1, 2, 8] +dropout_rate: 0.25 diff --git a/training/conf/network/encoder/vae_encoder.yaml b/training/conf/network/encoder/vae_encoder.yaml index dacd389..58e905d 100644 --- a/training/conf/network/encoder/vae_encoder.yaml +++ b/training/conf/network/encoder/vae_encoder.yaml @@ -1,5 +1,5 @@ _target_: text_recognizer.networks.vqvae.encoder.Encoder in_channels: 1 hidden_dim: 32 -channels_multipliers: [1, 2, 4, 4] +channels_multipliers: [1, 2, 4, 8, 8] dropout_rate: 0.25 diff --git a/training/conf/network/vqvae.yaml b/training/conf/network/vqvae.yaml index d97e9b6..835d0b7 100644 --- a/training/conf/network/vqvae.yaml +++ b/training/conf/network/vqvae.yaml @@ -3,7 +3,7 @@ defaults: - decoder: vae_decoder _target_: text_recognizer.networks.vqvae.vqvae.VQVAE -hidden_dim: 128 +hidden_dim: 256 embedding_dim: 32 num_embeddings: 1024 decay: 0.99 diff --git a/training/conf/network/vqvae_pixelcnn.yaml b/training/conf/network/vqvae_pixelcnn.yaml index 10200bc..cd850af 100644 --- a/training/conf/network/vqvae_pixelcnn.yaml +++ b/training/conf/network/vqvae_pixelcnn.yaml @@ -5,5 +5,5 @@ defaults: _target_: text_recognizer.networks.vqvae.vqvae.VQVAE hidden_dim: 64 embedding_dim: 32 -num_embeddings: 512 +num_embeddings: 1024 decay: 0.99 -- cgit v1.2.3-70-g09d2