summaryrefslogtreecommitdiff
path: root/training/conf/network
diff options
context:
space:
mode:
Diffstat (limited to 'training/conf/network')
-rw-r--r--training/conf/network/decoder/pixelcnn_encoder.yaml5
-rw-r--r--training/conf/network/decoder/vae_decoder.yaml5
-rw-r--r--training/conf/network/encoder/pixelcnn_decoder.yaml5
-rw-r--r--training/conf/network/encoder/vae_encoder.yaml5
-rw-r--r--training/conf/network/vqvae.yaml15
-rw-r--r--training/conf/network/vqvae_pixelcnn.yaml9
6 files changed, 37 insertions, 7 deletions
diff --git a/training/conf/network/decoder/pixelcnn_encoder.yaml b/training/conf/network/decoder/pixelcnn_encoder.yaml
new file mode 100644
index 0000000..47a130d
--- /dev/null
+++ b/training/conf/network/decoder/pixelcnn_encoder.yaml
@@ -0,0 +1,5 @@
+_target_: text_recognizer.networks.vqvae.pixelcnn.Encoder
+in_channels: 1
+hidden_dim: 8
+channels_multipliers: [1, 2, 8, 8]
+dropout_rate: 0.25
diff --git a/training/conf/network/decoder/vae_decoder.yaml b/training/conf/network/decoder/vae_decoder.yaml
new file mode 100644
index 0000000..b2090b3
--- /dev/null
+++ b/training/conf/network/decoder/vae_decoder.yaml
@@ -0,0 +1,5 @@
+_target_: text_recognizer.networks.vqvae.decoder.Decoder
+out_channels: 1
+hidden_dim: 32
+channels_multipliers: [8, 6, 2, 1]
+dropout_rate: 0.25
diff --git a/training/conf/network/encoder/pixelcnn_decoder.yaml b/training/conf/network/encoder/pixelcnn_decoder.yaml
new file mode 100644
index 0000000..3895164
--- /dev/null
+++ b/training/conf/network/encoder/pixelcnn_decoder.yaml
@@ -0,0 +1,5 @@
+_target_: text_recognizer.networks.vqvae.pixelcnn.Decoder
+out_channels: 1
+hidden_dim: 8
+channels_multipliers: [8, 8, 2, 1]
+dropout_rate: 0.25
diff --git a/training/conf/network/encoder/vae_encoder.yaml b/training/conf/network/encoder/vae_encoder.yaml
new file mode 100644
index 0000000..5dc6814
--- /dev/null
+++ b/training/conf/network/encoder/vae_encoder.yaml
@@ -0,0 +1,5 @@
+_target_: text_recognizer.networks.vqvae.encoder.Encoder
+in_channels: 1
+hidden_dim: 32
+channels_multipliers: [1, 2, 6, 8]
+dropout_rate: 0.25
diff --git a/training/conf/network/vqvae.yaml b/training/conf/network/vqvae.yaml
index 5a5c066..835d0b7 100644
--- a/training/conf/network/vqvae.yaml
+++ b/training/conf/network/vqvae.yaml
@@ -1,8 +1,9 @@
-_target_: text_recognizer.networks.vqvae.VQVAE
-in_channels: 1
-res_channels: 32
-num_residual_layers: 2
-embedding_dim: 64
-num_embeddings: 512
+defaults:
+ - encoder: vae_encoder
+ - decoder: vae_decoder
+
+_target_: text_recognizer.networks.vqvae.vqvae.VQVAE
+hidden_dim: 256
+embedding_dim: 32
+num_embeddings: 1024
decay: 0.99
-activation: mish
diff --git a/training/conf/network/vqvae_pixelcnn.yaml b/training/conf/network/vqvae_pixelcnn.yaml
new file mode 100644
index 0000000..10200bc
--- /dev/null
+++ b/training/conf/network/vqvae_pixelcnn.yaml
@@ -0,0 +1,9 @@
+defaults:
+ - encoder: pixelcnn_encoder
+ - decoder: pixelcnn_decoder
+
+_target_: text_recognizer.networks.vqvae.vqvae.VQVAE
+hidden_dim: 64
+embedding_dim: 32
+num_embeddings: 512
+decay: 0.99