summaryrefslogtreecommitdiff
path: root/training/conf/network/conv_perceiver.yaml
diff options
context:
space:
mode:
Diffstat (limited to 'training/conf/network/conv_perceiver.yaml')
-rw-r--r--training/conf/network/conv_perceiver.yaml30
1 files changed, 30 insertions, 0 deletions
diff --git a/training/conf/network/conv_perceiver.yaml b/training/conf/network/conv_perceiver.yaml
new file mode 100644
index 0000000..e6906fa
--- /dev/null
+++ b/training/conf/network/conv_perceiver.yaml
@@ -0,0 +1,30 @@
+_target_: text_recognizer.networks.ConvPerceiver
+input_dims: [1, 1, 576, 640]
+hidden_dim: &hidden_dim 144
+num_classes: &num_classes 58
+queries_dim: &queries_dim 16
+max_length: 89
+pad_index: 3
+encoder:
+ _target_: text_recognizer.networks.EfficientNet
+ arch: b0
+ stochastic_dropout_rate: 0.2
+ bn_momentum: 0.99
+ bn_eps: 1.0e-3
+ depth: 5
+ out_channels: *hidden_dim
+decoder:
+ _target_: text_recognizer.networks.perceiver.PerceiverIO
+ dim: *hidden_dim
+ cross_heads: 1
+ cross_head_dim: 64
+ num_latents: 256
+ latent_dim: 512
+ latent_heads: 8
+ depth: 6
+ queries_dim: *queries_dim
+ logits_dim: *num_classes
+pixel_embedding:
+ _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding
+ dim: *hidden_dim
+ shape: [3, 64]