summaryrefslogtreecommitdiff
path: root/training/conf
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2022-09-03 12:13:02 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2022-09-03 12:13:02 +0200
commit73ccaaa24936faed36fcc467532baa5386d402ae (patch)
treec7230fff21b8a780c2b0cd8a5d610075cbb7f21e /training/conf
parent5dd76ca9a3ff35c57cbc7c607afbdb4ee1c8b36f (diff)
Update perceiver
Diffstat (limited to 'training/conf')
-rw-r--r--training/conf/model/lit_perceiver.yaml5
-rw-r--r--training/conf/network/conv_perceiver.yaml30
2 files changed, 35 insertions, 0 deletions
diff --git a/training/conf/model/lit_perceiver.yaml b/training/conf/model/lit_perceiver.yaml
new file mode 100644
index 0000000..6d1ec82
--- /dev/null
+++ b/training/conf/model/lit_perceiver.yaml
@@ -0,0 +1,5 @@
+_target_: text_recognizer.models.LitPerceiver
+max_output_len: 682
+start_token: <s>
+end_token: <e>
+pad_token: <p>
diff --git a/training/conf/network/conv_perceiver.yaml b/training/conf/network/conv_perceiver.yaml
new file mode 100644
index 0000000..e6906fa
--- /dev/null
+++ b/training/conf/network/conv_perceiver.yaml
@@ -0,0 +1,30 @@
+_target_: text_recognizer.networks.ConvPerceiver
+input_dims: [1, 1, 576, 640]
+hidden_dim: &hidden_dim 144
+num_classes: &num_classes 58
+queries_dim: &queries_dim 16
+max_length: 89
+pad_index: 3
+encoder:
+ _target_: text_recognizer.networks.EfficientNet
+ arch: b0
+ stochastic_dropout_rate: 0.2
+ bn_momentum: 0.99
+ bn_eps: 1.0e-3
+ depth: 5
+ out_channels: *hidden_dim
+decoder:
+ _target_: text_recognizer.networks.perceiver.PerceiverIO
+ dim: *hidden_dim
+ cross_heads: 1
+ cross_head_dim: 64
+ num_latents: 256
+ latent_dim: 512
+ latent_heads: 8
+ depth: 6
+ queries_dim: *queries_dim
+ logits_dim: *num_classes
+pixel_embedding:
+ _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding
+ dim: *hidden_dim
+ shape: [3, 64]