diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-03 12:13:02 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-03 12:13:02 +0200 |
commit | 73ccaaa24936faed36fcc467532baa5386d402ae (patch) | |
tree | c7230fff21b8a780c2b0cd8a5d610075cbb7f21e /training | |
parent | 5dd76ca9a3ff35c57cbc7c607afbdb4ee1c8b36f (diff) |
Update perceiver
Diffstat (limited to 'training')
-rw-r--r-- | training/conf/model/lit_perceiver.yaml | 5 | ||||
-rw-r--r-- | training/conf/network/conv_perceiver.yaml | 30 |
2 files changed, 35 insertions, 0 deletions
diff --git a/training/conf/model/lit_perceiver.yaml b/training/conf/model/lit_perceiver.yaml new file mode 100644 index 0000000..6d1ec82 --- /dev/null +++ b/training/conf/model/lit_perceiver.yaml @@ -0,0 +1,5 @@ +_target_: text_recognizer.models.LitPerceiver +max_output_len: 682 +start_token: <s> +end_token: <e> +pad_token: <p> diff --git a/training/conf/network/conv_perceiver.yaml b/training/conf/network/conv_perceiver.yaml new file mode 100644 index 0000000..e6906fa --- /dev/null +++ b/training/conf/network/conv_perceiver.yaml @@ -0,0 +1,30 @@ +_target_: text_recognizer.networks.ConvPerceiver +input_dims: [1, 1, 576, 640] +hidden_dim: &hidden_dim 144 +num_classes: &num_classes 58 +queries_dim: &queries_dim 16 +max_length: 89 +pad_index: 3 +encoder: + _target_: text_recognizer.networks.EfficientNet + arch: b0 + stochastic_dropout_rate: 0.2 + bn_momentum: 0.99 + bn_eps: 1.0e-3 + depth: 5 + out_channels: *hidden_dim +decoder: + _target_: text_recognizer.networks.perceiver.PerceiverIO + dim: *hidden_dim + cross_heads: 1 + cross_head_dim: 64 + num_latents: 256 + latent_dim: 512 + latent_heads: 8 + depth: 6 + queries_dim: *queries_dim + logits_dim: *num_classes +pixel_embedding: + _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding + dim: *hidden_dim + shape: [3, 64] |