From 73ccaaa24936faed36fcc467532baa5386d402ae Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sat, 3 Sep 2022 12:13:02 +0200 Subject: Update perceiver --- training/conf/network/conv_perceiver.yaml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 training/conf/network/conv_perceiver.yaml (limited to 'training/conf/network/conv_perceiver.yaml') diff --git a/training/conf/network/conv_perceiver.yaml b/training/conf/network/conv_perceiver.yaml new file mode 100644 index 0000000..e6906fa --- /dev/null +++ b/training/conf/network/conv_perceiver.yaml @@ -0,0 +1,30 @@ +_target_: text_recognizer.networks.ConvPerceiver +input_dims: [1, 1, 576, 640] +hidden_dim: &hidden_dim 144 +num_classes: &num_classes 58 +queries_dim: &queries_dim 16 +max_length: 89 +pad_index: 3 +encoder: + _target_: text_recognizer.networks.EfficientNet + arch: b0 + stochastic_dropout_rate: 0.2 + bn_momentum: 0.99 + bn_eps: 1.0e-3 + depth: 5 + out_channels: *hidden_dim +decoder: + _target_: text_recognizer.networks.perceiver.PerceiverIO + dim: *hidden_dim + cross_heads: 1 + cross_head_dim: 64 + num_latents: 256 + latent_dim: 512 + latent_heads: 8 + depth: 6 + queries_dim: *queries_dim + logits_dim: *num_classes +pixel_embedding: + _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding + dim: *hidden_dim + shape: [3, 64] -- cgit v1.2.3-70-g09d2