diff options
Diffstat (limited to 'training/conf/network/mammut_lines.yaml')
-rw-r--r-- | training/conf/network/mammut_lines.yaml | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/training/conf/network/mammut_lines.yaml b/training/conf/network/mammut_lines.yaml new file mode 100644 index 0000000..f1c73d0 --- /dev/null +++ b/training/conf/network/mammut_lines.yaml @@ -0,0 +1,41 @@ +_target_: text_recognizer.network.mammut.MaMMUT +encoder: + _target_: text_recognizer.network.vit.Vit + image_height: 56 + image_width: 1024 + patch_height: 56 + patch_width: 8 + dim: &dim 512 + encoder: + _target_: text_recognizer.network.transformer.encoder.Encoder + dim: *dim + heads: 12 + dim_head: 64 + ff_mult: 4 + depth: 6 + dropout_rate: 0.1 + channels: 1 +image_attn_pool: + _target_: text_recognizer.network.transformer.attention.Attention + dim: *dim + heads: 8 + causal: false + dim_head: 64 + ff_mult: 4 + dropout_rate: 0.0 + use_flash: true + norm_context: true + rotary_emb: null +decoder: + _target_: text_recognizer.network.transformer.decoder.Decoder + dim: *dim + ff_mult: 4 + heads: 12 + dim_head: 64 + depth: 6 + dropout_rate: 0.1 +dim: *dim +dim_latent: *dim +num_tokens: 58 +pad_index: 3 +num_image_queries: 256 |