diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-04-15 21:48:18 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-04-15 21:48:18 +0200 |
commit | e643e0c61ab33ce1bb8cfdebc92fc0670c82afda (patch) | |
tree | 10876c66ed249429e42f1660231bb7ff84c10769 /training/conf/network/mammut_cvit_lines.yaml | |
parent | 9ce21f569ecac03f15f2ad669fde3dd4a512f8cc (diff) |
Update configs
Diffstat (limited to 'training/conf/network/mammut_cvit_lines.yaml')
-rw-r--r-- | training/conf/network/mammut_cvit_lines.yaml | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/training/conf/network/mammut_cvit_lines.yaml b/training/conf/network/mammut_cvit_lines.yaml new file mode 100644 index 0000000..75fcccb --- /dev/null +++ b/training/conf/network/mammut_cvit_lines.yaml @@ -0,0 +1,51 @@ +_target_: text_recognizer.network.mammut.MaMMUT +encoder: + _target_: text_recognizer.network.cvit.CVit + image_height: 7 + image_width: 128 + patch_height: 7 + patch_width: 1 + dim: &dim 512 + encoder: + _target_: text_recognizer.network.transformer.encoder.Encoder + dim: *dim + heads: 8 + dim_head: 64 + ff_mult: 4 + depth: 2 + dropout_rate: 0.5 + use_rotary_emb: true + one_kv_head: true + stem: + _target_: text_recognizer.network.convnext.convnext.ConvNext + dim: 16 + dim_mults: [2, 8, 32] + depths: [2, 2, 4] + attn: null + channels: 512 +image_attn_pool: + _target_: text_recognizer.network.transformer.attention.Attention + dim: *dim + heads: 8 + causal: false + dim_head: 64 + ff_mult: 4 + dropout_rate: 0.0 + use_flash: true + norm_context: true + use_rotary_emb: false + one_kv_head: true +decoder: + _target_: text_recognizer.network.transformer.decoder.Decoder + dim: *dim + ff_mult: 4 + heads: 8 + dim_head: 64 + depth: 6 + dropout_rate: 0.5 + one_kv_head: true +dim: *dim +dim_latent: *dim +num_tokens: 57 +pad_index: 3 +num_image_queries: 64 |