From 56dc112cfb649217cd624b4ff305e2db83a383b7 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Mon, 11 Sep 2023 22:15:26 +0200 Subject: Update configs --- training/conf/network/mammut_lines.yaml | 41 +++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 training/conf/network/mammut_lines.yaml (limited to 'training/conf/network/mammut_lines.yaml') diff --git a/training/conf/network/mammut_lines.yaml b/training/conf/network/mammut_lines.yaml new file mode 100644 index 0000000..f1c73d0 --- /dev/null +++ b/training/conf/network/mammut_lines.yaml @@ -0,0 +1,41 @@ +_target_: text_recognizer.network.mammut.MaMMUT +encoder: + _target_: text_recognizer.network.vit.Vit + image_height: 56 + image_width: 1024 + patch_height: 56 + patch_width: 8 + dim: &dim 512 + encoder: + _target_: text_recognizer.network.transformer.encoder.Encoder + dim: *dim + heads: 12 + dim_head: 64 + ff_mult: 4 + depth: 6 + dropout_rate: 0.1 + channels: 1 +image_attn_pool: + _target_: text_recognizer.network.transformer.attention.Attention + dim: *dim + heads: 8 + causal: false + dim_head: 64 + ff_mult: 4 + dropout_rate: 0.0 + use_flash: true + norm_context: true + rotary_emb: null +decoder: + _target_: text_recognizer.network.transformer.decoder.Decoder + dim: *dim + ff_mult: 4 + heads: 12 + dim_head: 64 + depth: 6 + dropout_rate: 0.1 +dim: *dim +dim_latent: *dim +num_tokens: 58 +pad_index: 3 +num_image_queries: 256 -- cgit v1.2.3-70-g09d2