summaryrefslogtreecommitdiff
path: root/training/conf/network/vit_lines.yaml
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2023-08-25 23:19:39 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2023-08-25 23:19:39 +0200
commit6968572c1a21394b88a29f675b17b9698784a898 (patch)
treed89d1c5c2ec331d38dcb5b6a2dbbd72c9e355b8a /training/conf/network/vit_lines.yaml
parent49ca6ade1a19f7f9c702171537fe4be0dfcda66d (diff)
Update training stuff
Diffstat (limited to 'training/conf/network/vit_lines.yaml')
-rw-r--r--training/conf/network/vit_lines.yaml37
1 files changed, 37 insertions, 0 deletions
diff --git a/training/conf/network/vit_lines.yaml b/training/conf/network/vit_lines.yaml
new file mode 100644
index 0000000..35f83c3
--- /dev/null
+++ b/training/conf/network/vit_lines.yaml
@@ -0,0 +1,37 @@
+_target_: text_recognizer.network.vit.VisionTransformer
+image_height: 56
+image_width: 1024
+patch_height: 28
+patch_width: 32
+dim: &dim 256
+num_classes: &num_classes 57
+encoder:
+ _target_: text_recognizer.network.transformer.encoder.Encoder
+ dim: *dim
+ inner_dim: 1024
+ heads: 8
+ dim_head: 64
+ depth: 6
+ dropout_rate: 0.0
+decoder:
+ _target_: text_recognizer.network.transformer.decoder.Decoder
+ dim: *dim
+ inner_dim: 1024
+ heads: 8
+ dim_head: 64
+ depth: 6
+ dropout_rate: 0.0
+token_embedding:
+ _target_: "text_recognizer.network.transformer.embedding.token.\
+ TokenEmbedding"
+ num_tokens: *num_classes
+ dim: *dim
+ use_l2: true
+pos_embedding:
+ _target_: "text_recognizer.network.transformer.embedding.absolute.\
+ AbsolutePositionalEmbedding"
+ dim: *dim
+ max_length: 89
+ use_l2: true
+tie_embeddings: true
+pad_index: 3