summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
Diffstat (limited to 'training')
-rw-r--r--training/conf/experiment/conv_transformer_paragraphs.yaml14
1 files changed, 7 insertions, 7 deletions
diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml
index 34eedab..60898da 100644
--- a/training/conf/experiment/conv_transformer_paragraphs.yaml
+++ b/training/conf/experiment/conv_transformer_paragraphs.yaml
@@ -14,7 +14,7 @@ epochs: &epochs 600
num_classes: &num_classes 58
ignore_index: &ignore_index 3
max_output_len: &max_output_len 682
-summary: [[1, 1, 576, 640], [1, 682]]
+# summary: [[1, 1, 576, 640], [1, 682]]
logger:
wandb:
@@ -55,7 +55,7 @@ lr_scheduler:
monitor: val/cer
datamodule:
- batch_size: 8
+ batch_size: 2
train_fraction: 0.95
network:
@@ -67,9 +67,9 @@ network:
encoder:
_target_: text_recognizer.networks.convnext.ConvNext
dim: 16
- dim_mults: [2, 4, 8, 8, 8]
- depths: [3, 3, 3, 4, 6]
- downsampling_factors: [[2, 2], [2, 2], [2, 2], [2, 1], [2, 1]]
+ dim_mults: [2, 4, 8, 8]
+ depths: [3, 3, 6, 6]
+ downsampling_factors: [[2, 2], [2, 2], [2, 2], [2, 1]]
attn:
_target_: text_recognizer.networks.convnext.TransformerBlock
attn:
@@ -118,7 +118,7 @@ network:
_target_: "text_recognizer.networks.transformer.embeddings.axial.\
AxialPositionalEmbeddingImage"
dim: *hidden_dim
- axial_shape: [7, 128]
+ axial_shape: [36, 80]
axial_dims: [64, 64]
token_pos_embedding:
_target_: "text_recognizer.networks.transformer.embeddings.fourier.\
@@ -130,4 +130,4 @@ network:
trainer:
gradient_clip_val: 1.0
max_epochs: *epochs
- accumulate_grad_batches: 1
+ accumulate_grad_batches: 6