summaryrefslogtreecommitdiff
path: root/training/conf/experiment/conv_transformer_paragraphs.yaml
diff options
context:
space:
mode:
Diffstat (limited to 'training/conf/experiment/conv_transformer_paragraphs.yaml')
-rw-r--r--training/conf/experiment/conv_transformer_paragraphs.yaml13
1 files changed, 6 insertions, 7 deletions
diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml
index 7c6e231..c8db485 100644
--- a/training/conf/experiment/conv_transformer_paragraphs.yaml
+++ b/training/conf/experiment/conv_transformer_paragraphs.yaml
@@ -34,15 +34,14 @@ optimizer:
betas: [0.9, 0.999]
weight_decay: 0
eps: 1.0e-8
- parameters: network
lr_scheduler:
_target_: torch.optim.lr_scheduler.OneCycleLR
max_lr: 3.0e-4
total_steps: null
epochs: *epochs
- steps_per_epoch: 3201
- pct_start: 0.3
+ steps_per_epoch: 5037
+ pct_start: 0.15
anneal_strategy: cos
cycle_momentum: true
base_momentum: 0.85
@@ -56,7 +55,7 @@ lr_scheduler:
monitor: val/cer
datamodule:
- batch_size: 6
+ batch_size: 4
train_fraction: 0.95
network:
@@ -66,9 +65,9 @@ network:
encoder:
depth: 5
decoder:
- depth: 6
+ depth: 4
pixel_embedding:
- shape: [18, 78]
+ shape: [17, 79]
model:
max_output_len: *max_output_len
@@ -76,4 +75,4 @@ model:
trainer:
gradient_clip_val: 0.5
max_epochs: *epochs
- accumulate_grad_batches: 1
+ accumulate_grad_batches: 2