diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-04-08 23:38:03 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-04-08 23:38:03 +0200 |
commit | e388cd95c77d37a51324cff9d84a809421bf97d3 (patch) | |
tree | d585545f85d03ea8a6907daba254821fddeb1589 /text_recognizer/training/experiments | |
parent | f4629a0d4149d5870c9fd8ce83ff5d391bd7ddd3 (diff) |
Bug fixes word pieces
Diffstat (limited to 'text_recognizer/training/experiments')
-rw-r--r-- | text_recognizer/training/experiments/image_transformer.yaml | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/text_recognizer/training/experiments/image_transformer.yaml b/text_recognizer/training/experiments/image_transformer.yaml new file mode 100644 index 0000000..bedcbb5 --- /dev/null +++ b/text_recognizer/training/experiments/image_transformer.yaml @@ -0,0 +1,72 @@ +seed: 4711 + +network: + desc: null + type: ImageTransformer + args: + encoder: + type: null + args: null + num_decoder_layers: 4 + hidden_dim: 256 + num_heads: 4 + expansion_dim: 1024 + dropout_rate: 0.1 + transformer_activation: glu + +model: + desc: null + type: LitTransformerModel + args: + optimizer: + type: MADGRAD + args: + lr: 1.0e-2 + momentum: 0.9 + weight_decay: 0 + eps: 1.0e-6 + lr_scheduler: + type: CosineAnnealingLR + args: + T_max: 512 + criterion: + type: CrossEntropyLoss + args: + weight: None + ignore_index: -100 + reduction: mean + monitor: val_loss + mapping: sentence_piece + +data: + desc: null + type: IAMExtendedParagraphs + args: + batch_size: 16 + num_workers: 12 + train_fraction: 0.8 + augment: true + +callbacks: + - type: ModelCheckpoint + args: + monitor: val_loss + mode: min + - type: EarlyStopping + args: + monitor: val_loss + mode: min + patience: 10 + +trainer: + desc: null + args: + stochastic_weight_avg: true + auto_scale_batch_size: binsearch + gradient_clip_val: 0 + fast_dev_run: false + gpus: 1 + precision: 16 + max_epochs: 512 + terminate_on_nan: true + weights_summary: true |