From 5c28ad1905f63838afbc7ee6c5ec31ff1310e3a1 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Thu, 9 Jun 2022 22:36:04 +0200 Subject: Move conformer config --- training/conf/network/conformer.yaml | 36 ++++++++++++++++++++++++++ training/conf/network/conformer/conformer.yaml | 27 ------------------- 2 files changed, 36 insertions(+), 27 deletions(-) create mode 100644 training/conf/network/conformer.yaml delete mode 100644 training/conf/network/conformer/conformer.yaml (limited to 'training/conf') diff --git a/training/conf/network/conformer.yaml b/training/conf/network/conformer.yaml new file mode 100644 index 0000000..1d72dd5 --- /dev/null +++ b/training/conf/network/conformer.yaml @@ -0,0 +1,36 @@ +_target_: text_recognizer.networks.conformer.Conformer +depth: 8 +num_classes: 57 +dim: &dim 144 +dim_gru: 144 +block: + _target_: text_recognizer.networks.conformer.ConformerBlock + dim: *dim + attn: + _target_: text_recognizer.networks.conformer.Attention + dim: *dim + heads: 8 + dim_head: 64 + mult: 4 + ff: + _target_: text_recognizer.networks.conformer.Feedforward + dim: *dim + expansion_factor: 4 + dropout: 0.1 + conv: + _target_: text_recognizer.networks.conformer.ConformerConv + dim: *dim + expansion_factor: 2 + kernel_size: 31 + dropout: 0.1 +subsampler: + _target_: text_recognizer.networks.conformer.Subsampler + pixel_pos_embedding: + _target_: text_recognizer.networks.transformer.AxialPositionalEmbedding + dim: 64 + shape: [6, 127] + channels: 64 + height: 6 + dim: *dim + depth: 3 + dropout: 0.1 diff --git a/training/conf/network/conformer/conformer.yaml b/training/conf/network/conformer/conformer.yaml deleted file mode 100644 index 2bb423f..0000000 --- a/training/conf/network/conformer/conformer.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -_target_: text_recognizer.networks.conformer.Conformer -depth: 16 -block: - _target_: text_recognizer.networks.conformer.ConformerBlock - dim: &dim 128 - attn: - _target_: text_recognizer.networks.transformer.Attention - dim: *dim - num_heads: 8 - dim_head: 64 - dropout_rate: 0.1 - causal: false - rotary_embedding: - _target_: text_recognizer.networks.transformer.RotaryEmbedding - dim: 64 - ff: - _target_: text_recognizer.networks.conformer.Feedforward - dim: *dim - expansion_factor: 4 - dropout: 0.1 - conv: - _target_: text_recognizer.networks.conformer.ConformerConv - dim: *dim - expansion_factor: 2 - kernel_size: 31 - dropout: 0.1 -- cgit v1.2.3-70-g09d2