diff options
Diffstat (limited to 'training/conf/network')
-rw-r--r-- | training/conf/network/conv_transformer.yaml | 2 | ||||
-rw-r--r-- | training/conf/network/decoder/transformer_decoder.yaml | 30 | ||||
-rw-r--r-- | training/conf/network/encoder/efficientnet.yaml | 5 |
3 files changed, 1 insertions, 36 deletions
diff --git a/training/conf/network/conv_transformer.yaml b/training/conf/network/conv_transformer.yaml index 54eb028..39c5c46 100644 --- a/training/conf/network/conv_transformer.yaml +++ b/training/conf/network/conv_transformer.yaml @@ -10,7 +10,7 @@ encoder: bn_momentum: 0.99 bn_eps: 1.0e-3 depth: 3 - out_channels: 128 + out_channels: *hidden_dim decoder: _target_: text_recognizer.networks.transformer.Decoder depth: 6 diff --git a/training/conf/network/decoder/transformer_decoder.yaml b/training/conf/network/decoder/transformer_decoder.yaml deleted file mode 100644 index 4588ee9..0000000 --- a/training/conf/network/decoder/transformer_decoder.yaml +++ /dev/null @@ -1,30 +0,0 @@ -_target_: text_recognizer.networks.transformer.decoder.Decoder -depth: 4 -block: - _target_: text_recognizer.networks.transformer.decoder.DecoderBlock - self_attn: - _target_: text_recognizer.networks.transformer.attention.Attention - dim: 64 - num_heads: 4 - dim_head: 64 - dropout_rate: 0.05 - causal: true - rotary_embedding: - _target_: text_recognizer.networks.transformer.embeddings.rotary.RotaryEmbedding - dim: 128 - cross_attn: - _target_: text_recognizer.networks.transformer.attention.Attention - dim: 64 - num_heads: 4 - dim_head: 64 - dropout_rate: 0.05 - causal: false - norm: - _target_: text_recognizer.networks.transformer.norm.RMSNorm - normalized_shape: 192 - ff: - _target_: text_recognizer.networks.transformer.mlp.FeedForward - dim_out: null - expansion_factor: 4 - glu: true - dropout_rate: 0.2 diff --git a/training/conf/network/encoder/efficientnet.yaml b/training/conf/network/encoder/efficientnet.yaml deleted file mode 100644 index a7be069..0000000 --- a/training/conf/network/encoder/efficientnet.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_target_: text_recognizer.networks.efficientnet.EfficientNet -arch: b0 -stochastic_dropout_rate: 0.2 -bn_momentum: 0.99 -bn_eps: 1.0e-3 |