diff options
Diffstat (limited to 'training/conf/network')
-rw-r--r-- | training/conf/network/conv_transformer.yaml | 59 | ||||
-rw-r--r-- | training/conf/network/convnext.yaml | 17 |
2 files changed, 0 insertions, 76 deletions
diff --git a/training/conf/network/conv_transformer.yaml b/training/conf/network/conv_transformer.yaml deleted file mode 100644 index 1e03946..0000000 --- a/training/conf/network/conv_transformer.yaml +++ /dev/null @@ -1,59 +0,0 @@ -_target_: text_recognizer.network.ConvTransformer -encoder: - _target_: text_recognizer.network.image_encoder.ImageEncoder - encoder: - _target_: text_recognizer.network.convnext.ConvNext - dim: 16 - dim_mults: [2, 4, 8] - depths: [3, 3, 6] - downsampling_factors: [[2, 2], [2, 2], [2, 2]] - pixel_embedding: - _target_: "text_recognizer.network.transformer.embeddings.axial.\ - AxialPositionalEmbeddingImage" - dim: &hidden_dim 128 - axial_shape: [7, 128] - axial_dims: [64, 64] -decoder: - _target_: text_recognizer.network.text_decoder.TextDecoder - hidden_dim: *hidden_dim - num_classes: 58 - pad_index: 3 - decoder: - _target_: text_recognizer.network.transformer.Decoder - dim: *hidden_dim - depth: 10 - block: - _target_: text_recognizer.network.transformer.decoder_block.DecoderBlock - self_attn: - _target_: text_recognizer.network.transformer.Attention - dim: *hidden_dim - num_heads: 12 - dim_head: 64 - dropout_rate: &dropout_rate 0.2 - causal: true - cross_attn: - _target_: text_recognizer.network.transformer.Attention - dim: *hidden_dim - num_heads: 12 - dim_head: 64 - dropout_rate: *dropout_rate - causal: false - norm: - _target_: text_recognizer.network.transformer.RMSNorm - dim: *hidden_dim - ff: - _target_: text_recognizer.network.transformer.FeedForward - dim: *hidden_dim - dim_out: null - expansion_factor: 2 - glu: true - dropout_rate: *dropout_rate - rotary_embedding: - _target_: text_recognizer.network.transformer.RotaryEmbedding - dim: 64 - token_pos_embedding: - _target_: "text_recognizer.network.transformer.embeddings.fourier.\ - PositionalEncoding" - dim: *hidden_dim - dropout_rate: 0.1 - max_len: 89 diff --git a/training/conf/network/convnext.yaml b/training/conf/network/convnext.yaml deleted file mode 100644 index 904bd56..0000000 --- a/training/conf/network/convnext.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_target_: text_recognizer.network.convnext.ConvNext -dim: 16 -dim_mults: [2, 4, 8] -depths: [3, 3, 6] -downsampling_factors: [[2, 2], [2, 2], [2, 2]] -attn: - _target_: text_recognizer.network.convnext.TransformerBlock - attn: - _target_: text_recognizer.network.convnext.Attention - dim: 128 - heads: 4 - dim_head: 64 - scale: 8 - ff: - _target_: text_recognizer.network.convnext.FeedForward - dim: 128 - mult: 4 |