diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-29 00:14:43 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-29 00:14:43 +0200 |
commit | a4d5de1e45e61a89fbcf1932b84539a5988bcb1f (patch) | |
tree | 91b522c0d4a1090f2a420b0664a24181751c7b2a /notebooks/04-conv-transformer.ipynb | |
parent | 4f42407d7e600f2706c149ba07148c68b9ed9d4e (diff) |
Update notebooks
Diffstat (limited to 'notebooks/04-conv-transformer.ipynb')
-rw-r--r-- | notebooks/04-conv-transformer.ipynb | 23 |
1 files changed, 16 insertions, 7 deletions
diff --git a/notebooks/04-conv-transformer.ipynb b/notebooks/04-conv-transformer.ipynb index 0ddf73b..b864098 100644 --- a/notebooks/04-conv-transformer.ipynb +++ b/notebooks/04-conv-transformer.ipynb @@ -2,10 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "id": "7c02ae76-b540-4b16-9492-e9210b3b9249", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "import os\n", "os.environ['CUDA_VISIBLE_DEVICE'] = ''\n", @@ -16,7 +25,7 @@ "\n", "import numpy as np\n", "from omegaconf import OmegaConf\n", - "\n", + "import torch\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", @@ -50,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "id": "e52ecb01-c975-4e55-925d-1182c7aea473", "metadata": {}, "outputs": [], @@ -61,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "id": "f939aa37-7b1d-45cc-885c-323c4540bda1", "metadata": {}, "outputs": [ @@ -71,7 +80,7 @@ "{'_target_': 'text_recognizer.networks.ConvTransformer', 'input_dims': [1, 1, 576, 640], 'hidden_dim': 128, 'num_classes': 58, 'pad_index': 3, 'encoder': {'_target_': 'text_recognizer.networks.convnext.ConvNext', 'dim': 16, 'dim_mults': [2, 4, 8], 'depths': [3, 3, 6], 'downsampling_factors': [[2, 2], [2, 2], [2, 2]]}, 'decoder': {'_target_': 'text_recognizer.networks.transformer.Decoder', 'dim': 128, 'depth': 10, 'block': {'_target_': 'text_recognizer.networks.transformer.decoder_block.DecoderBlock', 'self_attn': {'_target_': 'text_recognizer.networks.transformer.Attention', 'dim': 128, 'num_heads': 12, 'dim_head': 64, 'dropout_rate': 0.2, 'causal': True, 'rotary_embedding': {'_target_': 'text_recognizer.networks.transformer.RotaryEmbedding', 'dim': 64}}, 'cross_attn': {'_target_': 'text_recognizer.networks.transformer.Attention', 'dim': 128, 'num_heads': 12, 'dim_head': 64, 'dropout_rate': 0.2, 'causal': False}, 'norm': {'_target_': 'text_recognizer.networks.transformer.RMSNorm', 'dim': 128}, 'ff': {'_target_': 'text_recognizer.networks.transformer.FeedForward', 'dim': 128, 'dim_out': None, 'expansion_factor': 2, 'glu': True, 'dropout_rate': 0.2}}}, 'pixel_embedding': {'_target_': 'text_recognizer.networks.transformer.embeddings.axial.AxialPositionalEmbeddingImage', 'dim': 128, 'axial_shape': [7, 128], 'axial_dims': [64, 64]}, 'token_pos_embedding': {'_target_': 'text_recognizer.networks.transformer.embeddings.fourier.PositionalEncoding', 'dim': 128, 'dropout_rate': 0.1, 'max_len': 89}}" ] }, - "execution_count": 13, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -82,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "id": "aaeab329-aeb0-4a1b-aa35-5a2aab81b1d0", "metadata": { "scrolled": false |