{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "1e40a88b", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from PIL import Image\n", "import torch\n", "from torch import nn\n", "from importlib.util import find_spec\n", "if find_spec(\"text_recognizer\") is None:\n", " import sys\n", " sys.path.append('..')\n", " " ] }, { "cell_type": "code", "execution_count": 2, "id": "3e812a1e", "metadata": {}, "outputs": [], "source": [ "import attr" ] }, { "cell_type": "code", "execution_count": 3, "id": "d3a6146b-94b1-4618-a4e4-00f8e23ffdb0", "metadata": {}, "outputs": [], "source": [ "from hydra import compose, initialize\n", "from omegaconf import OmegaConf\n", "from hydra.utils import instantiate" ] }, { "cell_type": "code", "execution_count": 4, "id": "9c797159-845e-42c6-bd65-1c976ad627cd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "encoder:\n", " _target_: text_recognizer.networks.encoders.efficientnet.EfficientNet\n", " arch: b0\n", " out_channels: 1280\n", " stochastic_dropout_rate: 0.2\n", " bn_momentum: 0.99\n", " bn_eps: 0.001\n", "decoder:\n", " _target_: text_recognizer.networks.transformer.Decoder\n", " dim: 256\n", " depth: 2\n", " num_heads: 8\n", " attn_fn: text_recognizer.networks.transformer.attention.Attention\n", " attn_kwargs:\n", " num_heads: 8\n", " dim_head: 64\n", " dropout_rate: 0.2\n", " norm_fn: torch.nn.LayerNorm\n", " ff_fn: text_recognizer.networks.transformer.mlp.FeedForward\n", " ff_kwargs:\n", " dim: 256\n", " dim_out: null\n", " expansion_factor: 4\n", " glu: true\n", " dropout_rate: 0.2\n", " rotary_emb: null\n", " rotary_emb_dim: null\n", " cross_attend: true\n", " pre_norm: true\n", "_target_: text_recognizer.networks.conv_transformer.ConvTransformer\n", "input_dims:\n", "- 1\n", "- 576\n", "- 640\n", "hidden_dim: 256\n", "dropout_rate: 0.2\n", "max_output_len: 682\n", "num_classes: 1004\n", "start_token: \n", "end_token: \n", "pad_token:

\n", "\n", "{'encoder': {'_target_': 'text_recognizer.networks.encoders.efficientnet.EfficientNet', 'arch': 'b0', 'out_channels': 1280, 'stochastic_dropout_rate': 0.2, 'bn_momentum': 0.99, 'bn_eps': 0.001}, 'decoder': {'_target_': 'text_recognizer.networks.transformer.Decoder', 'dim': 256, 'depth': 2, 'num_heads': 8, 'attn_fn': 'text_recognizer.networks.transformer.attention.Attention', 'attn_kwargs': {'num_heads': 8, 'dim_head': 64, 'dropout_rate': 0.2}, 'norm_fn': 'torch.nn.LayerNorm', 'ff_fn': 'text_recognizer.networks.transformer.mlp.FeedForward', 'ff_kwargs': {'dim': 256, 'dim_out': None, 'expansion_factor': 4, 'glu': True, 'dropout_rate': 0.2}, 'rotary_emb': None, 'rotary_emb_dim': None, 'cross_attend': True, 'pre_norm': True}, '_target_': 'text_recognizer.networks.conv_transformer.ConvTransformer', 'input_dims': [1, 576, 640], 'hidden_dim': 256, 'dropout_rate': 0.2, 'max_output_len': 682, 'num_classes': 1004, 'start_token': '', 'end_token': '', 'pad_token': '

'}\n" ] } ], "source": [ "# context initialization\n", "with initialize(config_path=\"../training/conf/network/\", job_name=\"test_app\"):\n", " cfg = compose(config_name=\"conv_transformer\")\n", " print(OmegaConf.to_yaml(cfg))\n", " print(cfg)" ] }, { "cell_type": "code", "execution_count": 5, "id": "cdb895b6-8949-4318-8a40-06fb5ed5e8d6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "_target_: text_recognizer.data.mappings.WordPieceMapping\n", "num_features: 1000\n", "tokens: iamdb_1kwp_tokens_1000.txt\n", "lexicon: iamdb_1kwp_lex_1000.txt\n", "data_dir: null\n", "use_words: false\n", "prepend_wordsep: false\n", "special_tokens:\n", "- \n", "- \n", "-

\n", "extra_symbols:\n", "- '\n", "\n", " '\n", "\n", "{'_target_': 'text_recognizer.data.mappings.WordPieceMapping', 'num_features': 1000, 'tokens': 'iamdb_1kwp_tokens_1000.txt', 'lexicon': 'iamdb_1kwp_lex_1000.txt', 'data_dir': None, 'use_words': False, 'prepend_wordsep': False, 'special_tokens': ['', '', '

'], 'extra_symbols': ['\\n']}\n" ] } ], "source": [ "with initialize(config_path=\"../training/conf/mapping/\", job_name=\"test_app\"):\n", " cfg = compose(config_name=\"word_piece\")\n", " print(OmegaConf.to_yaml(cfg))\n", " print(cfg)" ] }, { "cell_type": "code", "execution_count": 6, "id": "b6181656-580a-4d96-8495-b6bb510944cc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'_target_': 'text_recognizer.data.mappings.WordPieceMapping', 'num_features': 1000, 'tokens': 'iamdb_1kwp_tokens_1000.txt', 'lexicon': 'iamdb_1kwp_lex_1000.txt', 'data_dir': None, 'use_words': False, 'prepend_wordsep': False, 'special_tokens': ['', '', '

'], 'extra_symbols': ['\\n']}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cfg" ] }, { "cell_type": "code", "execution_count": null, "id": "5cd80d84-3ae5-4bb4-bc00-0dac7b22e134", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 8, "id": "0c123c76-ed90-49fa-903b-70ad60a33f16", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-07-29 23:02:56.650 | DEBUG | text_recognizer.data.mappings:_configure_wordpiece_processor:104 - Using data dir: /home/aktersnurra/projects/text-recognizer/data/downloaded/iam/iamdb\n" ] } ], "source": [ "mapping = instantiate(cfg)" ] }, { "cell_type": "code", "execution_count": 9, "id": "ff6c57f0-3c96-418e-8192-cd12bf79c073", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([1002])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mapping.get_index(\"

\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "348391ec-0cf7-49f6-bac2-26bc8c966705", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1006" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(mapping)" ] }, { "cell_type": "code", "execution_count": 15, "id": "67673bf2-79c6-4010-93dd-9c9ba8f9a90e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([1003])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mapping.get_index(\"\\n\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8923ea1e-b571-42ee-bfd7-4984aa70644f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }