{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "1e40a88b", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from PIL import Image\n", "import torch\n", "from torch import nn\n", "from importlib.util import find_spec\n", "if find_spec(\"text_recognizer\") is None:\n", " import sys\n", " sys.path.append('..')\n", " " ] }, { "cell_type": "code", "execution_count": 2, "id": "d3a6146b-94b1-4618-a4e4-00f8e23ffdb0", "metadata": {}, "outputs": [], "source": [ "from hydra import compose, initialize\n", "from omegaconf import OmegaConf\n", "from hydra.utils import instantiate" ] }, { "cell_type": "code", "execution_count": 3, "id": "6b722ca0-9c65-4f90-be4e-b7334ea81237", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mapping:\n", " _target_: text_recognizer.data.mappings.WordPieceMapping\n", " num_features: 1000\n", " tokens: iamdb_1kwp_tokens_1000.txt\n", " lexicon: iamdb_1kwp_lex_1000.txt\n", " data_dir: null\n", " use_words: false\n", " prepend_wordsep: false\n", " special_tokens:\n", " - \n", " - \n", " -

\n", " extra_symbols:\n", " - \\n\n", "_target_: text_recognizer.models.transformer.TransformerLitModel\n", "interval: step\n", "monitor: val/loss\n", "ignore_tokens:\n", "- \n", "- \n", "-

\n", "start_token: \n", "end_token: \n", "pad_token:

\n", "\n", "{'mapping': {'_target_': 'text_recognizer.data.mappings.WordPieceMapping', 'num_features': 1000, 'tokens': 'iamdb_1kwp_tokens_1000.txt', 'lexicon': 'iamdb_1kwp_lex_1000.txt', 'data_dir': None, 'use_words': False, 'prepend_wordsep': False, 'special_tokens': ['', '', '

'], 'extra_symbols': ['\\\\n']}, '_target_': 'text_recognizer.models.transformer.TransformerLitModel', 'interval': 'step', 'monitor': 'val/loss', 'ignore_tokens': ['', '', '

'], 'start_token': '', 'end_token': '', 'pad_token': '

'}\n" ] } ], "source": [ "# context initialization\n", "with initialize(config_path=\"../training/conf/model/\", job_name=\"test_app\"):\n", " cfg = compose(config_name=\"lit_transformer\")\n", " print(OmegaConf.to_yaml(cfg))\n", " print(cfg)" ] }, { "cell_type": "code", "execution_count": null, "id": "9c797159-845e-42c6-bd65-1c976ad627cd", "metadata": {}, "outputs": [], "source": [ "# context initialization\n", "with initialize(config_path=\"../training/conf/network/\", job_name=\"test_app\"):\n", " cfg = compose(config_name=\"conv_transformer\")\n", " print(OmegaConf.to_yaml(cfg))\n", " print(cfg)" ] }, { "cell_type": "code", "execution_count": null, "id": "af2c8cfa-0b45-4681-b671-0f97ace62516", "metadata": {}, "outputs": [], "source": [ "net = instantiate(cfg)" ] }, { "cell_type": "code", "execution_count": null, "id": "8f0742ad-5e2f-42d5-83e7-6e46398b4f0f", "metadata": { "tags": [] }, "outputs": [], "source": [ "net" ] }, { "cell_type": "code", "execution_count": null, "id": "40be59bc-db79-4af1-9df4-e280f7a56481", "metadata": {}, "outputs": [], "source": [ "img = torch.rand(4, 1, 576, 640)" ] }, { "cell_type": "code", "execution_count": null, "id": "d5a8f10b-edf5-4a18-9747-f016db72c384", "metadata": {}, "outputs": [], "source": [ "y = torch.randint(0, 1006, (4, 451))" ] }, { "cell_type": "code", "execution_count": null, "id": "19423ef1-3d98-4af3-8748-fdd3bb817300", "metadata": {}, "outputs": [], "source": [ "y.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "0712ee7e-4f66-4fb1-bc91-d8a127eb7ac7", "metadata": {}, "outputs": [], "source": [ "net = net.cuda()\n", "img = img.cuda()\n", "y = y.cuda()" ] }, { "cell_type": "code", "execution_count": null, "id": "719154b4-47db-4c91-bae4-8c572c4a4536", "metadata": {}, "outputs": [], "source": [ "net(img, y).shape" ] }, { "cell_type": "code", "execution_count": null, "id": "bcb7db0f-0afe-44eb-9bb7-b988fbead95a", "metadata": {}, "outputs": [], "source": [ "from torchsummary import summary" ] }, { "cell_type": "code", "execution_count": null, "id": "31af8ee1-28d3-46b8-a847-6506d29bc45c", "metadata": {}, "outputs": [], "source": [ "summary(net, [(1, 576, 640), (451,)], device=\"cpu\", depth=2)" ] }, { "cell_type": "code", "execution_count": null, "id": "4d6d836f-d169-48b4-92e6-ca17179e6f85", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }