{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "7c02ae76-b540-4b16-9492-e9210b3b9249", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ['CUDA_VISIBLE_DEVICE'] = ''\n", "import random\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "import numpy as np\n", "from omegaconf import OmegaConf\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "from importlib.util import find_spec\n", "if find_spec(\"text_recognizer\") is None:\n", " import sys\n", " sys.path.append('..')" ] }, { "cell_type": "code", "execution_count": null, "id": "ccdb6dde-47e5-429a-88f2-0764fb7e259a", "metadata": {}, "outputs": [], "source": [ "from hydra import compose, initialize\n", "from omegaconf import OmegaConf\n", "from hydra.utils import instantiate" ] }, { "cell_type": "code", "execution_count": null, "id": "3cf50475-39f2-4642-a7d1-5bcbc0a036f7", "metadata": {}, "outputs": [], "source": [ "path = \"../training/conf/experiment/vqgan_htr_char_iam_lines.yaml\"" ] }, { "cell_type": "code", "execution_count": null, "id": "e52ecb01-c975-4e55-925d-1182c7aea473", "metadata": {}, "outputs": [], "source": [ "with open(path, \"rb\") as f:\n", " cfg = OmegaConf.load(f)" ] }, { "cell_type": "code", "execution_count": null, "id": "f939aa37-7b1d-45cc-885c-323c4540bda1", "metadata": {}, "outputs": [], "source": [ "cfg" ] }, { "cell_type": "code", "execution_count": null, "id": "aaeab329-aeb0-4a1b-aa35-5a2aab81b1d0", "metadata": {}, "outputs": [], "source": [ "net = instantiate(cfg.network)" ] }, { "cell_type": "code", "execution_count": null, "id": "a564ac7a-b67f-4bc1-af36-0fe0a58c1bc9", "metadata": {}, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "code", "execution_count": null, "id": "aeddcc5c-e48d-4d90-8efa-963011ef40bc", "metadata": {}, "outputs": [], "source": [ "x = torch.randn((16, 1, 16, 64))\n", "y = torch.randint(0, 56, (16, 89))" ] }, { "cell_type": "code", "execution_count": null, "id": "0f0d78bc-7e0a-4d06-8e38-49b29ad25933", "metadata": {}, "outputs": [], "source": [ "y.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "e9f4ee2a-c93f-4461-8d75-40c8c12d9d48", "metadata": {}, "outputs": [], "source": [ "yy = net(x, y)" ] }, { "cell_type": "code", "execution_count": null, "id": "7a7493a9-0e1d-46ef-8180-27605e18d082", "metadata": {}, "outputs": [], "source": [ "yy[0].shape" ] }, { "cell_type": "code", "execution_count": null, "id": "75bc9695-2afd-455c-a4fb-2e182456ccbd", "metadata": {}, "outputs": [], "source": [ "z = torch.randn((16, 8, 32))" ] }, { "cell_type": "code", "execution_count": null, "id": "3df6f9a0-6e66-4f46-a5b7-c0bb71b16b9b", "metadata": {}, "outputs": [], "source": [ "z, _ = net.encode(x)" ] }, { "cell_type": "code", "execution_count": null, "id": "6d6e9dd1-c56e-4169-8216-bcc84ea980e3", "metadata": {}, "outputs": [], "source": [ "z.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "8f1539cb-b9b2-40b7-a843-d7479ddbddd7", "metadata": {}, "outputs": [], "source": [ "yy = net.decode(z, y[:, :2])" ] }, { "cell_type": "code", "execution_count": null, "id": "5cdba0a9-da7d-4e33-b209-7f360d1a38e5", "metadata": {}, "outputs": [], "source": [ "yy.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "6da8065f-f93f-4aec-a60e-408712a28c3b", "metadata": {}, "outputs": [], "source": [ "torch.argmax(yy,dim=-2).shape" ] }, { "cell_type": "code", "execution_count": null, "id": "beabbda7-6a1f-4294-8f01-f9d866ffe088", "metadata": {}, "outputs": [], "source": [ "yy[0].shape" ] }, { "cell_type": "code", "execution_count": null, "id": "618b997c-e6a6-4487-b70c-9d260cb556d3", "metadata": {}, "outputs": [], "source": [ "from torchinfo import summary" ] }, { "cell_type": "code", "execution_count": null, "id": "25759b7b-8deb-4163-b75d-a1357c9fe88f", "metadata": {}, "outputs": [], "source": [ "summary(net, (1, 1, 576, 640), device=\"cpu\")" ] }, { "cell_type": "code", "execution_count": null, "id": "62ca0d97-625c-474b-8d6c-d0caba79e198", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }