summaryrefslogtreecommitdiff
path: root/notebooks/04-vq-transformer.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'notebooks/04-vq-transformer.ipynb')
-rw-r--r--notebooks/04-vq-transformer.ipynb253
1 files changed, 0 insertions, 253 deletions
diff --git a/notebooks/04-vq-transformer.ipynb b/notebooks/04-vq-transformer.ipynb
deleted file mode 100644
index 69d2688..0000000
--- a/notebooks/04-vq-transformer.ipynb
+++ /dev/null
@@ -1,253 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "7c02ae76-b540-4b16-9492-e9210b3b9249",
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "os.environ['CUDA_VISIBLE_DEVICE'] = ''\n",
- "import random\n",
- "\n",
- "%matplotlib inline\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "import numpy as np\n",
- "from omegaconf import OmegaConf\n",
- "\n",
- "%load_ext autoreload\n",
- "%autoreload 2\n",
- "\n",
- "from importlib.util import find_spec\n",
- "if find_spec(\"text_recognizer\") is None:\n",
- " import sys\n",
- " sys.path.append('..')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ccdb6dde-47e5-429a-88f2-0764fb7e259a",
- "metadata": {},
- "outputs": [],
- "source": [
- "from hydra import compose, initialize\n",
- "from omegaconf import OmegaConf\n",
- "from hydra.utils import instantiate"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3cf50475-39f2-4642-a7d1-5bcbc0a036f7",
- "metadata": {},
- "outputs": [],
- "source": [
- "path = \"../training/conf/experiment/vqgan_htr_char_iam_lines.yaml\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e52ecb01-c975-4e55-925d-1182c7aea473",
- "metadata": {},
- "outputs": [],
- "source": [
- "with open(path, \"rb\") as f:\n",
- " cfg = OmegaConf.load(f)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f939aa37-7b1d-45cc-885c-323c4540bda1",
- "metadata": {},
- "outputs": [],
- "source": [
- "cfg"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "aaeab329-aeb0-4a1b-aa35-5a2aab81b1d0",
- "metadata": {},
- "outputs": [],
- "source": [
- "net = instantiate(cfg.network)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a564ac7a-b67f-4bc1-af36-0fe0a58c1bc9",
- "metadata": {},
- "outputs": [],
- "source": [
- "import torch"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "aeddcc5c-e48d-4d90-8efa-963011ef40bc",
- "metadata": {},
- "outputs": [],
- "source": [
- "x = torch.randn((16, 1, 16, 64))\n",
- "y = torch.randint(0, 56, (16, 89))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "0f0d78bc-7e0a-4d06-8e38-49b29ad25933",
- "metadata": {},
- "outputs": [],
- "source": [
- "y.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e9f4ee2a-c93f-4461-8d75-40c8c12d9d48",
- "metadata": {},
- "outputs": [],
- "source": [
- "yy = net(x, y)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "7a7493a9-0e1d-46ef-8180-27605e18d082",
- "metadata": {},
- "outputs": [],
- "source": [
- "yy[0].shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "75bc9695-2afd-455c-a4fb-2e182456ccbd",
- "metadata": {},
- "outputs": [],
- "source": [
- "z = torch.randn((16, 8, 32))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3df6f9a0-6e66-4f46-a5b7-c0bb71b16b9b",
- "metadata": {},
- "outputs": [],
- "source": [
- "z, _ = net.encode(x)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6d6e9dd1-c56e-4169-8216-bcc84ea980e3",
- "metadata": {},
- "outputs": [],
- "source": [
- "z.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8f1539cb-b9b2-40b7-a843-d7479ddbddd7",
- "metadata": {},
- "outputs": [],
- "source": [
- "yy = net.decode(z, y[:, :2])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "5cdba0a9-da7d-4e33-b209-7f360d1a38e5",
- "metadata": {},
- "outputs": [],
- "source": [
- "yy.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6da8065f-f93f-4aec-a60e-408712a28c3b",
- "metadata": {},
- "outputs": [],
- "source": [
- "torch.argmax(yy,dim=-2).shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "beabbda7-6a1f-4294-8f01-f9d866ffe088",
- "metadata": {},
- "outputs": [],
- "source": [
- "yy[0].shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "618b997c-e6a6-4487-b70c-9d260cb556d3",
- "metadata": {},
- "outputs": [],
- "source": [
- "from torchinfo import summary"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "25759b7b-8deb-4163-b75d-a1357c9fe88f",
- "metadata": {},
- "outputs": [],
- "source": [
- "summary(net, (1, 1, 576, 640), device=\"cpu\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "62ca0d97-625c-474b-8d6c-d0caba79e198",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}