summaryrefslogtreecommitdiff
path: root/notebooks/04-vq-transformer.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'notebooks/04-vq-transformer.ipynb')
-rw-r--r--notebooks/04-vq-transformer.ipynb253
1 files changed, 253 insertions, 0 deletions
diff --git a/notebooks/04-vq-transformer.ipynb b/notebooks/04-vq-transformer.ipynb
new file mode 100644
index 0000000..69d2688
--- /dev/null
+++ b/notebooks/04-vq-transformer.ipynb
@@ -0,0 +1,253 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7c02ae76-b540-4b16-9492-e9210b3b9249",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ['CUDA_VISIBLE_DEVICE'] = ''\n",
+ "import random\n",
+ "\n",
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import numpy as np\n",
+ "from omegaconf import OmegaConf\n",
+ "\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "from importlib.util import find_spec\n",
+ "if find_spec(\"text_recognizer\") is None:\n",
+ " import sys\n",
+ " sys.path.append('..')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ccdb6dde-47e5-429a-88f2-0764fb7e259a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from hydra import compose, initialize\n",
+ "from omegaconf import OmegaConf\n",
+ "from hydra.utils import instantiate"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3cf50475-39f2-4642-a7d1-5bcbc0a036f7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = \"../training/conf/experiment/vqgan_htr_char_iam_lines.yaml\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e52ecb01-c975-4e55-925d-1182c7aea473",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with open(path, \"rb\") as f:\n",
+ " cfg = OmegaConf.load(f)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f939aa37-7b1d-45cc-885c-323c4540bda1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cfg"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aaeab329-aeb0-4a1b-aa35-5a2aab81b1d0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "net = instantiate(cfg.network)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a564ac7a-b67f-4bc1-af36-0fe0a58c1bc9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aeddcc5c-e48d-4d90-8efa-963011ef40bc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = torch.randn((16, 1, 16, 64))\n",
+ "y = torch.randint(0, 56, (16, 89))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0f0d78bc-7e0a-4d06-8e38-49b29ad25933",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e9f4ee2a-c93f-4461-8d75-40c8c12d9d48",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yy = net(x, y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7a7493a9-0e1d-46ef-8180-27605e18d082",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yy[0].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "75bc9695-2afd-455c-a4fb-2e182456ccbd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z = torch.randn((16, 8, 32))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3df6f9a0-6e66-4f46-a5b7-c0bb71b16b9b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z, _ = net.encode(x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6d6e9dd1-c56e-4169-8216-bcc84ea980e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8f1539cb-b9b2-40b7-a843-d7479ddbddd7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yy = net.decode(z, y[:, :2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5cdba0a9-da7d-4e33-b209-7f360d1a38e5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yy.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6da8065f-f93f-4aec-a60e-408712a28c3b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.argmax(yy,dim=-2).shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "beabbda7-6a1f-4294-8f01-f9d866ffe088",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yy[0].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "618b997c-e6a6-4487-b70c-9d260cb556d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from torchinfo import summary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "25759b7b-8deb-4163-b75d-a1357c9fe88f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "summary(net, (1, 1, 576, 640), device=\"cpu\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "62ca0d97-625c-474b-8d6c-d0caba79e198",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}