diff options
Diffstat (limited to 'notebooks/00-testing-stuff-out.ipynb')
-rw-r--r-- | notebooks/00-testing-stuff-out.ipynb | 547 |
1 files changed, 0 insertions, 547 deletions
diff --git a/notebooks/00-testing-stuff-out.ipynb b/notebooks/00-testing-stuff-out.ipynb deleted file mode 100644 index 12c5145..0000000 --- a/notebooks/00-testing-stuff-out.ipynb +++ /dev/null @@ -1,547 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from PIL import Image\n", - "import torch.nn.functional as F\n", - "import torch\n", - "from torch import nn\n", - "from torchsummary import summary\n", - "from importlib.util import find_spec\n", - "if find_spec(\"text_recognizer\") is None:\n", - " import sys\n", - " sys.path.append('..')" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from omegaconf import OmegaConf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "path = \"../training/configs/vqvae.yaml\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conf = OmegaConf.load(path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(OmegaConf.to_yaml(conf))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from text_recognizer.networks import VQVAE" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vae = VQVAE(**conf.network.args)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vae" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "datum = torch.randn([2, 1, 576, 640])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vae.encoder(datum)[0].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vae(datum)[0].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from text_recognizer.networks.backbones.efficientnet import EfficientNet" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "en = EfficientNet()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "datum = torch.randn([2, 1, 576, 640])" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "trg = torch.randint(0, 1000, [2, 682])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trg.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "datum = torch.randn([2, 1, 224, 224])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "en(datum).shape" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "path = \"../training/configs/cnn_transformer.yaml\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "conf = OmegaConf.load(path)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "seed: 4711\n", - "network:\n", - " desc: Configuration of the PyTorch neural network.\n", - " type: CNNTransformer\n", - " args:\n", - " encoder:\n", - " type: EfficientNet\n", - " args: null\n", - " num_decoder_layers: 4\n", - " hidden_dim: 256\n", - " num_heads: 4\n", - " expansion_dim: 1024\n", - " dropout_rate: 0.1\n", - " transformer_activation: glu\n", - "model:\n", - " desc: Configuration of the PyTorch Lightning model.\n", - " type: LitTransformerModel\n", - " args:\n", - " optimizer:\n", - " type: MADGRAD\n", - " args:\n", - " lr: 0.001\n", - " momentum: 0.9\n", - " weight_decay: 0\n", - " eps: 1.0e-06\n", - " lr_scheduler:\n", - " type: OneCycleLR\n", - " args:\n", - " interval: step\n", - " max_lr: 0.001\n", - " three_phase: true\n", - " epochs: 512\n", - " steps_per_epoch: 1246\n", - " criterion:\n", - " type: CrossEntropyLoss\n", - " args:\n", - " weight: None\n", - " ignore_index: -100\n", - " reduction: mean\n", - " monitor: val_loss\n", - " mapping: sentence_piece\n", - "data:\n", - " desc: Configuration of the training/test data.\n", - " type: IAMExtendedParagraphs\n", - " args:\n", - " batch_size: 16\n", - " num_workers: 12\n", - " train_fraction: 0.8\n", - " augment: true\n", - "callbacks:\n", - "- type: ModelCheckpoint\n", - " args:\n", - " monitor: val_loss\n", - " mode: min\n", - " save_last: true\n", - "- type: StochasticWeightAveraging\n", - " args:\n", - " swa_epoch_start: 0.8\n", - " swa_lrs: 0.05\n", - " annealing_epochs: 10\n", - " annealing_strategy: cos\n", - " device: null\n", - "- type: LearningRateMonitor\n", - " args:\n", - " logging_interval: step\n", - "- type: EarlyStopping\n", - " args:\n", - " monitor: val_loss\n", - " mode: min\n", - " patience: 10\n", - "trainer:\n", - " desc: Configuration of the PyTorch Lightning Trainer.\n", - " args:\n", - " stochastic_weight_avg: true\n", - " auto_scale_batch_size: binsearch\n", - " gradient_clip_val: 0\n", - " fast_dev_run: false\n", - " gpus: 1\n", - " precision: 16\n", - " max_epochs: 512\n", - " terminate_on_nan: true\n", - " weights_summary: true\n", - "load_checkpoint: null\n", - "\n" - ] - } - ], - "source": [ - "print(OmegaConf.to_yaml(conf))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "from text_recognizer.networks.cnn_transformer import CNNTransformer" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "t = CNNTransformer(input_shape=(1, 576, 640), output_shape=(682, 1), **conf.network.args)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t.encode(datum).shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trg.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([2, 682, 1004])" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "t(datum, trg).shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "b, n = 16, 128\n", - "device = \"cpu\"" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "x = lambda: torch.ones((b, n), device=device).bool()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([16, 128])" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x().shape" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([16, 128])" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "torch.ones((b, n), device=device).bool().shape" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "x = torch.randn(1, 1, 576, 640)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "144" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "576 // 4" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "160" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "640 // 4" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "x = torch.randn(1, 1, 144, 160)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from einops import rearrange" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "patch_size=16\n", - "p = rearrange(x, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([1, 1440, 256])" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "p.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.1" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} |