In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torch
from torch import nn
from importlib.util import find_spec
if find_spec("text_recognizer") is None:
    import sys
    sys.path.append('..')
    

In [2]:
from hydra import compose, initialize
from omegaconf import OmegaConf
from hydra.utils import instantiate

In [3]:
# context initialization
with initialize(config_path="../training/conf/", job_name="test_app"):
    cfg = compose(config_name="config", overrides=["+experiment=vqvae"])
    print(OmegaConf.to_yaml(cfg))
    print(cfg)

callbacks:
  model_checkpoint:
    _target_: pytorch_lightning.callbacks.ModelCheckpoint
    monitor: val/loss
    save_top_k: 1
    save_last: true
    mode: min
    verbose: false
    dirpath: checkpoints/
    filename: '{epoch:02d}'
  learning_rate_monitor:
    _target_: pytorch_lightning.callbacks.LearningRateMonitor
    logging_interval: step
    log_momentum: false
  watch_model:
    _target_: callbacks.wandb_callbacks.WatchModel
    log: all
    log_freq: 100
  upload_code_as_artifact:
    _target_: callbacks.wandb_callbacks.UploadCodeAsArtifact
    project_dir: ${work_dir}/text_recognizer
  upload_ckpts_as_artifact:
    _target_: callbacks.wandb_callbacks.UploadCheckpointsAsArtifact
    ckpt_dir: checkpoints/
    upload_best_only: true
  log_image_reconstruction:
    _target_: callbacks.wandb_callbacks.LogReconstuctedImages
    num_samples: 8
criterion:
  _target_: torch.nn.MSELoss
  reduction: mean
datamodule:
  _target_: text_recognizer.data.iam_extended_paragraphs.IAMExtende

In [4]:
mapping = instantiate(cfg.mapping)

2021-08-04 05:07:26.480 | DEBUG    | text_recognizer.data.word_piece_mapping:__init__:37 - Using data dir: /home/aktersnurra/projects/text-recognizer/data/downloaded/iam/iamdb


In [5]:
network = instantiate(cfg.network)

In [6]:
x = torch.rand(1, 1, 576, 640)

In [7]:
network.encode(x)[0].shape

torch.Size([1, 64, 144, 160])

In [8]:
t, l = network(x)

torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512])


In [9]:
l

tensor(0.0188, grad_fn=<AddBackward0>)

In [12]:
from torch.nn import functional as F


In [15]:
F.mse_loss(x, t) + l

tensor(0.5669, grad_fn=<AddBackward0>)

In [10]:
t.shape

torch.Size([1, 1, 576, 640])

In [None]:
576 / 4

In [None]:
t = torch.randint(0, 1006, (1, 451)).cuda()

In [None]:
z = torch.rand((1, 36 * 40, 128)).cuda()

In [None]:
network = network.cuda()

In [None]:
network.decode(z, t).shape

In [None]:
decoder = decoder.cuda()

In [None]:
decoder(z, t).shape

In [None]:
OmegaConf.set_struct(cfg, False)

In [None]:
datamodule = instantiate(cfg.datamodule, mapping=mapping)

In [None]:
datamodule.prepare_data()
datamodule.setup()

In [None]:
len(datamodule.train_dataloader())

In [None]:
mapping

In [None]:
config = cfg

In [None]:
loss_fn = instantiate(cfg.criterion)

In [None]:
import hydra

In [None]:
    model = hydra.utils.instantiate(
        config.model,
        mapping=mapping,
        network=network,
        loss_fn=loss_fn,
        optimizer_config=config.optimizer,
        lr_scheduler_config=config.lr_scheduler,
        _recursive_=False,
    )


In [None]:
mapping.get_index

In [None]:
net = instantiate(cfg)

In [None]:
net

In [None]:
img = torch.rand(4, 1, 576, 640)

In [None]:
y = torch.randint(0, 1006, (4, 451))

In [None]:
y.shape

In [None]:
net = net.cuda()
img = img.cuda()
y = y.cuda()

In [None]:
net(img, y).shape

In [None]:
from torchsummary import summary

In [None]:
summary(net, [(1, 576, 640), (451,)], device="cpu", depth=2)