In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICE'] = ''
import random

%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np

%load_ext autoreload
%autoreload 2

from importlib.util import find_spec
if find_spec("text_recognizer") is None:
 import sys
 sys.path.append('..')

from text_recognizer.data.iam_paragraphs import IAMParagraphs
from text_recognizer.data.iam_synthetic_paragraphs import IAMSyntheticParagraphs
from text_recognizer.data.iam_extended_paragraphs import IAMExtendedParagraphs

In [None]:
def _plot(image, figsize=(12,12), title='', vmin=0, vmax=255):
 plt.figure(figsize=figsize)
 if title:
 plt.title(title)
 plt.imshow(image, cmap='gray', vmin=vmin, vmax=vmax)

def convert_y_label_to_string(y, mapping, padding_index=3):
 return ''.join([mapping[int(i)] for i in y if i != padding_index])

In [None]:
from hydra import compose, initialize
from omegaconf import OmegaConf
from hydra.utils import instantiate

In [None]:
import torch

In [None]:
# context initialization
with initialize(config_path="../training/conf/", job_name="test_app"):
 cfg = compose(config_name="config", overrides=["+experiment=vqgan_htr_char"])
 print(OmegaConf.to_yaml(cfg))

In [None]:
datamodule = instantiate(cfg.datamodule, mapping=cfg.mapping)
datamodule.prepare_data()
datamodule.setup()
print(datamodule)

In [None]:
net = instantiate(cfg.network).cuda()

In [None]:
x = torch.randn(2, 1, 576, 640).cuda()

In [None]:
c = torch.randint(0, 53, (2, 682)).cuda()

In [None]:
c.shape

In [None]:
net(x, c)

In [None]:
len(datamodule.train_dataloader())

In [None]:
x.min()

In [None]:
x, y = next(iter(datamodule.train_dataloader()))

In [None]:
x.min()

In [None]:
x.max()

In [None]:
from torch import nn

In [None]:
loss = nn.BCEWithLogitsLoss()

In [None]:
target

In [None]:
input = 10 * torch.rand((6, 1, 576, 640), requires_grad=True)

In [None]:
s = nn.Softmax2d()

In [None]:
input.flatten(-2, -1).shape

In [None]:
s(input)

In [None]:
input = torch.randn((8, 1, 576, 640), requires_grad=True)
target = torch.empty((8, 1, 576, 640)).random_(2)
output = loss(input, target)
output.backward()

In [None]:
output = loss(input.flatten(-2, -1), target.flatten(-2, -1))

In [None]:
output = loss(x, target)


In [None]:
output

In [None]:
output

In [None]:
x.shape

In [None]:
x, y = datamodule.data_train[3]

In [None]:
y

In [None]:
convert_y_label_to_string(y, datamodule.mapping, padding_index=3)

In [None]:
x.shape

In [None]:
_plot(x[0], vmax=1, title=datamodule.mapping.get_text(y))

In [None]:
x[0].max()

In [None]:
plt.figure(figsize=(8,8))
plt.imshow(x[0], cmap='gray')

In [None]:
y[0]

In [None]:
_plot(x[0, 0], vmax=1, title=convert_y_label_to_string(y[0], datamodule.mapping))

In [None]:
# Training

for _ in range(5):
 i = random.randint(0, len(dataset.data_train))
 x, y = dataset.data_train[i]
 _plot(x[0], vmax=1, title=convert_y_label_to_string(y, dataset.mapping))

In [None]:
from einops import rearrange

In [None]:
x, y = dataset.data_train[2]

In [None]:
_plot(x[0], vmax=1, title=convert_y_label_to_string(y, dataset.mapping))

In [None]:
p = 32
patches = rearrange(x.unsqueeze(0), 'b c (h p1) (w p2) -> b c (h w) p1 p2', p1 = p, p2 = p)

In [None]:
fig = plt.figure(figsize=(20, 20))
for i in range(15):
 ax = fig.add_subplot(1, 15, i + 1)
 ax.imshow(patches[0, 0, i + 160, :, :].squeeze(0), cmap='gray')

In [None]:
# Testing

for _ in range(5):
 i = random.randint(0, len(dataset.data_test))
 x, y = dataset.data_test[i]
 _plot(x[0], vmax=1, title=convert_y_label_to_string(y, dataset.mapping))

In [None]:
dataset = IAMSyntheticParagraphs()
dataset.prepare_data()
dataset.setup()
print(dataset)

In [None]:
# Training

for _ in range(5):
 i = random.randint(0, len(dataset.data_train))
 x, y = dataset.data_train[i]
 _plot(x[0], vmax=1, title=convert_y_label_to_string(y, dataset.mapping))