In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICE'] = ''
import random

%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np

%load_ext autoreload
%autoreload 2

from importlib.util import find_spec
if find_spec("text_recognizer") is None:
 import sys
 sys.path.append('..')

from text_recognizer.data.iam_paragraphs import IAMParagraphs
from text_recognizer.data.iam_synthetic_paragraphs import IAMSyntheticParagraphs
from text_recognizer.data.iam_extended_paragraphs import IAMExtendedParagraphs

In [None]:
def _plot(image, figsize=(12,12), title='', vmin=0, vmax=255):
 plt.figure(figsize=figsize)
 if title:
 plt.title(title)
 plt.imshow(image, cmap='gray', vmin=vmin, vmax=vmax)

def convert_y_label_to_string(y, mapping, padding_index=3):
 return ''.join([mapping[i] for i in y if i != padding_index])

In [None]:
dataset = IAMExtendedParagraphs(batch_size=1, word_pieces=True)
dataset.prepare_data()
dataset.setup()
print(dataset)

In [None]:
len(dataset.mapping)

In [None]:
dataset = IAMParagraphs()
dataset.prepare_data()
dataset.setup()
print(dataset)

In [None]:
x, y = next(iter(dataset.test_dataloader()))

In [None]:
x.shape

In [None]:
x, y = dataset.data_train[0]

In [None]:
x.shape

In [None]:
y

In [None]:
len(y)

In [None]:
x.shape

In [None]:
_plot(x[0], vmax=1, title=dataset.mapping.get_text(y))

In [None]:
_plot(x[0, 0], vmax=1, title=convert_y_label_to_string(y[0], dataset.mapping))

In [None]:
# Training

for _ in range(5):
 i = random.randint(0, len(dataset.data_train))
 x, y = dataset.data_train[i]
 _plot(x[0], vmax=1, title=convert_y_label_to_string(y, dataset.mapping))

In [None]:
from einops import rearrange

In [None]:
x, y = dataset.data_train[2]

In [None]:
_plot(x[0], vmax=1, title=convert_y_label_to_string(y, dataset.mapping))

In [None]:
p = 32
patches = rearrange(x.unsqueeze(0), 'b c (h p1) (w p2) -> b c (h w) p1 p2', p1 = p, p2 = p)

In [None]:
fig = plt.figure(figsize=(20, 20))
for i in range(15):
 ax = fig.add_subplot(1, 15, i + 1)
 ax.imshow(patches[0, 0, i + 160, :, :].squeeze(0), cmap='gray')

In [None]:
# Testing

for _ in range(5):
 i = random.randint(0, len(dataset.data_test))
 x, y = dataset.data_test[i]
 _plot(x[0], vmax=1, title=convert_y_label_to_string(y, dataset.mapping))

In [None]:
dataset = IAMSyntheticParagraphs()
dataset.prepare_data()
dataset.setup()
print(dataset)

In [None]:
# Training

for _ in range(5):
 i = random.randint(0, len(dataset.data_train))
 x, y = dataset.data_train[i]
 _plot(x[0], vmax=1, title=convert_y_label_to_string(y, dataset.mapping))