summaryrefslogtreecommitdiff
path: root/src/notebooks/01b-dataset_normalization.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'src/notebooks/01b-dataset_normalization.ipynb')
-rw-r--r--src/notebooks/01b-dataset_normalization.ipynb148
1 files changed, 148 insertions, 0 deletions
diff --git a/src/notebooks/01b-dataset_normalization.ipynb b/src/notebooks/01b-dataset_normalization.ipynb
new file mode 100644
index 0000000..9421816
--- /dev/null
+++ b/src/notebooks/01b-dataset_normalization.ipynb
@@ -0,0 +1,148 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "from PIL import Image\n",
+ "import torch\n",
+ "from importlib.util import find_spec\n",
+ "if find_spec(\"text_recognizer\") is None:\n",
+ " import sys\n",
+ " sys.path.append('..')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from text_recognizer.datasets import EmnistDataLoader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_loaders = EmnistDataLoader(splits=[\"train\"], sample_to_balance=True,\n",
+ " subsample_fraction = None,\n",
+ " transform = None,\n",
+ " target_transform = None,\n",
+ " batch_size = 512,\n",
+ " shuffle = True,\n",
+ " num_workers = 0,\n",
+ " cuda = False,\n",
+ " seed = 4711)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loader = data_loaders(\"train\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mean = 0.\n",
+ "std = 0.\n",
+ "nb_samples = 0.\n",
+ "for data in loader:\n",
+ " data, _ = data\n",
+ " batch_samples = data.size(0)\n",
+ " data = data.view(batch_samples, data.size(1), -1)\n",
+ " mean += data.mean(2).sum(0)\n",
+ " std += data.std(2).sum(0)\n",
+ " nb_samples += batch_samples\n",
+ "\n",
+ "mean /= nb_samples\n",
+ "std /= nb_samples"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([0.1731])"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mean"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([0.3247])"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "std"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}