summaryrefslogtreecommitdiff
path: root/src/notebooks/01b-dataset_normalization.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'src/notebooks/01b-dataset_normalization.ipynb')
-rw-r--r--src/notebooks/01b-dataset_normalization.ipynb148
1 files changed, 0 insertions, 148 deletions
diff --git a/src/notebooks/01b-dataset_normalization.ipynb b/src/notebooks/01b-dataset_normalization.ipynb
deleted file mode 100644
index 9421816..0000000
--- a/src/notebooks/01b-dataset_normalization.ipynb
+++ /dev/null
@@ -1,148 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "%load_ext autoreload\n",
- "%autoreload 2\n",
- "\n",
- "%matplotlib inline\n",
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "from PIL import Image\n",
- "import torch\n",
- "from importlib.util import find_spec\n",
- "if find_spec(\"text_recognizer\") is None:\n",
- " import sys\n",
- " sys.path.append('..')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "from text_recognizer.datasets import EmnistDataLoader"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "data_loaders = EmnistDataLoader(splits=[\"train\"], sample_to_balance=True,\n",
- " subsample_fraction = None,\n",
- " transform = None,\n",
- " target_transform = None,\n",
- " batch_size = 512,\n",
- " shuffle = True,\n",
- " num_workers = 0,\n",
- " cuda = False,\n",
- " seed = 4711)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [],
- "source": [
- "loader = data_loaders(\"train\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [],
- "source": [
- "mean = 0.\n",
- "std = 0.\n",
- "nb_samples = 0.\n",
- "for data in loader:\n",
- " data, _ = data\n",
- " batch_samples = data.size(0)\n",
- " data = data.view(batch_samples, data.size(1), -1)\n",
- " mean += data.mean(2).sum(0)\n",
- " std += data.std(2).sum(0)\n",
- " nb_samples += batch_samples\n",
- "\n",
- "mean /= nb_samples\n",
- "std /= nb_samples"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "tensor([0.1731])"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mean"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "tensor([0.3247])"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "std"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}