diff options
author | aktersnurra <gustaf.rydholm@gmail.com> | 2020-07-22 23:18:08 +0200 |
---|---|---|
committer | aktersnurra <gustaf.rydholm@gmail.com> | 2020-07-22 23:18:08 +0200 |
commit | f473456c19558aaf8552df97a51d4e18cc69dfa8 (patch) | |
tree | 0d35ce2410ff623ba5fb433d616d95b67ecf7a98 | |
parent | ad3bd52530f4800d4fb05dfef3354921f95513af (diff) |
Working training loop and testing of trained CharacterModel.
101 files changed, 2860 insertions, 224 deletions
@@ -1,6 +1,6 @@ [flake8] select = ANN,B,B9,BLK,C,D,DAR,E,F,I,S,W -ignore = E203,E501,W503,ANN101,F401,D202,S310,S101 +ignore = E203,E501,W503,ANN101,F401,D202,S310,S101,S404,S602,B009 max-line-length = 120 max-complexity = 10 application-import-names = text_recognizer,tests @@ -8,8 +8,24 @@ TBC ## Todo - [x] subsampling -- [ ] Be able to run experiments -- [ ] Train models -- [ ] Implement wandb -- [ ] Implement Bayesian hyperparameter search +- [x] Be able to run experiments +- [x] Train models +- [x] Fix input size in base model +- [x] Fix s.t. the best weights are saved +- [x] Implement total training time +- [x] Fix tqdm and logging output +- [x] Fix basic test to load model +- [x] Fix loading previous experiments +- [ ] Check how to pass arguments to unittest (remove B009 then) +- [x] Able to set verbosity level on the logger to terminal output +- [ ] Implement Callbacks for training + - [ ] Implement early stopping + - [ ] Implement wandb + - [ ] Implement lr scheduler as a callback +- [ ] Continuing reimplementing labs - [ ] New models and datasets +- [ ] Check that dataset exists, otherwise download it form the web. Do this in run_experiment.py. +- [ ] Create repr func for data loaders +- [ ] Be able to restart with lr scheduler +- [ ] Implement Bayesian hyperparameter search +- [ ] Try to fix shell cmd security issues S404, S602 @@ -8,7 +8,7 @@ from nox.sessions import Session package = "text-recognizer" nox.options.sessions = "lint", "mypy", "pytype", "safety", "tests" -locations = "src", "tests", "noxfile.py", "docs/conf.py" +locations = "src", "tests", "noxfile.py", "docs/conf.py", "src/text_recognizer/tests" def install_with_constraints(session: Session, *args: str, **kwargs: Any) -> None: diff --git a/pyproject.toml b/pyproject.toml index bc4f39d..09825aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,8 @@ fail_under = 50 [tool.poetry.scripts] download-emnist = "text_recognizer.datasets.emnist_dataset:download_emnist" create-emnist-support-files = "text_recognizer.tests.support.create_emnist_support_files:create_emnist_support_files" +run-experiment = "training.run_experiment:main" +run-experiments = "training.prepare_experiments:main" # mlp = "text_recognizer.networks.mlp:test" # lenet = "text_recognizer.networks.lenet:test" diff --git a/src/notebooks/Untitled.ipynb b/src/notebooks/Untitled.ipynb index 1cb7acb..97c523d 100644 --- a/src/notebooks/Untitled.ipynb +++ b/src/notebooks/Untitled.ipynb @@ -24,6 +24,24 @@ "metadata": {}, "outputs": [], "source": [ + "a = getattr(torch.nn, \"ReLU\")()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "loss = getattr(torch.nn, \"L1Loss\")()" ] }, @@ -43,6 +61,33 @@ "metadata": {}, "outputs": [], "source": [ + "b = torch.randn(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a(b)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "output = loss(input, target)\n", "output.backward()" ] @@ -99,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -108,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -117,17 +162,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2020-07-05 21:16:55.100 | DEBUG | training.gpu_manager:_get_free_gpu:55 - pid 29777 picking gpu 0\n", - "2020-07-05 21:16:55.704 | DEBUG | training.gpu_manager:_get_free_gpu:59 - pid 29777 could not get lock.\n", - "2020-07-05 21:16:55.705 | DEBUG | training.gpu_manager:get_free_gpu:37 - pid 29777 sleeping\n", - "2020-07-05 21:17:00.722 | DEBUG | training.gpu_manager:_get_free_gpu:55 - pid 29777 picking gpu 0\n" + "2020-07-21 14:10:13.170 | DEBUG | training.gpu_manager:_get_free_gpu:57 - pid 11721 picking gpu 0\n" ] }, { @@ -136,7 +178,7 @@ "0" ] }, - "execution_count": 4, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -150,6 +192,701 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [ + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p = Path(\"/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "str(p).split(\"/\")[0] + \"/\" + str(p).split(\"/\")[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p.parents[0].resolve()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d = 'Experiment JSON, e.g. \\'{\"dataset\": \"EmnistDataset\", \"model\": \"CharacterModel\", \"network\": \"mlp\"}\\''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(d)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "path = \"/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/sample_experiment.yml\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "with open(path) as f:\n", + " d = yaml.safe_load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_config = d[\"experiments\"][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dataloader': 'EmnistDataLoader',\n", + " 'data_loader_args': {'splits': ['train', 'val'],\n", + " 'sample_to_balance': True,\n", + " 'subsample_fraction': None,\n", + " 'transform': None,\n", + " 'target_transform': None,\n", + " 'batch_size': 256,\n", + " 'shuffle': True,\n", + " 'num_workers': 0,\n", + " 'cuda': True,\n", + " 'seed': 4711},\n", + " 'model': 'CharacterModel',\n", + " 'metrics': ['accuracy'],\n", + " 'network': 'MLP',\n", + " 'network_args': {'input_size': 784, 'num_layers': 2},\n", + " 'train_args': {'batch_size': 256, 'epochs': 16},\n", + " 'criterion': 'CrossEntropyLoss',\n", + " 'criterion_args': {'weight': None, 'ignore_index': -100, 'reduction': 'mean'},\n", + " 'optimizer': 'AdamW',\n", + " 'optimizer_args': {'lr': 0.0003,\n", + " 'betas': [0.9, 0.999],\n", + " 'eps': 1e-08,\n", + " 'weight_decay': 0,\n", + " 'amsgrad': False},\n", + " 'lr_scheduler': 'OneCycleLR',\n", + " 'lr_scheduler_args': {'max_lr': 3e-05, 'epochs': 16}}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "experiment_config" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "network_module = importlib.import_module(\"text_recognizer.networks\")\n", + "network_fn_ = getattr(network_module, experiment_config[\"network\"])\n", + "network_args = experiment_config.get(\"network_args\", {})" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, 784)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(1,) + (network_args[\"input_size\"],)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer_ = getattr(torch.optim, experiment_config[\"optimizer\"])\n", + "optimizer_args = experiment_config.get(\"optimizer_args\", {})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer_args" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "network_args" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "network_fn_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "net = network_fn_(**network_args)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer_(net.parameters() , **optimizer_args)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "criterion_ = getattr(torch.nn, experiment_config[\"criterion\"])\n", + "criterion_args = experiment_config.get(\"criterion_args\", {})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "criterion_(**criterion_args)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "models_module = importlib.import_module(\"text_recognizer.models\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = {metric: getattr(models_module, metric) for metric in experiment_config[\"metrics\"]}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "torch.randn(3, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "torch.randn(3, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics['accuracy'](torch.randn(3, 10), torch.randn(3, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metric_fn_ = getattr(models_module, experiment_config[\"metric\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metric_fn_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "2.e-3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lr_scheduler_ = getattr(\n", + " torch.optim.lr_scheduler, experiment_config[\"lr_scheduler\"]\n", + ")\n", + "lr_scheduler_args = experiment_config.get(\"lr_scheduler_args\", {})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"OneCycleLR\" in str(lr_scheduler_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "datasets_module = importlib.import_module(\"text_recognizer.datasets\")\n", + "data_loader_ = getattr(datasets_module, experiment_config[\"dataloader\"])\n", + "data_loader_args = experiment_config.get(\"data_loader_args\", {})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_loader_(**data_loader_args)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "cuda = \"cuda:0\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "cleanString = re.sub('[^A-Za-z]+','', cuda )" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "cleanString = re.sub('[^0-9]+','', cuda )" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleanString" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([28, 28], 1)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "([28, 28], ) + (1,)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(range(3-1))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1,)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tuple([1])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from glob import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/text_recognizer/weights/CharacterModel_Emnist_MLP_weights.pt']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "glob(\"/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/text_recognizer/weights/CharacterModel_*MLP_weights.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "def test(a, b, c, d):\n", + " print(a,b,c,d)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "f = {\"a\": 2, \"b\": 3, \"c\": 4}" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_items([('a', 2), ('b', 3), ('c', 4)])\n" + ] + } + ], + "source": [ + "print(f.items())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 3 4 1\n" + ] + } + ], + "source": [ + "test(**f, d=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "path = \"/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/*\"" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "l = glob(path)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "l.sort()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_124928' in l" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_124928',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_141139',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_145028',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_150212',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_150301',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_150317',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_151408',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_153144',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_153207',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_191111',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826',\n", + " '/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Logger' object has no attribute 'DEBUG'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-18-e1360ed6a5af>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDEBUG\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'Logger' object has no attribute 'DEBUG'" + ] + } + ], + "source": [ + "logger.DEBUG" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [] } ], diff --git a/src/text_recognizer/character_predictor.py b/src/text_recognizer/character_predictor.py index 69ef896..a773f36 100644 --- a/src/text_recognizer/character_predictor.py +++ b/src/text_recognizer/character_predictor.py @@ -1,8 +1,8 @@ """CharacterPredictor class.""" - -from typing import Tuple, Union +from typing import Dict, Tuple, Type, Union import numpy as np +from torch import nn from text_recognizer.models import CharacterModel from text_recognizer.util import read_image @@ -11,9 +11,9 @@ from text_recognizer.util import read_image class CharacterPredictor: """Recognizes the character in handwritten character images.""" - def __init__(self) -> None: + def __init__(self, network_fn: Type[nn.Module], network_args: Dict) -> None: """Intializes the CharacterModel and load the pretrained weights.""" - self.model = CharacterModel() + self.model = CharacterModel(network_fn=network_fn, network_args=network_args) self.model.load_weights() self.model.eval() diff --git a/src/text_recognizer/datasets/__init__.py b/src/text_recognizer/datasets/__init__.py index aec5bf9..795be90 100644 --- a/src/text_recognizer/datasets/__init__.py +++ b/src/text_recognizer/datasets/__init__.py @@ -1,2 +1,4 @@ """Dataset modules.""" from .emnist_dataset import EmnistDataLoader + +__all__ = ["EmnistDataLoader"] diff --git a/src/text_recognizer/datasets/emnist_dataset.py b/src/text_recognizer/datasets/emnist_dataset.py index a17d7a9..b92b57d 100644 --- a/src/text_recognizer/datasets/emnist_dataset.py +++ b/src/text_recognizer/datasets/emnist_dataset.py @@ -2,7 +2,7 @@ import json from pathlib import Path -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, List, Optional, Type from loguru import logger import numpy as np @@ -102,21 +102,22 @@ class EmnistDataLoader: self.shuffle = shuffle self.num_workers = num_workers self.cuda = cuda + self.seed = seed self._data_loaders = self._fetch_emnist_data_loaders() @property def __name__(self) -> str: """Returns the name of the dataset.""" - return "EMNIST" + return "Emnist" - def __call__(self, split: str) -> Optional[DataLoader]: + def __call__(self, split: str) -> DataLoader: """Returns the `split` DataLoader. Args: split (str): The dataset split, i.e. train or val. Returns: - type: A PyTorch DataLoader. + DataLoader: A PyTorch DataLoader. Raises: ValueError: If the split does not exist. diff --git a/src/text_recognizer/models/__init__.py b/src/text_recognizer/models/__init__.py index d265dcf..ff10a07 100644 --- a/src/text_recognizer/models/__init__.py +++ b/src/text_recognizer/models/__init__.py @@ -1,2 +1,6 @@ """Model modules.""" +from .base import Model from .character_model import CharacterModel +from .metrics import accuracy + +__all__ = ["Model", "CharacterModel", "accuracy"] diff --git a/src/text_recognizer/models/base.py b/src/text_recognizer/models/base.py index 0cc531a..b78eacb 100644 --- a/src/text_recognizer/models/base.py +++ b/src/text_recognizer/models/base.py @@ -1,9 +1,11 @@ """Abstract Model class for PyTorch neural networks.""" from abc import ABC, abstractmethod +from glob import glob from pathlib import Path +import re import shutil -from typing import Callable, Dict, Optional, Tuple +from typing import Callable, Dict, Optional, Tuple, Type from loguru import logger import torch @@ -19,7 +21,7 @@ class Model(ABC): def __init__( self, - network_fn: Callable, + network_fn: Type[nn.Module], network_args: Dict, data_loader: Optional[Callable] = None, data_loader_args: Optional[Dict] = None, @@ -35,7 +37,7 @@ class Model(ABC): """Base class, to be inherited by model for specific type of data. Args: - network_fn (Callable): The PyTorch network. + network_fn (Type[nn.Module]): The PyTorch network. network_args (Dict): Arguments for the network. data_loader (Optional[Callable]): A function that fetches train and val DataLoader. data_loader_args (Optional[Dict]): Arguments for the DataLoader. @@ -57,27 +59,29 @@ class Model(ABC): self._data_loaders = data_loader(**data_loader_args) dataset_name = self._data_loaders.__name__ else: - dataset_name = "" + dataset_name = "*" self._data_loaders = None - self.name = f"{self.__class__.__name__}_{dataset_name}_{network_fn.__name__}" + self._name = f"{self.__class__.__name__}_{dataset_name}_{network_fn.__name__}" # Extract the input shape for the torchsummary. - self._input_shape = network_args.pop("input_shape") + if isinstance(network_args["input_size"], int): + self._input_shape = (1,) + tuple([network_args["input_size"]]) + else: + self._input_shape = (1,) + tuple(network_args["input_size"]) if metrics is not None: self._metrics = metrics # Set the device. - if self.device is None: - self._device = torch.device( - "cuda:0" if torch.cuda.is_available() else "cpu" - ) + if device is None: + self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: self._device = device # Load network. - self._network = network_fn(**network_args) + self.network_args = network_args + self._network = network_fn(**self.network_args) # To device. self._network.to(self._device) @@ -95,13 +99,29 @@ class Model(ABC): # Set learning rate scheduler. self._lr_scheduler = None if lr_scheduler is not None: + # OneCycleLR needs the number of steps in an epoch as an input argument. + if "OneCycleLR" in str(lr_scheduler): + lr_scheduler_args["steps_per_epoch"] = len(self._data_loaders("train")) self._lr_scheduler = lr_scheduler(self._optimizer, **lr_scheduler_args) + # Class mapping. + self._mapping = None + + @property + def __name__(self) -> str: + """Returns the name of the model.""" + return self._name + @property def input_shape(self) -> Tuple[int, ...]: """The input shape.""" return self._input_shape + @property + def mapping(self) -> Dict: + """Returns the class mapping.""" + return self._mapping + def eval(self) -> None: """Sets the network to evaluation mode.""" self._network.eval() @@ -149,13 +169,14 @@ class Model(ABC): def weights_filename(self) -> str: """Filepath to the network weights.""" WEIGHT_DIRNAME.mkdir(parents=True, exist_ok=True) - return str(WEIGHT_DIRNAME / f"{self.name}_weights.pt") + return str(WEIGHT_DIRNAME / f"{self._name}_weights.pt") def summary(self) -> None: """Prints a summary of the network architecture.""" - summary(self._network, self._input_shape, device=self.device) + device = re.sub("[^A-Za-z]+", "", self.device) + summary(self._network, self._input_shape, device=device) - def _get_state(self) -> Dict: + def _get_state_dict(self) -> Dict: """Get the state dict of the model.""" state = {"model_state": self._network.state_dict()} if self._optimizer is not None: @@ -172,6 +193,7 @@ class Model(ABC): epoch (int): The last epoch when the checkpoint was created. """ + logger.debug("Loading checkpoint...") if not path.exists(): logger.debug("File does not exist {str(path)}") @@ -200,6 +222,7 @@ class Model(ABC): state = self._get_state_dict() state["is_best"] = is_best state["epoch"] = epoch + state["network_args"] = self.network_args path.mkdir(parents=True, exist_ok=True) @@ -216,15 +239,18 @@ class Model(ABC): def load_weights(self) -> None: """Load the network weights.""" logger.debug("Loading network weights.") - weights = torch.load(self.weights_filename)["model_state"] + filename = glob(self.weights_filename)[0] + weights = torch.load(filename, map_location=torch.device(self._device))[ + "model_state" + ] self._network.load_state_dict(weights) - def save_weights(self) -> None: + def save_weights(self, path: Path) -> None: """Save the network weights.""" - logger.debug("Saving network weights.") - torch.save({"model_state": self._network.state_dict()}, self.weights_filename) + logger.debug("Saving the best network weights.") + shutil.copyfile(str(path / "best.pt"), self.weights_filename) @abstractmethod - def mapping(self) -> Dict: - """Mapping from network output to class.""" + def load_mapping(self) -> None: + """Loads class mapping from network output to character.""" ... diff --git a/src/text_recognizer/models/character_model.py b/src/text_recognizer/models/character_model.py index fd69bf2..527fc7d 100644 --- a/src/text_recognizer/models/character_model.py +++ b/src/text_recognizer/models/character_model.py @@ -1,5 +1,5 @@ """Defines the CharacterModel class.""" -from typing import Callable, Dict, Optional, Tuple +from typing import Callable, Dict, Optional, Tuple, Type import numpy as np import torch @@ -8,7 +8,6 @@ from torchvision.transforms import ToTensor from text_recognizer.datasets.emnist_dataset import load_emnist_mapping from text_recognizer.models.base import Model -from text_recognizer.networks.mlp import mlp class CharacterModel(Model): @@ -16,8 +15,9 @@ class CharacterModel(Model): def __init__( self, - network_fn: Callable, + network_fn: Type[nn.Module], network_args: Dict, + data_loader: Optional[Callable] = None, data_loader_args: Optional[Dict] = None, metrics: Optional[Dict] = None, criterion: Optional[Callable] = None, @@ -33,6 +33,7 @@ class CharacterModel(Model): super().__init__( network_fn, network_args, + data_loader, data_loader_args, metrics, criterion, @@ -43,13 +44,13 @@ class CharacterModel(Model): lr_scheduler_args, device, ) - self.emnist_mapping = self.mapping() - self.eval() + self.load_mapping() + self.tensor_transform = ToTensor() + self.softmax = nn.Softmax(dim=0) - def mapping(self) -> Dict[int, str]: + def load_mapping(self) -> None: """Mapping between integers and classes.""" - mapping = load_emnist_mapping() - return mapping + self._mapping = load_emnist_mapping() def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: """Character prediction on an image. @@ -61,15 +62,20 @@ class CharacterModel(Model): Tuple[str, float]: The predicted character and the confidence in the prediction. """ + if image.dtype == np.uint8: image = (image / 255).astype(np.float32) # Conver to Pytorch Tensor. - image = ToTensor(image) + image = self.tensor_transform(image) + + with torch.no_grad(): + logits = self.network(image) + + prediction = self.softmax(logits.data.squeeze()) - prediction = self.network(image) - index = torch.argmax(prediction, dim=1) + index = int(torch.argmax(prediction, dim=0)) confidence_of_prediction = prediction[index] - predicted_character = self.emnist_mapping[index] + predicted_character = self._mapping[index] return predicted_character, confidence_of_prediction diff --git a/src/text_recognizer/models/metrics.py b/src/text_recognizer/models/metrics.py index e2a30a9..ac8d68e 100644 --- a/src/text_recognizer/models/metrics.py +++ b/src/text_recognizer/models/metrics.py @@ -3,7 +3,7 @@ import torch -def accuracy(outputs: torch.Tensor, labels: torch.Tensro) -> float: +def accuracy(outputs: torch.Tensor, labels: torch.Tensor) -> float: """Computes the accuracy. Args: diff --git a/src/text_recognizer/networks/__init__.py b/src/text_recognizer/networks/__init__.py index 4ea5bb3..e6b6946 100644 --- a/src/text_recognizer/networks/__init__.py +++ b/src/text_recognizer/networks/__init__.py @@ -1 +1,5 @@ """Network modules.""" +from .lenet import LeNet +from .mlp import MLP + +__all__ = ["MLP", "LeNet"] diff --git a/src/text_recognizer/networks/lenet.py b/src/text_recognizer/networks/lenet.py index 71d247f..2839a0c 100644 --- a/src/text_recognizer/networks/lenet.py +++ b/src/text_recognizer/networks/lenet.py @@ -1,5 +1,5 @@ """Defines the LeNet network.""" -from typing import Callable, Optional, Tuple +from typing import Callable, Dict, Optional, Tuple import torch from torch import nn @@ -18,28 +18,37 @@ class LeNet(nn.Module): def __init__( self, - channels: Tuple[int, ...], - kernel_sizes: Tuple[int, ...], - hidden_size: Tuple[int, ...], - dropout_rate: float, - output_size: int, + input_size: Tuple[int, ...] = (1, 28, 28), + channels: Tuple[int, ...] = (1, 32, 64), + kernel_sizes: Tuple[int, ...] = (3, 3, 2), + hidden_size: Tuple[int, ...] = (9216, 128), + dropout_rate: float = 0.2, + output_size: int = 10, activation_fn: Optional[Callable] = None, + activation_fn_args: Optional[Dict] = None, ) -> None: """The LeNet network. Args: - channels (Tuple[int, ...]): Channels in the convolutional layers. - kernel_sizes (Tuple[int, ...]): Kernel sizes in the convolutional layers. + input_size (Tuple[int, ...]): The input shape of the network. Defaults to (1, 28, 28). + channels (Tuple[int, ...]): Channels in the convolutional layers. Defaults to (1, 32, 64). + kernel_sizes (Tuple[int, ...]): Kernel sizes in the convolutional layers. Defaults to (3, 3, 2). hidden_size (Tuple[int, ...]): Size of the flattend output form the convolutional layers. - dropout_rate (float): The dropout rate. - output_size (int): Number of classes. + Defaults to (9216, 128). + dropout_rate (float): The dropout rate. Defaults to 0.2. + output_size (int): Number of classes. Defaults to 10. activation_fn (Optional[Callable]): The non-linear activation function. Defaults to nn.ReLU(inplace). + activation_fn_args (Optional[Dict]): The arguments for the activation function. Defaults to None. """ super().__init__() - if activation_fn is None: + self._input_size = input_size + + if activation_fn is not None: + activation_fn = getattr(nn, activation_fn)(activation_fn_args) + else: activation_fn = nn.ReLU(inplace=True) self.layers = [ @@ -68,26 +77,6 @@ class LeNet(nn.Module): def forward(self, x: torch.Tensor) -> torch.Tensor: """The feedforward.""" + if len(x.shape) == 3: + x = x.unsqueeze(0) return self.layers(x) - - -# def test(): -# x = torch.randn([1, 1, 28, 28]) -# channels = [1, 32, 64] -# kernel_sizes = [3, 3, 2] -# hidden_size = [9216, 128] -# output_size = 10 -# dropout_rate = 0.2 -# activation_fn = nn.ReLU() -# net = LeNet( -# channels=channels, -# kernel_sizes=kernel_sizes, -# dropout_rate=dropout_rate, -# hidden_size=hidden_size, -# output_size=output_size, -# activation_fn=activation_fn, -# ) -# from torchsummary import summary -# -# summary(net, (1, 28, 28), device="cpu") -# out = net(x) diff --git a/src/text_recognizer/networks/mlp.py b/src/text_recognizer/networks/mlp.py index 2a41790..d704d99 100644 --- a/src/text_recognizer/networks/mlp.py +++ b/src/text_recognizer/networks/mlp.py @@ -1,5 +1,5 @@ """Defines the MLP network.""" -from typing import Callable, Optional +from typing import Callable, Dict, List, Optional, Union import torch from torch import nn @@ -10,45 +10,54 @@ class MLP(nn.Module): def __init__( self, - input_size: int, - output_size: int, - hidden_size: int, - num_layers: int, - dropout_rate: float, + input_size: int = 784, + output_size: int = 10, + hidden_size: Union[int, List] = 128, + num_layers: int = 3, + dropout_rate: float = 0.2, activation_fn: Optional[Callable] = None, + activation_fn_args: Optional[Dict] = None, ) -> None: """Initialization of the MLP network. Args: - input_size (int): The input shape of the network. - output_size (int): Number of classes in the dataset. - hidden_size (int): The number of `neurons` in each hidden layer. - num_layers (int): The number of hidden layers. - dropout_rate (float): The dropout rate at each layer. - activation_fn (Optional[Callable]): The activation function in the hidden layers, (default: - nn.ReLU()). + input_size (int): The input shape of the network. Defaults to 784. + output_size (int): Number of classes in the dataset. Defaults to 10. + hidden_size (Union[int, List]): The number of `neurons` in each hidden layer. Defaults to 128. + num_layers (int): The number of hidden layers. Defaults to 3. + dropout_rate (float): The dropout rate at each layer. Defaults to 0.2. + activation_fn (Optional[Callable]): The activation function in the hidden layers. Defaults to + None. + activation_fn_args (Optional[Dict]): The arguments for the activation function. Defaults to None. """ super().__init__() - if activation_fn is None: + if activation_fn is not None: + activation_fn = getattr(nn, activation_fn)(activation_fn_args) + else: activation_fn = nn.ReLU(inplace=True) + if isinstance(hidden_size, int): + hidden_size = [hidden_size] * num_layers + self.layers = [ - nn.Linear(in_features=input_size, out_features=hidden_size), + nn.Linear(in_features=input_size, out_features=hidden_size[0]), activation_fn, ] - for _ in range(num_layers): + for i in range(num_layers - 1): self.layers += [ - nn.Linear(in_features=hidden_size, out_features=hidden_size), + nn.Linear(in_features=hidden_size[i], out_features=hidden_size[i + 1]), activation_fn, ] if dropout_rate: self.layers.append(nn.Dropout(p=dropout_rate)) - self.layers.append(nn.Linear(in_features=hidden_size, out_features=output_size)) + self.layers.append( + nn.Linear(in_features=hidden_size[-1], out_features=output_size) + ) self.layers = nn.Sequential(*self.layers) @@ -57,25 +66,7 @@ class MLP(nn.Module): x = torch.flatten(x, start_dim=1) return self.layers(x) - -# def test(): -# x = torch.randn([1, 28, 28]) -# input_size = torch.flatten(x).shape[0] -# output_size = 10 -# hidden_size = 128 -# num_layers = 5 -# dropout_rate = 0.25 -# activation_fn = nn.GELU() -# net = MLP( -# input_size=input_size, -# output_size=output_size, -# hidden_size=hidden_size, -# num_layers=num_layers, -# dropout_rate=dropout_rate, -# activation_fn=activation_fn, -# ) -# from torchsummary import summary -# -# summary(net, (1, 28, 28), device="cpu") -# -# out = net(x) + @property + def __name__(self) -> str: + """Returns the name of the network.""" + return "mlp" diff --git a/src/text_recognizer/tests/test_character_predictor.py b/src/text_recognizer/tests/test_character_predictor.py index 7c094ef..c603a3a 100644 --- a/src/text_recognizer/tests/test_character_predictor.py +++ b/src/text_recognizer/tests/test_character_predictor.py @@ -1,9 +1,14 @@ """Test for CharacterPredictor class.""" +import importlib import os from pathlib import Path import unittest +import click +from loguru import logger + from text_recognizer.character_predictor import CharacterPredictor +from text_recognizer.networks import MLP SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" @@ -13,13 +18,23 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "" class TestCharacterPredictor(unittest.TestCase): """Tests for the CharacterPredictor class.""" + # @click.command() + # @click.option( + # "--network", type=str, help="Network to load, e.g. MLP or LeNet.", default="MLP" + # ) def test_filename(self) -> None: """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" - predictor = CharacterPredictor() + network_module = importlib.import_module("text_recognizer.networks") + network_fn_ = getattr(network_module, "MLP") + # network_args = {"input_size": [28, 28], "output_size": 62, "dropout_rate": 0} + network_args = {"input_size": 784, "output_size": 62, "dropout_rate": 0.2} + predictor = CharacterPredictor( + network_fn=network_fn_, network_args=network_args + ) for filename in SUPPORT_DIRNAME.glob("*.png"): pred, conf = predictor.predict(str(filename)) - print( + logger.info( f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}" ) self.assertEqual(pred, filename.stem) diff --git a/src/text_recognizer/util.py b/src/text_recognizer/util.py index 52fa1e4..6c07c60 100644 --- a/src/text_recognizer/util.py +++ b/src/text_recognizer/util.py @@ -25,7 +25,7 @@ def read_image(image_uri: Union[Path, str], grayscale: bool = False) -> np.ndarr ) from None imread_flag = cv2.IMREAD_GRAYSCALE if grayscale else cv2.IMREAD_COLOR - local_file = os.path.exsits(image_uri) + local_file = os.path.exists(image_uri) try: image = None if local_file: diff --git a/src/text_recognizer/weights/CharacterModel_Emnist_LeNet_weights.pt b/src/text_recognizer/weights/CharacterModel_Emnist_LeNet_weights.pt Binary files differnew file mode 100644 index 0000000..43a3891 --- /dev/null +++ b/src/text_recognizer/weights/CharacterModel_Emnist_LeNet_weights.pt diff --git a/src/text_recognizer/weights/CharacterModel_Emnist_MLP_weights.pt b/src/text_recognizer/weights/CharacterModel_Emnist_MLP_weights.pt Binary files differnew file mode 100644 index 0000000..0dde787 --- /dev/null +++ b/src/text_recognizer/weights/CharacterModel_Emnist_MLP_weights.pt diff --git a/src/training/callbacks/__init__.py b/src/training/callbacks/__init__.py new file mode 100644 index 0000000..868d739 --- /dev/null +++ b/src/training/callbacks/__init__.py @@ -0,0 +1 @@ +"""TBC.""" diff --git a/src/training/callbacks/base.py b/src/training/callbacks/base.py new file mode 100644 index 0000000..d80a1e5 --- /dev/null +++ b/src/training/callbacks/base.py @@ -0,0 +1,101 @@ +"""Metaclass for callback functions.""" + +from abc import ABC +from typing import Callable, List, Type + + +class Callback(ABC): + """Metaclass for callbacks used in training.""" + + def on_fit_begin(self) -> None: + """Called when fit begins.""" + pass + + def on_fit_end(self) -> None: + """Called when fit ends.""" + pass + + def on_train_epoch_begin(self) -> None: + """Called at the beginning of an epoch.""" + pass + + def on_train_epoch_end(self) -> None: + """Called at the end of an epoch.""" + pass + + def on_val_epoch_begin(self) -> None: + """Called at the beginning of an epoch.""" + pass + + def on_val_epoch_end(self) -> None: + """Called at the end of an epoch.""" + pass + + def on_train_batch_begin(self) -> None: + """Called at the beginning of an epoch.""" + pass + + def on_train_batch_end(self) -> None: + """Called at the end of an epoch.""" + pass + + def on_val_batch_begin(self) -> None: + """Called at the beginning of an epoch.""" + pass + + def on_val_batch_end(self) -> None: + """Called at the end of an epoch.""" + pass + + +class CallbackList: + """Container for abstracting away callback calls.""" + + def __init__(self, callbacks: List[Callable] = None) -> None: + """TBC.""" + self._callbacks = callbacks if callbacks is not None else [] + + def append(self, callback: Type[Callback]) -> None: + """Append new callback to callback list.""" + self.callbacks.append(callback) + + def on_fit_begin(self) -> None: + """Called when fit begins.""" + for _ in self._callbacks: + pass + + def on_fit_end(self) -> None: + """Called when fit ends.""" + pass + + def on_train_epoch_begin(self) -> None: + """Called at the beginning of an epoch.""" + pass + + def on_train_epoch_end(self) -> None: + """Called at the end of an epoch.""" + pass + + def on_val_epoch_begin(self) -> None: + """Called at the beginning of an epoch.""" + pass + + def on_val_epoch_end(self) -> None: + """Called at the end of an epoch.""" + pass + + def on_train_batch_begin(self) -> None: + """Called at the beginning of an epoch.""" + pass + + def on_train_batch_end(self) -> None: + """Called at the end of an epoch.""" + pass + + def on_val_batch_begin(self) -> None: + """Called at the beginning of an epoch.""" + pass + + def on_val_batch_end(self) -> None: + """Called at the end of an epoch.""" + pass diff --git a/src/training/callbacks/early_stopping.py b/src/training/callbacks/early_stopping.py new file mode 100644 index 0000000..4da0e85 --- /dev/null +++ b/src/training/callbacks/early_stopping.py @@ -0,0 +1 @@ +"""Implements Early stopping for PyTorch model.""" diff --git a/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/config.yml b/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/config.yml new file mode 100644 index 0000000..2595325 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/config.yml @@ -0,0 +1,48 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 8 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: LeNet +network_args: + input_size: + - 28 + - 28 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/model/best.pt b/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/model/best.pt Binary files differnew file mode 100644 index 0000000..6d78bad --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/model/last.pt b/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/model/last.pt Binary files differnew file mode 100644 index 0000000..6d78bad --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_LeNet/0721_231455/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/config.yml b/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/config.yml new file mode 100644 index 0000000..2595325 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/config.yml @@ -0,0 +1,48 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 8 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: LeNet +network_args: + input_size: + - 28 + - 28 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/model/best.pt b/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/model/best.pt Binary files differnew file mode 100644 index 0000000..43a3891 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/model/last.pt b/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/model/last.pt Binary files differnew file mode 100644 index 0000000..61c03f0 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_LeNet/0722_190746/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_124928/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_124928/config.yml new file mode 100644 index 0000000..2aa52cd --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_124928/config.yml @@ -0,0 +1,43 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: null +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.001 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141139/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141139/config.yml new file mode 100644 index 0000000..829297d --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141139/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.0003 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.0006 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/config.yml new file mode 100644 index 0000000..829297d --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.0003 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.0006 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/model/best.pt Binary files differnew file mode 100644 index 0000000..d0db78b --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/model/last.pt Binary files differnew file mode 100644 index 0000000..d0db78b --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141213/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/config.yml new file mode 100644 index 0000000..3df32bb --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.01 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.1 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/model/best.pt Binary files differnew file mode 100644 index 0000000..5914c8f --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/model/last.pt Binary files differnew file mode 100644 index 0000000..5ba44bb --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141433/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/model/best.pt Binary files differnew file mode 100644 index 0000000..96c21c1 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/model/last.pt Binary files differnew file mode 100644 index 0000000..f024c0d --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_141702/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_145028/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_145028/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_145028/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_150212/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_150212/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_150212/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_150301/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_150301/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_150301/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_150317/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_150317/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_150317/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/model/best.pt Binary files differnew file mode 100644 index 0000000..f833a89 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/model/last.pt Binary files differnew file mode 100644 index 0000000..f833a89 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_151135/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_151408/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_151408/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_151408/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_153144/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153144/config.yml new file mode 100644 index 0000000..829297d --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153144/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.0003 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.0006 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_153207/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153207/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153207/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/model/best.pt Binary files differnew file mode 100644 index 0000000..cbbc5e1 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/model/last.pt Binary files differnew file mode 100644 index 0000000..cbbc5e1 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_153310/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/config.yml new file mode 100644 index 0000000..fb75736 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/model/best.pt Binary files differnew file mode 100644 index 0000000..c93e3c6 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/model/last.pt Binary files differnew file mode 100644 index 0000000..c93e3c6 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_175150/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/config.yml new file mode 100644 index 0000000..1be5113 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: Adam +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 5.0e-05 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/model/best.pt Binary files differnew file mode 100644 index 0000000..580bad2 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/model/last.pt Binary files differnew file mode 100644 index 0000000..97e245c --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_180741/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/config.yml new file mode 100644 index 0000000..d2f98a2 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/config.yml @@ -0,0 +1,46 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: Adamax +optimizer_args: + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/model/best.pt Binary files differnew file mode 100644 index 0000000..5a3df56 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/model/last.pt Binary files differnew file mode 100644 index 0000000..7f28dc3 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_181933/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/config.yml new file mode 100644 index 0000000..d2f98a2 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/config.yml @@ -0,0 +1,46 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: Adamax +optimizer_args: + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/model/best.pt Binary files differnew file mode 100644 index 0000000..6f09780 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/model/last.pt Binary files differnew file mode 100644 index 0000000..3bb103e --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_183347/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/config.yml new file mode 100644 index 0000000..a7c66c5 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/config.yml @@ -0,0 +1,46 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/model/best.pt Binary files differnew file mode 100644 index 0000000..c3e3618 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/model/last.pt Binary files differnew file mode 100644 index 0000000..c3e3618 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190044/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/config.yml new file mode 100644 index 0000000..a7c66c5 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/config.yml @@ -0,0 +1,46 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/model/best.pt Binary files differnew file mode 100644 index 0000000..44d9b9b --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/model/last.pt Binary files differnew file mode 100644 index 0000000..44d9b9b --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190633/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/config.yml new file mode 100644 index 0000000..a7c66c5 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/config.yml @@ -0,0 +1,46 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/model/best.pt Binary files differnew file mode 100644 index 0000000..4a0333c --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/model/last.pt Binary files differnew file mode 100644 index 0000000..4a0333c --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_190738/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191111/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191111/config.yml new file mode 100644 index 0000000..a7c66c5 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191111/config.yml @@ -0,0 +1,46 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 0 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/config.yml new file mode 100644 index 0000000..08c344c --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/config.yml @@ -0,0 +1,46 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 1 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/model/best.pt Binary files differnew file mode 100644 index 0000000..076aae1 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/model/last.pt Binary files differnew file mode 100644 index 0000000..076aae1 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191310/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/config.yml new file mode 100644 index 0000000..0b9b10e --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/config.yml @@ -0,0 +1,42 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 1 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: null +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: RMSprop +optimizer_args: + alpha: 0.9 + centered: false + eps: 1.0e-07 + lr: 0.001 + momentum: 0 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/model/best.pt Binary files differnew file mode 100644 index 0000000..2fb0195 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/model/last.pt Binary files differnew file mode 100644 index 0000000..2fb0195 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191412/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/config.yml new file mode 100644 index 0000000..93c2854 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/config.yml @@ -0,0 +1,42 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 4 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: null +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: RMSprop +optimizer_args: + alpha: 0.9 + centered: false + eps: 1.0e-07 + lr: 0.001 + momentum: 0 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/model/best.pt Binary files differnew file mode 100644 index 0000000..9acc5b1 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/model/last.pt Binary files differnew file mode 100644 index 0000000..b8cc01c --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191504/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/config.yml new file mode 100644 index 0000000..7340941 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/config.yml @@ -0,0 +1,47 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 8 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/model/best.pt Binary files differnew file mode 100644 index 0000000..26bfb07 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/model/last.pt Binary files differnew file mode 100644 index 0000000..26bfb07 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0721_191826/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/config.yml new file mode 100644 index 0000000..90f0e13 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/config.yml @@ -0,0 +1,49 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 8 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 33 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +resume_experiment: last +train_args: + batch_size: 256 + epochs: 33 + val_metric: accuracy +verbosity: 1 diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/model/best.pt Binary files differnew file mode 100644 index 0000000..f0f297b --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/model/last.pt Binary files differnew file mode 100644 index 0000000..c1adda5 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_191559/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_213125/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213125/config.yml new file mode 100644 index 0000000..8d77de5 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213125/config.yml @@ -0,0 +1,49 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 8 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +resume_experiment: null +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy +verbosity: 2 diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/config.yml new file mode 100644 index 0000000..8d77de5 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/config.yml @@ -0,0 +1,49 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 8 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +resume_experiment: null +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy +verbosity: 2 diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/model/best.pt Binary files differnew file mode 100644 index 0000000..e985997 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/model/last.pt Binary files differnew file mode 100644 index 0000000..e985997 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213413/model/last.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/config.yml b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/config.yml new file mode 100644 index 0000000..8d77de5 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/config.yml @@ -0,0 +1,49 @@ +criterion: CrossEntropyLoss +criterion_args: + ignore_index: -100 + reduction: mean + weight: null +data_loader_args: + batch_size: 256 + cuda: true + num_workers: 8 + sample_to_balance: true + seed: 4711 + shuffle: true + splits: + - train + - val + subsample_fraction: null + target_transform: null + transform: null +dataloader: EmnistDataLoader +device: cuda:0 +experiment_group: Sample Experiments +lr_scheduler: OneCycleLR +lr_scheduler_args: + epochs: 16 + max_lr: 0.001 + steps_per_epoch: 1314 +metrics: +- accuracy +model: CharacterModel +network: MLP +network_args: + input_size: 784 + num_layers: 3 + output_size: 62 +optimizer: AdamW +optimizer_args: + amsgrad: false + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + lr: 0.01 + weight_decay: 0 +resume_experiment: null +train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy +verbosity: 2 diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/model/best.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/model/best.pt Binary files differnew file mode 100644 index 0000000..0dde787 --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/model/best.pt diff --git a/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/model/last.pt b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/model/last.pt Binary files differnew file mode 100644 index 0000000..e02738b --- /dev/null +++ b/src/training/experiments/CharacterModel_Emnist_MLP/0722_213549/model/last.pt diff --git a/src/training/experiments/sample.yml b/src/training/experiments/sample.yml new file mode 100644 index 0000000..0ed560d --- /dev/null +++ b/src/training/experiments/sample.yml @@ -0,0 +1,43 @@ +experiment_group: Sample Experiments +experiments: + - dataloader: EmnistDataLoader + model: CharacterModel + metrics: [accuracy] + network: MLP + network_args: + input_shape: 784 + num_layers: 2 + train_args: + batch_size: 256 + epochs: 16 + criterion: CrossEntropyLoss + criterion_args: + weight: null + ignore_index: -100 + reduction: mean + optimizer: AdamW + optimizer_args: + lr: 3.e-4 + betas: [0.9, 0.999] + eps: 1.e-08 + weight_decay: 0 + amsgrad: false + lr_scheduler: OneCycleLR + lr_scheduler_args: + max_lr: 3.e-5 + epochs: 16 + # - dataloader: EmnistDataLoader + # model: CharacterModel + # network: MLP + # network_args: + # input_shape: 784 + # num_layers: 4 + # train_args: + # batch_size: 256 + # - dataloader: EmnistDataLoader + # model: CharacterModel + # network: LeNet + # network_args: + # input_shape: [28, 28] + # train_args: + # batch_size: 256 diff --git a/src/training/experiments/sample_experiment.yml b/src/training/experiments/sample_experiment.yml new file mode 100644 index 0000000..e8d5023 --- /dev/null +++ b/src/training/experiments/sample_experiment.yml @@ -0,0 +1,56 @@ +experiment_group: Sample Experiments +experiments: + - dataloader: EmnistDataLoader + data_loader_args: + splits: [train, val] + sample_to_balance: true + subsample_fraction: null + transform: null + target_transform: null + batch_size: 256 + shuffle: true + num_workers: 8 + cuda: true + seed: 4711 + model: CharacterModel + metrics: [accuracy] + network: MLP + network_args: + input_size: 784 + output_size: 62 + num_layers: 3 + # network: LeNet + # network_args: + # input_size: [28, 28] + # output_size: 62 + train_args: + batch_size: 256 + epochs: 16 + val_metric: accuracy + criterion: CrossEntropyLoss + criterion_args: + weight: null + ignore_index: -100 + reduction: mean + # optimizer: RMSprop + # optimizer_args: + # lr: 1.e-3 + # alpha: 0.9 + # eps: 1.e-7 + # momentum: 0 + # weight_decay: 0 + # centered: false + optimizer: AdamW + optimizer_args: + lr: 1.e-2 + betas: [0.9, 0.999] + eps: 1.e-08 + weight_decay: 0 + amsgrad: false + # lr_scheduler: null + lr_scheduler: OneCycleLR + lr_scheduler_args: + max_lr: 1.e-3 + epochs: 16 + verbosity: 2 # 0, 1, 2 + resume_experiment: null diff --git a/src/training/prepare_experiments.py b/src/training/prepare_experiments.py index 1ab8f00..eb872d7 100644 --- a/src/training/prepare_experiments.py +++ b/src/training/prepare_experiments.py @@ -1,22 +1,24 @@ """Run a experiment from a config file.""" import json +from subprocess import check_call import click from loguru import logger import yaml -def run_experiment(experiment_filename: str) -> None: +def run_experiments(experiments_filename: str) -> None: """Run experiment from file.""" - with open(experiment_filename) as f: + with open(experiments_filename) as f: experiments_config = yaml.safe_load(f) num_experiments = len(experiments_config["experiments"]) for index in range(num_experiments): experiment_config = experiments_config["experiments"][index] experiment_config["experiment_group"] = experiments_config["experiment_group"] - print( - f"python training/run_experiment.py --gpu=-1 '{json.dumps(experiment_config)}'" - ) + # cmd = f"python training/run_experiment.py --gpu=-1 '{json.dumps(experiment_config)}'" + cmd = f"poetry run run-experiment --gpu=-1 --save --experiment_config '{json.dumps(experiment_config)}'" + print(cmd) + check_call(cmd, shell=True) @click.command() @@ -26,9 +28,9 @@ def run_experiment(experiment_filename: str) -> None: type=str, help="Filename of Yaml file of experiments to run.", ) -def main(experiment_filename: str) -> None: +def main(experiments_filename: str) -> None: """Parse command-line arguments and run experiments from provided file.""" - run_experiment(experiment_filename) + run_experiments(experiments_filename) if __name__ == "__main__": diff --git a/src/training/run_experiment.py b/src/training/run_experiment.py index 8296e59..0b29ce9 100644 --- a/src/training/run_experiment.py +++ b/src/training/run_experiment.py @@ -1,17 +1,64 @@ """Script to run experiments.""" +from datetime import datetime +from glob import glob import importlib +import json import os -from typing import Dict +from pathlib import Path +import re +from typing import Callable, Dict, Tuple import click +from loguru import logger import torch +from tqdm import tqdm +from training.gpu_manager import GPUManager from training.train import Trainer +import yaml -def run_experiment( - experiment_config: Dict, save_weights: bool, gpu_index: int, use_wandb: bool = False -) -> None: - """Short summary.""" +EXPERIMENTS_DIRNAME = Path(__file__).parents[0].resolve() / "experiments" + + +DEFAULT_TRAIN_ARGS = {"batch_size": 64, "epochs": 16} + + +def get_level(experiment_config: Dict) -> int: + """Sets the logger level.""" + if experiment_config["verbosity"] == 0: + return 40 + elif experiment_config["verbosity"] == 1: + return 20 + else: + return 10 + + +def create_experiment_dir(model: Callable, experiment_config: Dict) -> Path: + """Create new experiment.""" + EXPERIMENTS_DIRNAME.mkdir(parents=True, exist_ok=True) + experiment_dir = EXPERIMENTS_DIRNAME / model.__name__ + if experiment_config["resume_experiment"] is None: + experiment = datetime.now().strftime("%m%d_%H%M%S") + logger.debug(f"Creating a new experiment called {experiment}") + else: + available_experiments = glob(str(experiment_dir) + "/*") + available_experiments.sort() + if experiment_config["resume_experiment"] == "last": + experiment = available_experiments[-1] + logger.debug(f"Resuming the latest experiment {experiment}") + else: + experiment = experiment_config["resume_experiment"] + assert ( + str(experiment_dir / experiment) in available_experiments + ), "Experiment does not exist." + logger.debug(f"Resuming the experiment {experiment}") + + experiment_dir = experiment_dir / experiment + return experiment_dir + + +def load_modules_and_arguments(experiment_config: Dict) -> Tuple[Callable, Dict]: + """Loads all modules and arguments.""" # Import the data loader module and arguments. datasets_module = importlib.import_module("text_recognizer.datasets") data_loader_ = getattr(datasets_module, experiment_config["dataloader"]) @@ -21,8 +68,11 @@ def run_experiment( models_module = importlib.import_module("text_recognizer.models") model_class_ = getattr(models_module, experiment_config["model"]) - # Import metric. - metric_fn_ = getattr(models_module, experiment_config["metric"]) + # Import metrics. + metric_fns_ = { + metric: getattr(models_module, metric) + for metric in experiment_config["metrics"] + } # Import network module and arguments. network_module = importlib.import_module("text_recognizer.networks") @@ -38,38 +88,145 @@ def run_experiment( optimizer_args = experiment_config.get("optimizer_args", {}) # Learning rate scheduler - lr_scheduler_ = None - lr_scheduler_args = None if experiment_config["lr_scheduler"] is not None: lr_scheduler_ = getattr( torch.optim.lr_scheduler, experiment_config["lr_scheduler"] ) lr_scheduler_args = experiment_config.get("lr_scheduler_args", {}) + else: + lr_scheduler_ = None + lr_scheduler_args = None + + model_args = { + "data_loader": data_loader_, + "data_loader_args": data_loader_args, + "metrics": metric_fns_, + "network_fn": network_fn_, + "network_args": network_args, + "criterion": criterion_, + "criterion_args": criterion_args, + "optimizer": optimizer_, + "optimizer_args": optimizer_args, + "lr_scheduler": lr_scheduler_, + "lr_scheduler_args": lr_scheduler_args, + } + + return model_class_, model_args + + +def run_experiment( + experiment_config: Dict, save_weights: bool, device: str, use_wandb: bool = False +) -> None: + """Runs an experiment.""" + + # Load the modules and model arguments. + model_class_, model_args = load_modules_and_arguments(experiment_config) + + # Initializes the model with experiment config. + model = model_class_(**model_args, device=device) + + # Create new experiment. + experiment_dir = create_experiment_dir(model, experiment_config) + + # Create log and model directories. + log_dir = experiment_dir / "log" + model_dir = experiment_dir / "model" + + # Get checkpoint path. + checkpoint_path = model_dir / "last.pt" + if not checkpoint_path.exists(): + checkpoint_path = None - # Device - # TODO fix gpu manager - device = None - - model = model_class_( - network_fn=network_fn_, - network_args=network_args, - data_loader=data_loader_, - data_loader_args=data_loader_args, - metrics=metric_fn_, - criterion=criterion_, - criterion_args=criterion_args, - optimizer=optimizer_, - optimizer_args=optimizer_args, - lr_scheduler=lr_scheduler_, - lr_scheduler_args=lr_scheduler_args, - device=device, + # Make sure the log directory exists. + log_dir.mkdir(parents=True, exist_ok=True) + + # Have to remove default logger to get tqdm to work properly. + logger.remove() + + # Fetch verbosity level. + level = get_level(experiment_config) + + logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True, level=level) + logger.add( + str(log_dir / "train.log"), + format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", ) - # TODO: Fix checkpoint path and wandb + if "cuda" in device: + gpu_index = re.sub("[^0-9]+", "", device) + logger.info( + f"Running experiment with config {experiment_config} on GPU {gpu_index}" + ) + else: + logger.info(f"Running experiment with config {experiment_config} on CPU") + + logger.info(f"The class mapping is {model.mapping}") + + # Pŕints a summary of the network in terminal. + model.summary() + + experiment_config["train_args"] = { + **DEFAULT_TRAIN_ARGS, + **experiment_config.get("train_args", {}), + } + + experiment_config["experiment_group"] = experiment_config.get( + "experiment_group", None + ) + + experiment_config["device"] = device + + # Save the config used in the experiment folder. + config_path = experiment_dir / "config.yml" + with open(str(config_path), "w") as f: + yaml.dump(experiment_config, f) + + # TODO: wandb trainer = Trainer( model=model, - epochs=experiment_config["epochs"], - val_metric=experiment_config["metric"], + model_dir=model_dir, + epochs=experiment_config["train_args"]["epochs"], + val_metric=experiment_config["train_args"]["val_metric"], + checkpoint_path=checkpoint_path, ) trainer.fit() + + score = trainer.validate() + + logger.info(f"Validation set evaluation: {score}") + + if save_weights: + model.save_weights(model_dir) + + +@click.command() +@click.option( + "--experiment_config", + type=str, + help='Experiment JSON, e.g. \'{"dataloader": "EmnistDataLoader", "model": "CharacterModel", "network": "mlp"}\'', +) +@click.option("--gpu", type=int, default=0, help="Provide the index of the GPU to use.") +@click.option( + "--save", + is_flag=True, + help="If set, the final weights will be saved to a canonical, version-controlled location.", +) +@click.option( + "--nowandb", is_flag=False, help="If true, do not use wandb for this run." +) +def main(experiment_config: str, gpu: int, save: bool, nowandb: bool) -> None: + """Run experiment.""" + if gpu < 0: + gpu_manager = GPUManager(True) + gpu = gpu_manager.get_free_gpu() + + device = "cuda:" + str(gpu) + + experiment_config = json.loads(experiment_config) + os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu}" + run_experiment(experiment_config, save, device, nowandb) + + +if __name__ == "__main__": + main() diff --git a/src/training/train.py b/src/training/train.py index 4a452b6..8cd5110 100644 --- a/src/training/train.py +++ b/src/training/train.py @@ -1,8 +1,8 @@ """Training script for PyTorch models.""" -from datetime import datetime from pathlib import Path -from typing import Callable, Dict, Optional +import time +from typing import Dict, Optional, Type from loguru import logger import numpy as np @@ -11,6 +11,7 @@ from tqdm import tqdm, trange from training.util import RunningAverage import wandb +from text_recognizer.models import Model torch.backends.cudnn.benchmark = True np.random.seed(4711) @@ -18,17 +19,16 @@ torch.manual_seed(4711) torch.cuda.manual_seed(4711) -EXPERIMENTS_DIRNAME = Path(__file__).parents[0].resolve() / "experiments" - - class Trainer: """Trainer for training PyTorch models.""" # TODO implement wandb. + # TODO implement Bayesian parameter search. def __init__( self, - model: Callable, + model: Type[Model], + model_dir: Path, epochs: int, val_metric: str = "accuracy", checkpoint_path: Optional[Path] = None, @@ -37,7 +37,8 @@ class Trainer: """Initialization of the Trainer. Args: - model (Callable): A model object. + model (Type[Model]): A model object. + model_dir (Path): Path to the model directory. epochs (int): Number of epochs to train. val_metric (str): The validation metric to evaluate the model on. Defaults to "accuracy". checkpoint_path (Optional[Path]): The path to a previously trained model. Defaults to None. @@ -45,6 +46,7 @@ class Trainer: """ self.model = model + self.model_dir = model_dir self.epochs = epochs self.checkpoint_path = checkpoint_path self.start_epoch = 0 @@ -58,7 +60,10 @@ class Trainer: self.val_metric = val_metric self.best_val_metric = 0.0 - logger.add(self.model.name + "_{time}.log") + + # Parse the name of the experiment. + experiment_dir = str(self.model_dir.parents[1]).split("/") + self.experiment_name = experiment_dir[-2] + "/" + experiment_dir[-1] def train(self) -> None: """Training loop.""" @@ -68,13 +73,13 @@ class Trainer: # Running average for the loss. loss_avg = RunningAverage() - data_loader = self.model.data_loaders["train"] + data_loader = self.model.data_loaders("train") with tqdm( total=len(data_loader), leave=False, unit="step", - bar_format="{n_fmt}/{total_fmt} {bar} {remaining} {rate_inv_fmt}{postfix}", + bar_format="{n_fmt}/{total_fmt} |{bar:20}| {remaining} {rate_inv_fmt}{postfix}", ) as t: for data, targets in data_loader: @@ -85,7 +90,7 @@ class Trainer: # Forward pass. # Get the network prediction. - output = self.model.predict(data) + output = self.model.network(data) # Compute the loss. loss = self.model.criterion(output, targets) @@ -105,16 +110,20 @@ class Trainer: output = output.data.cpu() targets = targets.data.cpu() metrics = { - metric: round(self.model.metrics[metric](output, targets), 4) + metric: self.model.metrics[metric](output, targets) for metric in self.model.metrics } - metrics["loss"] = round(loss_avg(), 4) + metrics["loss"] = loss_avg() # Update Tqdm progress bar. t.set_postfix(**metrics) t.update() - def evaluate(self) -> Dict: + # If the model has a learning rate scheduler, compute a step. + if self.model.lr_scheduler is not None: + self.model.lr_scheduler.step() + + def validate(self) -> Dict: """Evaluation loop. Returns: @@ -125,7 +134,7 @@ class Trainer: self.model.eval() # Running average for the loss. - data_loader = self.model.data_loaders["val"] + data_loader = self.model.data_loaders("val") # Running average for the loss. loss_avg = RunningAverage() @@ -137,7 +146,7 @@ class Trainer: total=len(data_loader), leave=False, unit="step", - bar_format="{n_fmt}/{total_fmt} {bar} {remaining} {rate_inv_fmt}{postfix}", + bar_format="{n_fmt}/{total_fmt} |{bar:20}| {remaining} {rate_inv_fmt}{postfix}", ) as t: for data, targets in data_loader: data, targets = ( @@ -145,22 +154,23 @@ class Trainer: targets.to(self.model.device), ) - # Forward pass. - # Get the network prediction. - output = self.model.predict(data) + with torch.no_grad(): + # Forward pass. + # Get the network prediction. + output = self.model.network(data) - # Compute the loss. - loss = self.model.criterion(output, targets) + # Compute the loss. + loss = self.model.criterion(output, targets) # Compute metrics. loss_avg.update(loss.item()) output = output.data.cpu() targets = targets.data.cpu() metrics = { - metric: round(self.model.metrics[metric](output, targets), 4) + metric: self.model.metrics[metric](output, targets) for metric in self.model.metrics } - metrics["loss"] = round(loss.item(), 4) + metrics["loss"] = loss.item() summary.append(metrics) @@ -170,7 +180,7 @@ class Trainer: # Compute mean of all metrics. metrics_mean = { - metric: np.mean(x[metric] for x in summary) for metric in summary[0] + metric: np.mean([x[metric] for x in summary]) for metric in summary[0] } metrics_str = " - ".join(f"{k}: {v}" for k, v in metrics_mean.items()) logger.debug(metrics_str) @@ -179,55 +189,34 @@ class Trainer: def fit(self) -> None: """Runs the training and evaluation loop.""" - # Create new experiment. - EXPERIMENTS_DIRNAME.mkdir(parents=True, exist_ok=True) - experiment = datetime.now().strftime("%m%d_%H%M%S") - experiment_dir = EXPERIMENTS_DIRNAME / self.model.network.__name__ / experiment - - # Create log and model directories. - log_dir = experiment_dir / "log" - model_dir = experiment_dir / "model" - - # Make sure the log directory exists. - log_dir.mkdir(parents=True, exist_ok=True) - - logger.add( - str(log_dir / "train.log"), - format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", - ) - - logger.debug( - f"Running an experiment called {self.model.network.__name__}/{experiment}." - ) - - # Pŕints a summary of the network in terminal. - self.model.summary() + logger.debug(f"Running an experiment called {self.experiment_name}.") + t_start = time.time() # Run the training loop. for epoch in trange( - total=self.epochs, + self.epochs, initial=self.start_epoch, - leave=True, - bar_format="{desc}: {n_fmt}/{total_fmt} {bar} {remaining}{postfix}", + leave=False, + bar_format="{desc}: {n_fmt}/{total_fmt} |{bar:10}| {remaining}{postfix}", desc="Epoch", ): # Perform one training pass over the training set. self.train() # Evaluate the model on the validation set. - val_metrics = self.evaluate() - - # If the model has a learning rate scheduler, compute a step. - if self.model.lr_scheduler is not None: - self.model.lr_scheduler.step() + val_metrics = self.validate() # The validation metric to evaluate the model on, e.g. accuracy. val_metric = val_metrics[self.val_metric] is_best = val_metric >= self.best_val_metric - + self.best_val_metric = val_metric if is_best else self.best_val_metric # Save checkpoint. - self.model.save_checkpoint(model_dir, is_best, epoch, self.val_metric) + self.model.save_checkpoint(self.model_dir, is_best, epoch, self.val_metric) if self.start_epoch > 0 and epoch + self.start_epoch == self.epochs: logger.debug(f"Trained the model for {self.epochs} number of epochs.") break + + t_end = time.time() + t_training = t_end - t_start + logger.info(f"Training took {t_training:.2f} s.") |