diff options
| -rw-r--r-- | poetry.lock | 541 | ||||
| -rw-r--r-- | pyproject.toml | 1 | ||||
| -rw-r--r-- | text_recognizer/datasets/base_data_module.py | 69 | ||||
| -rw-r--r-- | text_recognizer/datasets/download_utils.py | 73 | ||||
| -rw-r--r-- | text_recognizer/datasets/emnist.py | 194 | ||||
| -rw-r--r-- | text_recognizer/datasets/emnist_dataset.py | 131 | ||||
| -rw-r--r-- | text_recognizer/datasets/emnist_essentials.json | 1 | 
7 files changed, 877 insertions, 133 deletions
| diff --git a/poetry.lock b/poetry.lock index 78f086e..a389e98 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,34 @@  [[package]] +name = "absl-py" +version = "0.12.0" +description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + +[[package]] +name = "aiohttp" +version = "3.7.4.post0" +description = "Async http client/server framework (asyncio)" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +async-timeout = ">=3.0,<4.0" +attrs = ">=17.3.0" +chardet = ">=2.0,<5.0" +multidict = ">=4.5,<7.0" +typing-extensions = ">=3.6.5" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["aiodns", "brotlipy", "cchardet"] + +[[package]]  name = "alabaster"  version = "0.7.12"  description = "A configurable sidebar-enabled Sphinx theme" @@ -48,6 +78,14 @@ optional = false  python-versions = ">=3.5"  [[package]] +name = "async-timeout" +version = "3.0.1" +description = "Timeout context manager for asyncio programs" +category = "main" +optional = false +python-versions = ">=3.5.3" + +[[package]]  name = "atomicwrites"  version = "1.4.0"  description = "Atomic file writes." @@ -156,6 +194,14 @@ optional = false  python-versions = "*"  [[package]] +name = "cachetools" +version = "4.2.1" +description = "Extensible memoizing collections and decorators" +category = "main" +optional = false +python-versions = "~=3.5" + +[[package]]  name = "certifi"  version = "2020.11.8"  description = "Python package for providing Mozilla's CA Bundle." @@ -424,6 +470,42 @@ python-versions = "*"  flake8 = "*"  [[package]] +name = "fsspec" +version = "0.8.7" +description = "File-system specification" +category = "main" +optional = false +python-versions = ">3.6" + +[package.dependencies] +aiohttp = {version = "*", optional = true, markers = "extra == \"http\""} +requests = {version = "*", optional = true, markers = "extra == \"http\""} + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +dask = ["dask", "distributed"] +dropbox = ["dropboxdrivefs", "requests", "dropbox"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +hdfs = ["pyarrow"] +http = ["requests", "aiohttp"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] + +[[package]] +name = "future" +version = "0.18.2" +description = "Clean single-source support for Python 3 and 2" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]]  name = "gitdb"  version = "4.0.5"  description = "Git Object Database" @@ -446,6 +528,39 @@ python-versions = ">=3.4"  gitdb = ">=4.0.1,<5"  [[package]] +name = "google-auth" +version = "1.28.0" +description = "Google Authentication Library" +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" + +[package.dependencies] +cachetools = ">=2.0.0,<5.0" +pyasn1-modules = ">=0.2.1" +rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} +six = ">=1.9.0" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)"] +pyopenssl = ["pyopenssl (>=20.0.0)"] + +[[package]] +name = "google-auth-oauthlib" +version = "0.4.3" +description = "Google Authentication Library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +google-auth = ">=1.0.0" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click (>=6.0.0)"] + +[[package]]  name = "gpustat"  version = "0.6.0"  description = "An utility to monitor NVIDIA GPU status and usage" @@ -476,6 +591,20 @@ docs = ["sphinx (>=1.8)", "sphinx-rtd-theme"]  test = ["mock (>=3)", "pytest (>=4)", "pytest-mock (>=2)", "pytest-cov"]  [[package]] +name = "grpcio" +version = "1.36.1" +description = "HTTP/2-based RPC framework" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.5.2" + +[package.extras] +protobuf = ["grpcio-tools (>=1.36.1)"] + +[[package]]  name = "gtn"  version = "0.0.0"  description = "Automatic differentiation with WFSTs" @@ -768,6 +897,17 @@ win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}  dev = ["codecov (>=2.0.15)", "colorama (>=0.3.4)", "flake8 (>=3.7.7)", "tox (>=3.9.0)", "tox-travis (>=0.12)", "pytest (>=4.6.2)", "pytest-cov (>=2.7.1)", "Sphinx (>=2.2.1)", "sphinx-autobuild (>=0.7.1)", "sphinx-rtd-theme (>=0.4.3)", "black (>=19.10b0)", "isort (>=5.1.1)"]  [[package]] +name = "markdown" +version = "3.3.4" +description = "Python implementation of Markdown." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +testing = ["coverage", "pyyaml"] + +[[package]]  name = "markupsafe"  version = "1.1.1"  description = "Safely add untrusted strings to HTML/XML markup." @@ -830,6 +970,14 @@ optional = false  python-versions = ">=3.5"  [[package]] +name = "multidict" +version = "5.1.0" +description = "multidict implementation" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]]  name = "mypy"  version = "0.770"  description = "Optional static typing for Python" @@ -996,6 +1144,19 @@ optional = false  python-versions = "*"  [[package]] +name = "oauthlib" +version = "3.1.0" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.extras] +rsa = ["cryptography"] +signals = ["blinker"] +signedtoken = ["cryptography", "pyjwt (>=1.0.0)"] + +[[package]]  name = "omegaconf"  version = "2.0.5"  description = "A flexible configuration library" @@ -1194,6 +1355,25 @@ optional = false  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"  [[package]] +name = "pyasn1" +version = "0.4.8" +description = "ASN.1 types and codecs" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyasn1-modules" +version = "0.2.8" +description = "A collection of ASN.1-based protocols modules." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.5.0" + +[[package]]  name = "pycodestyle"  version = "2.6.0"  description = "Python style guide checker" @@ -1323,6 +1503,33 @@ optional = false  python-versions = "*"  [[package]] +name = "pytorch-lightning" +version = "1.2.4" +description = "PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +fsspec = {version = ">=0.8.1", extras = ["http"]} +future = ">=0.17.1" +numpy = ">=1.16.6" +PyYAML = ">=5.1,<5.4.0 || >=5.5.0" +tensorboard = ">=2.2.0" +torch = ">=1.4" +tqdm = ">=4.41.0" + +[package.extras] +all = ["matplotlib (>3.1)", "horovod (>=0.21.2)", "omegaconf (>=2.0.1)", "torchtext (>=0.5)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)", "neptune-client (>=0.4.109)", "comet-ml (>=3.1.12)", "mlflow (>=1.0.0)", "test-tube (>=0.7.5)", "wandb (>=0.8.21)", "coverage (>=5.2)", "codecov (>=2.1)", "pytest (>=6.0)", "pytest-cov (>2.10)", "flake8 (>=3.6)", "check-manifest", "twine (==3.2)", "scikit-learn (>=0.22.2)", "scikit-image (>=0.17.2)", "isort (>=5.6.4)", "mypy (>=0.720,<0.800)", "pre-commit (>=1.0)", "cloudpickle (>=1.3)", "nltk (>=3.3)", "pandas", "torchvision (>=0.5)", "gym (>=0.17.0)"] +cpu = ["matplotlib (>3.1)", "omegaconf (>=2.0.1)", "torchtext (>=0.5)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)", "neptune-client (>=0.4.109)", "comet-ml (>=3.1.12)", "mlflow (>=1.0.0)", "test-tube (>=0.7.5)", "wandb (>=0.8.21)", "coverage (>=5.2)", "codecov (>=2.1)", "pytest (>=6.0)", "pytest-cov (>2.10)", "flake8 (>=3.6)", "check-manifest", "twine (==3.2)", "scikit-learn (>=0.22.2)", "scikit-image (>=0.17.2)", "isort (>=5.6.4)", "mypy (>=0.720,<0.800)", "pre-commit (>=1.0)", "cloudpickle (>=1.3)", "nltk (>=3.3)", "pandas", "torchvision (>=0.5)", "gym (>=0.17.0)"] +cpu-extra = ["matplotlib (>3.1)", "omegaconf (>=2.0.1)", "torchtext (>=0.5)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)"] +dev = ["matplotlib (>3.1)", "horovod (>=0.21.2)", "omegaconf (>=2.0.1)", "torchtext (>=0.5)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)", "neptune-client (>=0.4.109)", "comet-ml (>=3.1.12)", "mlflow (>=1.0.0)", "test-tube (>=0.7.5)", "wandb (>=0.8.21)", "coverage (>=5.2)", "codecov (>=2.1)", "pytest (>=6.0)", "pytest-cov (>2.10)", "flake8 (>=3.6)", "check-manifest", "twine (==3.2)", "scikit-learn (>=0.22.2)", "scikit-image (>=0.17.2)", "isort (>=5.6.4)", "mypy (>=0.720,<0.800)", "pre-commit (>=1.0)", "cloudpickle (>=1.3)", "nltk (>=3.3)", "pandas"] +examples = ["torchvision (>=0.5)", "gym (>=0.17.0)"] +extra = ["matplotlib (>3.1)", "horovod (>=0.21.2)", "omegaconf (>=2.0.1)", "torchtext (>=0.5)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)"] +loggers = ["neptune-client (>=0.4.109)", "comet-ml (>=3.1.12)", "mlflow (>=1.0.0)", "test-tube (>=0.7.5)", "wandb (>=0.8.21)"] +test = ["coverage (>=5.2)", "codecov (>=2.1)", "pytest (>=6.0)", "pytest-cov (>2.10)", "flake8 (>=3.6)", "check-manifest", "twine (==3.2)", "scikit-learn (>=0.22.2)", "scikit-image (>=0.17.2)", "isort (>=5.6.4)", "mypy (>=0.720,<0.800)", "pre-commit (>=1.0)", "cloudpickle (>=1.3)", "nltk (>=3.3)", "pandas"] + +[[package]]  name = "pytorch-metric-learning"  version = "0.9.94"  description = "The easiest way to use deep metric learning in your application. Modular, flexible, and extensible. Written in PyTorch." @@ -1465,6 +1672,32 @@ security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"]  socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]  [[package]] +name = "requests-oauthlib" +version = "1.3.0" +description = "OAuthlib authentication support for Requests." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "rsa" +version = "4.7.2" +description = "Pure-Python RSA implementation" +category = "main" +optional = false +python-versions = ">=3.5, <4" + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]]  name = "safety"  version = "1.9.0"  description = "Checks installed dependencies for known vulnerabilities." @@ -1730,6 +1963,35 @@ optional = false  python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, <4"  [[package]] +name = "tensorboard" +version = "2.4.1" +description = "TensorBoard lets you watch Tensors Flow" +category = "main" +optional = false +python-versions = ">= 2.7, != 3.0.*, != 3.1.*" + +[package.dependencies] +absl-py = ">=0.4" +google-auth = ">=1.6.3,<2" +google-auth-oauthlib = ">=0.4.1,<0.5" +grpcio = ">=1.24.3" +markdown = ">=2.6.8" +numpy = ">=1.12.0" +protobuf = ">=3.6.0" +requests = ">=2.21.0,<3" +six = ">=1.10.0" +tensorboard-plugin-wit = ">=1.6.0" +werkzeug = ">=0.11.15" + +[[package]] +name = "tensorboard-plugin-wit" +version = "1.8.0" +description = "What-If Tool TensorBoard plugin." +category = "main" +optional = false +python-versions = "*" + +[[package]]  name = "terminado"  version = "0.9.1"  description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library." @@ -1953,6 +2215,18 @@ optional = false  python-versions = "*"  [[package]] +name = "werkzeug" +version = "1.0.1" +description = "The comprehensive WSGI web application library." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +dev = ["pytest", "pytest-timeout", "coverage", "tox", "sphinx", "pallets-sphinx-themes", "sphinx-issues"] +watchdog = ["watchdog"] + +[[package]]  name = "widgetsnbextension"  version = "3.5.1"  description = "IPython HTML widgets for Jupyter" @@ -1990,12 +2264,67 @@ all = ["six", "pytest", "pytest-cov", "codecov", "scikit-build", "cmake", "ninja  optional = ["pygments", "colorama"]  tests = ["pytest", "pytest-cov", "codecov", "scikit-build", "cmake", "ninja", "pybind11"] +[[package]] +name = "yarl" +version = "1.6.3" +description = "Yet another URL library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" +  [metadata]  lock-version = "1.1"  python-versions = "^3.8" -content-hash = "c87742a388e1277e84313b4c0ff75681d754c8328db2c488c0aba2a4dafc6a64" +content-hash = "45c6282e27a0231e3dc3c951a9540f3cd2f6d6eb3d3eda1eab84168014d8062a"  [metadata.files] +absl-py = [ +    {file = "absl-py-0.12.0.tar.gz", hash = "sha256:b44f68984a5ceb2607d135a615999b93924c771238a63920d17d3387b0d229d5"}, +    {file = "absl_py-0.12.0-py3-none-any.whl", hash = "sha256:afe94e3c751ff81aad55d33ab6e630390da32780110b5af72ae81ecff8418d9e"}, +] +aiohttp = [ +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:3cf75f7cdc2397ed4442594b935a11ed5569961333d49b7539ea741be2cc79d5"}, +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:4b302b45040890cea949ad092479e01ba25911a15e648429c7c5aae9650c67a8"}, +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:fe60131d21b31fd1a14bd43e6bb88256f69dfc3188b3a89d736d6c71ed43ec95"}, +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:393f389841e8f2dfc86f774ad22f00923fdee66d238af89b70ea314c4aefd290"}, +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:c6e9dcb4cb338d91a73f178d866d051efe7c62a7166653a91e7d9fb18274058f"}, +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:5df68496d19f849921f05f14f31bd6ef53ad4b00245da3195048c69934521809"}, +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:0563c1b3826945eecd62186f3f5c7d31abb7391fedc893b7e2b26303b5a9f3fe"}, +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-win32.whl", hash = "sha256:3d78619672183be860b96ed96f533046ec97ca067fd46ac1f6a09cd9b7484287"}, +    {file = "aiohttp-3.7.4.post0-cp36-cp36m-win_amd64.whl", hash = "sha256:f705e12750171c0ab4ef2a3c76b9a4024a62c4103e3a55dd6f99265b9bc6fcfc"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:230a8f7e24298dea47659251abc0fd8b3c4e38a664c59d4b89cca7f6c09c9e87"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2e19413bf84934d651344783c9f5e22dee452e251cfd220ebadbed2d9931dbf0"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:e4b2b334e68b18ac9817d828ba44d8fcb391f6acb398bcc5062b14b2cbeac970"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:d012ad7911653a906425d8473a1465caa9f8dea7fcf07b6d870397b774ea7c0f"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:40eced07f07a9e60e825554a31f923e8d3997cfc7fb31dbc1328c70826e04cde"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:209b4a8ee987eccc91e2bd3ac36adee0e53a5970b8ac52c273f7f8fd4872c94c"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:14762875b22d0055f05d12abc7f7d61d5fd4fe4642ce1a249abdf8c700bf1fd8"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-win32.whl", hash = "sha256:7615dab56bb07bff74bc865307aeb89a8bfd9941d2ef9d817b9436da3a0ea54f"}, +    {file = "aiohttp-3.7.4.post0-cp37-cp37m-win_amd64.whl", hash = "sha256:d9e13b33afd39ddeb377eff2c1c4f00544e191e1d1dee5b6c51ddee8ea6f0cf5"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:547da6cacac20666422d4882cfcd51298d45f7ccb60a04ec27424d2f36ba3eaf"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:af9aa9ef5ba1fd5b8c948bb11f44891968ab30356d65fd0cc6707d989cd521df"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:64322071e046020e8797117b3658b9c2f80e3267daec409b350b6a7a05041213"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:bb437315738aa441251214dad17428cafda9cdc9729499f1d6001748e1d432f4"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:e54962802d4b8b18b6207d4a927032826af39395a3bd9196a5af43fc4e60b009"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:a00bb73540af068ca7390e636c01cbc4f644961896fa9363154ff43fd37af2f5"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:79ebfc238612123a713a457d92afb4096e2148be17df6c50fb9bf7a81c2f8013"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-win32.whl", hash = "sha256:515dfef7f869a0feb2afee66b957cc7bbe9ad0cdee45aec7fdc623f4ecd4fb16"}, +    {file = "aiohttp-3.7.4.post0-cp38-cp38-win_amd64.whl", hash = "sha256:114b281e4d68302a324dd33abb04778e8557d88947875cbf4e842c2c01a030c5"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:7b18b97cf8ee5452fa5f4e3af95d01d84d86d32c5e2bfa260cf041749d66360b"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:15492a6368d985b76a2a5fdd2166cddfea5d24e69eefed4630cbaae5c81d89bd"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bdb230b4943891321e06fc7def63c7aace16095be7d9cf3b1e01be2f10fba439"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:cffe3ab27871bc3ea47df5d8f7013945712c46a3cc5a95b6bee15887f1675c22"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:f881853d2643a29e643609da57b96d5f9c9b93f62429dcc1cbb413c7d07f0e1a"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:a5ca29ee66f8343ed336816c553e82d6cade48a3ad702b9ffa6125d187e2dedb"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:17c073de315745a1510393a96e680d20af8e67e324f70b42accbd4cb3315c9fb"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-win32.whl", hash = "sha256:932bb1ea39a54e9ea27fc9232163059a0b8855256f4052e776357ad9add6f1c9"}, +    {file = "aiohttp-3.7.4.post0-cp39-cp39-win_amd64.whl", hash = "sha256:02f46fc0e3c5ac58b80d4d56eb0a7c7d97fcef69ace9326289fb9f1955e65cfe"}, +    {file = "aiohttp-3.7.4.post0.tar.gz", hash = "sha256:493d3299ebe5f5a7c66b9819eacdcfbbaaf1a8e84911ddffcdc48888497afecf"}, +]  alabaster = [      {file = "alabaster-0.7.12-py2.py3-none-any.whl", hash = "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359"},      {file = "alabaster-0.7.12.tar.gz", hash = "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"}, @@ -2032,6 +2361,10 @@ async-generator = [      {file = "async_generator-1.10-py3-none-any.whl", hash = "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b"},      {file = "async_generator-1.10.tar.gz", hash = "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"},  ] +async-timeout = [ +    {file = "async-timeout-3.0.1.tar.gz", hash = "sha256:0c3c816a028d47f659d6ff5c745cb2acf1f966da1fe5c19c77a70282b25f4c5f"}, +    {file = "async_timeout-3.0.1-py3-none-any.whl", hash = "sha256:4291ca197d287d274d0b6cb5d6f8f8f82d434ed288f962539ff18cc9012f9ea3"}, +]  atomicwrites = [      {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},      {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, @@ -2069,6 +2402,10 @@ boltons = [      {file = "boltons-20.2.1-py2.py3-none-any.whl", hash = "sha256:3dd8a8e3c1886e7f7ba3422b50f55a66e1700161bf01b919d098e7d96dd2d9b6"},      {file = "boltons-20.2.1.tar.gz", hash = "sha256:dd362291a460cc1e0c2e91cc6a60da3036ced77099b623112e8f833e6734bdc5"},  ] +cachetools = [ +    {file = "cachetools-4.2.1-py3-none-any.whl", hash = "sha256:1d9d5f567be80f7c07d765e21b814326d78c61eb0c3a637dffc0e5d1796cb2e2"}, +    {file = "cachetools-4.2.1.tar.gz", hash = "sha256:f469e29e7aa4cff64d8de4aad95ce76de8ea1125a16c68e0d93f65c3c3dc92e9"}, +]  certifi = [      {file = "certifi-2020.11.8-py2.py3-none-any.whl", hash = "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd"},      {file = "certifi-2020.11.8.tar.gz", hash = "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4"}, @@ -2233,6 +2570,13 @@ flake8-polyfill = [      {file = "flake8-polyfill-1.0.2.tar.gz", hash = "sha256:e44b087597f6da52ec6393a709e7108b2905317d0c0b744cdca6208e670d8eda"},      {file = "flake8_polyfill-1.0.2-py2.py3-none-any.whl", hash = "sha256:12be6a34ee3ab795b19ca73505e7b55826d5f6ad7230d31b18e106400169b9e9"},  ] +fsspec = [ +    {file = "fsspec-0.8.7-py3-none-any.whl", hash = "sha256:65dbf8244a3a3d23342109925f9f588c7551b2b01a5f47e555043b17e2b32d62"}, +    {file = "fsspec-0.8.7.tar.gz", hash = "sha256:4b11557a90ac637089b10afa4c77adf42080c0696f6f2771c41ce92d73c41432"}, +] +future = [ +    {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, +]  gitdb = [      {file = "gitdb-4.0.5-py3-none-any.whl", hash = "sha256:91f36bfb1ab7949b3b40e23736db18231bf7593edada2ba5c3a174a7b23657ac"},      {file = "gitdb-4.0.5.tar.gz", hash = "sha256:c9e1f2d0db7ddb9a704c2a0217be31214e91a4fe1dea1efad19ae42ba0c285c9"}, @@ -2241,6 +2585,14 @@ gitpython = [      {file = "GitPython-3.1.11-py3-none-any.whl", hash = "sha256:6eea89b655917b500437e9668e4a12eabdcf00229a0df1762aabd692ef9b746b"},      {file = "GitPython-3.1.11.tar.gz", hash = "sha256:befa4d101f91bad1b632df4308ec64555db684c360bd7d2130b4807d49ce86b8"},  ] +google-auth = [ +    {file = "google-auth-1.28.0.tar.gz", hash = "sha256:9bd436d19ab047001a1340720d2b629eb96dd503258c524921ec2af3ee88a80e"}, +    {file = "google_auth-1.28.0-py2.py3-none-any.whl", hash = "sha256:dcaba3aa9d4e0e96fd945bf25a86b6f878fcb05770b67adbeb50a63ca4d28a5e"}, +] +google-auth-oauthlib = [ +    {file = "google-auth-oauthlib-0.4.3.tar.gz", hash = "sha256:54431535309cfab50897d9c181e8c2226268825aa6e42e930b05b99c5041a18c"}, +    {file = "google_auth_oauthlib-0.4.3-py2.py3-none-any.whl", hash = "sha256:dabffbf594a6be2fd6d054060846d1201569252efb10dfb749b504a7591f8af0"}, +]  gpustat = [      {file = "gpustat-0.6.0.tar.gz", hash = "sha256:f69135080b2668b662822633312c2180002c10111597af9631bb02e042755b6c"},  ] @@ -2248,6 +2600,54 @@ graphviz = [      {file = "graphviz-0.16-py2.py3-none-any.whl", hash = "sha256:3cad5517c961090dfc679df6402a57de62d97703e2880a1a46147bb0dc1639eb"},      {file = "graphviz-0.16.zip", hash = "sha256:d2d25af1c199cad567ce4806f0449cb74eb30cf451fd7597251e1da099ac6e57"},  ] +grpcio = [ +    {file = "grpcio-1.36.1-cp27-cp27m-macosx_10_10_x86_64.whl", hash = "sha256:e3a83c5db16f95daac1d96cf3c9018d765579b5a29bb336758d793028e729921"}, +    {file = "grpcio-1.36.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c18739fecb90760b183bfcb4da1cf2c6bf57e38f7baa2c131d5f67d9a4c8365d"}, +    {file = "grpcio-1.36.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:f6efa62ca1fe02cd34ec35f53446f04a15fe2c886a4e825f5679936a573d2cbf"}, +    {file = "grpcio-1.36.1-cp27-cp27m-win32.whl", hash = "sha256:9a18299827a70be0507f98a65393b1c7f6c004fe2ca995fe23ffac534dd187a7"}, +    {file = "grpcio-1.36.1-cp27-cp27m-win_amd64.whl", hash = "sha256:8a89190de1985a54ef311650cf9687ffb81de038973fd32e452636ddae36b29f"}, +    {file = "grpcio-1.36.1-cp27-cp27mu-linux_armv7l.whl", hash = "sha256:3e75643d21db7d68acd541d3fec66faaa8061d12b511e101b529ff12a276bb9b"}, +    {file = "grpcio-1.36.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:3c5204e05e18268dd6a1099ca6c106fd9d00bcae1e37d5a5186094c55044c941"}, +    {file = "grpcio-1.36.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:24d4c2c5e540e666c52225953d6813afc8ccf9bf46db6a72edd4e8d606656248"}, +    {file = "grpcio-1.36.1-cp35-cp35m-linux_armv7l.whl", hash = "sha256:4dc7295dc9673f7af22c1e38c2a2c24ecbd6773a4c5ed5a46ed38ad4dcf2bf6c"}, +    {file = "grpcio-1.36.1-cp35-cp35m-macosx_10_10_intel.whl", hash = "sha256:f241116d4bf1a8037ff87f16914b606390824e50902bdbfa2262e855fbf07fe5"}, +    {file = "grpcio-1.36.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:1056b558acfd575d774644826df449e1402a03e456a3192fafb6b06d1069bf80"}, +    {file = "grpcio-1.36.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:52ec563da45d06319224ebbda53501d25594de64ee1b2786e119ba4a2f1ce40c"}, +    {file = "grpcio-1.36.1-cp35-cp35m-manylinux2014_i686.whl", hash = "sha256:7cbeac9bbe6a4a7fce4a89c892c249135dd9f5f5219ede157174c34a456188f0"}, +    {file = "grpcio-1.36.1-cp35-cp35m-manylinux2014_x86_64.whl", hash = "sha256:2abaa9f0d83bd0b26f6d0d1fc4b97d73bde3ceac36ab857f70d3cabcf31c5c79"}, +    {file = "grpcio-1.36.1-cp35-cp35m-win32.whl", hash = "sha256:02030e1afd3247f2b159df9dff959ec79dd4047b1c4dd4eec9e3d1642efbd504"}, +    {file = "grpcio-1.36.1-cp35-cp35m-win_amd64.whl", hash = "sha256:eafafc7e040e36aa926edc731ab52c23465981888779ae64bfc8ad85888ed4f3"}, +    {file = "grpcio-1.36.1-cp36-cp36m-linux_armv7l.whl", hash = "sha256:1030e74ddd0fa6e3bad7944f0c68cf1251b15bcd70641f0ad3858fdf2b8602a0"}, +    {file = "grpcio-1.36.1-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:b003e24339030ed356f59505d1065b89e1f443ef41ce71ca9069be944c0d2e6b"}, +    {file = "grpcio-1.36.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:76daa3c4d58fcf40f7969bdb4270335e96ee0382a050cadcd97d7332cd0251a3"}, +    {file = "grpcio-1.36.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:f591597bb25eae0094ead5a965555e911453e5f35fdbdaa83be11ef107865697"}, +    {file = "grpcio-1.36.1-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:cbd82c479338fc1c0e5c3db09752b61fe47d40c6e38e4be8657153712fa76674"}, +    {file = "grpcio-1.36.1-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:7e32bc01dfaa7a51c547379644ea619a2161d6969affdac3bbd173478d26673d"}, +    {file = "grpcio-1.36.1-cp36-cp36m-win32.whl", hash = "sha256:5378189fb897567f4929f75ab67a3e0da4f8967806246cb9cfa1fa06bfbdb0d5"}, +    {file = "grpcio-1.36.1-cp36-cp36m-win_amd64.whl", hash = "sha256:3a6295aa692806218e97bb687a71cd768450ed99e2acddc488f18d738edef463"}, +    {file = "grpcio-1.36.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:6f6f8a8b57e40347d0bf32c2135037dae31d63d3b19007b4c426a11b76deaf65"}, +    {file = "grpcio-1.36.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:4c05ed54b2a00df01e633bebec819b512bf0c60f8f5b3b36dd344dc673b02fea"}, +    {file = "grpcio-1.36.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:e1b9e906aa6f7577016e86ed7f3a69cae7dab4e41356584dc7980f76ea65035f"}, +    {file = "grpcio-1.36.1-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:a602d6b30760bbbb2fe776caaa914a0d404636cafc3f2322718bf8002d7b1e55"}, +    {file = "grpcio-1.36.1-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:dee9971aef20fc09ed897420446c4d0926cd1d7630f343333288523ca5b44bb2"}, +    {file = "grpcio-1.36.1-cp37-cp37m-win32.whl", hash = "sha256:ed16bfeda02268e75e038c58599d52afc7097d749916c079b26bc27a66900f7d"}, +    {file = "grpcio-1.36.1-cp37-cp37m-win_amd64.whl", hash = "sha256:85a6035ae75ce964f78f19cf913938596ccf068b149fcd79f4371268bcb9aa7c"}, +    {file = "grpcio-1.36.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:6b30682180053eebc87802c2f249d2f59b430e1a18e8808575dde0d22a968b2c"}, +    {file = "grpcio-1.36.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:5e4920a8fb5d17b2c5ba980db0ac1c925bbee3e5d70e96da3ec4fb1c8600d68f"}, +    {file = "grpcio-1.36.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:f7740d9d9451f3663df11b241ac05cafc0efaa052d2fdca6640c4d3748eaf6e2"}, +    {file = "grpcio-1.36.1-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:20b7c4c5513e1135a2261e56830c0e710f205fee92019b92fe132d7f16a5cfd8"}, +    {file = "grpcio-1.36.1-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:216fbd2a488e74c3b96e240e4054c85c4c99102a439bc9f556936991643f43bc"}, +    {file = "grpcio-1.36.1-cp38-cp38-win32.whl", hash = "sha256:7863c2a140e829b1f4c6d67bf0bf15e5321ac4766d0a295e2682970d9dd4b091"}, +    {file = "grpcio-1.36.1-cp38-cp38-win_amd64.whl", hash = "sha256:f214076eb13da9e65c1aa9877b51fca03f51a82bd8691358e1a1edd9ff341330"}, +    {file = "grpcio-1.36.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:ec753c022b39656f88409fbf9f2d3b28497e3f17aa678f884d78776b41ebe6bd"}, +    {file = "grpcio-1.36.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:0648a6d5d7ddcd9c8462d7d961660ee024dad6b88152ee3a521819e611830edf"}, +    {file = "grpcio-1.36.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:45ea10dd133a43b10c0b4326834107ebccfee25dab59b312b78e018c2d72a1f0"}, +    {file = "grpcio-1.36.1-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:bab743cdac1d6d8326c65d1d091d0740b39966dfab06519f74a03b3d128b8454"}, +    {file = "grpcio-1.36.1-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:09af8ceb91860086216edc6e5ea15f9beb2cf81687faa43b7c03216f5b73e244"}, +    {file = "grpcio-1.36.1-cp39-cp39-win32.whl", hash = "sha256:f3f70505207ee1cee65f60a799fd8e06e07861409aa0d55d834825a79b40c297"}, +    {file = "grpcio-1.36.1-cp39-cp39-win_amd64.whl", hash = "sha256:f22c11772eff25ba1ca536e760b8c34ba56f2a9d66b6842cb11770a8f61f879d"}, +    {file = "grpcio-1.36.1.tar.gz", hash = "sha256:a66ea59b20f3669df0f0c6a3bd57b985e5b2d1dcf3e4c29819bb8dc232d0fd38"}, +]  gtn = [      {file = "gtn-0.0.0.tar.gz", hash = "sha256:72fece9ca51df161c1274e570d6f5f933e76f4cac9d8d6dd543a3fe0383f7268"},  ] @@ -2370,6 +2770,10 @@ loguru = [      {file = "loguru-0.5.3-py3-none-any.whl", hash = "sha256:f8087ac396b5ee5f67c963b495d615ebbceac2796379599820e324419d53667c"},      {file = "loguru-0.5.3.tar.gz", hash = "sha256:b28e72ac7a98be3d28ad28570299a393dfcd32e5e3f6a353dec94675767b6319"},  ] +markdown = [ +    {file = "Markdown-3.3.4-py3-none-any.whl", hash = "sha256:96c3ba1261de2f7547b46a00ea8463832c921d3f9d6aba3f255a6f71386db20c"}, +    {file = "Markdown-3.3.4.tar.gz", hash = "sha256:31b5b491868dcc87d6c24b7e3d19a0d730d59d3e46f4eea6430a321bed387a49"}, +]  markupsafe = [      {file = "MarkupSafe-1.1.1-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161"},      {file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"}, @@ -2467,6 +2871,45 @@ more-itertools = [      {file = "more-itertools-8.6.0.tar.gz", hash = "sha256:b3a9005928e5bed54076e6e549c792b306fddfe72b2d1d22dd63d42d5d3899cf"},      {file = "more_itertools-8.6.0-py3-none-any.whl", hash = "sha256:8e1a2a43b2f2727425f2b5839587ae37093f19153dc26c0927d1048ff6557330"},  ] +multidict = [ +    {file = "multidict-5.1.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:b7993704f1a4b204e71debe6095150d43b2ee6150fa4f44d6d966ec356a8d61f"}, +    {file = "multidict-5.1.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:9dd6e9b1a913d096ac95d0399bd737e00f2af1e1594a787e00f7975778c8b2bf"}, +    {file = "multidict-5.1.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:f21756997ad8ef815d8ef3d34edd98804ab5ea337feedcd62fb52d22bf531281"}, +    {file = "multidict-5.1.0-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:1ab820665e67373de5802acae069a6a05567ae234ddb129f31d290fc3d1aa56d"}, +    {file = "multidict-5.1.0-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:9436dc58c123f07b230383083855593550c4d301d2532045a17ccf6eca505f6d"}, +    {file = "multidict-5.1.0-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:830f57206cc96ed0ccf68304141fec9481a096c4d2e2831f311bde1c404401da"}, +    {file = "multidict-5.1.0-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:2e68965192c4ea61fff1b81c14ff712fc7dc15d2bd120602e4a3494ea6584224"}, +    {file = "multidict-5.1.0-cp36-cp36m-win32.whl", hash = "sha256:2f1a132f1c88724674271d636e6b7351477c27722f2ed789f719f9e3545a3d26"}, +    {file = "multidict-5.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:3a4f32116f8f72ecf2a29dabfb27b23ab7cdc0ba807e8459e59a93a9be9506f6"}, +    {file = "multidict-5.1.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:46c73e09ad374a6d876c599f2328161bcd95e280f84d2060cf57991dec5cfe76"}, +    {file = "multidict-5.1.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:018132dbd8688c7a69ad89c4a3f39ea2f9f33302ebe567a879da8f4ca73f0d0a"}, +    {file = "multidict-5.1.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:4b186eb7d6ae7c06eb4392411189469e6a820da81447f46c0072a41c748ab73f"}, +    {file = "multidict-5.1.0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:3a041b76d13706b7fff23b9fc83117c7b8fe8d5fe9e6be45eee72b9baa75f348"}, +    {file = "multidict-5.1.0-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:051012ccee979b2b06be928a6150d237aec75dd6bf2d1eeeb190baf2b05abc93"}, +    {file = "multidict-5.1.0-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:6a4d5ce640e37b0efcc8441caeea8f43a06addace2335bd11151bc02d2ee31f9"}, +    {file = "multidict-5.1.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:5cf3443199b83ed9e955f511b5b241fd3ae004e3cb81c58ec10f4fe47c7dce37"}, +    {file = "multidict-5.1.0-cp37-cp37m-win32.whl", hash = "sha256:f200755768dc19c6f4e2b672421e0ebb3dd54c38d5a4f262b872d8cfcc9e93b5"}, +    {file = "multidict-5.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:05c20b68e512166fddba59a918773ba002fdd77800cad9f55b59790030bab632"}, +    {file = "multidict-5.1.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:54fd1e83a184e19c598d5e70ba508196fd0bbdd676ce159feb412a4a6664f952"}, +    {file = "multidict-5.1.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:0e3c84e6c67eba89c2dbcee08504ba8644ab4284863452450520dad8f1e89b79"}, +    {file = "multidict-5.1.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:dc862056f76443a0db4509116c5cd480fe1b6a2d45512a653f9a855cc0517456"}, +    {file = "multidict-5.1.0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:0e929169f9c090dae0646a011c8b058e5e5fb391466016b39d21745b48817fd7"}, +    {file = "multidict-5.1.0-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:d81eddcb12d608cc08081fa88d046c78afb1bf8107e6feab5d43503fea74a635"}, +    {file = "multidict-5.1.0-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:585fd452dd7782130d112f7ddf3473ffdd521414674c33876187e101b588738a"}, +    {file = "multidict-5.1.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:37e5438e1c78931df5d3c0c78ae049092877e5e9c02dd1ff5abb9cf27a5914ea"}, +    {file = "multidict-5.1.0-cp38-cp38-win32.whl", hash = "sha256:07b42215124aedecc6083f1ce6b7e5ec5b50047afa701f3442054373a6deb656"}, +    {file = "multidict-5.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:929006d3c2d923788ba153ad0de8ed2e5ed39fdbe8e7be21e2f22ed06c6783d3"}, +    {file = "multidict-5.1.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:b797515be8743b771aa868f83563f789bbd4b236659ba52243b735d80b29ed93"}, +    {file = "multidict-5.1.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:d5c65bdf4484872c4af3150aeebe101ba560dcfb34488d9a8ff8dbcd21079647"}, +    {file = "multidict-5.1.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b47a43177a5e65b771b80db71e7be76c0ba23cc8aa73eeeb089ed5219cdbe27d"}, +    {file = "multidict-5.1.0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:806068d4f86cb06af37cd65821554f98240a19ce646d3cd24e1c33587f313eb8"}, +    {file = "multidict-5.1.0-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:46dd362c2f045095c920162e9307de5ffd0a1bfbba0a6e990b344366f55a30c1"}, +    {file = "multidict-5.1.0-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:ace010325c787c378afd7f7c1ac66b26313b3344628652eacd149bdd23c68841"}, +    {file = "multidict-5.1.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:ecc771ab628ea281517e24fd2c52e8f31c41e66652d07599ad8818abaad38cda"}, +    {file = "multidict-5.1.0-cp39-cp39-win32.whl", hash = "sha256:fc13a9524bc18b6fb6e0dbec3533ba0496bbed167c56d0aabefd965584557d80"}, +    {file = "multidict-5.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:7df80d07818b385f3129180369079bd6934cf70469f99daaebfac89dca288359"}, +    {file = "multidict-5.1.0.tar.gz", hash = "sha256:25b4e5f22d3a37ddf3effc0710ba692cfc792c2b9edfb9c05aefe823256e84d5"}, +]  mypy = [      {file = "mypy-0.770-cp35-cp35m-macosx_10_6_x86_64.whl", hash = "sha256:a34b577cdf6313bf24755f7a0e3f3c326d5c1f4fe7422d1d06498eb25ad0c600"},      {file = "mypy-0.770-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:86c857510a9b7c3104cf4cde1568f4921762c8f9842e987bc03ed4f160925754"}, @@ -2539,6 +2982,10 @@ numpy = [  nvidia-ml-py3 = [      {file = "nvidia-ml-py3-7.352.0.tar.gz", hash = "sha256:390f02919ee9d73fe63a98c73101061a6b37fa694a793abf56673320f1f51277"},  ] +oauthlib = [ +    {file = "oauthlib-3.1.0-py2.py3-none-any.whl", hash = "sha256:df884cd6cbe20e32633f1db1072e9356f53638e4361bef4e8b03c9127c9328ea"}, +    {file = "oauthlib-3.1.0.tar.gz", hash = "sha256:bee41cc35fcca6e988463cacc3bcb8a96224f470ca547e697b604cc697b2f889"}, +]  omegaconf = [      {file = "omegaconf-2.0.5-py3-none-any.whl", hash = "sha256:d1a39f93e06b33ed7033311006d41bdc7a92e6c484c09327f4dc6bdcbbfe8a8e"},      {file = "omegaconf-2.0.5.tar.gz", hash = "sha256:be2378999380395d51eedb39cfcc03d967971d9baa99d1c36f8527b09ea72709"}, @@ -2686,6 +3133,36 @@ py = [      {file = "py-1.9.0-py2.py3-none-any.whl", hash = "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2"},      {file = "py-1.9.0.tar.gz", hash = "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342"},  ] +pyasn1 = [ +    {file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"}, +    {file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"}, +    {file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"}, +    {file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"}, +    {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, +    {file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"}, +    {file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"}, +    {file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"}, +    {file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"}, +    {file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"}, +    {file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"}, +    {file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"}, +    {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, +] +pyasn1-modules = [ +    {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"}, +    {file = "pyasn1_modules-0.2.8-py2.4.egg", hash = "sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199"}, +    {file = "pyasn1_modules-0.2.8-py2.5.egg", hash = "sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405"}, +    {file = "pyasn1_modules-0.2.8-py2.6.egg", hash = "sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb"}, +    {file = "pyasn1_modules-0.2.8-py2.7.egg", hash = "sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8"}, +    {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"}, +    {file = "pyasn1_modules-0.2.8-py3.1.egg", hash = "sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d"}, +    {file = "pyasn1_modules-0.2.8-py3.2.egg", hash = "sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45"}, +    {file = "pyasn1_modules-0.2.8-py3.3.egg", hash = "sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4"}, +    {file = "pyasn1_modules-0.2.8-py3.4.egg", hash = "sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811"}, +    {file = "pyasn1_modules-0.2.8-py3.5.egg", hash = "sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed"}, +    {file = "pyasn1_modules-0.2.8-py3.6.egg", hash = "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0"}, +    {file = "pyasn1_modules-0.2.8-py3.7.egg", hash = "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd"}, +]  pycodestyle = [      {file = "pycodestyle-2.6.0-py2.py3-none-any.whl", hash = "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367"},      {file = "pycodestyle-2.6.0.tar.gz", hash = "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e"}, @@ -2732,6 +3209,10 @@ python-dateutil = [  python-levenshtein = [      {file = "python-Levenshtein-0.12.0.tar.gz", hash = "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"},  ] +pytorch-lightning = [ +    {file = "pytorch-lightning-1.2.4.tar.gz", hash = "sha256:bcf9d963ef6e0faa2d9a2149e16c212e1d471c1b4b555392b1e59632c02da9ab"}, +    {file = "pytorch_lightning-1.2.4-py3-none-any.whl", hash = "sha256:fefb0124558bc3c26b1b12a37ddb01d5c892131e21404bc9c28daba4d5f26f57"}, +]  pytorch-metric-learning = [      {file = "pytorch-metric-learning-0.9.94.tar.gz", hash = "sha256:523ab08ee10745edc6512cc32b62b4ba0c858906cfd5a2e9e5c9bfa1a6b7daa2"},      {file = "pytorch_metric_learning-0.9.94-py3-none-any.whl", hash = "sha256:3719c380c3b8d90f599c3c7e9fe7410d025b091d389ef7769044a1437096dbcc"}, @@ -2872,6 +3353,15 @@ requests = [      {file = "requests-2.25.0-py2.py3-none-any.whl", hash = "sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998"},      {file = "requests-2.25.0.tar.gz", hash = "sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8"},  ] +requests-oauthlib = [ +    {file = "requests-oauthlib-1.3.0.tar.gz", hash = "sha256:b4261601a71fd721a8bd6d7aa1cc1d6a8a93b4a9f5e96626f8e4d91e8beeaa6a"}, +    {file = "requests_oauthlib-1.3.0-py2.py3-none-any.whl", hash = "sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d"}, +    {file = "requests_oauthlib-1.3.0-py3.7.egg", hash = "sha256:fa6c47b933f01060936d87ae9327fead68768b69c6c9ea2109c48be30f2d4dbc"}, +] +rsa = [ +    {file = "rsa-4.7.2-py3-none-any.whl", hash = "sha256:78f9a9bf4e7be0c5ded4583326e7461e3a3c5aae24073648b4bdfa797d78c9d2"}, +    {file = "rsa-4.7.2.tar.gz", hash = "sha256:9d689e6ca1b3038bc82bf8d23e944b6b6037bc02301a574935b2dd946e0353b9"}, +]  safety = [      {file = "safety-1.9.0-py2.py3-none-any.whl", hash = "sha256:86c1c4a031fe35bd624fce143fbe642a0234d29f7cbf7a9aa269f244a955b087"},      {file = "safety-1.9.0.tar.gz", hash = "sha256:23bf20690d4400edc795836b0c983c2b4cbbb922233108ff925b7dd7750f00c9"}, @@ -3031,6 +3521,12 @@ subprocess32 = [      {file = "subprocess32-3.5.4-cp27-cp27mu-manylinux2014_x86_64.whl", hash = "sha256:e45d985aef903c5b7444d34350b05da91a9e0ea015415ab45a21212786c649d0"},      {file = "subprocess32-3.5.4.tar.gz", hash = "sha256:eb2937c80497978d181efa1b839ec2d9622cf9600a039a79d0e108d1f9aec79d"},  ] +tensorboard = [ +    {file = "tensorboard-2.4.1-py3-none-any.whl", hash = "sha256:7b8c53c396069b618f6f276ec94fc45d17e3282d668979216e5d30be472115e4"}, +] +tensorboard-plugin-wit = [ +    {file = "tensorboard_plugin_wit-1.8.0-py3-none-any.whl", hash = "sha256:2a80d1c551d741e99b2f197bb915d8a133e24adb8da1732b840041860f91183a"}, +]  terminado = [      {file = "terminado-0.9.1-py3-none-any.whl", hash = "sha256:c55f025beb06c2e2669f7ba5a04f47bb3304c30c05842d4981d8f0fc9ab3b4e3"},      {file = "terminado-0.9.1.tar.gz", hash = "sha256:3da72a155b807b01c9e8a5babd214e052a0a45a975751da3521a1c3381ce6d76"}, @@ -3191,6 +3687,10 @@ webencodings = [      {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},      {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},  ] +werkzeug = [ +    {file = "Werkzeug-1.0.1-py2.py3-none-any.whl", hash = "sha256:2de2a5db0baeae7b2d2664949077c2ac63fbd16d98da0ff71837f7d1dea3fd43"}, +    {file = "Werkzeug-1.0.1.tar.gz", hash = "sha256:6c80b1e5ad3665290ea39320b91e1be1e0d5f60652b964a3070216de83d2e47c"}, +]  widgetsnbextension = [      {file = "widgetsnbextension-3.5.1-py2.py3-none-any.whl", hash = "sha256:bd314f8ceb488571a5ffea6cc5b9fc6cba0adaf88a9d2386b93a489751938bcd"},      {file = "widgetsnbextension-3.5.1.tar.gz", hash = "sha256:079f87d87270bce047512400efd70238820751a11d2d8cb137a5a5bdbaf255c7"}, @@ -3203,3 +3703,42 @@ xdoctest = [      {file = "xdoctest-0.12.0-py2.py3-none-any.whl", hash = "sha256:82424d2cc4b6d6b96b7b7134c81e97a4594c536547c1954533128a6a26cf1cb2"},      {file = "xdoctest-0.12.0.tar.gz", hash = "sha256:2d985d8d78d4444079d3b072965327ab06a5e6dcb4882f3561d7596eb4da6b13"},  ] +yarl = [ +    {file = "yarl-1.6.3-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:0355a701b3998dcd832d0dc47cc5dedf3874f966ac7f870e0f3a6788d802d434"}, +    {file = "yarl-1.6.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:bafb450deef6861815ed579c7a6113a879a6ef58aed4c3a4be54400ae8871478"}, +    {file = "yarl-1.6.3-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:547f7665ad50fa8563150ed079f8e805e63dd85def6674c97efd78eed6c224a6"}, +    {file = "yarl-1.6.3-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:63f90b20ca654b3ecc7a8d62c03ffa46999595f0167d6450fa8383bab252987e"}, +    {file = "yarl-1.6.3-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:97b5bdc450d63c3ba30a127d018b866ea94e65655efaf889ebeabc20f7d12406"}, +    {file = "yarl-1.6.3-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:d8d07d102f17b68966e2de0e07bfd6e139c7c02ef06d3a0f8d2f0f055e13bb76"}, +    {file = "yarl-1.6.3-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:15263c3b0b47968c1d90daa89f21fcc889bb4b1aac5555580d74565de6836366"}, +    {file = "yarl-1.6.3-cp36-cp36m-win32.whl", hash = "sha256:b5dfc9a40c198334f4f3f55880ecf910adebdcb2a0b9a9c23c9345faa9185721"}, +    {file = "yarl-1.6.3-cp36-cp36m-win_amd64.whl", hash = "sha256:b2e9a456c121e26d13c29251f8267541bd75e6a1ccf9e859179701c36a078643"}, +    {file = "yarl-1.6.3-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:ce3beb46a72d9f2190f9e1027886bfc513702d748047b548b05dab7dfb584d2e"}, +    {file = "yarl-1.6.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2ce4c621d21326a4a5500c25031e102af589edb50c09b321049e388b3934eec3"}, +    {file = "yarl-1.6.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:d26608cf178efb8faa5ff0f2d2e77c208f471c5a3709e577a7b3fd0445703ac8"}, +    {file = "yarl-1.6.3-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:4c5bcfc3ed226bf6419f7a33982fb4b8ec2e45785a0561eb99274ebbf09fdd6a"}, +    {file = "yarl-1.6.3-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:4736eaee5626db8d9cda9eb5282028cc834e2aeb194e0d8b50217d707e98bb5c"}, +    {file = "yarl-1.6.3-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:68dc568889b1c13f1e4745c96b931cc94fdd0defe92a72c2b8ce01091b22e35f"}, +    {file = "yarl-1.6.3-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:7356644cbed76119d0b6bd32ffba704d30d747e0c217109d7979a7bc36c4d970"}, +    {file = "yarl-1.6.3-cp37-cp37m-win32.whl", hash = "sha256:00d7ad91b6583602eb9c1d085a2cf281ada267e9a197e8b7cae487dadbfa293e"}, +    {file = "yarl-1.6.3-cp37-cp37m-win_amd64.whl", hash = "sha256:69ee97c71fee1f63d04c945f56d5d726483c4762845400a6795a3b75d56b6c50"}, +    {file = "yarl-1.6.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:e46fba844f4895b36f4c398c5af062a9808d1f26b2999c58909517384d5deda2"}, +    {file = "yarl-1.6.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:31ede6e8c4329fb81c86706ba8f6bf661a924b53ba191b27aa5fcee5714d18ec"}, +    {file = "yarl-1.6.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fcbb48a93e8699eae920f8d92f7160c03567b421bc17362a9ffbbd706a816f71"}, +    {file = "yarl-1.6.3-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:72a660bdd24497e3e84f5519e57a9ee9220b6f3ac4d45056961bf22838ce20cc"}, +    {file = "yarl-1.6.3-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:324ba3d3c6fee56e2e0b0d09bf5c73824b9f08234339d2b788af65e60040c959"}, +    {file = "yarl-1.6.3-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:e6b5460dc5ad42ad2b36cca524491dfcaffbfd9c8df50508bddc354e787b8dc2"}, +    {file = "yarl-1.6.3-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:6d6283d8e0631b617edf0fd726353cb76630b83a089a40933043894e7f6721e2"}, +    {file = "yarl-1.6.3-cp38-cp38-win32.whl", hash = "sha256:9ede61b0854e267fd565e7527e2f2eb3ef8858b301319be0604177690e1a3896"}, +    {file = "yarl-1.6.3-cp38-cp38-win_amd64.whl", hash = "sha256:f0b059678fd549c66b89bed03efcabb009075bd131c248ecdf087bdb6faba24a"}, +    {file = "yarl-1.6.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:329412812ecfc94a57cd37c9d547579510a9e83c516bc069470db5f75684629e"}, +    {file = "yarl-1.6.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:c49ff66d479d38ab863c50f7bb27dee97c6627c5fe60697de15529da9c3de724"}, +    {file = "yarl-1.6.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f040bcc6725c821a4c0665f3aa96a4d0805a7aaf2caf266d256b8ed71b9f041c"}, +    {file = "yarl-1.6.3-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:d5c32c82990e4ac4d8150fd7652b972216b204de4e83a122546dce571c1bdf25"}, +    {file = "yarl-1.6.3-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:d597767fcd2c3dc49d6eea360c458b65643d1e4dbed91361cf5e36e53c1f8c96"}, +    {file = "yarl-1.6.3-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:8aa3decd5e0e852dc68335abf5478a518b41bf2ab2f330fe44916399efedfae0"}, +    {file = "yarl-1.6.3-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:73494d5b71099ae8cb8754f1df131c11d433b387efab7b51849e7e1e851f07a4"}, +    {file = "yarl-1.6.3-cp39-cp39-win32.whl", hash = "sha256:5b883e458058f8d6099e4420f0cc2567989032b5f34b271c0827de9f1079a424"}, +    {file = "yarl-1.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:4953fb0b4fdb7e08b2f3b3be80a00d28c5c8a2056bb066169de00e6501b986b6"}, +    {file = "yarl-1.6.3.tar.gz", hash = "sha256:8a9066529240171b68893d60dca86a763eae2139dd42f42106b03cf4b426bf10"}, +] diff --git a/pyproject.toml b/pyproject.toml index 2f774b2..ef75edf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ wandb = "^0.10.12"  einops = "^0.3.0"  gtn = "^0.0.0"  sentencepiece = "^0.1.95" +pytorch-lightning = "^1.2.4"  [tool.poetry.dev-dependencies]  pytest = "^5.4.2" diff --git a/text_recognizer/datasets/base_data_module.py b/text_recognizer/datasets/base_data_module.py new file mode 100644 index 0000000..09a0a43 --- /dev/null +++ b/text_recognizer/datasets/base_data_module.py @@ -0,0 +1,69 @@ +"""Base lightning DataModule class.""" +from pathlib import Path +from typing import Dict + +import pytorch_lightning as pl +from torch.utils.data import DataLoader + + +def load_and_print_info(data_module_class: type) -> None: +    """Load EMNISTLines and prints info.""" +    dataset = data_module_class() +    dataset.prepare_data() +    dataset.setup() +    print(dataset) + + +class BaseDataModule(pl.LightningDataModule): +    """Base PyTorch Lightning DataModule.""" +     +    def __init__(self, batch_size: int = 128, num_workers: int = 0) -> None: +        super().__init__() +        self.batch_size = batch_size +        self.num_workers = num_workers + +        # Placeholders for subclasses. +        self.dims = None +        self.output_dims = None +        self.mapping = None + +    @classmethod +    def data_dirname(cls) -> Path: +        """Return the path to the base data directory.""" +        return Path(__file__).resolve().parents[2] / "data" + +    def config(self) -> Dict: +        """Return important settings of the dataset.""" +        return {"input_dim": self.dims, "output_dims": self.output_dims, "mapping": self.mapping} + +    def prepare_data(self) -> None: +        """Prepare data for training.""" +        pass + +    def setup(self, stage: Any = None) -> None: +        """Split into train, val, test, and set dims. +         +        Should assign `torch Dataset` objects to self.data_train, self.data_val, and +            optionally self.data_test. + +        Args: +            stage (Any): Variable to set splits. + +        """ +        self.data_train = None +        self.data_val = None +        self.data_test = None + + +    def train_dataloader(self) -> DataLoader: +        """Retun DataLoader for train data.""" +        return DataLoader(self.data_train, shuffle=True, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True) + +    def val_dataloader(self) -> DataLoader: +        """Return DataLoader for val data.""" +        return DataLoader(self.data_val, shuffle=False, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True) + +    def test_dataloader(self) -> DataLoader: +        """Return DataLoader for val data.""" +        return DataLoader(self.data_test, shuffle=False, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True) + diff --git a/text_recognizer/datasets/download_utils.py b/text_recognizer/datasets/download_utils.py new file mode 100644 index 0000000..7a2cab8 --- /dev/null +++ b/text_recognizer/datasets/download_utils.py @@ -0,0 +1,73 @@ +"""Util functions for downloading datasets.""" +import hashlib +from pathlib import Path +from typing import Dict, List, Optional +from urllib.request import urlretrieve + +from loguru import logger +from tqdm import tqdm + + +def _compute_sha256(filename: Path) -> str: +    """Returns the SHA256 checksum of a file.""" +    with filename.open(mode="rb") as f: +        return hashlib.sha256(f.read()).hexdigest() + + +class TqdmUpTo(tqdm): +    """TQDM progress bar when downloading files. + +    From https://github.com/tqdm/tqdm/blob/master/examples/tqdm_wget.py + +    """ + +    def update_to( +        self, blocks: int = 1, block_size: int = 1, total_size: Optional[int] = None +    ) -> None: +        """Updates the progress bar. + +        Args: +            blocks (int): Number of blocks transferred so far. Defaults to 1. +            block_size (int): Size of each block, in tqdm units. Defaults to 1. +            total_size (Optional[int]): Total size in tqdm units. Defaults to None. +        """ +        if total_size is not None: +            self.total = total_size  # pylint: disable=attribute-defined-outside-init +        self.update(blocks * block_size - self.n) + + +def _download_url(url: str, filename: str) -> None: +    """Downloads a file from url to filename, with a progress bar.""" +    with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1) as t: +        urlretrieve(url, filename, reporthook=t.update_to, data=None)  # nosec + + +def download_dataset(metadata: Dict, dl_dir: Path) -> Optional[Path]: +    """Downloads dataset using a metadata file. + +    Args: +        metadata (Dict): A metadata file of the dataset. +        dl_dir (Path): Download directory for the dataset. + +    Returns: +        Optional[Path]: Returns filename if dataset is downloaded, None if it already +            exists. + +    Raises: +        ValueError: If the SHA-256 value is not the same between the dataset and +            the metadata file. + +    """ +    dl_dir.mkdir(parents=True, exist_ok=True) +    filename = dl_dir / metadata["filename"] +    if filename.exists(): +        return +    logger.info(f"Downloading raw dataset from {metadata['url']} to {filename}...") +    _download_url(metadata["url"], filename)  +    logger.info("Computing the SHA-256...") +    sha256 = _compute_sha256(filename) +    if sha256 != metadata["sha256"]: +        raise ValueError( +                "Downloaded data file SHA-256 does not match that listed in metadata document." +                ) +    return filename diff --git a/text_recognizer/datasets/emnist.py b/text_recognizer/datasets/emnist.py new file mode 100644 index 0000000..e99dbfd --- /dev/null +++ b/text_recognizer/datasets/emnist.py @@ -0,0 +1,194 @@ +"""EMNIST dataset: downloads it from FSDL aws url if not present.""" +from pathlib import Path +from typing import Sequence, Tuple +import json +import os +import shutil +import zipfile + +import h5py +import numpy as np +from loguru import logger +import toml +import torch +from torch.utils.data import random_split +from torchvision import transforms + +from text_recognizer.datasets.base_dataset import BaseDataset +from text_recognizer.datasets.base_data_module import BaseDataModule, load_print_info +from text_recognizer.datasets.download_utils import download_dataset + + +SEED = 4711 +NUM_SPECIAL_TOKENS = 4 +SAMPLE_TO_BALANCE = True  + +RAW_DATA_DIRNAME = BaseDataModule.data_dirname() / "raw" / "emnist" +METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml" +DL_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "emnist" +PROCESSED_DATA_DIRNAME = BaseDataset.data_dirname() / "processed" / "emnist" +PROCESSED_DATA_FILENAME = PROCESSED_DATA_DIRNAME / "byclass.h5" +ESSENTIALS_FILENAME = Path(__file__).parents[0].resolve() / "emnsit_essentials.json" + + +class EMNIST(BaseDataModule): +    """ +    "The EMNIST dataset is a set of handwritten character digits derived from the NIST Special Database 19 +    and converted to a 28x28 pixel image format and dataset structure that directly matches the MNIST dataset." +    From https://www.nist.gov/itl/iad/image-group/emnist-dataset + +    The data split we will use is +    EMNIST ByClass: 814,255 characters. 62 unbalanced classes. +    """ + +    def __init__(self, batch_size: int = 128, num_workers: int = 0, train_fraction: float = 0.8) -> None: +        super().__init__(batch_size, num_workers) +        if not ESSENTIALS_FILENAME.exists(): +            _download_and_process_emnist() +        with ESSENTIALS_FILENAME.open() as f: +            essentials = json.load(f) +        self.train_fraction = train_fraction +        self.mapping = list(essentials["characters"]) +        self.inverse_mapping = {v: k for k, v in enumerate(self.mapping)} +        self.data_train = None +        self.data_val = None +        self.data_test = None +        self.transform = transforms.Compose([transforms.ToTensor()]) +        self.dims = (1, *essentials["input_shape"]) +        self.output_dims = (1,) + +    def prepare_data(self) -> None: +        if not PROCESSED_DATA_FILENAME.exists(): +            _download_and_process_emnist() + +    def setup(self, stage: str = None) -> None: +        if stage == "fit" or stage is None: +            with h5py.File(PROCESSED_DATA_FILENAME, "r") as f: +                data = f["x_train"][:] +                targets = f["y_train"][:] +         +            dataset_train = BaseDataset(data, targets, transform=self.transform) +            train_size = int(self.train_fraction * len(dataset_train)) +            val_size = len(dataset_train) - train_size +            self.data_train, self.data_val = random_split(dataset_train, [train_size, val_size], generator=torch.Generator()) + +        if stage == "test" or stage is None: +            with h5py.File(PROCESSED_DATA_FILENAME, "r") as f: +                data = f["x_test"][:] +                targets = f["y_test"][:] +            self.data_test = BaseDataset(data, targets, transform=self.transform) + + +    def __repr__(self) -> str: +        basic = f"EMNIST Dataset\nNum classes: {len(self.mapping)}\nMapping: {self.mapping}\nDims: {self.dims}\n" +        if not any([self.data_train, self.data_val, self.data_test]): +            return basic + +        datum, target = next(iter(self.train_dataloader())) +        data = ( +            f"Train/val/test sizes: {len(self.data_train)}, {len(self.data_val)}, {len(self.data_test)}\n" +            f"Batch x stats: {(datum.shape, datum.dtype, datum.min(), datum.mean(), datum.std(), datum.max())}\n" +            f"Batch y stats: {(target.shape, target.dtype, target.min(), target.max())}\n" +        ) + +        return basic + data + + +def _download_and_process_emnist() -> None: +    metadata = toml.load(METADATA_FILENAME) +    download_dataset(metadata, DL_DATA_DIRNAME) +    _process_raw_dataset(metadata["filename"], DL_DATA_DIRNAME) + + +def _process_raw_dataset(filename: str, dirname: Path) -> None: +    logger.info("Unzipping EMNIST...") +    curdir = os.getcwd() +    os.chdir(dirname) +    content = zipfile.ZipFile(filename, "r") +    content.extract("matlab/emnist-byclass.mat") + +    from scipy.io import loadmat + +    logger.info("Loading training data from .mat file") +    data = loadmat("matlab/emnist-byclass.mat") +    x_train = data["dataset"]["train"][0, 0]["images"][0, 0].reshape(-1, 28, 28).swapaxes(1, 2) +    y_train = data["dataset"]["train"][0, 0]["labels"][0, 0] + NUM_SPECIAL_TOKENS +    x_test = data["dataset"]["test"][0, 0]["images"][0, 0].reshape(-1, 28, 28).swapaxes(1, 2) +    y_test = data["dataset"]["test"][0, 0]["labels"][0, 0] + NUM_SPECIAL_TOKENS + +    if SAMPLE_TO_BALANCE: +        logger.info("Balancing classes to reduce amount of data") +        x_train, y_train = _sample_to_balance(x_train, y_train) +        x_test, y_test = _sample_to_balance(x_test, y_test) + + +    logger.info("Saving to HDF5 in a compressed format...") +    PROCESSED_DATA_DIRNAME.mkdir(parents=True, exist_ok=True) +    with h5py.File(PROCESSED_DATA_FILENAME, "w") as f: +        f.create_dataset("x_train", data=x_train, dtype="u1", compression="lzf") +        f.create_dataset("y_train", data=y_train, dtype="u1", compression="lzf") +        f.create_dataset("x_test", data=x_test, dtype="u1", compression="lzf") +        f.create_dataset("y_test", data=y_test, dtype="u1", compression="lzf") + +    logger.info("Saving essential dataset parameters to text_recognizer/datasets...") +    mapping = {int(k): chr(v) for k, v in data["dataset"]["mapping"][0, 0]} +    characters = _augment_emnist_characters(mapping.values()) +    essentials = {"characters": characters, "input_shape": list(x_train.shape[1:])} + +    with ESSENTIALS_FILENAME.open(mode="w") as f: +        json.dump(essentials, f) + +    logger.info("Cleaning up...") +    shutil.rmtree("matlab") +    os.chdir(curdir) + + +def _sample_to_balance(x: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: +    """Balances the dataset by taking the mean number of instances per class.""" +    np.random.seed(SEED) +    num_to_sample = int(np.bincount(y.flatten()).mean()) +    all_sampled_indices = [] +    for label in np.unique(y.flatten()): +        indices = np.where(y == label)[0] +        sampled_indices = np.unique(np.random.choice(indices, num_to_sample)) +        all_sampled_indices.append(sampled_indices) +    indices = np.concatenate(all_sampled_indices) +    x_sampled = x[indices] +    y_sampled= y[indices] +    return x_sampled, y_sampled + + +def _augment_emnist_characters(characters: Sequence[str]) -> Sequence[str]: +    """Augment the mapping with extra symbols.""" +    # Extra characters from the IAM dataset. +    iam_characters = [ +            " ", +            "!", +            '"', +            "#", +            "&", +            "'", +            "(", +            ")", +            "*", +            "+", +            ",", +            "-", +            ".", +            "/", +            ":", +            ";", +            "?", +        ] + +    # Also add special tokens for: +    # - CTC blank token at index 0 +    # - Start token at index 1 +    # - End token at index 2 +    # - Padding token at index 3 +    # Note: Do not forget to update NUM_SPECIAL_TOKENS if changing this! +    return ["<b>", "<s>", "</s>", "<p>", *characters, *iam_characters] + + +if __name__ == "__main__": +    load_print_info(EMNIST) diff --git a/text_recognizer/datasets/emnist_dataset.py b/text_recognizer/datasets/emnist_dataset.py deleted file mode 100644 index 9884fdf..0000000 --- a/text_recognizer/datasets/emnist_dataset.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Emnist dataset: black and white images of handwritten characters (Aa-Zz) and digits (0-9).""" - -import json -from pathlib import Path -from typing import Callable, Optional, Tuple, Union - -from loguru import logger -import numpy as np -from PIL import Image -import torch -from torch import Tensor -from torchvision.datasets import EMNIST -from torchvision.transforms import Compose, ToTensor - -from text_recognizer.datasets.dataset import Dataset -from text_recognizer.datasets.transforms import Transpose -from text_recognizer.datasets.util import DATA_DIRNAME - - -class EmnistDataset(Dataset): -    """This is a class for resampling and subsampling the PyTorch EMNIST dataset.""" - -    def __init__( -        self, -        pad_token: str = None, -        train: bool = False, -        sample_to_balance: bool = False, -        subsample_fraction: float = None, -        transform: Optional[Callable] = None, -        target_transform: Optional[Callable] = None, -        seed: int = 4711, -    ) -> None: -        """Loads the dataset and the mappings. - -        Args: -            pad_token (str): The pad token symbol. Defaults to _. -            train (bool): If True, loads the training set, otherwise the validation set is loaded. Defaults to False. -            sample_to_balance (bool): Resamples the dataset to make it balanced. Defaults to False. -            subsample_fraction (float): Description of parameter `subsample_fraction`. Defaults to None. -            transform (Optional[Callable]): Transform(s) for input data. Defaults to None. -            target_transform (Optional[Callable]): Transform(s) for output data. Defaults to None. -            seed (int): Seed number. Defaults to 4711. - -        """ -        super().__init__( -            train=train, -            subsample_fraction=subsample_fraction, -            transform=transform, -            target_transform=target_transform, -            pad_token=pad_token, -        ) - -        self.sample_to_balance = sample_to_balance - -        # Have to transpose the emnist characters, ToTensor norms input between [0,1]. -        if transform is None: -            self.transform = Compose([Transpose(), ToTensor()]) - -        self.target_transform = None - -        self.seed = seed - -    def __getitem__(self, index: Union[int, Tensor]) -> Tuple[Tensor, Tensor]: -        """Fetches samples from the dataset. - -        Args: -            index (Union[int, Tensor]): The indices of the samples to fetch. - -        Returns: -            Tuple[Tensor, Tensor]: Data target tuple. - -        """ -        if torch.is_tensor(index): -            index = index.tolist() - -        data = self.data[index] -        targets = self.targets[index] - -        if self.transform: -            data = self.transform(data) - -        if self.target_transform: -            targets = self.target_transform(targets) - -        return data, targets - -    def __repr__(self) -> str: -        """Returns information about the dataset.""" -        return ( -            "EMNIST Dataset\n" -            f"Num classes: {self.num_classes}\n" -            f"Input shape: {self.input_shape}\n" -            f"Mapping: {self.mapper.mapping}\n" -        ) - -    def _sample_to_balance(self) -> None: -        """Because the dataset is not balanced, we take at most the mean number of instances per class.""" -        np.random.seed(self.seed) -        x = self._data -        y = self._targets -        num_to_sample = int(np.bincount(y.flatten()).mean()) -        all_sampled_indices = [] -        for label in np.unique(y.flatten()): -            inds = np.where(y == label)[0] -            sampled_indices = np.unique(np.random.choice(inds, num_to_sample)) -            all_sampled_indices.append(sampled_indices) -        indices = np.concatenate(all_sampled_indices) -        x_sampled = x[indices] -        y_sampled = y[indices] -        self._data = x_sampled -        self._targets = y_sampled - -    def load_or_generate_data(self) -> None: -        """Fetch the EMNIST dataset.""" -        dataset = EMNIST( -            root=DATA_DIRNAME, -            split="byclass", -            train=self.train, -            download=False, -            transform=None, -            target_transform=None, -        ) - -        self._data = dataset.data -        self._targets = dataset.targets - -        if self.sample_to_balance: -            self._sample_to_balance() - -        if self.subsample_fraction is not None: -            self._subsample() diff --git a/text_recognizer/datasets/emnist_essentials.json b/text_recognizer/datasets/emnist_essentials.json deleted file mode 100644 index 2a0648a..0000000 --- a/text_recognizer/datasets/emnist_essentials.json +++ /dev/null @@ -1 +0,0 @@ -{"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} |