summaryrefslogtreecommitdiff
path: root/text_recognizer/data/iam.py
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-07-06 17:42:53 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-07-06 17:42:53 +0200
commiteb5b206f7e1b08435378d2a02395307be55ee6f1 (patch)
tree0cd30234afab698eb632b20a7da97e3bc7e98882 /text_recognizer/data/iam.py
parent4d1f2cef39688871d2caafce42a09316381a27ae (diff)
Refactoring data with attrs and refactor conf for hydra
Diffstat (limited to 'text_recognizer/data/iam.py')
-rw-r--r--text_recognizer/data/iam.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/text_recognizer/data/iam.py b/text_recognizer/data/iam.py
index 261c8d3..3982c4f 100644
--- a/text_recognizer/data/iam.py
+++ b/text_recognizer/data/iam.py
@@ -5,6 +5,7 @@ from typing import Any, Dict, List
import xml.etree.ElementTree as ElementTree
import zipfile
+import attr
from boltons.cacheutils import cachedproperty
from loguru import logger
import toml
@@ -22,6 +23,7 @@ DOWNSAMPLE_FACTOR = 2 # If images were downsampled, the regions must also be.
LINE_REGION_PADDING = 16 # Add this many pixels around the exact coordinates.
+@attr.s(auto_attribs=True)
class IAM(BaseDataModule):
"""
"The IAM Lines dataset, first published at the ICDAR 1999, contains forms of unconstrained handwritten text,
@@ -35,9 +37,7 @@ class IAM(BaseDataModule):
The text lines of all data sets are mutually exclusive, thus each writer has contributed to one set only.
"""
- def __init__(self, batch_size: int = 128, num_workers: int = 0) -> None:
- super().__init__(batch_size, num_workers)
- self.metadata = toml.load(METADATA_FILENAME)
+ metadata: Dict = attr.ib(init=False, default=toml.load(METADATA_FILENAME))
def prepare_data(self) -> None:
if self.xml_filenames: