summaryrefslogtreecommitdiff
path: root/text_recognizer/data
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2022-09-27 23:18:35 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2022-09-27 23:18:35 +0200
commitca14de1fe2ab57951bc71c914c7c45b0a86fb447 (patch)
treeaffb867c923066956df4d5c8d6868eade4743000 /text_recognizer/data
parent723cf87846bf7297326fc82973a1e148af317638 (diff)
Update sentence generator
Diffstat (limited to 'text_recognizer/data')
-rw-r--r--text_recognizer/data/utils/sentence_generator.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/text_recognizer/data/utils/sentence_generator.py b/text_recognizer/data/utils/sentence_generator.py
index c98d0da..c40373d 100644
--- a/text_recognizer/data/utils/sentence_generator.py
+++ b/text_recognizer/data/utils/sentence_generator.py
@@ -5,12 +5,12 @@ import string
from typing import Optional
import nltk
-import numpy as np
from nltk.corpus.reader.util import ConcatenatedCorpusView
+import numpy as np
-from text_recognizer.data.base_data_module import BaseDataModule
+import text_recognizer.metadata.shared as metadata
-NLTK_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "nltk"
+NLTK_DATA_DIRNAME = metadata.DOWNLOADED_DATA_DIRNAME / "nltk"
class SentenceGenerator:
@@ -25,7 +25,7 @@ class SentenceGenerator:
self.max_length = max_length
def generate(self, max_length: Optional[int] = None) -> str:
- r"""Generates a word or sentences from the Brown corpus.
+ """Generates a word or sentences from the Brown corpus.
Sample a string from the Brown corpus of length at least one word and at most
max_length, padding to max_length with the '_' characters if sentence is