From ca14de1fe2ab57951bc71c914c7c45b0a86fb447 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Tue, 27 Sep 2022 23:18:35 +0200 Subject: Update sentence generator --- text_recognizer/data/utils/sentence_generator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'text_recognizer/data/utils') diff --git a/text_recognizer/data/utils/sentence_generator.py b/text_recognizer/data/utils/sentence_generator.py index c98d0da..c40373d 100644 --- a/text_recognizer/data/utils/sentence_generator.py +++ b/text_recognizer/data/utils/sentence_generator.py @@ -5,12 +5,12 @@ import string from typing import Optional import nltk -import numpy as np from nltk.corpus.reader.util import ConcatenatedCorpusView +import numpy as np -from text_recognizer.data.base_data_module import BaseDataModule +import text_recognizer.metadata.shared as metadata -NLTK_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "nltk" +NLTK_DATA_DIRNAME = metadata.DOWNLOADED_DATA_DIRNAME / "nltk" class SentenceGenerator: @@ -25,7 +25,7 @@ class SentenceGenerator: self.max_length = max_length def generate(self, max_length: Optional[int] = None) -> str: - r"""Generates a word or sentences from the Brown corpus. + """Generates a word or sentences from the Brown corpus. Sample a string from the Brown corpus of length at least one word and at most max_length, padding to max_length with the '_' characters if sentence is -- cgit v1.2.3-70-g09d2