diff options
Diffstat (limited to 'rag')
-rw-r--r-- | rag/upload.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/rag/upload.py b/rag/upload.py new file mode 100644 index 0000000..3c5a100 --- /dev/null +++ b/rag/upload.py @@ -0,0 +1,31 @@ +from pathlib import Path + +import click +from dotenv import load_dotenv +from loguru import logger as log +from tqdm import tqdm + +from rag.retriever.retriever import Retriever + +log.remove() +log.add(lambda msg: tqdm.write(msg, end=""), colorize=True) + + +@click.command() +@click.option( + "-d", + "--directory", + help="The full path to the root directory containing pdfs to upload", + type=click.Path(exists=True), +) +def main(directory: str): + log.info(f"Uploading pfs found in directory {directory}...") + retriever = Retriever() + pdfs = Path(directory).glob("**/*.pdf") + for path in tqdm(pdfs): + retriever.add_pdf(path=path) + + +if __name__ == "__main__": + load_dotenv() + main() |