From a96b270805eba2b6d8c40d1fa2ee0d35c73cba0e Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sat, 13 Apr 2024 13:21:44 +0200 Subject: Add upload script --- rag/upload.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 rag/upload.py (limited to 'rag') diff --git a/rag/upload.py b/rag/upload.py new file mode 100644 index 0000000..3c5a100 --- /dev/null +++ b/rag/upload.py @@ -0,0 +1,31 @@ +from pathlib import Path + +import click +from dotenv import load_dotenv +from loguru import logger as log +from tqdm import tqdm + +from rag.retriever.retriever import Retriever + +log.remove() +log.add(lambda msg: tqdm.write(msg, end=""), colorize=True) + + +@click.command() +@click.option( + "-d", + "--directory", + help="The full path to the root directory containing pdfs to upload", + type=click.Path(exists=True), +) +def main(directory: str): + log.info(f"Uploading pfs found in directory {directory}...") + retriever = Retriever() + pdfs = Path(directory).glob("**/*.pdf") + for path in tqdm(pdfs): + retriever.add_pdf(path=path) + + +if __name__ == "__main__": + load_dotenv() + main() -- cgit v1.2.3-70-g09d2