summaryrefslogtreecommitdiff
path: root/rag/upload.py
diff options
context:
space:
mode:
Diffstat (limited to 'rag/upload.py')
-rw-r--r--rag/upload.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/rag/upload.py b/rag/upload.py
new file mode 100644
index 0000000..3c5a100
--- /dev/null
+++ b/rag/upload.py
@@ -0,0 +1,31 @@
+from pathlib import Path
+
+import click
+from dotenv import load_dotenv
+from loguru import logger as log
+from tqdm import tqdm
+
+from rag.retriever.retriever import Retriever
+
+log.remove()
+log.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+
+
+@click.command()
+@click.option(
+ "-d",
+ "--directory",
+ help="The full path to the root directory containing pdfs to upload",
+ type=click.Path(exists=True),
+)
+def main(directory: str):
+ log.info(f"Uploading pfs found in directory {directory}...")
+ retriever = Retriever()
+ pdfs = Path(directory).glob("**/*.pdf")
+ for path in tqdm(pdfs):
+ retriever.add_pdf(path=path)
+
+
+if __name__ == "__main__":
+ load_dotenv()
+ main()