summaryrefslogtreecommitdiff
path: root/rag
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-13 13:21:44 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-13 13:21:44 +0200
commita96b270805eba2b6d8c40d1fa2ee0d35c73cba0e (patch)
treefd2a663eda2b4ccb021adcd7fb5b4e6d89180f95 /rag
parent4968ed48ed1adb267b910b28fdda0db115ba1b19 (diff)
Add upload script
Diffstat (limited to 'rag')
-rw-r--r--rag/upload.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/rag/upload.py b/rag/upload.py
new file mode 100644
index 0000000..3c5a100
--- /dev/null
+++ b/rag/upload.py
@@ -0,0 +1,31 @@
+from pathlib import Path
+
+import click
+from dotenv import load_dotenv
+from loguru import logger as log
+from tqdm import tqdm
+
+from rag.retriever.retriever import Retriever
+
+log.remove()
+log.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+
+
+@click.command()
+@click.option(
+ "-d",
+ "--directory",
+ help="The full path to the root directory containing pdfs to upload",
+ type=click.Path(exists=True),
+)
+def main(directory: str):
+ log.info(f"Uploading pfs found in directory {directory}...")
+ retriever = Retriever()
+ pdfs = Path(directory).glob("**/*.pdf")
+ for path in tqdm(pdfs):
+ retriever.add_pdf(path=path)
+
+
+if __name__ == "__main__":
+ load_dotenv()
+ main()