summaryrefslogtreecommitdiff
path: root/rag/rag.py
diff options
context:
space:
mode:
Diffstat (limited to 'rag/rag.py')
-rw-r--r--rag/rag.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/rag/rag.py b/rag/rag.py
index 6826a80..c31e6f4 100644
--- a/rag/rag.py
+++ b/rag/rag.py
@@ -1,12 +1,15 @@
+from pathlib import Path
from typing import List
from dotenv import load_dotenv
from loguru import logger as log
from qdrant_client.models import StrictFloat
+from rag.db.document import DocumentDB
from rag.db.vector import VectorDB
from rag.llm.encoder import Encoder
from rag.llm.generator import Generator, Prompt
+from rag.parser import pdf
class RAG:
@@ -16,6 +19,17 @@ class RAG:
self.encoder = Encoder()
self.vector_db = VectorDB()
+ # FIXME: refactor this, add vector?
+ def add_pdf(self, filepath: Path):
+ chunks = pdf.parser(filepath)
+ added = self.document_db.add(chunks)
+ if added:
+ log.debug(f"Adding pdf with filepath: {filepath} to vector db")
+ points = self.encoder.encode_document(chunks)
+ self.vector_db.add(points)
+ else:
+ log.debug("Document already exists!")
+
def __context(self, query_emb: List[StrictFloat], limit: int) -> str:
hits = self.vector_db.search(query_emb, limit)
log.debug(f"Got {len(hits)} hits in the vector db with limit={limit}")