summaryrefslogtreecommitdiff
path: root/rag
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-06 13:19:32 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-06 13:19:32 +0200
commitff060aa4b45cbfdcc1af1584302cbdcf6b251fc3 (patch)
treee3795d48e8d568618ba665ab1717fad02ea5df06 /rag
parent052bf63a2c18b1b55013dcf6974228609cc4d76f (diff)
Add fixme
Diffstat (limited to 'rag')
-rw-r--r--rag/rag.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/rag/rag.py b/rag/rag.py
index 6826a80..c31e6f4 100644
--- a/rag/rag.py
+++ b/rag/rag.py
@@ -1,12 +1,15 @@
+from pathlib import Path
from typing import List
from dotenv import load_dotenv
from loguru import logger as log
from qdrant_client.models import StrictFloat
+from rag.db.document import DocumentDB
from rag.db.vector import VectorDB
from rag.llm.encoder import Encoder
from rag.llm.generator import Generator, Prompt
+from rag.parser import pdf
class RAG:
@@ -16,6 +19,17 @@ class RAG:
self.encoder = Encoder()
self.vector_db = VectorDB()
+ # FIXME: refactor this, add vector?
+ def add_pdf(self, filepath: Path):
+ chunks = pdf.parser(filepath)
+ added = self.document_db.add(chunks)
+ if added:
+ log.debug(f"Adding pdf with filepath: {filepath} to vector db")
+ points = self.encoder.encode_document(chunks)
+ self.vector_db.add(points)
+ else:
+ log.debug("Document already exists!")
+
def __context(self, query_emb: List[StrictFloat], limit: int) -> str:
hits = self.vector_db.search(query_emb, limit)
log.debug(f"Got {len(hits)} hits in the vector db with limit={limit}")