From ff060aa4b45cbfdcc1af1584302cbdcf6b251fc3 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sat, 6 Apr 2024 13:19:32 +0200 Subject: Add fixme --- rag/rag.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/rag/rag.py b/rag/rag.py index 6826a80..c31e6f4 100644 --- a/rag/rag.py +++ b/rag/rag.py @@ -1,12 +1,15 @@ +from pathlib import Path from typing import List from dotenv import load_dotenv from loguru import logger as log from qdrant_client.models import StrictFloat +from rag.db.document import DocumentDB from rag.db.vector import VectorDB from rag.llm.encoder import Encoder from rag.llm.generator import Generator, Prompt +from rag.parser import pdf class RAG: @@ -16,6 +19,17 @@ class RAG: self.encoder = Encoder() self.vector_db = VectorDB() + # FIXME: refactor this, add vector? + def add_pdf(self, filepath: Path): + chunks = pdf.parser(filepath) + added = self.document_db.add(chunks) + if added: + log.debug(f"Adding pdf with filepath: {filepath} to vector db") + points = self.encoder.encode_document(chunks) + self.vector_db.add(points) + else: + log.debug("Document already exists!") + def __context(self, query_emb: List[StrictFloat], limit: int) -> str: hits = self.vector_db.search(query_emb, limit) log.debug(f"Got {len(hits)} hits in the vector db with limit={limit}") -- cgit v1.2.3-70-g09d2