From 3f447bff69c20109474c455f1ad52bd547ab49e9 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Tue, 9 Apr 2024 00:41:55 +0200 Subject: Update --- rag/retriever/retriever.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'rag/retriever') diff --git a/rag/retriever/retriever.py b/rag/retriever/retriever.py index dbfdfa2..885dafe 100644 --- a/rag/retriever/retriever.py +++ b/rag/retriever/retriever.py @@ -16,12 +16,12 @@ class Retriever: self.doc_db = DocumentDB() self.vec_db = VectorDB() - def add_pdf_from_path(self, path: Path): + def __add_pdf_from_path(self, path: Path): log.debug(f"Adding pdf from {path}") blob = self.pdf_parser.from_path(path) - self.add_pdf_from_blob(blob) + self.__add_pdf_from_blob(blob) - def add_pdf_from_blob(self, blob: BytesIO, source: Optional[str] = None): + def __add_pdf_from_blob(self, blob: BytesIO, source: Optional[str] = None): if self.doc_db.add(blob): log.debug("Adding pdf to vector database...") document = self.pdf_parser.from_data(blob) @@ -31,6 +31,19 @@ class Retriever: else: log.debug("Document already exists!") + def add_pdf( + self, + path: Optional[Path] = None, + blob: Optional[BytesIO] = None, + source: Optional[str] = None, + ): + if path: + self.__add_pdf_from_path(path) + elif blob and source: + self.__add_pdf_from_blob(blob, source) + else: + log.error("Invalid input!") + def retrieve(self, query: str, limit: int = 5) -> List[Document]: log.debug(f"Finding documents matching query: {query}") query_emb = self.encoder.encode_query(query) -- cgit v1.2.3-70-g09d2