diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-04-09 00:41:55 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-04-09 00:41:55 +0200 |
commit | 3f447bff69c20109474c455f1ad52bd547ab49e9 (patch) | |
tree | 66695ca0e3423e2973c5b24ec1ae7096019b5dd0 /rag/retriever | |
parent | c05eae81f9aaa0a764203446ec54d7dd7cbeb66f (diff) |
Update
Diffstat (limited to 'rag/retriever')
-rw-r--r-- | rag/retriever/retriever.py | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/rag/retriever/retriever.py b/rag/retriever/retriever.py index dbfdfa2..885dafe 100644 --- a/rag/retriever/retriever.py +++ b/rag/retriever/retriever.py @@ -16,12 +16,12 @@ class Retriever: self.doc_db = DocumentDB() self.vec_db = VectorDB() - def add_pdf_from_path(self, path: Path): + def __add_pdf_from_path(self, path: Path): log.debug(f"Adding pdf from {path}") blob = self.pdf_parser.from_path(path) - self.add_pdf_from_blob(blob) + self.__add_pdf_from_blob(blob) - def add_pdf_from_blob(self, blob: BytesIO, source: Optional[str] = None): + def __add_pdf_from_blob(self, blob: BytesIO, source: Optional[str] = None): if self.doc_db.add(blob): log.debug("Adding pdf to vector database...") document = self.pdf_parser.from_data(blob) @@ -31,6 +31,19 @@ class Retriever: else: log.debug("Document already exists!") + def add_pdf( + self, + path: Optional[Path] = None, + blob: Optional[BytesIO] = None, + source: Optional[str] = None, + ): + if path: + self.__add_pdf_from_path(path) + elif blob and source: + self.__add_pdf_from_blob(blob, source) + else: + log.error("Invalid input!") + def retrieve(self, query: str, limit: int = 5) -> List[Document]: log.debug(f"Finding documents matching query: {query}") query_emb = self.encoder.encode_query(query) |