diff options
Diffstat (limited to 'rag/db/documents.py')
-rw-r--r-- | rag/db/documents.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/rag/db/documents.py b/rag/db/documents.py index 7d088da..6f83b1f 100644 --- a/rag/db/documents.py +++ b/rag/db/documents.py @@ -4,6 +4,7 @@ from typing import List import psycopg from langchain_core.documents.base import Document +from loguru import logger as log TABLES = """ CREATE TABLE IF NOT EXISTS document ( @@ -16,21 +17,24 @@ class Documents: self.conn = psycopg.connect( f"dbname={os.environ['RAG_DB_NAME']} user={os.environ['RAG_DB_USER']}" ) - self.__create_content_table() + self.__configure() def close(self): self.conn.close() - def __create_content_table(self): + def __configure(self): + log.debug("Creating documents table if it does not exist...") with self.conn.cursor() as cur: cur.execute(TABLES) self.conn.commit() def __hash(self, chunks: List[Document]) -> str: + log.debug("Generating sha256 hash for pdf document") document = str.encode("".join([chunk.page_content for chunk in chunks])) return hashlib.sha256(document).hexdigest() def add_document(self, chunks: List[Document]) -> bool: + log.debug("Inserting document hash into documents db...") with self.conn.cursor() as cur: hash = self.__hash(chunks) cur.execute( |