summaryrefslogtreecommitdiff
path: root/rag/db/documents.py
diff options
context:
space:
mode:
Diffstat (limited to 'rag/db/documents.py')
-rw-r--r--rag/db/documents.py59
1 files changed, 0 insertions, 59 deletions
diff --git a/rag/db/documents.py b/rag/db/documents.py
deleted file mode 100644
index 6f83b1f..0000000
--- a/rag/db/documents.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import hashlib
-import os
-from typing import List
-
-import psycopg
-from langchain_core.documents.base import Document
-from loguru import logger as log
-
-TABLES = """
-CREATE TABLE IF NOT EXISTS document (
- hash text PRIMARY KEY)
-"""
-
-
-class Documents:
- def __init__(self) -> None:
- self.conn = psycopg.connect(
- f"dbname={os.environ['RAG_DB_NAME']} user={os.environ['RAG_DB_USER']}"
- )
- self.__configure()
-
- def close(self):
- self.conn.close()
-
- def __configure(self):
- log.debug("Creating documents table if it does not exist...")
- with self.conn.cursor() as cur:
- cur.execute(TABLES)
- self.conn.commit()
-
- def __hash(self, chunks: List[Document]) -> str:
- log.debug("Generating sha256 hash for pdf document")
- document = str.encode("".join([chunk.page_content for chunk in chunks]))
- return hashlib.sha256(document).hexdigest()
-
- def add_document(self, chunks: List[Document]) -> bool:
- log.debug("Inserting document hash into documents db...")
- with self.conn.cursor() as cur:
- hash = self.__hash(chunks)
- cur.execute(
- """
- SELECT * FROM document
- WHERE
- hash = %s
- """,
- (hash,),
- )
- exist = cur.fetchone()
- if exist is None:
- cur.execute(
- """
- INSERT INTO document
- (hash) VALUES
- (%s)
- """,
- (hash,),
- )
- self.conn.commit()
- return exist is not None