summaryrefslogtreecommitdiff
path: root/rag/db
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-08 00:23:52 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-08 00:23:52 +0200
commit8211705debf9d1335223c606275f46c43c78d8a2 (patch)
treef09f902c7d31b2035813c42cbb4a47e720fa363b /rag/db
parent95f47c4900a96d91daaef93bf87094ed3d4da43c (diff)
Updates
Diffstat (limited to 'rag/db')
-rw-r--r--rag/db/document.py7
-rw-r--r--rag/db/vector.py5
2 files changed, 8 insertions, 4 deletions
diff --git a/rag/db/document.py b/rag/db/document.py
index 528a399..54ac451 100644
--- a/rag/db/document.py
+++ b/rag/db/document.py
@@ -1,6 +1,7 @@
import hashlib
import os
+from langchain_community.document_loaders.blob_loaders import Blob
import psycopg
from loguru import logger as log
@@ -26,11 +27,11 @@ class DocumentDB:
cur.execute(TABLES)
self.conn.commit()
- def __hash(self, blob: bytes) -> str:
+ def __hash(self, blob: Blob) -> str:
log.debug("Hashing document...")
- return hashlib.sha256(blob).hexdigest()
+ return hashlib.sha256(blob.as_bytes()).hexdigest()
- def add(self, blob: bytes) -> bool:
+ def add(self, blob: Blob) -> bool:
with self.conn.cursor() as cur:
hash = self.__hash(blob)
cur.execute(
diff --git a/rag/db/vector.py b/rag/db/vector.py
index 4aa62cc..bbbbf32 100644
--- a/rag/db/vector.py
+++ b/rag/db/vector.py
@@ -50,6 +50,9 @@ class VectorDB:
def search(self, query: List[float], limit: int = 4) -> List[ScoredPoint]:
log.debug("Searching for vectors...")
hits = self.client.search(
- collection_name=self.collection_name, query_vector=query, limit=limit
+ collection_name=self.collection_name,
+ query_vector=query,
+ limit=limit,
+ score_threshold=0.6,
)
return hits