From 8211705debf9d1335223c606275f46c43c78d8a2 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Mon, 8 Apr 2024 00:23:52 +0200 Subject: Updates --- rag/db/document.py | 7 ++++--- rag/db/vector.py | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'rag/db') diff --git a/rag/db/document.py b/rag/db/document.py index 528a399..54ac451 100644 --- a/rag/db/document.py +++ b/rag/db/document.py @@ -1,6 +1,7 @@ import hashlib import os +from langchain_community.document_loaders.blob_loaders import Blob import psycopg from loguru import logger as log @@ -26,11 +27,11 @@ class DocumentDB: cur.execute(TABLES) self.conn.commit() - def __hash(self, blob: bytes) -> str: + def __hash(self, blob: Blob) -> str: log.debug("Hashing document...") - return hashlib.sha256(blob).hexdigest() + return hashlib.sha256(blob.as_bytes()).hexdigest() - def add(self, blob: bytes) -> bool: + def add(self, blob: Blob) -> bool: with self.conn.cursor() as cur: hash = self.__hash(blob) cur.execute( diff --git a/rag/db/vector.py b/rag/db/vector.py index 4aa62cc..bbbbf32 100644 --- a/rag/db/vector.py +++ b/rag/db/vector.py @@ -50,6 +50,9 @@ class VectorDB: def search(self, query: List[float], limit: int = 4) -> List[ScoredPoint]: log.debug("Searching for vectors...") hits = self.client.search( - collection_name=self.collection_name, query_vector=query, limit=limit + collection_name=self.collection_name, + query_vector=query, + limit=limit, + score_threshold=0.6, ) return hits -- cgit v1.2.3-70-g09d2