From 4bf064f37ae164b452320bdc45cf03c53d66a5b1 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Mon, 5 Aug 2024 00:52:56 +0200 Subject: Update vector id --- rag/retriever/encoder.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rag/retriever/encoder.py b/rag/retriever/encoder.py index 6176a37..b68c3bb 100644 --- a/rag/retriever/encoder.py +++ b/rag/retriever/encoder.py @@ -1,7 +1,7 @@ +import hashlib import os from pathlib import Path from typing import Dict, List -from uuid import uuid4 import ollama from langchain_core.documents import Document @@ -28,7 +28,9 @@ class Encoder: log.debug("Encoding document...") return [ Point( - id=uuid4().hex, + id=hashlib.sha256( + chunk.page_content.encode(encoding="utf-8") + ).hexdigest(), vector=self.__encode(chunk.page_content), payload={ "text": chunk.page_content, -- cgit v1.2.3-70-g09d2