summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2024-08-05 00:52:56 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2024-08-05 00:52:56 +0200
commit4bf064f37ae164b452320bdc45cf03c53d66a5b1 (patch)
treef3cf877b98ce331e8b3a8062ac9fbde6f4ea36fe
parent5531d8147e52324a16c977f385715f934af5f246 (diff)
Update vector id
-rw-r--r--rag/retriever/encoder.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/rag/retriever/encoder.py b/rag/retriever/encoder.py
index 6176a37..b68c3bb 100644
--- a/rag/retriever/encoder.py
+++ b/rag/retriever/encoder.py
@@ -1,7 +1,7 @@
+import hashlib
import os
from pathlib import Path
from typing import Dict, List
-from uuid import uuid4
import ollama
from langchain_core.documents import Document
@@ -28,7 +28,9 @@ class Encoder:
log.debug("Encoding document...")
return [
Point(
- id=uuid4().hex,
+ id=hashlib.sha256(
+ chunk.page_content.encode(encoding="utf-8")
+ ).hexdigest(),
vector=self.__encode(chunk.page_content),
payload={
"text": chunk.page_content,