diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-08-05 00:52:56 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-08-05 00:52:56 +0200 |
commit | 4bf064f37ae164b452320bdc45cf03c53d66a5b1 (patch) | |
tree | f3cf877b98ce331e8b3a8062ac9fbde6f4ea36fe /rag | |
parent | 5531d8147e52324a16c977f385715f934af5f246 (diff) |
Update vector id
Diffstat (limited to 'rag')
-rw-r--r-- | rag/retriever/encoder.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/rag/retriever/encoder.py b/rag/retriever/encoder.py index 6176a37..b68c3bb 100644 --- a/rag/retriever/encoder.py +++ b/rag/retriever/encoder.py @@ -1,7 +1,7 @@ +import hashlib import os from pathlib import Path from typing import Dict, List -from uuid import uuid4 import ollama from langchain_core.documents import Document @@ -28,7 +28,9 @@ class Encoder: log.debug("Encoding document...") return [ Point( - id=uuid4().hex, + id=hashlib.sha256( + chunk.page_content.encode(encoding="utf-8") + ).hexdigest(), vector=self.__encode(chunk.page_content), payload={ "text": chunk.page_content, |