summaryrefslogtreecommitdiff
path: root/rag/llm/encoder.py
diff options
context:
space:
mode:
Diffstat (limited to 'rag/llm/encoder.py')
-rw-r--r--rag/llm/encoder.py47
1 files changed, 0 insertions, 47 deletions
diff --git a/rag/llm/encoder.py b/rag/llm/encoder.py
deleted file mode 100644
index a59b1b4..0000000
--- a/rag/llm/encoder.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import os
-from pathlib import Path
-from typing import List, Dict
-from uuid import uuid4
-
-import ollama
-from langchain_core.documents import Document
-from loguru import logger as log
-from qdrant_client.http.models import StrictFloat
-
-
-try:
- from rag.db.vector import Point
-except ModuleNotFoundError:
- from db.vector import Point
-
-
-class Encoder:
- def __init__(self) -> None:
- self.model = os.environ["ENCODER_MODEL"]
- self.query_prompt = "Represent this sentence for searching relevant passages: "
-
- def __encode(self, prompt: str) -> List[StrictFloat]:
- return list(ollama.embeddings(model=self.model, prompt=prompt)["embedding"])
-
- def __get_source(self, metadata: Dict[str, str]) -> str:
- source = metadata["source"]
- return Path(source).name
-
- def encode_document(self, chunks: List[Document]) -> List[Point]:
- log.debug("Encoding document...")
- return [
- Point(
- id=uuid4().hex,
- vector=self.__encode(chunk.page_content),
- payload={
- "text": chunk.page_content,
- "source": self.__get_source(chunk.metadata),
- },
- )
- for chunk in chunks
- ]
-
- def encode_query(self, query: str) -> List[StrictFloat]:
- log.debug(f"Encoding query: {query}")
- query = self.query_prompt + query
- return self.__encode(query)