import os from typing import Any, Generator, List import ollama from loguru import logger as log from rag.retriever.vector import Document from .abstract import AbstractGenerator from .prompt import Prompt SYSTEM_PROMPT = ( "# System Preamble" "## Basic Rules" "When you answer the user's requests, you cite your sources in your answers, according to those instructions." "Answer the following question using the provided context.\n" "## Style Guide" "Unless the user asks for a different style of answer, you should answer " "in full sentences, using proper grammar and spelling." ) class Ollama(metaclass=AbstractGenerator): def __init__(self) -> None: self.model = os.environ["GENERATOR_MODEL"] def __context(self, documents: List[Document]) -> str: results = [ f"Document: {i}\ntitle: {doc.title}\ntext: {doc.text}" for i, doc in enumerate(documents) ] return "\n".join(results) def __metaprompt(self, prompt: Prompt) -> str: # Include sources metaprompt = ( f'Question: "{prompt.query.strip()}"\n\n' "Context:\n" "\n" f"{self.__context(prompt.documents)}\n\n" "\n" "Carefully perform the following instructions, in order, starting each " "with a new line.\n" "Firstly, Decide which of the retrieved documents are relevant to the " "user's last input by writing 'Relevant Documents:' followed by " "comma-separated list of document numbers.\n If none are relevant, you " "should instead write 'None'.\n" "Secondly, Decide which of the retrieved documents contain facts that " "should be cited in a good answer to the user's last input by writing " "'Cited Documents:' followed a comma-separated list of document numbers. " "If you dont want to cite any of them, you should instead write 'None'.\n" "Thirdly, Write 'Answer:' followed by a response to the user's last input " "in high quality natural english. Use the retrieved documents to help you. " "Do not insert any citations or grounding markup.\n" "Finally, Write 'Grounded answer:' followed by a response to the user's " "last input in high quality natural english. Use the symbols and " " to indicate when a fact comes from a document in the search " "result, e.g my fact for a fact from document 0." ) return metaprompt def generate(self, prompt: Prompt) -> Generator[Any, Any, Any]: log.debug("Generating answer with ollama...") metaprompt = self.__metaprompt(prompt) for chunk in ollama.generate( model=self.model, prompt=metaprompt, system=SYSTEM_PROMPT, stream=True ): yield chunk["response"]