rag/generator/ollama.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

import os
from typing import Any, Generator, List

import ollama
from loguru import logger as log

from rag.retriever.vector import Document

from .abstract import AbstractGenerator
from .prompt import Prompt

SYSTEM_PROMPT = (
    "# System Preamble"
    "## Basic Rules"
    "When you answer the user's requests, you cite your sources in your answers, according to those instructions."
    "Answer the following question using the provided context.\n"
    "## Style Guide"
    "Unless the user asks for a different style of answer, you should answer "
    "in full sentences, using proper grammar and spelling."
)


class Ollama(metaclass=AbstractGenerator):
    def __init__(self) -> None:
        self.model = os.environ["GENERATOR_MODEL"]

    def __context(self, documents: List[Document]) -> str:
        results = [
            f"Document: {i}\ntitle: {doc.title}\ntext: {doc.text}"
            for i, doc in enumerate(documents)
        ]
        return "\n".join(results)

    def __metaprompt(self, prompt: Prompt) -> str:
        # Include sources
        metaprompt = (
            f'Question: "{prompt.query.strip()}"\n\n'
            "Context:\n"
            "<result>\n"
            f"{self.__context(prompt.documents)}\n\n"
            "</result>\n"
            "Carefully perform the following instructions, in order, starting each "
            "with a new line.\n"
            "Firstly, Decide which of the retrieved documents are relevant to the "
            "user's last input by writing 'Relevant Documents:' followed by "
            "comma-separated list of document numbers.\n If none are relevant, you "
            "should instead write 'None'.\n"
            "Secondly, Decide which of the retrieved documents contain facts that "
            "should be cited in a good answer to the user's last input by writing "
            "'Cited Documents:' followed a comma-separated list of document numbers. "
            "If you dont want to cite any of them, you should instead write 'None'.\n"
            "Thirdly, Write 'Answer:' followed by a response to the user's last input "
            "in high quality natural english. Use the retrieved documents to help you. "
            "Do not insert any citations or grounding markup.\n"
            "Finally, Write 'Grounded answer:' followed by a response to the user's "
            "last input in high quality natural english. Use the symbols <co: doc> and "
            "</co: doc> to indicate when a fact comes from a document in the search "
            "result, e.g <co: 0>my fact</co: 0> for a fact from document 0."
        )
        return metaprompt

    def generate(self, prompt: Prompt) -> Generator[Any, Any, Any]:
        log.debug("Generating answer with ollama...")
        metaprompt = self.__metaprompt(prompt)
        for chunk in ollama.generate(
            model=self.model, prompt=metaprompt, system=SYSTEM_PROMPT, stream=True
        ):
            yield chunk["response"]