Quansight · pmeier · Jul 11, 2024 · Jul 3, 2024 · Jul 3, 2024 · Jul 4, 2024
diff --git a/ragna/assistants/_demo.py b/ragna/assistants/_demo.py
@@ -1,8 +1,7 @@
-import re
 import textwrap
 from typing import Iterator
 
-from ragna.core import Assistant, Source
+from ragna.core import Assistant, Message
 
 
 class RagnaDemoAssistant(Assistant):
@@ -22,11 +21,8 @@ class RagnaDemoAssistant(Assistant):
     def display_name(cls) -> str:
         return "Ragna/DemoAssistant"
 
-    def answer(self, prompt: str, sources: list[Source]) -> Iterator[str]:
-        if re.search("markdown", prompt, re.IGNORECASE):
-            yield self._markdown_answer()
-        else:
-            yield self._default_answer(prompt, sources)
+    def answer(self, messages: list[Message]) -> Iterator[str]:
+        yield self._default_answer(messages)
 
     def _markdown_answer(self) -> str:
         return textwrap.dedent(
@@ -39,16 +35,19 @@ def _markdown_answer(self) -> str:
             """
         ).strip()
 
-    def _default_answer(self, prompt: str, sources: list[Source]) -> str:
+    def _default_answer(self, messages: list[Message]) -> str:
+        prompt = messages[-1].content.strip()
         sources_display = []
-        for source in sources:
-            source_display = f"- {source.document.name}"
-            if source.location:
-                source_display += f", {source.location}"
-            source_display += f": {textwrap.shorten(source.content, width=100)}"
-            sources_display.append(source_display)
-        if len(sources) > 3:
-            sources_display.append("[...]")
+        for message in messages:
+            sources = message.sources
+            for source in sources:
+                source_display = f"- {source.document.name}"
+                if source.location:
+                    source_display += f", {source.location}"
+                source_display += f": {textwrap.shorten(source.content, width=100)}"
+                sources_display.append(source_display)
+            if len(sources) > 3:
+                sources_display.append("[...]")
 
         return (
             textwrap.dedent(

diff --git a/ragna/assistants/_openai.py b/ragna/assistants/_openai.py
@@ -2,7 +2,7 @@
 from functools import cached_property
 from typing import Any, AsyncIterator, Optional, cast
 
-from ragna.core import Source
+from ragna.core import Message, MessageRole
 
 from ._http_api import HttpApiAssistant, HttpStreamingProtocol
 
@@ -14,17 +14,43 @@ class OpenaiLikeHttpApiAssistant(HttpApiAssistant):
     @abc.abstractmethod
     def _url(self) -> str: ...
 
-    def _make_system_content(self, sources: list[Source]) -> str:
+    # TODO: move to user config
+    def _make_system_content(self) -> str:
         # See https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
         instruction = (
-            "You are an helpful assistants that answers user questions given the context below. "
+            "You are a helpful assistant that answers user questions given the context below. "
             "If you don't know the answer, just say so. Don't try to make up an answer. "
-            "Only use the sources below to generate the answer."
+            "Only use the included messages below to generate the answer."
         )
-        return instruction + "\n\n".join(source.content for source in sources)
+
+        return Message(
+            content=instruction,
+            role=MessageRole.SYSTEM,
+        )
+
+    def _format_message_sources(self, messages: list[Message]) -> str:
+        sources_prompt = "Include the following sources in your answer:"
+        formatted_messages = []
+        for message in messages:
+            if message.role == MessageRole.USER:
+                formatted_messages.append(
+                    {
+                        "content": sources_prompt
+                        + "\n\n".join(source.content for source in message.sources),
+                        "role": MessageRole.SYSTEM,
+                    }
+                )
+
+            formatted_messages.append(
+                {"content": message.content, "role": message.role}
+            )
+        return formatted_messages
 
     def _stream(
-        self, prompt: str, sources: list[Source], *, max_new_tokens: int
+        self,
+        messages: list[dict],
+        *,
+        max_new_tokens: int,
     ) -> AsyncIterator[dict[str, Any]]:
         # See https://platform.openai.com/docs/api-reference/chat/create
         # and https://platform.openai.com/docs/api-reference/chat/streaming
@@ -35,16 +61,7 @@ def _stream(
             headers["Authorization"] = f"Bearer {self._api_key}"
 
         json_ = {
-            "messages": [
-                {
-                    "role": "system",
-                    "content": self._make_system_content(sources),
-                },
-                {
-                    "role": "user",
-                    "content": prompt,
-                },
-            ],
+            "messages": messages,
             "temperature": 0.0,
             "max_tokens": max_new_tokens,
             "stream": True,
@@ -55,9 +72,16 @@ def _stream(
         return self._call_api("POST", self._url, headers=headers, json=json_)
 
     async def answer(
-        self, prompt: str, sources: list[Source], *, max_new_tokens: int = 256
+        self,
+        messages: list[Message] = [],
+        *,
+        max_new_tokens: int = 256,
     ) -> AsyncIterator[str]:
-        async for data in self._stream(prompt, sources, max_new_tokens=max_new_tokens):
+        formatted_messages = self._format_message_sources(messages)
+        print("formatted_messages: ", formatted_messages)
+        async for data in self._stream(
+            formatted_messages, max_new_tokens=max_new_tokens
+        ):
             choice = data["choices"][0]
             if choice["finish_reason"] is not None:
                 break

diff --git a/ragna/core/_components.py b/ragna/core/_components.py
@@ -147,7 +147,7 @@ def retrieve(self, documents: list[Document], prompt: str) -> list[Source]:
         ...
 
 
-class MessageRole(enum.Enum):
+class MessageRole(str, enum.Enum):
     """Message role
 
     Attributes:
@@ -185,8 +185,10 @@ def __init__(
     ) -> None:
         if isinstance(content, str):
             self._content: str = content
+            print("content", content)
         else:
             self._content_stream: AsyncIterable[str] = content
+            print("content_stream", content)
 
         self.role = role
         self.sources = sources or []
@@ -237,13 +239,25 @@ class Assistant(Component, abc.ABC):
 
     __ragna_protocol_methods__ = ["answer"]
 
+    def _make_system_content(self):
+        return Message(
+            content=(
+                "You are a helpful assistant that answers user questions given the context below. "
+                "If you don't know the answer, just say so. Don't try to make up an answer. "
+                "Only use the included messages below to generate the answer."
+            ),
+            role=MessageRole.SYSTEM,
+        )
+
     @abc.abstractmethod
-    def answer(self, prompt: str, sources: list[Source]) -> Iterator[str]:
+    def answer(
+        self,
+        messages: list[Message] = [],
+    ) -> Iterator[str]:
         """Answer a prompt given some sources.
 
         Args:
-            prompt: Prompt to be answered.
-            sources: Sources to use when answering answer the prompt.
+            messages: List of messages to send to the LLM API.
 
         Returns:
             Answer.

diff --git a/ragna/core/_rag.py b/ragna/core/_rag.py
@@ -195,6 +195,9 @@ async def prepare(self) -> Message:
         await self._run(self.source_storage.store, self.documents)
         self._prepared = True
 
+        system_prompt = self.assistant._make_system_content()
+        self._messages.append(system_prompt)
+
         welcome = Message(
             content="How can I help you with the documents?",
             role=MessageRole.SYSTEM,
@@ -220,17 +223,20 @@ async def answer(self, prompt: str, *, stream: bool = False) -> Message:
                 detail=RagnaException.EVENT,
             )
 
-        self._messages.append(Message(content=prompt, role=MessageRole.USER))
-
         sources = await self._run(self.source_storage.retrieve, self.documents, prompt)
 
+        question = Message(content=prompt, role=MessageRole.USER, sources=sources)
+        self._messages.append(question)
+
         answer = Message(
-            content=self._run_gen(self.assistant.answer, prompt, sources),
+            content=self._run_gen(self.assistant.answer, self._messages),
             role=MessageRole.ASSISTANT,
             sources=sources,
         )
-        if not stream:
-            await answer.read()
+
+        await answer.read()
+        # if not stream:
+        #     await answer.read()
 
         self._messages.append(answer)
 

diff --git a/ragna/deploy/_api/core.py b/ragna/deploy/_api/core.py
@@ -109,6 +109,7 @@ async def create_token(request: Request) -> str:
     def _get_component_json_schema(
         component: Type[Component],
     ) -> dict[str, dict[str, Any]]:
+        print(component._protocol_model())
         json_schema = component._protocol_model().model_json_schema()
         # FIXME: there is likely a better way to exclude certain fields builtin in
         #  pydantic

diff --git a/tests/assistants/test_api.py b/tests/assistants/test_api.py
@@ -5,7 +5,7 @@
 from ragna import assistants
 from ragna._compat import anext
 from ragna.assistants._http_api import HttpApiAssistant
-from ragna.core import RagnaException
+from ragna.core import Message, RagnaException
 from tests.utils import skip_on_windows
 
 HTTP_API_ASSISTANTS = [
@@ -25,7 +25,8 @@
 async def test_api_call_error_smoke(mocker, assistant):
     mocker.patch.dict(os.environ, {assistant._API_KEY_ENV_VAR: "SENTINEL"})
 
-    chunks = assistant().answer(prompt="?", sources=[])
+    messages = [Message(content="?", sources=[])]
+    chunks = assistant().answer(messages)
 
     with pytest.raises(RagnaException, match="API call failed"):
         await anext(chunks)
diff --git a/tests/core/test_rag.py b/tests/core/test_rag.py
@@ -45,8 +45,7 @@ def test_params_validation_missing(self, demo_document):
         class ValidationAssistant(Assistant):
             def answer(
                 self,
-                prompt,
-                sources,
+                messages,
                 bool_param: bool,
                 int_param: int,
                 float_param: float,
@@ -65,8 +64,7 @@ def test_params_validation_wrong_type(self, demo_document):
         class ValidationAssistant(Assistant):
             def answer(
                 self,
-                prompt,
-                sources,
+                messages,
                 bool_param: bool,
                 int_param: int,
                 float_param: float,

diff --git a/tests/deploy/utils.py b/tests/deploy/utils.py
@@ -8,7 +8,7 @@
 
 
 class TestAssistant(RagnaDemoAssistant):
-    def answer(self, prompt, sources, *, multiple_answer_chunks: bool = True):
+    def answer(self, messages, *, multiple_answer_chunks: bool = True):
         # Simulate a "real" assistant through a small delay. See
         # https://github.com/Quansight/ragna/pull/401#issuecomment-2095851440
         # for why this is needed.
@@ -17,7 +17,7 @@ def answer(self, prompt, sources, *, multiple_answer_chunks: bool = True):
         # the tests in deploy/ui/test_ui.py. This can be removed if TestAssistant
         # is ever removed from that file.
         time.sleep(1e-3)
-        content = next(super().answer(prompt, sources))
+        content = next(super().answer(messages))
 
         if multiple_answer_chunks:
             for chunk in content.split(" "):