zylon-ai · hirschrobert · Jun 6, 2024 · Jun 6, 2024 · Jun 6, 2024 · Jun 6, 2024
diff --git a/poetry.lock b/poetry.lock
diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py
@@ -31,6 +31,7 @@ def __init__(self, settings: Settings) -> None:
                 self.embedding_model = HuggingFaceEmbedding(
                     model_name=settings.huggingface.embedding_hf_model_name,
                     cache_folder=str(models_cache_path),
+                    max_length=settings.huggingface.embedding_hf_max_length,
                 )
             case "sagemaker":
                 try:

diff --git a/private_gpt/components/ingest/ingest_helper.py b/private_gpt/components/ingest/ingest_helper.py
@@ -89,10 +89,16 @@ def _load_file_to_documents(file_name: str, file_data: Path) -> list[Document]:
             )
             # Read as a plain text
             string_reader = StringIterableReader()
-            return string_reader.load_data([file_data.read_text()])
+            return string_reader.load_data([file_data.read_text(errors='replace')])
 
         logger.debug("Specific reader found for extension=%s", extension)
-        return reader_cls().load_data(file_data)
+        try:
+            res = reader_cls().load_data(file_data)
+        except:
+            string_reader = StringIterableReader()
+            res = string_reader.load_data([file_data.read_text(errors='replace')])
+            pass
+        return res
 
     @staticmethod
     def _exclude_metadata(documents: list[Document]) -> None:

diff --git a/private_gpt/components/llm/prompt_helper.py b/private_gpt/components/llm/prompt_helper.py
@@ -138,6 +138,76 @@ def _completion_to_prompt(self, completion: str) -> str:
         )
 
 
+class Llama3PromptStyle(AbstractPromptStyle):
+
+    """
+    Template:
+    {% set loop_messages = messages %}
+	{% for message in loop_messages %}
+		{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}
+		{% if loop.index0 == 0 %}
+			{% set content = bos_token + content %}
+		{% endif %}
+		{{ content }}
+	{% endfor %}
+	{% if add_generation_prompt %}
+		{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+	{% endif %}
+    """
+
+    BOS, EOS = "<|begin_of_text|>", "<|end_of_text|>"
+    B_INST, E_INST = "<|start_header_id|>user<|end_header_id|>", "<|eot_id|>"
+    B_SYS, E_SYS = "<|start_header_id|>system<|end_header_id|> ", "<|eot_id|>"
+    ASSISTANT_INST = "<|start_header_id|>assistant<|end_header_id|>"
+    DEFAULT_SYSTEM_PROMPT = """\
+    You are a helpful, respectful and honest assistant. \
+    Always answer as helpfully as possible and follow ALL given instructions. \
+    Do not speculate or make up information. \
+    Do not reference any given instructions or context. \
+    """
+
+    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
+        string_messages: list[str] = []
+        if messages[0].role == MessageRole.SYSTEM:
+            system_message_str = messages[0].content or ""
+            messages = messages[1:]
+        else:
+            system_message_str = self.DEFAULT_SYSTEM_PROMPT
+
+        system_message_str = f"{self.B_SYS} {system_message_str.strip()} {self.E_SYS}"
+
+        for i in range(0, len(messages), 2):
+            user_message = messages[i]
+            assert user_message.role == MessageRole.USER
+
+            if i == 0:
+                str_message = f"{system_message_str} {self.BOS} {self.B_INST} "
+            else:
+                 # end previous user-assistant interaction
+                string_messages[-1] += f" {self.EOS}"
+                # no need to include system prompt
+                str_message = f"{self.BOS} {self.B_INST} "
+
+            str_message += f"{user_message.content} {self.E_INST} {self.ASSISTANT_INST}"
+
+            if len(messages) > (i + 1):
+                assistant_message = messages[i + 1]
+                assert assistant_message.role == MessageRole.ASSISTANT
+                str_message += f" {assistant_message.content} {self.E_SYS} {self.B_INST}"
+
+            string_messages.append(str_message)
+
+        return "".join(string_messages)
+
+    def _completion_to_prompt(self, completion: str) -> str:
+        system_prompt_str = self.DEFAULT_SYSTEM_PROMPT
+
+        return (
+            f"{self.B_SYS} {system_prompt_str.strip()} {self.E_SYS} "
+            f"{completion.strip()} {self.E_SYS} "
+        )
+
+
 class TagPromptStyle(AbstractPromptStyle):
     """Tag prompt style (used by Vigogne) that uses the prompt style `<|ROLE|>`.
 
@@ -219,7 +289,7 @@ def _completion_to_prompt(self, completion: str) -> str:
 
 
 def get_prompt_style(
-    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] | None
+    prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
 ) -> AbstractPromptStyle:
     """Get the prompt style to use from the given string.
 
@@ -230,6 +300,8 @@ def get_prompt_style(
         return DefaultPromptStyle()
     elif prompt_style == "llama2":
         return Llama2PromptStyle()
+    elif prompt_style == "llama3":
+        return Llama3PromptStyle()
     elif prompt_style == "tag":
         return TagPromptStyle()
     elif prompt_style == "mistral":

diff --git a/private_gpt/server/ingest/ingest_service.py b/private_gpt/server/ingest/ingest_service.py
@@ -39,13 +39,14 @@ def __init__(
             docstore=node_store_component.doc_store,
             index_store=node_store_component.index_store,
         )
-        node_parser = SentenceWindowNodeParser.from_defaults()
+        self._settings = settings()
+        node_parser = SentenceWindowNodeParser.from_defaults(window_size=self._settings.vectorstore.inject_win_size)
 
         self.ingest_component = get_ingestion_component(
             self.storage_context,
             embed_model=embedding_component.embedding_model,
             transformations=[node_parser, embedding_component.embedding_model],
-            settings=settings(),
+            settings=self._settings,
         )
 
     def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:

diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -104,12 +104,13 @@ class LLMSettings(BaseModel):
         0.1,
         description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
     )
-    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
+    prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = Field(
         "llama2",
         description=(
             "The prompt style to use for the chat engine. "
             "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
             "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
+            "If `llama3` - use the llama3 prompt style from the llama_index."
             "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
             "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
             "`llama2` is the historic behaviour. `default` might work better with your custom models."
@@ -119,6 +120,10 @@ class LLMSettings(BaseModel):
 
 class VectorstoreSettings(BaseModel):
     database: Literal["chroma", "qdrant", "postgres"]
+    inject_win_size: int = Field(
+        3,
+        description="How many sentences on either side to capture, when parsing files",
+    )
 
 
 class NodeStoreSettings(BaseModel):
@@ -150,6 +155,10 @@ class HuggingFaceSettings(BaseModel):
     embedding_hf_model_name: str = Field(
         description="Name of the HuggingFace model to use for embeddings"
     )
+    embedding_hf_max_length: int = Field(
+        None,
+        description="Some embedding models have a maximum length for input, provide here for not crashing"
+    )
     access_token: str = Field(
         None,
         description="Huggingface access token, required to download some models",

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,7 @@ watchdog = "^4.0.0"
 transformers = "^4.38.2"
 docx2txt = "^0.8"
 cryptography = "^3.1"
+sentencepiece = "^0.2.0"
 # LlamaIndex core libs
 llama-index-core = "^0.10.14"
 llama-index-readers-file = "^0.1.6"
@@ -25,7 +26,7 @@ llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
 llama-index-llms-ollama = {version ="^0.1.2", optional = true}
 llama-index-llms-azure-openai = {version ="^0.1.5", optional = true}
 llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
-llama-index-embeddings-huggingface = {version ="^0.1.4", optional = true}
+llama-index-embeddings-huggingface = {version ="^0.2.0", optional = true}
 llama-index-embeddings-openai = {version ="^0.1.6", optional = true}
 llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true}
 llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true}
@@ -42,7 +43,7 @@ boto3 = {version ="^1.34.51", optional = true}
 
 # Optional Reranker dependencies
 torch = {version ="^2.1.2", optional = true}
-sentence-transformers = {version ="^2.6.1", optional = true}
+sentence-transformers = {version ="^2.7.0", optional = true}
 
 # Optional UI
 gradio = {version ="^4.19.2", optional = true}

diff --git a/settings.yaml b/settings.yaml
@@ -69,10 +69,12 @@ embedding:
 
 huggingface:
   embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_max_length: 512 # some models have a maximum length for input
   access_token: ${HUGGINGFACE_TOKEN:}
 
 vectorstore:
   database: qdrant
+  inject_win_size: 2
 
 nodestore:
   database: simple