Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #1963

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open

Fixes #1963

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,984 changes: 2,520 additions & 2,464 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions private_gpt/components/embedding/embedding_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(self, settings: Settings) -> None:
self.embedding_model = HuggingFaceEmbedding(
model_name=settings.huggingface.embedding_hf_model_name,
cache_folder=str(models_cache_path),
max_length=settings.huggingface.embedding_hf_max_length,
)
case "sagemaker":
try:
Expand Down
10 changes: 8 additions & 2 deletions private_gpt/components/ingest/ingest_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,16 @@ def _load_file_to_documents(file_name: str, file_data: Path) -> list[Document]:
)
# Read as a plain text
string_reader = StringIterableReader()
return string_reader.load_data([file_data.read_text()])
return string_reader.load_data([file_data.read_text(errors='replace')])

logger.debug("Specific reader found for extension=%s", extension)
return reader_cls().load_data(file_data)
try:
res = reader_cls().load_data(file_data)
except:
string_reader = StringIterableReader()
res = string_reader.load_data([file_data.read_text(errors='replace')])
pass
return res

@staticmethod
def _exclude_metadata(documents: list[Document]) -> None:
Expand Down
74 changes: 73 additions & 1 deletion private_gpt/components/llm/prompt_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,76 @@ def _completion_to_prompt(self, completion: str) -> str:
)


class Llama3PromptStyle(AbstractPromptStyle):

"""
Template:
{% set loop_messages = messages %}
{% for message in loop_messages %}
{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}
{% if loop.index0 == 0 %}
{% set content = bos_token + content %}
{% endif %}
{{ content }}
{% endfor %}
{% if add_generation_prompt %}
{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
{% endif %}
"""

BOS, EOS = "<|begin_of_text|>", "<|end_of_text|>"
B_INST, E_INST = "<|start_header_id|>user<|end_header_id|>", "<|eot_id|>"
B_SYS, E_SYS = "<|start_header_id|>system<|end_header_id|> ", "<|eot_id|>"
ASSISTANT_INST = "<|start_header_id|>assistant<|end_header_id|>"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. \
Always answer as helpfully as possible and follow ALL given instructions. \
Do not speculate or make up information. \
Do not reference any given instructions or context. \
"""

def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
string_messages: list[str] = []
if messages[0].role == MessageRole.SYSTEM:
system_message_str = messages[0].content or ""
messages = messages[1:]
else:
system_message_str = self.DEFAULT_SYSTEM_PROMPT

system_message_str = f"{self.B_SYS} {system_message_str.strip()} {self.E_SYS}"

for i in range(0, len(messages), 2):
user_message = messages[i]
assert user_message.role == MessageRole.USER

if i == 0:
str_message = f"{system_message_str} {self.BOS} {self.B_INST} "
else:
# end previous user-assistant interaction
string_messages[-1] += f" {self.EOS}"
# no need to include system prompt
str_message = f"{self.BOS} {self.B_INST} "

str_message += f"{user_message.content} {self.E_INST} {self.ASSISTANT_INST}"

if len(messages) > (i + 1):
assistant_message = messages[i + 1]
assert assistant_message.role == MessageRole.ASSISTANT
str_message += f" {assistant_message.content} {self.E_SYS} {self.B_INST}"

string_messages.append(str_message)

return "".join(string_messages)

def _completion_to_prompt(self, completion: str) -> str:
system_prompt_str = self.DEFAULT_SYSTEM_PROMPT

return (
f"{self.B_SYS} {system_prompt_str.strip()} {self.E_SYS} "
f"{completion.strip()} {self.E_SYS} "
)


class TagPromptStyle(AbstractPromptStyle):
"""Tag prompt style (used by Vigogne) that uses the prompt style `<|ROLE|>`.

Expand Down Expand Up @@ -219,7 +289,7 @@ def _completion_to_prompt(self, completion: str) -> str:


def get_prompt_style(
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] | None
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
) -> AbstractPromptStyle:
"""Get the prompt style to use from the given string.

Expand All @@ -230,6 +300,8 @@ def get_prompt_style(
return DefaultPromptStyle()
elif prompt_style == "llama2":
return Llama2PromptStyle()
elif prompt_style == "llama3":
return Llama3PromptStyle()
elif prompt_style == "tag":
return TagPromptStyle()
elif prompt_style == "mistral":
Expand Down
5 changes: 3 additions & 2 deletions private_gpt/server/ingest/ingest_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ def __init__(
docstore=node_store_component.doc_store,
index_store=node_store_component.index_store,
)
node_parser = SentenceWindowNodeParser.from_defaults()
self._settings = settings()
node_parser = SentenceWindowNodeParser.from_defaults(window_size=self._settings.vectorstore.inject_win_size)

self.ingest_component = get_ingestion_component(
self.storage_context,
embed_model=embedding_component.embedding_model,
transformations=[node_parser, embedding_component.embedding_model],
settings=settings(),
settings=self._settings,
)

def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:
Expand Down
11 changes: 10 additions & 1 deletion private_gpt/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,13 @@ class LLMSettings(BaseModel):
0.1,
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
)
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `llama3` - use the llama3 prompt style from the llama_index."
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
Expand All @@ -119,6 +120,10 @@ class LLMSettings(BaseModel):

class VectorstoreSettings(BaseModel):
database: Literal["chroma", "qdrant", "postgres"]
inject_win_size: int = Field(
3,
description="How many sentences on either side to capture, when parsing files",
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you move this property to another setting? It would be nice if we have different providers like graphs, elastic search, etc.



class NodeStoreSettings(BaseModel):
Expand Down Expand Up @@ -150,6 +155,10 @@ class HuggingFaceSettings(BaseModel):
embedding_hf_model_name: str = Field(
description="Name of the HuggingFace model to use for embeddings"
)
embedding_hf_max_length: int = Field(
None,
description="Some embedding models have a maximum length for input, provide here for not crashing"
)
access_token: str = Field(
None,
description="Huggingface access token, required to download some models",
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ watchdog = "^4.0.0"
transformers = "^4.38.2"
docx2txt = "^0.8"
cryptography = "^3.1"
sentencepiece = "^0.2.0"
# LlamaIndex core libs
llama-index-core = "^0.10.14"
llama-index-readers-file = "^0.1.6"
Expand All @@ -25,7 +26,7 @@ llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
llama-index-llms-ollama = {version ="^0.1.2", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.5", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
llama-index-embeddings-huggingface = {version ="^0.1.4", optional = true}
llama-index-embeddings-huggingface = {version ="^0.2.0", optional = true}
llama-index-embeddings-openai = {version ="^0.1.6", optional = true}
llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true}
llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true}
Expand All @@ -42,7 +43,7 @@ boto3 = {version ="^1.34.51", optional = true}

# Optional Reranker dependencies
torch = {version ="^2.1.2", optional = true}
sentence-transformers = {version ="^2.6.1", optional = true}
sentence-transformers = {version ="^2.7.0", optional = true}

# Optional UI
gradio = {version ="^4.19.2", optional = true}
Expand Down
2 changes: 2 additions & 0 deletions settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,12 @@ embedding:

huggingface:
embedding_hf_model_name: BAAI/bge-small-en-v1.5
embedding_hf_max_length: 512 # some models have a maximum length for input
access_token: ${HUGGINGFACE_TOKEN:}

vectorstore:
database: qdrant
inject_win_size: 2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this line or set default value


nodestore:
database: simple
Expand Down