Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/summary #17

Merged
merged 20 commits into from
Mar 1, 2024
Merged
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,23 @@ HUGGINGFACEHUB_API_TOKEN="MY_HF_API_KEY"

You can also pass `huggingfacehub_api_token` as a named parameter.

#### AWS Bedrock
Create your access keys in security credentials of your user in AWS.

Then write in the files ```~/.aws/config``` and ````~/.aws/credentials```` for Linux and MacOS or ````%USERPROFILE%\.aws\config```` and ````%USERPROFILE%\.aws\credentials```` for Windows:

In credentials:
```shell
[default]
aws_access_key_id = <YOUR_CREATED_AWS_KEY>
aws_secret_access_key = <YOUR_CREATED_AWS_SECRET_KEY>
```

In config:
```shell
[default]
region = <AWS_REGION>
```

### ⚙️ Install locally

Expand Down Expand Up @@ -238,6 +254,7 @@ The current available tasks in Promptmeteo are:
| `CodeGenerator` | Code generation |
| `ApiGenerator` | API REST generation |
| `ApiFormatter` | API REST correction |
| `Summarizer` | Text summarization |

### ✅ Available Model

Expand All @@ -259,3 +276,5 @@ The current available `model_name` and `language` values are:
| google | text-bison@001 | en |
| google | text-bison-32k | es |
| google | text-bison-32k | en |
| bedrock | anthropic.claude-v2 | en |
| bedrock | anthropic.claude-v2 | es |
1,718 changes: 1,718 additions & 0 deletions examples/05_test_openai_classification_prompts.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions promptmeteo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .document_classifier import DocumentClassifier
from .api_generator import APIGenerator
from .api_formatter import APIFormatter
from .summarizer import Summarizer
2 changes: 1 addition & 1 deletion promptmeteo/api_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,4 +382,4 @@ def replace_values(orig_dict, replace_dict):
sort_keys=False,
)

return api
return api
6 changes: 6 additions & 0 deletions promptmeteo/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .hf_hub_api import HFHubApiLLM
from .hf_pipeline import HFPipelineLLM
from .google_vertexai import GoogleVertexAILLM
from .bedrock import BedrockLLM


class ModelProvider(str, Enum):
Expand All @@ -42,6 +43,7 @@ class ModelProvider(str, Enum):
PROVIDER_2: str = "hf_hub_api"
PROVIDER_3: str = "hf_pipeline"
PROVIDER_4: str = "google-vertexai"
PROVIDER_5: str = "bedrock"


class ModelFactory:
Expand All @@ -57,6 +59,7 @@ class ModelFactory:
ModelProvider.PROVIDER_2: HFHubApiLLM,
ModelProvider.PROVIDER_3: HFPipelineLLM,
ModelProvider.PROVIDER_3: GoogleVertexAILLM,
ModelProvider.PROVIDER_5: BedrockLLM
}

@classmethod
Expand Down Expand Up @@ -87,6 +90,9 @@ def factory_method(

elif model_provider_name == ModelProvider.PROVIDER_4.value:
model_cls = GoogleVertexAILLM

elif model_provider_name == ModelProvider.PROVIDER_5.value:
model_cls = BedrockLLM

else:
raise ValueError(
Expand Down
127 changes: 127 additions & 0 deletions promptmeteo/models/bedrock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/python3

# Copyright (c) 2023 Paradigma Digital S.L.

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

from enum import Enum
from typing import Dict
from typing import Optional
import os
import boto3
from langchain.llms.bedrock import Bedrock
from langchain.embeddings import HuggingFaceEmbeddings

from .base import BaseModel


class ModelTypes(str, Enum):

"""
Enum of available model types.
"""

AnthropicClaudeV2: str = "anthropic.claude-v2"

@classmethod
def has_value(
cls,
value: str,
) -> bool:
"""
Checks if the value is in the enum or not.
"""

return value in cls._value2member_map_


class ModelEnum(Enum):

"""
Model Parameters.
"""

class AnthropicClaudeV2:

"""
Default parameters for Anthropic Claude V2
"""

client = Bedrock
embedding = HuggingFaceEmbeddings
model_task: str = "text2text-generation"
params: dict = {
'max_tokens_to_sample': 2048,
'temperature': 0.3,
'top_k': 250,
'top_p': 0.999,
'stop_sequences': ['Human:']
}


class BedrockLLM(BaseModel):

"""
Bedrock LLM model.
"""

def __init__(
self,
model_name: Optional[str] = "",
model_params: Optional[Dict] = None,
model_provider_token: Optional[str] = "",
**kwargs
) -> None:
"""
Make predictions using a model from OpenAI.
It will use the os environment called OPENAI_ORGANIZATION for instance the LLM
"""

if not ModelTypes.has_value(model_name):
raise ValueError(
f"`model_name`={model_name} not in supported model names: "
f"{[i.name for i in ModelTypes]}"
)
self.boto3_bedrock = boto3.client('bedrock-runtime', **kwargs)
super(BedrockLLM, self).__init__()

# Model name
model = ModelTypes(model_name).name

# Model parameters
if not model_params:
model_params = (
ModelEnum[model].value.params
if not model_params
else model_params
)
self.model_params = model_params

# Model
self._llm = ModelEnum[model].value.client(
model_id=model_name,
model_kwargs=self.model_params,
client = self.boto3_bedrock
)

embedding_name = "sentence-transformers/all-MiniLM-L6-v2"
if os.path.exists("/home/models/all-MiniLM-L6-v2"):
embedding_name = "/home/models/all-MiniLM-L6-v2"

self._embeddings = HuggingFaceEmbeddings(model_name=embedding_name)
9 changes: 9 additions & 0 deletions promptmeteo/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .base import BaseParser
from .dummy_parser import DummyParser
from .classification_parser import ClassificationParser
from .json_parser import JSONParser


class ParserTypes(str, Enum):
Expand All @@ -41,6 +42,8 @@ class ParserTypes(str, Enum):
PARSER_4: str = "code-generation"
PARSER_5: str = "api-generation"
PARSER_6: str = "api-correction"
PARSER_7: str = "json-info-extraction"
PARSER_8: str = "summarization"


class ParserFactory:
Expand Down Expand Up @@ -77,6 +80,12 @@ def factory_method(

elif task_type == ParserTypes.PARSER_6.value:
parser_cls = ApiParser

elif task_type == ParserTypes.PARSER_7.value:
parser_cls = JSONParser

elif task_type == ParserTypes.PARSER_8.value:
parser_cls = DummyParser

else:
raise ValueError(
Expand Down
67 changes: 67 additions & 0 deletions promptmeteo/parsers/json_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/python3

# Copyright (c) 2023 Paradigma Digital S.L.

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

from typing import List
import re
from .base import BaseParser
import regex
import json


class JSONParser(BaseParser):

"""
Parser for potential JSON outputs
"""

def run(
self,
text: str,
) -> List[str]:
"""
Given a response string from an LLM, returns the response expected for
the task.
"""

try:
json_output = self._preprocess(text)
json_obtanaied = json.loads(json_output)
return json_output
except:
return ""


def _preprocess(
self,
text: str,
) -> str:
"""
Preprocess output string before parsing result to solve common mistakes
such as end-of-line presence and beginning and finishing with empty
space.
"""
pattern = regex.compile(r'\{(?:[^{}]|(?R))*\}')
str_json = pattern.findall(text)[0]

str_json = str_json.replace("'",'"')

return str_json
61 changes: 61 additions & 0 deletions promptmeteo/prompts/anthropic.claude-v2_en_classification.prompt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2023 Paradigma Digital S.L.

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.


TEMPLATE:
"I need you to help me with a text classification task.
{__PROMPT_DOMAIN__}
{__PROMPT_LABELS__}

{__CHAIN_THOUGHT__}
{__ANSWER_FORMAT__}
{__SHOT_EXAMPLES__}
{__PROMPT_SAMPLE__}"


PROMPT_DOMAIN:
"The texts you will be processing are from the {__DOMAIN__} domain."


PROMPT_LABELS:
"I want you to classify the texts into one of the following categories:
{__LABELS__}."


PROMPT_DETAIL:
""

SHOT_EXAMPLES:
"Examples:\n\n{__EXAMPLES__}"

PROMPT_SAMPLE:
"\n\n{__SAMPLE__}\n"

CHAIN_THOUGHT:
"Please provide a step-by-step argument for your answer, explain why you
believe your final choice is justified, and make sure to conclude your
explanation with the name of the class you have selected as the correct
one, in lowercase and without punctuation."


ANSWER_FORMAT:
"In your response, include only the name of the class as a single word, in
lowercase, without punctuation, and without adding any other statements or
words."
Loading
Loading