Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: support both china region and global deploy #104

Merged
merged 43 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
0d68cf6
feat: add cors to etl lambda
SeisSerenata Jan 25, 2024
0e4a666
modify
Jan 26, 2024
06bd41f
Merge branch 'dev' of https://github.com/aws-samples/llm-bot into dev
Jan 26, 2024
92515e3
fix: parameter error.
Jan 26, 2024
40dfef4
feat: add retriever result num.
Jan 26, 2024
744d4e9
feat: add reranker switch.
Jan 26, 2024
31fb668
update layout model
Jan 29, 2024
245978b
update layout model
Jan 29, 2024
acfbb88
fix convert markdown bug
Jan 29, 2024
9ac0d28
chore: update shell script
NingLu Jan 29, 2024
e372e1d
add SagemakerEndpointChat; support baichuan13b model
Jan 31, 2024
7394b04
add market conversation entry; fix bug with code output
Feb 2, 2024
95d56c9
feat: change default retrieval config.
Feb 4, 2024
f5a6674
Merge branch 'dev' of github.com:aws-samples/llm-bot into dev
Feb 4, 2024
941c9e3
modify
Jan 26, 2024
040c068
fix: parameter error.
Jan 26, 2024
70b1861
feat: add retriever result num.
Jan 26, 2024
116e967
feat: add reranker switch.
Jan 26, 2024
2ac28d8
update layout model
Jan 29, 2024
ce75709
update layout model
Jan 29, 2024
d3051a0
fix convert markdown bug
Jan 29, 2024
919e674
chore: update shell script
NingLu Jan 29, 2024
525eed1
add SagemakerEndpointChat; support baichuan13b model
Jan 31, 2024
5b30b45
add market conversation entry; fix bug with code output
Feb 2, 2024
84eb0ac
feat: support china region deploy
IcyKallen Feb 4, 2024
4869efb
chore: reorganize lambda layers
IcyKallen Feb 4, 2024
9ae21d6
chore: update etl api
IcyKallen Feb 4, 2024
620661a
chore: update requirements for embedding layer
IcyKallen Feb 4, 2024
f083d56
Merge branch 'dev' of https://github.com/aws-samples/llm-bot into dev
Feb 5, 2024
f0be236
llm_utils code clean; support internlm2 model
Feb 5, 2024
b13cd4c
feat: add async get context.
Feb 5, 2024
f7f075e
feat: add timeit decorator.
Feb 5, 2024
2e98ece
Merge branch 'dev' of github.com:aws-samples/llm-bot into dev
Feb 5, 2024
bf618f0
modify instruct model artifacts
Feb 5, 2024
ee656b9
Merge branch 'dev' of https://github.com/aws-samples/llm-bot into dev
Feb 5, 2024
87c4ae1
chore: rollback etl slanet model
IcyKallen Feb 5, 2024
5afd117
feat: add lang type end embedding in qq retrieval.
Feb 5, 2024
3a907ba
fix: typo retriever.
Feb 5, 2024
11d5bc9
fix: typo retriever.
Feb 5, 2024
1cb372c
layout model based onnxruntime
Feb 5, 2024
e4fc78b
Merge branch 'dev' of github.com:aws-samples/llm-bot into dev
Feb 5, 2024
33a80a4
unified chat history
Feb 6, 2024
d8bd2a6
fix: fix transformers in etl utils
IcyKallen Feb 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 31 additions & 51 deletions source/lambda/executor/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import traceback
import uuid
import asyncio
import math

import boto3
from langchain.callbacks.base import BaseCallbackHandler
Expand All @@ -28,8 +29,13 @@
RunnableParallel,
RunnablePassthrough,
)
from langchain.memory import ConversationSummaryMemory, ChatMessageHistory
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.schema.messages import (
HumanMessage,AIMessage,SystemMessage
)
# from langchain.memory import ConversationSummaryMemory, ChatMessageHistory
# from langchain.utilities import GoogleSearchAPIWrapper
from dateutil import parser

from reranker import BGEReranker, MergeReranker
from retriever import (
QueryDocumentRetriever,
Expand All @@ -44,15 +50,15 @@
from constant import IntentType, Type
from ddb_utils import DynamoDBChatMessageHistory
from intent_utils import auto_intention_recoginition_chain
from langchain_utils import create_identity_lambda
# from langchain_utils import create_identity_lambda

# from llm_utils import generate as llm_generate
from llm_utils import get_llm_chain,get_llm_model
from llmbot_utils import (
QueryType,
# QueryType,
combine_recalls,
concat_recall_knowledge,
process_input_messages,
# concat_recall_knowledge,
# process_input_messages,
)
from time_utils import timeit
from preprocess_utils import run_preprocess
Expand All @@ -63,6 +69,7 @@
from langchain_utils import add_key_to_debug,chain_logger
from query_process_utils import get_query_process_chain
import parse_config
from serialization_utils import JSONEncoder

region = os.environ["AWS_REGION"]
embedding_endpoint = os.environ.get("embedding_endpoint", "")
Expand Down Expand Up @@ -204,27 +211,6 @@
results.append(result)
return results


# def organize_results(response, aos_index=None):
# """
# Organize results from aos response

# :param query_type: query type
# :param response: aos response json
# """
# results = []
# aos_hits = response["hits"]["hits"]
# for aos_hit in aos_hits:
# result = {}
# result["source"] = aos_hit["_source"]["metadata"]["file_path"]
# result["score"] = aos_hit["_score"]
# result["detail"] = aos_hit["_source"]
# result["content"] = aos_hit["_source"]["text"]
# result["doc"] = aos_hit["_source"]["text"]
# results.append(result)
# return results


def remove_redundancy_debug_info(results):
filtered_results = copy.deepcopy(results)
for result in filtered_results:
Expand All @@ -233,7 +219,6 @@
del result["detail"][field]
return filtered_results


def parse_query(
query_input: str,
history: list,
Expand Down Expand Up @@ -292,7 +277,7 @@
stop=None,
)
elpase_time = time.time() - start
logger.info(f"runing time of parse query: {elpase_time}s seconds")

Check failure on line 280 in source/lambda/executor/main.py

View workflow job for this annotation

GitHub Actions / miss spelling check for words or sentences

runing ==> running, ruining
return parsed_query


Expand Down Expand Up @@ -321,7 +306,7 @@
)
# logger.info(json.dumps(opensearch_knn_response, ensure_ascii=False))
elpase_time = time.time() - start
logger.info(f"runing time of opensearch_knn : {elpase_time}s seconds")

Check failure on line 309 in source/lambda/executor/main.py

View workflow job for this annotation

GitHub Actions / miss spelling check for words or sentences

runing ==> running, ruining
answer = None
sources = None
if len(opensearch_knn_results) > 0:
Expand Down Expand Up @@ -371,7 +356,7 @@
# logger.info(json.dumps(opensearch_knn_response, ensure_ascii=False))
faq_recall_end_time = time.time()
elpase_time = faq_recall_end_time - start
logger.info(f"runing time of faq recall : {elpase_time}s seconds")

Check failure on line 359 in source/lambda/executor/main.py

View workflow job for this annotation

GitHub Actions / miss spelling check for words or sentences

runing ==> running, ruining
filter = None
if parsed_query["is_api_query"]:
filter = [{"term": {"metadata.is_api": True}}]
Expand Down Expand Up @@ -404,7 +389,7 @@
)
ug_recall_end_time = time.time()
elpase_time = ug_recall_end_time - faq_recall_end_time
logger.info(f"runing time of ug recall: {elpase_time}s seconds")

Check failure on line 392 in source/lambda/executor/main.py

View workflow job for this annotation

GitHub Actions / miss spelling check for words or sentences

runing ==> running, ruining

# 2. get AOS invertedIndex recall
opensearch_query_results = []
Expand Down Expand Up @@ -458,7 +443,7 @@

rerank_end_time = time.time()
elpase_time = rerank_end_time - ug_recall_end_time
logger.info(f"runing time of rerank: {elpase_time}s seconds")

Check failure on line 446 in source/lambda/executor/main.py

View workflow job for this annotation

GitHub Actions / miss spelling check for words or sentences

runing ==> running, ruining

return rerank_knowledge

Expand Down Expand Up @@ -559,7 +544,7 @@
answer = llm_generate(**generate_input)
llm_end_time = time.time()
elpase_time = llm_end_time - llm_start_time
logger.info(f"runing time of llm: {elpase_time}s seconds")

Check failure on line 547 in source/lambda/executor/main.py

View workflow job for this annotation

GitHub Actions / miss spelling check for words or sentences

runing ==> running, ruining
# answer = ret["answer"]
debug_info["knowledge_qa_llm"] = answer
except Exception as e:
Expand Down Expand Up @@ -724,7 +709,6 @@
return qd_llm_chain



def get_chat_llm_chain(
rag_config,
stream=False
Expand All @@ -750,25 +734,6 @@
"context_sources": lambda x: [],
}
return chat_llm_chain

# def get_qq_result(docs, threshold=0.7):
# if len(docs) > 0 and docs[0]["score"]:
# source = docs[0]["source"]
# answer = docs[0]["answer"]
# sources = [source]
# return answer, sources
# else:
# return None, []


# def output_postprocess(raw_output):
# output = {"answer": "", "sources": [], "contexts": []}
# if raw_output is not None:
# output["answer"] = raw_output.get("answer", "")
# output["sources"] = raw_output.get("sources", [])
# output["contexts"] = raw_output.get("contexts", [])
# return output


def market_chain_entry(
query_input: str,
Expand Down Expand Up @@ -933,13 +898,14 @@
}
))



answer = response["answer"]
sources = response["context_sources"]
contexts = response["context_docs"]

return answer, sources, contexts, debug_info


def market_conversation_summary_entry(
messages:list[dict],
rag_config=None,
Expand All @@ -950,8 +916,22 @@
assert messages,messages
chat_history = []
for message in messages:
chat_history.append((message['role'],message['content']))
role = message['role']
content = message['content']
assert role in ['user','ai']
if role == 'user':
chat_history.append(HumanMessage(content=content))
else:
chat_history.append(AIMessage(content=content))
rag_config['chat_history'] = chat_history

else:
# filter by the window time
time_window = rag_config.get('time_window',{})
start_time = time_window.get('start_time',-math.inf)
end_time = time_window.get('end_time',math.inf)
assert isinstance(start_time, float) and isinstance(end_time, float), (start_time, end_time)
chat_history = rag_config['chat_history']

rag_config['intent_config']['intent_type'] = IntentType.CHAT.value

Expand Down Expand Up @@ -1174,7 +1154,7 @@
)
history_messages = chat_history.message_as_langchain
rag_config['chat_history'] = history_messages
logger.info(f'rag configs:\n {json.dumps(rag_config,indent=2,ensure_ascii=False)}')
logger.info(f'rag configs:\n {json.dumps(rag_config,indent=2,ensure_ascii=False,cls=JSONEncoder)}')
#
# knowledge_qa_flag = True if model == "knowledge_qa" else False

Expand Down Expand Up @@ -1237,7 +1217,7 @@
)

main_entry_elpase = time.time() - main_entry_start
logger.info(f"runing time of {biz_type} entry : {main_entry_elpase}s seconds")

Check failure on line 1220 in source/lambda/executor/main.py

View workflow job for this annotation

GitHub Actions / miss spelling check for words or sentences

runing ==> running, ruining

response_kwargs = dict(
stream=stream,
Expand Down
1 change: 1 addition & 0 deletions source/lambda/executor/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ requests_aws4auth==1.2.2
# tiktoken==0.3.3
boto3==1.28.57
botocore==1.31.57
python-dateutil==2.8.2
40 changes: 37 additions & 3 deletions source/lambda/executor/test/executor_local_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ def multiturn_chat_test():
session_id=session_id
)


def multiturn_strict_qq_test():
session_id = f'test_{int(time.time())}'
generate_answer(
Expand All @@ -234,6 +233,12 @@ def qq_match_test():
)

def knowledge_qa_test():
r = generate_answer(
"什么是Amazon Bedrock",
model="knowledge_qa",
stream=True,
type="market_chain",
)
r = generate_answer(
"如何将Kinesis Data Streams配置为AWS Lambda的事件源?",
model="knowledge_qa",
Expand Down Expand Up @@ -342,6 +347,32 @@ def test_internlm_model():
session_id=f'test_{time.time()}'
endpoint_name = 'internlm2-chat-7b-2024-02-04-11-35-08-733'
model_id = "internlm2-chat-7b"

generate_answer(
"什么是Amazon Bedrock",
model="knowledge_qa",
type="market_chain",
stream=True,
rag_parameters=dict(
retriever_config =dict({
"retriever_top_k": 20,
"chunk_num": 2,
"using_whole_doc": True,
"reranker_top_k": 10,
"enable_reranker": True
}),
generator_llm_config={
"model_kwargs":{
"max_new_tokens": 2000,
"temperature": 0.1,
"top_p": 0.9
},
"model_id": model_id,
"endpoint_name": endpoint_name,
"context_num": 1
})
)

generate_answer(
"《夜曲》是谁演唱的?",
session_id=session_id,
Expand Down Expand Up @@ -548,9 +579,12 @@ def market_deploy_test():
# stream=True,
# type="market_chain",
# )
# market_deploy_test()
# knowledge_qa_test()


market_deploy_test()
# test_baichuan_model()
test_internlm_model()
# test_internlm_model()
# test_baichuan_model()

# market_deploy_test()
Expand Down
7 changes: 6 additions & 1 deletion source/lambda/executor/utils/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ class IntentType(Enum):
AUTO = "auto"
@classmethod
def has_value(cls, value):
return value in cls._value2member_map_
return value in cls._value2member_map_


HUMAN_MESSAGE_TYPE = 'human'
AI_MESSAGE_TYPE = 'ai'
SYSTEM_MESSAGE_TYPE = 'system'
39 changes: 34 additions & 5 deletions source/lambda/executor/utils/ddb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,18 @@
from datetime import datetime
from botocore.exceptions import ClientError
from logger_utils import logger

import time
from decimal import Decimal
from langchain.schema import BaseChatMessageHistory
from langchain.schema.messages import (
BaseMessage,
_message_to_dict,
messages_from_dict,
messages_to_dict,
_message_from_dict
)
from constant import HUMAN_MESSAGE_TYPE,AI_MESSAGE_TYPE,SYSTEM_MESSAGE_TYPE


client = boto3.resource("dynamodb")

Expand Down Expand Up @@ -62,8 +66,10 @@ def message_as_langchain(self):
history = response["Item"]["History"]
ret = []
for his in history:
assert his['type'] in ['user','ai']
ret.append((his['type'],his['data']['content']))
assert his['type'] in [AI_MESSAGE_TYPE,HUMAN_MESSAGE_TYPE]
create_time = his['data']['additional_kwargs']['create_time']
his['data']['additional_kwargs']['create_time'] = float(create_time)
ret.append(_message_from_dict(his))
return ret

def add_message(self, message) -> None:
Expand All @@ -85,12 +91,35 @@ def add_message(self, message) -> None:

def add_user_message(self, message_id, content) -> None:
"""Append the user message to the record in DynamoDB"""
message = {'type': 'user', 'data': {'type': 'user', 'content': content, 'additional_kwargs': {"message_id": message_id}, 'example': False}}
message = {
'type': HUMAN_MESSAGE_TYPE,
'data': {
'type': HUMAN_MESSAGE_TYPE,
'content': content,
'additional_kwargs':{
"message_id": message_id,
"create_time": Decimal.from_float(time.time())
},
# 'example': False,
}
}
self.add_message(message)

def add_ai_message(self, message_id, content) -> None:
"""Append the ai message to the record in DynamoDB"""
message = {'type': 'ai', 'data': {'type': 'ai', 'content': content, 'additional_kwargs': {"message_id": message_id}, 'example': False}}
message = {
'type': AI_MESSAGE_TYPE,
'data':
{
'type': AI_MESSAGE_TYPE,
'content': content,
'additional_kwargs': {
"message_id": message_id,
"create_time": Decimal.from_float(time.time())
},
# 'example': False,
}
}
self.add_message(message)

def add_metadata(self, metadata) -> None:
Expand Down
Loading
Loading