Merge branch 'main' of https://github.com/aws-samples/Intelli-Agent

aws-samples · Mar 10, 2025 · b65027a · b65027a
2 parents 8a67f45 + 0eb1f55
commit b65027a
Show file tree

Hide file tree

Showing 140 changed files with 5,446 additions and 2,218 deletions.
diff --git a/.gitignore b/.gitignore
diff --git a/README.md b/README.md
@@ -445,66 +445,6 @@ Documents of various types are first converted to Markdown format and then split
 
 To inject intent data into your system, follow these steps:
 
-### Step-by-Step Guide to Inject Intent Data
-
-1. **Obtain JWT Token:**
-   - Refer to the documentation at [docs/auth.md](docs/auth.md) to understand how to obtain a JWT token.
-   - Use Postman or a similar tool for this process.
-
-2. **Injection Using ETL API:**
-   - Use the schema specified in [docs/ETL_API_SCHEMA.md](docs/ETL_API_SCHEMA.md) for intent data injection.
-   - Below is a sample JSON structure that you can use to inject intent data. Replace the placeholders with your specific S3 bucket and file details:
-
-   ```json
-   {
-       "s3Bucket": "your-bucket-name",
-       "s3Prefix": "s3path/default-intent.jsonl",
-       "offline": "true",
-       "qaEnhance": "false",
-       "workspaceId": "default-intent",
-       "operationType": "create",
-       "documentLanguage": "zh",
-       "indexType": "qq"
-   }
-   ```
-
-3. **Data Injection Format:**
-   - Use the following JSON format for injecting individual intent data:
-
-   ```json
-   {"question": "Hello", "answer": {"intent": "chat"}}
-   ```
-
-   - Replace `"Hello"` with the actual question text.
-
-
-### How to Update Resources Used by ETL
-
-The current solution is undergoing continuous updates, requiring manual updates for the document parsing component.
-
-1. [Optional] Update Document Parsing Model Endpoint
-
-```bash
-# Input a new ETL tag when executing sh build.sh
-cd source/script
-sh build.sh -b <S3 bucket name> -i <ETL model name> -t <new ETL tag name> -r <AWS region>
-
-# Input a new ETL tag when executing cdk deploy to trigger ETL endpoint update
-npx cdk deploy --rollback true --parameters S3ModelAssets=<Your S3 Bucket Name> --parameters SubEmail=<Your email address> --parameters EtlImageName=<Your ETL model name> --parameters ETLTag=<Your new ETL tag name> --require-approval never
-```
-
-2. Manually Update ETL Dependencies' whl Package
-
-
-First, confirm the path corresponding to `--extra-py-files` in your ETL Job.
-
-![Glue S3 bucket](docs/images/glue-s3-bucket.png)
-
-Next, upload `source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl` to the location where Glue dependencies are stored.
-
-```bash
-aws s3 cp source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl s3://<Your Glue job bucket>/llm_bot_dep-0.1.0-py3-none-any.whl
-```
 
 
 ## Testing

diff --git a/README_zh-cn.md b/README_zh-cn.md
@@ -109,7 +109,7 @@ Intelli-Agent 旨在以最小的开销和最大的效率帮助开发人员快速
 
 ### 灵活的模式选项
 
-下图是基于 [LangGraph](https://langchain-ai.github.io/langgraph/) 生成的在线逻辑。第一个节点是 **query_preprocess_lambda**，它处理聊天历史记录。然后用户可以从三种模式中选择：聊天模式（chat）、检索生成模式（rag）和代理模式（agent）。**聊天模式（chat）** 让您可以直接与不同的 LLM（如 Anthropic Claude 3）进行交互。**检索生成模式（rag）** 将检索与当前查询相关的内容并让 LLM 回答。**代理模式（agent）** 是最复杂的模式，能够处理复杂的业务场景。根据 **intention_detection_lambda** 提供的最相关意图和 **query_preprocess_lambda** 提供的聊天历史记录，**agent_lambda** 将决定使用哪些工具以及这些信息是否足以回答查询。**parse_tool_calling** 节点将解析 **agent_lambda** 的输出：
+下图是基于 [LangGraph](https://langchain-ai.github.io/langgraph/) 生成的在线逻辑。第一个节点是 **query_preprocess_lambda**，它处理聊天历史记录。用户在会话页面开启或关闭**只使用RAG**开关：**开启只使用RAG** 将检索与当前查询相关的内容并让 LLM 回答。**关闭只使用RAG** 会根据 **intention_detection_lambda** 提供的最相关意图和 **query_preprocess_lambda** 提供的聊天历史记录，**agent_lambda** 将决定使用哪些工具以及这些信息是否足以回答查询。**parse_tool_calling** 节点将解析 **agent_lambda** 的输出：
 
 * 如果 **agent_lambda** 从工具格式的角度选择了错误的工具，那么会通过 **invalid_tool_calling** 进行重新思考。
 * 如果 **agent_lambda** 选择了有效工具，那么会通过 **tool_execute_lambda** 执行该工具。然后，**agent_lambda** 将决定运行结果是否足以回答查询。
@@ -126,9 +126,8 @@ flowchart TD
         tools_execution["tools_execution"]
   end
     _start_["_start_"] --> query_preprocess["query_preprocess"]
-    query_preprocess == chat mode ==> llm_direct_results_generation["llm_direct_results_generation"]
-    query_preprocess == rag mode ==> all_knowledge_retrieve["all_knowledge_retrieve"]
-    query_preprocess == agent mode ==> intention_detection["intention_detection"]
+    query_preprocess == use_rag_only enabled ==> all_knowledge_retrieve["all_knowledge_retrieve"]
+    query_preprocess == use_rag_only disabled ==> intention_detection["intention_detection"]
     all_knowledge_retrieve --> llm_rag_results_generation["llm_rag_results_generation"]
     intention_detection -- similar query found --> matched_query_return["matched_query_return"]
     intention_detection -- intention detected --> tools_choose_and_results_generation
@@ -137,15 +136,13 @@ flowchart TD
     results_evaluation -. valid tool calling .-> tools_execution
     results_evaluation -. no need tool calling .-> final_results_preparation["final_results_preparation"]
     tools_execution --> tools_choose_and_results_generation
-    llm_direct_results_generation --> _end_["_end_"]
     llm_rag_results_generation --> _end_
     matched_query_return --> final_results_preparation
     final_results_preparation --> _end_
      tools_choose_and_results_generation:::process
      results_evaluation:::process
      tools_execution:::process
      query_preprocess:::process
-     llm_direct_results_generation:::process
      all_knowledge_retrieve:::process
      intention_detection:::process
      llm_rag_results_generation:::process

diff --git a/source/infrastructure/cdk.json b/source/infrastructure/cdk.json
@@ -3,7 +3,10 @@
   "output": "cdk.out",
   "build": "npx projen bundle",
   "watch": {
-    "include": ["src/**/*.ts", "test/**/*.ts"],
+    "include": [
+      "src/**/*.ts",
+      "test/**/*.ts"
+    ],
     "exclude": [
       "README.md",
       "cdk*.json",
@@ -16,7 +19,11 @@
     ]
   },
   "context": {
-    "@aws-cdk/customresources:installLatestAwsSdkDefault": false
+    "@aws-cdk/customresources:installLatestAwsSdkDefault": false,
+    "@aws-cdk/core:retryOptions": {
+      "maxAttempts": 3,
+      "backoffRate": 1.5
+    }
   },
   "//": "~~ Generated by projen. To modify, edit .projenrc.js and run \"npx projen\"."
-}
+}
diff --git a/source/infrastructure/lib/api/api-stack.ts b/source/infrastructure/lib/api/api-stack.ts
@@ -120,9 +120,7 @@ export class ApiConstruct extends Construct implements ApiConstructOutputs {
             if (event.RequestType === 'Create' || event.RequestType === 'Update') {
               try {
                 // Wait for 20 seconds
-                console.log('Waiting started')
                 await new Promise(resolve => setTimeout(resolve, 20000));
-                console.log('Waiting ended')
                 return {
                   Status: 'SUCCESS',
                   PhysicalResourceId: event.RequestId,

diff --git a/source/infrastructure/lib/chat/chat-stack.ts b/source/infrastructure/lib/chat/chat-stack.ts
@@ -114,7 +114,7 @@ export class ChatStack extends NestedStack implements ChatStackOutputs {
         ['bash', '-c', [
           "mkdir -p /tmp/online_lambda_function_codes",
           `cp -r ${join(__dirname, "../../../lambda/online/*")} /tmp/online_lambda_function_codes`,
-          `cp ${join(__dirname, "../../../lambda/job/dep/llm_bot_dep/sm_utils.py")} /tmp/online_lambda_function_codes/`,
+          `cp -r ${join(__dirname, "../../../lambda/shared")} /tmp/online_lambda_function_codes/`,
         ].join(' && ')
         ]
       ),
@@ -143,7 +143,8 @@ export class ChatStack extends NestedStack implements ChatStackOutputs {
         BEDROCK_AWS_ACCESS_KEY_ID: props.config.chat.bedrockAk || "",
         BEDROCK_AWS_SECRET_ACCESS_KEY: props.config.chat.bedrockSk || ""
       },
-      layers: [apiLambdaOnlineSourceLayer, modelLayer],
+      // layers: [apiLambdaOnlineSourceLayer, modelLayer],
+      layers: [apiLambdaOnlineSourceLayer],
     });
     this.lambdaOnlineMain = lambdaOnlineMain.function;
 

diff --git a/source/infrastructure/lib/knowledge-base/knowledge-base-stack.ts b/source/infrastructure/lib/knowledge-base/knowledge-base-stack.ts
@@ -138,6 +138,7 @@ export class KnowledgeBaseStack extends NestedStack implements KnowledgeBaseStac
 
 
   private createKnowledgeBaseJob(props: any) {
+    const deployRegion = props.config.deployRegion;
     const connection = new glue.Connection(this, "GlueJobConnection", {
       type: glue.ConnectionType.NETWORK,
       subnet: props.sharedConstructOutputs.vpc.privateSubnets[0],
@@ -159,10 +160,8 @@ export class KnowledgeBaseStack extends NestedStack implements KnowledgeBaseStac
     notificationLambda.addToRolePolicy(this.iamHelper.logStatement);
     notificationLambda.addToRolePolicy(this.dynamodbStatement);
 
-    // If this.region is cn-north-1 or cn-northwest-1, use the glue-job-script-cn.py
-    const glueJobScript = "glue-job-script.py";
 
-    // Assemble the extra python files list using _S3Bucket.s3UrlForObject("llm_bot_dep-0.1.0-py3-none-any.whl") and _S3Bucket.s3UrlForObject("nougat_ocr-0.1.17-py3-none-any.whl") and convert to string
+    // Assemble the extra python files list using _S3Bucket.s3UrlForObject("llm_bot_dep-0.1.0-py3-none-any.whl")
     const extraPythonFilesList = [
       this.glueLibS3Bucket.s3UrlForObject("llm_bot_dep-0.1.0-py3-none-any.whl"),
     ].join(",");
@@ -201,37 +200,44 @@ export class KnowledgeBaseStack extends NestedStack implements KnowledgeBaseStac
     glueRole.addToPolicy(this.iamHelper.glueStatement);
     glueRole.addToPolicy(this.dynamodbStatement);
     glueRole.addToPolicy(this.iamHelper.dynamodbStatement);
+    glueRole.addToPolicy(this.iamHelper.secretsManagerStatement);
+
+    const glueJobDefaultArguments: { [key: string]: string } = {
+      "--AOS_ENDPOINT": this.aosDomainEndpoint,
+      "--REGION": deployRegion,
+      "--ETL_MODEL_ENDPOINT": props.modelConstructOutputs.defaultKnowledgeBaseModelName,
+      "--RES_BUCKET": this.glueResultBucket.bucketName,
+      "--ETL_OBJECT_TABLE": this.etlObjTableName || "-",
+      "--PORTAL_BUCKET": this.uiPortalBucketName,
+      "--CHATBOT_TABLE": props.sharedConstructOutputs.chatbotTable.tableName,
+      "--additional-python-modules":
+        "langchain==0.3.7,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.35.98,openai==0.28.1,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0,python-docx==1.1.0,pdfminer.six==20221105,smart-open==7.0.4,opensearch-py==2.2.0,lxml==5.2.2,pandas==2.1.2,openpyxl==3.1.5,xlrd==2.0.1,langchain_community==0.3.5,pillow==10.0.1,tiktoken==0.8.0",
+      // Add multiple extra python files
+      "--extra-py-files": extraPythonFilesList,
+    }
+
+    // Set China-specific PyPI mirror for China regions
+    if (deployRegion === "cn-north-1" || deployRegion === "cn-northwest-1") {
+      glueJobDefaultArguments["--python-modules-installer-option"] = "-i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple";
+    }
 
     // Create glue job to process files specified in s3 bucket and prefix
-    const glueJob = new glue.Job(this, "PythonShellJob", {
-      executable: glue.JobExecutable.pythonShell({
-        glueVersion: glue.GlueVersion.V3_0,
-        pythonVersion: glue.PythonVersion.THREE_NINE,
+    const glueJob = new glue.Job(this, "PythonEtlJob", {
+      executable: glue.JobExecutable.pythonEtl({
+        glueVersion: glue.GlueVersion.V4_0,
+        pythonVersion: glue.PythonVersion.THREE,
         script: glue.Code.fromAsset(
-          join(__dirname, "../../../lambda/job", glueJobScript),
+          join(__dirname, "../../../lambda/job/glue-job-script.py"),
         ),
       }),
       // Worker Type is not supported for Job Command pythonshell and Both workerType and workerCount must be set
-      // workerType: glue.WorkerType.G_2X,
-      // workerCount: 2,
+      workerType: glue.WorkerType.G_1X,
+      workerCount: 2,
       maxConcurrentRuns: 200,
       maxRetries: 1,
       connections: [connection],
-      maxCapacity: 1,
       role: glueRole,
-      defaultArguments: {
-        "--AOS_ENDPOINT": this.aosDomainEndpoint,
-        "--REGION": process.env.CDK_DEFAULT_REGION || "-",
-        "--ETL_MODEL_ENDPOINT": props.modelConstructOutputs.defaultKnowledgeBaseModelName,
-        "--RES_BUCKET": this.glueResultBucket.bucketName,
-        "--ETL_OBJECT_TABLE": this.etlObjTableName || "-",
-        "--PORTAL_BUCKET": this.uiPortalBucketName,
-        "--CHATBOT_TABLE": props.sharedConstructOutputs.chatbotTable.tableName,
-        "--additional-python-modules":
-          "langchain==0.3.7,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.35.98,openai==0.28.1,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0,python-docx==1.1.0,nltk==3.9.1,pdfminer.six==20221105,smart-open==7.0.4,opensearch-py==2.2.0,lxml==5.2.2,pandas==2.1.2,openpyxl==3.1.5,xlrd==2.0.1,langchain_community==0.3.5,pillow==10.0.1,tiktoken==0.8.0",
-        // Add multiple extra python files
-        "--extra-py-files": extraPythonFilesList
-      },
+      defaultArguments: glueJobDefaultArguments,
     });
 
     // Create SNS topic and subscription to notify when glue job is completed
@@ -308,7 +314,7 @@ export class KnowledgeBaseStack extends NestedStack implements KnowledgeBaseStac
         "--TABLE_ITEM_ID.$": "$.tableItemId",
         "--QA_ENHANCEMENT.$": "$.qaEnhance",
         "--REGION": process.env.CDK_DEFAULT_REGION || "-",
-        "--BEDROCK_REGION": props.config.chat.bedrockRegion,
+        "--BEDROCK_REGION": props.config.chat.bedrockRegion || "-",
         "--MODEL_TABLE": props.sharedConstructOutputs.modelTable.tableName,
         "--RES_BUCKET": this.glueResultBucket.bucketName,
         "--S3_BUCKET.$": "$.s3Bucket",

diff --git a/source/infrastructure/lib/model/model-construct.ts b/source/infrastructure/lib/model/model-construct.ts
@@ -139,6 +139,7 @@ export class ModelConstruct extends NestedStack implements ModelConstructOutputs
     // Deploy Embedding and Reranker model
     let embeddingAndRerankerModelPrefix = props.config.model.embeddingsModels[0].name ?? "";
     let embeddingAndRerankerModelVersion = props.config.model.embeddingsModels[0].commitId ?? "";
+    let embeddingAndRerankerEndpointInstanceType = "ml.g4dn.4xlarge";
     let embeddingAndRerankerModelName = embeddingAndRerankerModelPrefix + "-" + embeddingAndRerankerModelVersion.slice(0, 5)
     let embeddingAndRerankerImageUrl = this.modelPublicEcrAccount + this.modelRegion + this.modelImageUrlDomain + "djl-inference:0.21.0-deepspeed0.8.3-cu117";
     let embeddingAndRerankerModelDataUrl = `s3://${props.config.model.modelConfig.modelAssetsBucket}/${embeddingAndRerankerModelPrefix}_deploy_code/`;
@@ -166,7 +167,7 @@ export class ModelConstruct extends NestedStack implements ModelConstructOutputs
             variantName: this.modelVariantName || "",
             containerStartupHealthCheckTimeoutInSeconds: 15 * 60,
             initialInstanceCount: 1,
-            instanceType: "ml.g4dn.4xlarge",
+            instanceType: embeddingAndRerankerEndpointInstanceType,
           },
         ],
       },
@@ -187,9 +188,10 @@ export class ModelConstruct extends NestedStack implements ModelConstructOutputs
 
   private deployKnowledgeBaseEndpoint(props: ModelConstructProps) {
     // Deploy Knowledge Base model
-    let knowledgeBaseModelName = "knowledge-base-model";
+    let knowledgeBaseModelInstanceType = "ml.g4dn.2xlarge";
     let knowledgeBaseModelEcrRepository = props.config.knowledgeBase.knowledgeBaseType.intelliAgentKb.knowledgeBaseModel.ecrRepository;
     let knowledgeBaseModelEcrImageTag = props.config.knowledgeBase.knowledgeBaseType.intelliAgentKb.knowledgeBaseModel.ecrImageTag;
+    let knowledgeBaseModelName = "knowledge-base-model" + "-" + knowledgeBaseModelEcrImageTag;
     let knowledgeBaseModelImageUrl = this.modelAccount + ".dkr.ecr." + this.modelRegion + this.modelImageUrlDomain + knowledgeBaseModelEcrRepository + ":" + knowledgeBaseModelEcrImageTag;
 
     const knowledgeBaseModelResources = this.deploySagemakerEndpoint({
@@ -209,15 +211,16 @@ export class ModelConstruct extends NestedStack implements ModelConstructOutputs
             variantName: this.modelVariantName || "",
             containerStartupHealthCheckTimeoutInSeconds: 15 * 60,
             initialInstanceCount: 1,
-            instanceType: "ml.g4dn.2xlarge",
+            instanceType: knowledgeBaseModelInstanceType,
           },
         ],
         asyncInferenceConfig: {
           clientConfig: {
             maxConcurrentInvocationsPerInstance: 1,
           },
           outputConfig: {
-            s3OutputPath: `s3://${props.sharedConstructOutputs.resultBucket.bucketName}/${knowledgeBaseModelName}/`,
+            s3OutputPath: `s3://${props.sharedConstructOutputs.resultBucket.bucketName}/${knowledgeBaseModelName}/output`,
+            s3FailurePath: `s3://${props.sharedConstructOutputs.resultBucket.bucketName}/${knowledgeBaseModelName}/failure`,
           },
         },
       },
@@ -287,7 +290,7 @@ export class ModelConstruct extends NestedStack implements ModelConstructOutputs
     executionRole.addToPolicy(this.modelIamHelper.stsStatement);
     executionRole.addToPolicy(this.modelIamHelper.ecrStatement);
     executionRole.addToPolicy(this.modelIamHelper.llmStatement);
-
+    executionRole.addToPolicy(this.modelIamHelper.secretsManagerStatement);
     this.modelExecutionRole = executionRole;
   }