-
Notifications
You must be signed in to change notification settings - Fork 6
/
example_config.yaml
151 lines (150 loc) · 6.22 KB
/
example_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
env: dev
dev:
appName: lisa
profile:
deploymentName:
accountNumber: 012345678901
region: us-east-1
deploymentStage: dev
removalPolicy: destroy
runCdkNag: false
# lambdaLayerAssets:
# authorizerLayerPath: /path/to/authorizer_layer.zip
# commonLayerPath: /path/to/common_layer.zip
# fastapiLayerPath: /path/to/fastapi_layer.zip
# ragLayerPath: /path/to/rag_layer.zip
# sdkLayerPath: /path/to/sdk_layer.zip
# stackSynthesizer: CliCredentialsStackSynthesizer
# permissionsBoundaryAspect:
# permissionsBoundaryPolicyName: CustomPermissionBoundary
# rolePrefix: CustomPrefix
# policyPrefix: CustomPrefix
# instanceProfilePrefix: CustomPrefix
# vpcId: vpc-0123456789abcdef,
# subnetIds: [subnet-fedcba9876543210, subnet-0987654321fedcba],
s3BucketModels: hf-models-gaiic
# aws partition mountS3 package location
mountS3DebUrl: https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb
# aws-iso partition mountS3 package location
# mountS3DebUrl: https://mountpoint-s3-release-us-iso-east-1.s3.us-iso-east-1.c2s.ic.gov/latest/x86_64/mount-s3.deb
# aws-iso-b partition mountS3 package location
# mountS3DebUrl: https://mountpoint-s3-release-us-isob-east-1.s3.us-isob-east-1.sc2s.sgov.gov/latest/x86_64/mount-s3.deb
accountNumbersEcr:
- 012345678901
deployRag: true
deployChat: true
deployUi: true
privateEndpoints: false
lambdaConfig:
pythonRuntime: PYTHON_3_10
logLevel: DEBUG
vpcAutoscalingConfig:
provisionedConcurrentExecutions: 5
minCapacity: 1
maxCapacity: 50
targetValue: 0.80
cooldown: 30
authConfig:
authority:
clientId:
adminGroup:
jwtGroupsProperty:
logLevel: DEBUG
# NOTE: The following configuration will allow for using a custom domain for the chat user interface.
# If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL.
# Users must use the custom domain for the user interface to work if this option is populated.
apiGatewayConfig:
domainName:
restApiConfig:
apiVersion: v2
instanceType: m5.large
containerConfig:
image:
baseImage: python:3.9
path: lib/serve/rest-api
type: asset
healthCheckConfig:
command: ["CMD-SHELL", "exit 0"]
interval: 10
startPeriod: 30
timeout: 5
retries: 3
autoScalingConfig:
minCapacity: 1
maxCapacity: 1
cooldown: 60
defaultInstanceWarmup: 60
metricConfig:
AlbMetricName: RequestCountPerTarget
targetValue: 1000
duration: 60
estimatedInstanceWarmup: 30
internetFacing: true
loadBalancerConfig:
sslCertIamArn: arn:aws:iam::012345678901:server-certificate/lisa-self-signed-dev
healthCheckConfig:
path: /health
interval: 60
timeout: 30
healthyThresholdCount: 2
unhealthyThresholdCount: 10
domainName:
ragRepositories:
- repositoryId: pgvector-rag
type: pgvector
rdsConfig:
username: postgres
# - repositoryId: default
# type: opensearch
# opensearchConfig:
# dataNodes: 2
# dataNodeInstanceType: r6g.large.search
# masterNodes: 0
# masterNodeInstanceType: r6g.large.search
# volumeSize: 300
# If adding an existing PGVector database, this configurations assumes:
# 1. The database has been configured to have pgvector installed and enabled: https://aws.amazon.com/about-aws/whats-new/2023/05/amazon-rds-postgresql-pgvector-ml-model-integration/
# 2. The database is accessible by RAG-related lambda functions (add inbound PostgreSQL access on the database's security group for all Lambda RAG security groups)
# 3. A secret ID exists in SecretsManager holding the database password within a json block of '{"password":"your_password_here"}'. This is the same format that RDS natively provides a password in SecretsManager.
# If the passwordSecretId or dbHost are not provided, then a sample database will be created for you. Only the username is required.
# - repositoryId: pgvector-rag
# type: pgvector
# rdsConfig:
# username: postgres
# passwordSecretId: # password ID as stored in SecretsManager. Example: "rds!db-aa88493d-be8d-4a3f-96dc-c668165f7826"
# dbHost: # Host name of database. Example hostname from RDS: "my-db-name.291b2f03.us-east-1.rds.amazonaws.com"
# dbName: postgres
ragFileProcessingConfig:
chunkSize: 512
chunkOverlap: 51
ecsModels:
- modelName: mistralai/Mistral-7B-Instruct-v0.2
inferenceContainer: tgi
baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1
- modelName: intfloat/e5-large-v2
inferenceContainer: tei
baseImage: ghcr.io/huggingface/text-embeddings-inference:1.2.3
# - modelName: mistralai/Mixtral-8x7B-Instruct-v0.1
# inferenceContainer: tgi
# baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1
# LiteLLM Config options found here: https://litellm.vercel.app/docs/proxy/configs#all-settings
# Anything within this config is copied to a configuration for starting LiteLLM in the REST API container.
# It is suggested to put an "ignored" API key so that calls to locally hosted models don't fail on OpenAI calls
# from LiteLLM.
# We added `lisa_params` to add additional metadata for interaction with the Chat UI. Specify if the model is a
# textgen or embedding model, and if it is textgen, specify whether it supports streaming. If embedding, then
# omit the `streaming` parameter. When defining the model list, the `lisa_params` will be an object in the model
# definition that will have the `model_type` and `streaming` fields in it. A commented example is provided below.
litellmConfig:
litellm_settings:
telemetry: false # Don't try to send telemetry to LiteLLM servers.
general_settings:
master_key: sk-d7a77bcb-3e23-483c-beec-2700f2baeeb1 # A key is required for model management purposes
model_list: # Add any of your existing (not LISA-hosted) models here.
# - model_name: mymodel
# litellm_params:
# model: openai/myprovider/mymodel
# api_key: ignored
# lisa_params:
# model_type: textgen
# streaming: true