Merge branch 'develop' into feature/rag-pipeline-patch

# Conflicts: # lib/schema.ts # package.json
awslabs · Nov 19, 2024 · e02cfce · e02cfce
2 parents cc9ccf0 + 60e56f3
commit e02cfce
Show file tree

Hide file tree

Showing 49 changed files with 1,730 additions and 2,803 deletions.
diff --git a/.github/workflows/docs.deploy.github-pages.yml b/.github/workflows/docs.deploy.github-pages.yml
@@ -29,6 +29,9 @@ jobs:
           cache: npm
       - name: Setup Pages
         uses: actions/configure-pages@v4
+      - name: Install root dependencies
+        run: |
+          npm install
       - name: Install dependencies
         working-directory: ./lib/docs
         run: npm install

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,62 @@
+# v3.2.0
+## Key Features
+### Enhanced Deployment Configuration
+- LISA v3.2.0 introduces a significant update to the configuration file schema, optimizing the deployment process
+- The previous single config.yaml file has been replaced with a more flexible two-file system: config-base.yaml and config-custom.yaml
+- config-base.yaml now contains default properties, which can be selectively overridden using config-custom.yaml, allowing for greater customization while maintaining a standardized base configuration
+- The number of required properties in the config-custom.yaml file has been reduced to 8 items, simplifying the configuration process
+- This update enhances the overall flexibility and maintainability of LISA configurations, providing a more robust foundation for future developments and easier customization for end-users
+
+#### Important Note
+- The previous config.yaml file format is no longer compatible with this update
+- To facilitate migration, we have developed a utility. Users can execute `npm run migrate-properties` to automatically convert their existing config.yaml file to the new config-custom.yaml format
+
+### Admin UI Configuration Page
+- Administrative Control of Chat Components:
+  - Administrators now have granular control over the activation and deactivation of chat components for all users through the Configuration Page
+  - This feature allows for dynamic management of user interface elements, enhancing system flexibility and user experience customization
+  - Items that can be configured include:
+    - The option to delete session history
+    - Visibility of message metadata
+    - Configuration of chat Kwargs
+    - Customization of prompt templates
+    - Adjust chat history buffer settings
+    - Modify the number of RAG documents to be included in the retrieval process (TopK)
+    - Ability to upload RAG documents
+    - Ability to upload in-context documents
+- System Banner Management:
+  - The Configuration Page now includes functionality for administrators to manage the system banner
+  - Administrators can activate, deactivate, and update the content of the system banner
+
+### LISA Documentation Site
+- We are pleased to announce the launch of the official [LISA Documentation site](https://awslabs.github.io/LISA/)
+- This comprehensive resource provides customers with additional guides and extensive information on LISA
+- The documentation is also optionally deployable within your environment during LISA deployment
+- The team is continuously working to add and expand content available on this site
+
+## Enhancements
+- Implemented a selection-based interface for instance input, replacing free text entry
+- Improved CDK Nag integration across stacks
+- Added functionality for administrators to specify block volume size for models, enabling successful deployment of larger models
+- Introduced options for administrators to choose between Private or Regional API Gateway endpoints
+- Enabled subnet specification within the designated VPC for deployed resources
+- Implemented support for headless deployment execution
+
+## Bug Fixes
+- Resolved issues with Create and Update model alerts to ensure proper display in the modal
+- Enhanced error handling for model creation/update processes to cover all potential scenarios
+
+## Coming Soon
+- Version 3.3.0 will include a new RAG ingestion pipeline. This will allow users to configure an S3 bucket and an ingestion trigger. When triggered, these documents will be pre-processed and loaded into the selected vector store.
+
+## Acknowledgements
+* @bedanley
+* @estohlmann
+* @dustins
+
+**Full Changelog**: https://github.com/awslabs/LISA/compare/v3.1.0...v3.2.0
+
+
 # v3.1.0
 ## Enhancements
 ### Model Management Administration

diff --git a/README.md b/README.md
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-3.1.0
+3.2.0
diff --git a/ecs_model_deployer/Dockerfile b/ecs_model_deployer/Dockerfile
@@ -1,4 +1,5 @@
 FROM public.ecr.aws/lambda/nodejs:18
 
 COPY ./dist/ ${LAMBDA_TASK_ROOT}
+RUN chmod 777 -R ${LAMBDA_TASK_ROOT}
 CMD ["index.handler"]
diff --git a/ecs_model_deployer/src/lib/ecs-model.ts b/ecs_model_deployer/src/lib/ecs-model.ts
@@ -12,7 +12,7 @@
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
-*/
+ */
 
 // ECS Model Construct.
 import { ISecurityGroup, IVpc, SubnetSelection } from 'aws-cdk-lib/aws-ec2';

diff --git a/ecs_model_deployer/src/lib/lisa_model_stack.ts b/ecs_model_deployer/src/lib/lisa_model_stack.ts
@@ -62,9 +62,9 @@ export class LisaModelStack extends Stack {
 
         let subnetSelection: SubnetSelection | undefined;
 
-        if (props.config.subnetIds && props.config.subnetIds.length > 0) {
+        if (props.config.subnets && props.config.subnets.length > 0) {
             subnetSelection = {
-                subnets: props.config.subnetIds?.map((subnet, index) => Subnet.fromSubnetId(this, index.toString(), subnet))
+                subnets: props.config.subnets?.map((subnet, index) => Subnet.fromSubnetId(this, index.toString(), subnet.subnetId))
             };
         }
 

diff --git a/ecs_model_deployer/src/lib/schema.ts b/ecs_model_deployer/src/lib/schema.ts
@@ -618,7 +618,10 @@ const RawConfigSchema = z
                 instanceProfilePrefix: z.string().optional(),
             })
             .optional(),
-        subnetIds: z.array(z.string()).optional(),
+        subnets: z.array(z.object({
+            subnetId: z.string().startsWith('subnet-'),
+            ipv4CidrBlock: z.string()
+        })).optional(),
     })
     .refine((config) => (config.pypiConfig.indexUrl && config.region.includes('iso')) || !config.region.includes('iso'), {
         message: 'Must set PypiConfig if in an iso region',

diff --git a/example_config.yaml b/example_config.yaml
@@ -1,151 +1,83 @@
-env: dev
-
-dev:
-  appName: lisa
-  profile:
-  deploymentName:
-  accountNumber: 012345678901
-  region: us-east-1
-  deploymentStage: dev
-  removalPolicy: destroy
-  runCdkNag: false
-  # lambdaLayerAssets:
-  #   authorizerLayerPath: /path/to/authorizer_layer.zip
-  #   commonLayerPath: /path/to/common_layer.zip
-  #   fastapiLayerPath: /path/to/fastapi_layer.zip
-  #   ragLayerPath: /path/to/rag_layer.zip
-  #   sdkLayerPath: /path/to/sdk_layer.zip
-  # stackSynthesizer: CliCredentialsStackSynthesizer
-  # permissionsBoundaryAspect:
-  #   permissionsBoundaryPolicyName: CustomPermissionBoundary
-  #   rolePrefix: CustomPrefix
-  #   policyPrefix: CustomPrefix
-  #   instanceProfilePrefix: CustomPrefix
-  # vpcId: vpc-0123456789abcdef,
-  # subnetIds: [subnet-fedcba9876543210, subnet-0987654321fedcba],
-  s3BucketModels: hf-models-gaiic
-  # aws partition mountS3 package location
-  mountS3DebUrl: https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb
-  # aws-iso partition mountS3 package location
-  # mountS3DebUrl: https://mountpoint-s3-release-us-iso-east-1.s3.us-iso-east-1.c2s.ic.gov/latest/x86_64/mount-s3.deb
-  # aws-iso-b partition mountS3 package location
-  # mountS3DebUrl: https://mountpoint-s3-release-us-isob-east-1.s3.us-isob-east-1.sc2s.sgov.gov/latest/x86_64/mount-s3.deb
-  accountNumbersEcr:
-    - 012345678901
-  deployRag: true
-  deployChat: true
-  deployUi: true
-  privateEndpoints: false
-  lambdaConfig:
-    pythonRuntime: PYTHON_3_10
-    logLevel: DEBUG
-    vpcAutoscalingConfig:
-      provisionedConcurrentExecutions: 5
-      minCapacity: 1
-      maxCapacity: 50
-      targetValue: 0.80
-      cooldown: 30
-  authConfig:
-    authority:
-    clientId:
-    adminGroup:
-    jwtGroupsProperty:
-  logLevel: DEBUG
-  # NOTE: The following configuration will allow for using a custom domain for the chat user interface.
-  # If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL.
-  # Users must use the custom domain for the user interface to work if this option is populated.
-  apiGatewayConfig:
-    domainName:
-  restApiConfig:
-    apiVersion: v2
-    instanceType: m5.large
-    containerConfig:
-      image:
-        baseImage: python:3.9
-        path: lib/serve/rest-api
-        type: asset
-      healthCheckConfig:
-        command: ["CMD-SHELL", "exit 0"]
-        interval: 10
-        startPeriod: 30
-        timeout: 5
-        retries: 3
-    autoScalingConfig:
-      minCapacity: 1
-      maxCapacity: 1
-      cooldown: 60
-      defaultInstanceWarmup: 60
-      metricConfig:
-        AlbMetricName: RequestCountPerTarget
-        targetValue: 1000
-        duration: 60
-        estimatedInstanceWarmup: 30
-    internetFacing: true
-    loadBalancerConfig:
-      sslCertIamArn: arn:aws:iam::012345678901:server-certificate/lisa-self-signed-dev
-      healthCheckConfig:
-        path: /health
-        interval: 60
-        timeout: 30
-        healthyThresholdCount: 2
-        unhealthyThresholdCount: 10
-      domainName:
-  ragRepositories:
-    - repositoryId: pgvector-rag
-      type: pgvector
-      rdsConfig:
-        username: postgres
-    # - repositoryId: default
-    #   type: opensearch
-    #   opensearchConfig:
-    #     dataNodes: 2
-    #     dataNodeInstanceType: r6g.large.search
-    #     masterNodes: 0
-    #     masterNodeInstanceType: r6g.large.search
-    #     volumeSize: 300
-    # If adding an existing PGVector database, this configurations assumes:
-    # 1. The database has been configured to have pgvector installed and enabled: https://aws.amazon.com/about-aws/whats-new/2023/05/amazon-rds-postgresql-pgvector-ml-model-integration/
-    # 2. The database is accessible by RAG-related lambda functions (add inbound PostgreSQL access on the database's security group for all Lambda RAG security groups)
-    # 3. A secret ID exists in SecretsManager holding the database password within a json block of '{"password":"your_password_here"}'. This is the same format that RDS natively provides a password in SecretsManager.
-    # If the passwordSecretId or dbHost are not provided, then a sample database will be created for you. Only the username is required.
-    # - repositoryId: pgvector-rag
-    #  type: pgvector
-    #  rdsConfig:
-    #    username: postgres
-    #    passwordSecretId: # password ID as stored in SecretsManager. Example: "rds!db-aa88493d-be8d-4a3f-96dc-c668165f7826"
-    #    dbHost: # Host name of database. Example hostname from RDS: "my-db-name.291b2f03.us-east-1.rds.amazonaws.com"
-    #    dbName: postgres
-  ragFileProcessingConfig:
-    chunkSize: 512
-    chunkOverlap: 51
-  ecsModels:
-    - modelName: mistralai/Mistral-7B-Instruct-v0.2
-      inferenceContainer: tgi
-      baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1
-    - modelName: intfloat/e5-large-v2
-      inferenceContainer: tei
-      baseImage: ghcr.io/huggingface/text-embeddings-inference:1.2.3
-    # - modelName: mistralai/Mixtral-8x7B-Instruct-v0.1
-    #   inferenceContainer: tgi
-    #   baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1
-  # LiteLLM Config options found here: https://litellm.vercel.app/docs/proxy/configs#all-settings
-  # Anything within this config is copied to a configuration for starting LiteLLM in the REST API container.
-  # It is suggested to put an "ignored" API key so that calls to locally hosted models don't fail on OpenAI calls
-  # from LiteLLM.
-  # We added `lisa_params` to add additional metadata for interaction with the Chat UI. Specify if the model is a
-  # textgen or embedding model, and if it is textgen, specify whether it supports streaming. If embedding, then
-  # omit the `streaming` parameter. When defining the model list, the `lisa_params` will be an object in the model
-  # definition that will have the `model_type` and `streaming` fields in it. A commented example is provided below.
-  litellmConfig:
-    litellm_settings:
-      telemetry: false  # Don't try to send telemetry to LiteLLM servers.
-    general_settings:
-      master_key: sk-d7a77bcb-3e23-483c-beec-2700f2baeeb1  # A key is required for model management purposes
-    model_list: # Add any of your existing (not LISA-hosted) models here.
-#      - model_name: mymodel
-#        litellm_params:
-#          model: openai/myprovider/mymodel
-#          api_key: ignored
-#        lisa_params:
-#          model_type: textgen
-#          streaming: true
+accountNumber: "012345678901"
+region: us-east-1
+authConfig:
+  authority:
+  clientId:
+  adminGroup:
+  jwtGroupsProperty:
+s3BucketModels: hf-models-gaiic
+########################### OPTIONAL BELOW #######################################
+# profile: AWS CLI profile for deployment.
+# vpcId: VPC ID for the application. (e.g. vpc-0123456789abcdef)
+# The following is an array of subnet objects for the application. These contain a subnetId(e.g. [subnet-fedcba9876543210] and ipv4CidrBlock
+# subnets:
+#  - subnetId:
+#    ipv4CidrBlock:
+# The following configuration will allow for using a custom domain for the chat user interface.
+# If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL.
+# Users must use the custom domain for the user interface to work if this option is populated.
+# apiGatewayConfig:
+#  domainName:
+# restApiConfig:
+#  sslCertIamArn: ARN of the self-signed cert to be used throughout the system
+# Some customers will want to download required libs prior to deployment, provide a path to the zipped resources
+# lambdaLayerAssets:
+#   authorizerLayerPath: /path/to/authorizer_layer.zip
+#   commonLayerPath: /path/to/common_layer.zip
+#   fastapiLayerPath: /path/to/fastapi_layer.zip
+#   ragLayerPath: /path/to/rag_layer.zip
+#   sdkLayerPath: /path/to/sdk_layer.zip
+# stackSynthesizer: CliCredentialsStackSynthesizer
+# deploymentPrefix: Prefix for deployment resources.
+# webAppAssetsPath: Optional path to precompiled webapp assets. If not specified the web application will be built at deploy time.
+# permissionsBoundaryAspect:
+#   permissionsBoundaryPolicyName: CustomPermissionBoundary
+#   rolePrefix: CustomPrefix
+#   policyPrefix: CustomPrefix
+#   instanceProfilePrefix: CustomPrefix
+# vpcId: vpc-0123456789abcdef,
+# aws-iso partition mountS3 package location
+# mountS3DebUrl: https://mountpoint-s3-release-us-iso-east-1.s3.us-iso-east-1.c2s.ic.gov/latest/x86_64/mount-s3.deb
+# aws-iso-b partition mountS3 package location
+# mountS3DebUrl: https://mountpoint-s3-release-us-isob-east-1.s3.us-isob-east-1.sc2s.sgov.gov/latest/x86_64/mount-s3.deb
+# List of AWS account numbers for ECR repositories.
+# accountNumbersEcr:
+#  - 012345678901
+# ragRepositories:
+#  - repositoryId: pgvector-rag
+#    type: pgvector
+#    rdsConfig:
+#      username: postgres
+#    - repositoryId: default
+#      type: opensearch
+#      opensearchConfig:
+#        dataNodes: 2
+#        dataNodeInstanceType: r6g.large.search
+#        masterNodes: 0
+#        masterNodeInstanceType: r6g.large.search
+#        volumeSize: 300
+#    If adding an existing PGVector database, this configurations assumes:
+#    1. The database has been configured to have pgvector installed and enabled: https://aws.amazon.com/about-aws/whats-new/2023/05/amazon-rds-postgresql-pgvector-ml-model-integration/
+#    2. The database is accessible by RAG-related lambda functions (add inbound PostgreSQL access on the database's security group for all Lambda RAG security groups)
+#    3. A secret ID exists in SecretsManager holding the database password within a json block of '{"password":"your_password_here"}'. This is the same format that RDS natively provides a password in SecretsManager.
+#    If the passwordSecretId or dbHost are not provided, then a sample database will be created for you. Only the username is required.
+#    - repositoryId: pgvector-rag
+#     type: pgvector
+#     rdsConfig:
+#       username: postgres
+#       passwordSecretId: # password ID as stored in SecretsManager. Example: "rds!db-aa88493d-be8d-4a3f-96dc-c668165f7826"
+#       dbHost: # Host name of database. Example hostname from RDS: "my-db-name.291b2f03.us-east-1.rds.amazonaws.com"
+#       dbName: postgres
+# You can optionally provide a list of models and the deployment process will ensure they exist in your model bucket and try to download them if they don't exist
+# ecsModels:
+#  - modelName: mistralai/Mistral-7B-Instruct-v0.2
+#    inferenceContainer: tgi
+#    baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1
+#  - modelName: intfloat/e5-large-v2
+#    inferenceContainer: tei
+#    baseImage: ghcr.io/huggingface/text-embeddings-inference:1.2.3
+#    - modelName: mistralai/Mixtral-8x7B-Instruct-v0.1
+#      inferenceContainer: tgi
+#      baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1
+# litellmConfig:
+#  db_key: sk-d7a77bcb-3e23-483c-beec-2700f2baeeb1  # A key is required for model management purposes - must start with sk-
diff --git a/lambda/dockerimagebuilder/__init__.py b/lambda/dockerimagebuilder/__init__.py
@@ -70,6 +70,7 @@ def handler(event: Dict[str, Any], context) -> Dict[str, Any]:  # type: ignore [
     try:
         instances = ec2_resource.create_instances(
             ImageId=ami_id,
+            SubnetId=os.environ["LISA_SUBNET_ID"],
             MinCount=1,
             MaxCount=1,
             InstanceType="m5.large",