diff --git a/.github/workflows/docs.deploy.github-pages.yml b/.github/workflows/docs.deploy.github-pages.yml index ab5a0c5b..ac442a3d 100644 --- a/.github/workflows/docs.deploy.github-pages.yml +++ b/.github/workflows/docs.deploy.github-pages.yml @@ -29,6 +29,9 @@ jobs: cache: npm - name: Setup Pages uses: actions/configure-pages@v4 + - name: Install root dependencies + run: | + npm install - name: Install dependencies working-directory: ./lib/docs run: npm install diff --git a/CHANGELOG.md b/CHANGELOG.md index ab04ea73..199dc844 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,62 @@ +# v3.2.0 +## Key Features +### Enhanced Deployment Configuration +- LISA v3.2.0 introduces a significant update to the configuration file schema, optimizing the deployment process +- The previous single config.yaml file has been replaced with a more flexible two-file system: config-base.yaml and config-custom.yaml +- config-base.yaml now contains default properties, which can be selectively overridden using config-custom.yaml, allowing for greater customization while maintaining a standardized base configuration +- The number of required properties in the config-custom.yaml file has been reduced to 8 items, simplifying the configuration process +- This update enhances the overall flexibility and maintainability of LISA configurations, providing a more robust foundation for future developments and easier customization for end-users + +#### Important Note +- The previous config.yaml file format is no longer compatible with this update +- To facilitate migration, we have developed a utility. Users can execute `npm run migrate-properties` to automatically convert their existing config.yaml file to the new config-custom.yaml format + +### Admin UI Configuration Page +- Administrative Control of Chat Components: + - Administrators now have granular control over the activation and deactivation of chat components for all users through the Configuration Page + - This feature allows for dynamic management of user interface elements, enhancing system flexibility and user experience customization + - Items that can be configured include: + - The option to delete session history + - Visibility of message metadata + - Configuration of chat Kwargs + - Customization of prompt templates + - Adjust chat history buffer settings + - Modify the number of RAG documents to be included in the retrieval process (TopK) + - Ability to upload RAG documents + - Ability to upload in-context documents +- System Banner Management: + - The Configuration Page now includes functionality for administrators to manage the system banner + - Administrators can activate, deactivate, and update the content of the system banner + +### LISA Documentation Site +- We are pleased to announce the launch of the official [LISA Documentation site](https://awslabs.github.io/LISA/) +- This comprehensive resource provides customers with additional guides and extensive information on LISA +- The documentation is also optionally deployable within your environment during LISA deployment +- The team is continuously working to add and expand content available on this site + +## Enhancements +- Implemented a selection-based interface for instance input, replacing free text entry +- Improved CDK Nag integration across stacks +- Added functionality for administrators to specify block volume size for models, enabling successful deployment of larger models +- Introduced options for administrators to choose between Private or Regional API Gateway endpoints +- Enabled subnet specification within the designated VPC for deployed resources +- Implemented support for headless deployment execution + +## Bug Fixes +- Resolved issues with Create and Update model alerts to ensure proper display in the modal +- Enhanced error handling for model creation/update processes to cover all potential scenarios + +## Coming Soon +- Version 3.3.0 will include a new RAG ingestion pipeline. This will allow users to configure an S3 bucket and an ingestion trigger. When triggered, these documents will be pre-processed and loaded into the selected vector store. + +## Acknowledgements +* @bedanley +* @estohlmann +* @dustins + +**Full Changelog**: https://github.com/awslabs/LISA/compare/v3.1.0...v3.2.0 + + # v3.1.0 ## Enhancements ### Model Management Administration diff --git a/README.md b/README.md index 2d7cadab..56f1b5ab 100644 --- a/README.md +++ b/README.md @@ -1,1310 +1,83 @@ -[![Full Documentation](https://img.shields.io/badge/Full%20Documentation-blue?style=for-the-badge&logo=Vite&logoColor=white)](https://awslabs.github.io/LISA/) - # LLM Inference Solution for Amazon Dedicated Cloud (LISA) -![LISA Architecture](./assets/LisaArchitecture.png) -LISA is an infrastructure-as-code solution that supports model hosting and inference. Customers deploy LISA directly -into an AWS account and provision their own infrastructure. Customers bring their own models to LISA for hosting and -inference through Amazon ECS. LISA accelerates the use of Generative AI (GenAI) applications by providing scalable, -low latency access to customers’ generative LLMs and embedding language models. Customers can then focus on -experimenting with LLMs and developing GenAI applications. - -LISA’s chatbot user interface can be used for experiment with features and for production use cases. LISA enhances model -output by integrating retrieval-augmented generation (RAG) with Amazon OpenSearch or PostgreSQL’s PGVector extension, -incorporating external knowledge sources into model responses. This helps reduce the need for fine-tuning and delivers -more contextually relevant outputs. - -LISA supports OpenAI’s API Spec via the LiteLLM proxy. This means that LISA is compatible for customers to configure -with models hosted externally by supported model providers. LiteLLM also allows customers to use LISA to standardize -model orchestration and communication across model providers instead of managing each individually. With OpenAI API spec -support, LISA can also be used as a stand-in replacement for any application that already utilizes OpenAI-centric -tooling (ex: OpenAI’s Python library, LangChain). - ---- -# Table of Contents - -- [LISA (LLM Inference Solution for Amazon Dedicated Cloud)](#lisa-llm-inference-solution-for-amazon-dedicated-cloud) -- [Breaking Changes in v2 to v3 Migration](#breaking-changes-in-v2-to-v3-migration) -- [Background](#background) -- [System Overview](#system-overview) -- [LISA Components](#lisa-components) - - [LISA Model Management](#lisa-model-management) - - [LISA Serve](#lisa-serve) - - [LISA Chat](#lisa-chat) -- [Interaction Flow](#interaction-flow) -- [Getting Started with LISA](#getting-started-with-lisa) - - [Prerequisites](#prerequisites) - - [Step 1: Clone the Repository](#step-1-clone-the-repository) - - [Step 2: Set Up Environment Variables](#step-2-set-up-environment-variables) - - [Step 3: Set Up Python and TypeScript Environments](#step-3-set-up-python-and-typescript-environments) - - [Step 4: Configure LISA](#step-4-configure-lisa) - - [Step 5: Stage Model Weights](#step-5-stage-model-weights) - - [Step 6: Configure Identity Provider](#step-6-configure-identity-provider) - - [Step 7: Configure LiteLLM](#step-7-configure-litellm) - - [Step 8: Set Up SSL Certificates (Development Only)](#step-8-set-up-ssl-certificates-development-only) - - [Step 9: Customize Model Deployment](#step-9-customize-model-deployment) - - [Step 10: Bootstrap CDK (If Not Already Done)](#step-10-bootstrap-cdk-if-not-already-done) -- [Recommended LiteLLM Configuration Options](#recommended-litellm-configuration-options) -- [API Usage Overview](#api-usage-overview) - - [User-facing OpenAI-Compatible API](#user-facing-openai-compatible-api) - - [Admin-level Model Management API](#admin-level-model-management-api) -- [Error Handling for API Requests](#error-handling-for-api-requests) -- [Deployment](#deployment) - - [Using Pre-built Resources](#using-pre-built-resources) - - [Deploying](#deploying) -- [Programmatic API Tokens](#programmatic-api-tokens) -- [Model Compatibility](#model-compatibility) -- [Chatbot Example](#chatbot-example) -- [Usage and Features](#usage-and-features) - - [OpenAI Specification Compatibility](#openai-specification-compatibility) - - [Continue JetBrains and VS Code Plugin](#continue-jetbrains-and-vs-code-plugin) - - [Usage in LLM Libraries](#usage-in-llm-libraries) -- [License Notice](#license-notice) - ---- -# Breaking Changes - -## v2 to v3 Migration - -With the release of LISA v3.0.0, we have introduced several architectural changes that are incompatible with previous versions. Although these changes may cause some friction for existing users, they aim to simplify the deployment experience and enhance long-term scalability. The following breaking changes are critical for existing users planning to upgrade: - -1. Model Deletion Upon Upgrade: Models deployed via EC2 and ECS using the config.yaml file’s ecsModels list will be deleted during the upgrade process. LISA has migrated to a new model deployment system that manages models internally, rendering the ecsModels list obsolete. We recommend backing up your model settings to facilitate their redeployment through the new Model Management API with minimal downtime. -1. Networking Changes and Full Teardown: Core networking changes require a complete teardown of the existing LISA installation using the make destroy command before upgrading. Cross-stack dependencies have been modified, necessitating this full teardown to ensure proper application of the v3 infrastructure changes. Additionally, users may need to manually delete some resources, such as ECR repositories or S3 buckets, if they were populated before CloudFormation began deleting the stack. This operation is destructive and irreversible, so it is crucial to back up any critical configurations and data (e.g., S3 RAG bucket contents, DynamoDB token tables) before proceeding with the upgrade. -1. New LiteLLM Admin Key Requirement: The new Model Management API requires an "admin" key for LiteLLM to track models for inference requests. This key, while transparent to users, must be present and conform to the required format (starting with sk-). The key is defined in the config.yaml file, and the LISA schema validator will prompt an error if it is missing or incorrectly formatted. - -## v3.0.0 to v3.1.0 - -In preparation of the v3.1.0 release, there are several changes that we needed to make in order to ensure the stability of the LISA system. -1. The CreateModel API `containerConfig` object has been changed so that the Docker Image repository is listed in `containerConfig.image.baseImage` instead of - its previous location at `containerConfig.baseImage.baseImage`. This change makes the configuration consistent with the config.yaml file in LISA v2.0 and prior. -2. The CreateModel API `containerConfig.image` object no longer requires the `path` option. We identified that this was a confusing and redundant option to set, considering - that the path was based on the LISA code repository structure, and that we already had an option to specify if a model was using TGI, TEI, or vLLM. Specifying the `inferenceContainer` - is sufficient for the system to infer which files to use so that the user does not have to provide this information. -3. The ApiDeployment stack now follows the same naming convention as the rest of the stacks that we deploy, utilization the deployment name and the deploymentStage names. This allows users - to have multiple LISA installations with different parameters in the same account without needing to change region or account entirely. After successful deployment, you may safely delete the - previous `${deploymentStage}-LisaApiDeployment` stack, as it is no longer in use. -4. If you have installed v3.0.0 or v3.0.1, you will need to **delete** the Models API stack so that the model deployer function will deploy again. The function was converted to a Docker Image - Function so that the growing Function size would fit within the Lambda constraints. We recommend that you take the following actions to avoid leaked resources: - 1. Use the Model Management UI to **delete all models** from LISA. This is needed so that we delete any CloudFormation stacks that track GPU instances. Failure to do this will require manual - resource cleanup to rid the account of inaccessible EC2 instances. Once the Models DynamoDB Table is deleted, we do not have a programmatic way to re-reference deployed models, so that is - why we recommend deleting them first. - 2. **Only after deleting all models through the Model Management UI**, manually delete the Model Management API stack in CloudFormation. This will take at least 45 minutes due to Lambda's use - of Elastic Network Interfaces for VPC access. The stack name will look like: `${deployment}-lisa-models-${deploymentStage}`. - 3. After the stack has been deleted, deploy LISA v3.1.0, which will recreate the Models API stack, along with the Docker Lambda Function. -5. The `ecsModels` section of `config.yaml` has been stripped down to only 3 fields per model: `modelName`, `inferenceContainer`, and `baseImage`. Just as before, the system will check to see if the models - defined here exist in your models S3 bucket prior to LISA deployment. These values will be needed later when invoking the Model Management API to create a model. ---- - -## Background - -LISA is a robust, AWS-native platform designed to simplify the deployment and management of Large Language Models (LLMs) in scalable, secure, and highly available environments. Drawing inspiration from the AWS open-source project [aws-genai-llm-chatbot](https://github.com/aws-samples/aws-genai-llm-chatbot), LISA builds on this foundation by offering more specialized functionality, particularly in the areas of security, modularity, and flexibility. - -One of the key differentiators of LISA is its ability to leverage the [text-generation-inference](https://github.com/huggingface/text-generation-inference/tree/main) text-generation-inference container from HuggingFace, allowing users to deploy cutting-edge LLMs. LISA also introduces several innovations that extend beyond its inspiration: - -1. **Support for Amazon Dedicated Cloud (ADC):** LISA is designed to operate in highly controlled environments like Amazon Dedicated Cloud (ADC) partitions, making it ideal for industries with stringent regulatory and security requirements. This focus on secure, isolated deployments differentiates LISA from other open-source platforms. -1. **Modular Design for Composability:** LISA's architecture is designed to be composable, splitting its components into distinct services. The core components, LISA Serve (for LLM serving and inference) and LISA Chat (for the chat interface), can be deployed as independent stacks. This modularity allows users to deploy only the parts they need, enhancing flexibility and scalability across different deployment environments. -1. **OpenAI API Specification Support:** LISA is built to support the OpenAI API specification, allowing users to replace OpenAI’s API with LISA without needing to change existing application code. This makes LISA a drop-in replacement for any workflow or application that already leverages OpenAI’s tooling, such as the OpenAI Python library or LangChain. - ---- - -## System Overview - -LISA is designed using a modular, microservices-based architecture, where each service performs a distinct function. It is composed of three core components: LISA Model Management, LISA Serve, and LISA Chat. Each of these components is responsible for specific functionality and interacts via well-defined API endpoints to ensure scalability, security, and fault tolerance across the system. - -**Key System Functionalities:** - -* **Authentication and Authorization** via AWS Cognito or OpenID Connect (OIDC) providers, ensuring secure access to both the REST API and Chat UI through token-based authentication and role-based access control. -* **Model Hosting** on AWS ECS with autoscaling and efficient traffic management using Application Load Balancers (ALBs), providing scalable and high-performance model inference. -* **Model Management** using AWS Step Functions to orchestrate complex workflows for creating, updating, and deleting models, automatically managing underlying ECS infrastructure. -* **Inference Requests** served via both the REST API and the Chat UI, dynamically routing user inputs to the appropriate ECS-hosted models for real-time inference. -* **Chat Interface** enabling users to interact with LISA through a user-friendly web interface, offering seamless real-time model interaction and session continuity. -* **Retrieval-Augmented Generation (RAG) Operations**, leveraging either OpenSearch or PGVector for efficient retrieval of relevant external data to enhance model responses. - ---- - -## LISA Components - -### LISA Model Management -![LISA Model Management Architecture](./assets/LisaModelManagement.png) -The Model Management component is responsible for managing the entire lifecycle of models in LISA. This includes creation, updating, deletion, and scaling of models deployed on ECS. The system automates and scales these operations, ensuring that the underlying infrastructure is managed efficiently. - -* **Model Hosting**: Models are containerized and deployed on AWS ECS, with each model hosted in its own isolated ECS task. This design allows models to be independently scaled based on demand. Traffic to the models is balanced using Application Load Balancers (ALBs), ensuring that the autoscaling mechanism reacts to load fluctuations in real time, optimizing both performance and availability. -* **External Model Routing**: LISA utilizes the LiteLLM proxy to route traffic to different model providers, no matter their API and payload format. Users may add models from external providers, such as SageMaker or Bedrock, to their system to allow requests to models hosted in those systems and services. LISA will simply add the configuration to LiteLLM without creating any additional supporting infrastructure. -* **Model Lifecycle Management**: AWS Step Functions are used to orchestrate the lifecycle of models, handling the creation, update, and deletion workflows. Each workflow provisions the required resources using CloudFormation templates, which manage infrastructure components like EC2 instances, security groups, and ECS services. The system ensures that the necessary security, networking, and infrastructure components are automatically deployed and configured. - * The CloudFormation stacks define essential resources using the LISA core VPC configuration, ensuring best practices for security and access across all resources in the environment. - * DynamoDB stores model metadata, while Amazon S3 securely manages model weights, enabling ECS instances to retrieve the weights dynamically during deployment. - -#### Technical Implementation - -* **Model Lifecycle**: Lifecycle operations such as creation, update, and deletion are executed by Step Functions and backed by AWS Lambda in ```lambda/models/lambda_functions.py```. -* **CloudFormation**: Infrastructure components are provisioned using CloudFormation templates, as defined in ```ecs_model_deployer/src/lib/lisa_model_stack.ts```. -* **ECS Cluster**: ECS cluster and task definitions are located in ```ecs_model_deployer/src/lib/ecsCluster.ts```, with model containers specified in ```ecs_model_deployer/src/lib/ecs-model.ts```. - ---- - -### LISA Serve -![LISA Serve Architecture](./assets/LisaServe.png) -LISA Serve is responsible for processing inference requests and serving model predictions. This component manages user requests to interact with LLMs and ensures that the models deliver low-latency responses. - -* **Inference Requests**: Requests are routed via ALB, which serves as the main entry point to LISA’s backend infrastructure. The ALB forwards requests to the appropriate ECS-hosted model or externally-hosted model based on the request parameters. For models hosted within LISA, traffic to the models is managed with model-specific ALBs, which enable autoscaling if the models are under heavy load. LISA supports both direct REST API-based interaction and interaction through the Chat UI, enabling programmatic access or a user-friendly chat experience. -* **RAG (Retrieval-Augmented Generation)**: RAG operations enhance model responses by integrating external data sources. LISA leverages OpenSearch or PGVector (PostgreSQL) as vector stores, enabling vector-based search and retrieval of relevant knowledge to augment LLM outputs dynamically. - -#### Technical Implementation - -* RAG operations are managed through ```lambda/rag/lambda_functions.py```, which handles embedding generation and document retrieval via OpenSearch and PostgreSQL. -* Direct requests to the LISA Serve ALB entrypoint must utilize the OpenAI API spec, which we support through the use of the LiteLLM proxy. - ---- - -### LISA Chat -![LISA Chatbot Architecture](./assets/LisaChat.png) -LISA Chat provides a customizable chat interface that enables users to interact with models in real-time. This component ensures that users have a seamless experience for submitting queries and maintaining session continuity. - -* **Chat Interface**: The Chat UI is hosted as a static website on Amazon S3 and is served via API Gateway. Users can interact with models directly through the web-based frontend, sending queries and viewing real-time responses from the models. The interface is integrated with LISA's backend services for model inference, retrieval augmented generation, and session management. -* **Session History Management**: LISA maintains session histories using DynamoDB, allowing users to retrieve and continue previous conversations seamlessly. This feature is crucial for maintaining continuity in multi-turn conversations with the models. - -#### Technical Implementation - -* The Chat UI is implemented in the ```lib/user-interface/react/``` folder and is deployed using the scripts in the ```scripts/``` folder. -* Session management logic is handled in ```lambda/session/lambda_functions.py```, where session data is stored and retrieved from DynamoDB. -* RAG operations are defined in lambda/repository/lambda_functions.py - ---- - -## Interaction Flow - -1. **User Interaction with Chat UI or API:** Users can interact with LISA through the Chat UI or REST API. Each interaction is authenticated using AWS Cognito or OIDC, ensuring secure access. -1. **Request Routing:** The API Gateway securely routes user requests to the appropriate backend services, whether for fetching the chat UI, performing RAG operations, or managing models. -1. **Model Management:** Administrators can deploy, update, or delete models via the Model Management API, which triggers ECS deployment and scaling workflows. -1. **Model Inference:** Inference requests are routed to ECS-hosted models or external models via the LiteLLM proxy. Responses are served back to users through the ALB. -1. **RAG Integration:** When RAG is enabled, LISA retrieves relevant documents from OpenSearch or PGVector, augmenting the model's response with external knowledge. -1. **Session Continuity:** User session data is stored in DynamoDB, ensuring that users can retrieve and continue previous conversations across multiple interactions. -1. **Autoscaling:** ECS tasks automatically scale based on system load, with ALBs distributing traffic across available instances to ensure performance. - ---- - -# Getting Started with LISA - -LISA (LLM Inference Solution for Amazon Dedicated Cloud) is an advanced infrastructure solution for deploying and -managing Large Language Models (LLMs) on AWS. This guide will walk you through the setup process, from prerequisites -to deployment. - -## Prerequisites - -Before beginning, ensure you have: - -1. An AWS account with appropriate permissions. - 1. Because of all the resource creation that happens as part of CDK deployments, we expect Administrator or Administrator-like permissions with resource creation and mutation permissions. - Installation will not succeed if this profile does not have permissions to create and edit arbitrary resources for the system. - **Note**: This level of permissions is not required for the runtime of LISA, only its deployment and subsequent updates. -2. AWS CLI installed and configured -3. Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles -4. Python 3.9 or later -5. Node.js 14 or later -6. Docker/Finch installed and running -7. Sufficient disk space for model downloads and conversions - -If you're new to CDK, review the [AWS CDK Documentation](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html) and consult with your AWS support team. - -> [!TIP] -> To minimize version conflicts and ensure a consistent deployment environment, it is recommended to execute the following steps on a dedicated EC2 instance. However, LISA can be deployed from any machine that meets the prerequisites listed above. - ---- - -## Step 1: Clone the Repository - -Ensure you're working with the latest stable release of LISA: - -```bash -git clone -b main --single-branch -cd lisa -``` - ---- - -## Step 2: Set Up Environment Variables - -Create and configure your `config.yaml` file: - -```bash -cp example_config.yaml config.yaml -``` - -Set the following environment variables: - -```bash -export PROFILE=my-aws-profile # Optional, can be left blank -export DEPLOYMENT_NAME=my-deployment -export ENV=dev # Options: dev, test, or prod -export CDK_DOCKER=finch # Optional, only required if not using docker as container engine -``` - ---- - -## Step 3: Set Up Python and TypeScript Environments - -Install system dependencies and set up both Python and TypeScript environments: - -```bash -# Install system dependencies -sudo apt-get update -sudo apt-get install -y jq - -# Install Python packages -pip3 install --user --upgrade pip -pip3 install yq huggingface_hub s5cmd - -# Set up Python environment -make createPythonEnvironment - -# Activate your python environment -# The command is the output from the previous make command) - -# Install Python Requirements -make installPythonRequirements - -# Set up TypeScript environment -make createTypeScriptEnvironment -make installTypeScriptRequirements -``` - ---- - -## Step 4: Configure LISA - -Edit the `config.yaml` file to customize your LISA deployment. Key configurations include: - -- AWS account and region settings -- Model configurations -- Authentication settings -- Networking and infrastructure preferences - ---- - -## Step 5: Stage Model Weights - -LISA requires model weights to be staged in the S3 bucket specified in your `config.yaml` file, assuming the S3 bucket follows this structure: - -``` -s3:/// -s3://// -s3://// -... -s3:/// -``` - -**Example:** - -``` -s3:///mistralai/Mistral-7B-Instruct-v0.2 -s3:///mistralai/Mistral-7B-Instruct-v0.2/ -s3:///mistralai/Mistral-7B-Instruct-v0.2/ -... -``` - -To automatically download and stage the model weights defined by the `ecsModels` parameter in your `config.yaml`, use the following command: - -```bash -make modelCheck -``` - -This command verifies if the model's weights are already present in your S3 bucket. If not, it downloads the weights, converts them to the required format, and uploads them to your S3 bucket. Ensure adequate disk space is available for this process. - -> **WARNING** -> As of LISA 3.0, the `ecsModels` parameter in `config.yaml` is solely for staging model weights in your S3 bucket. Previously, before models could be managed through the [API](https://github.com/awslabs/LISA/blob/develop/README.md#creating-a-model-admin-api) or via the Model Management section of the [Chatbot](https://github.com/awslabs/LISA/blob/develop/README.md#chatbot-example), this parameter also dictated which models were deployed. - -> **NOTE** -> For air-gapped systems, before running `make modelCheck` you should manually download model artifacts and place them in a `models` directory at the project root, using the structure: `models/`. - -> **NOTE** -> This process is primarily designed and tested for HuggingFace models. For other model formats, you will need to manually create and upload safetensors. - ---- - -## Step 6: Configure Identity Provider - -In the `config.yaml` file, configure the `authConfig` block for authentication. LISA supports OpenID Connect (OIDC) providers such as AWS Cognito or Keycloak. Required fields include: - -- `authority`: URL of your identity provider -- `clientId`: Client ID for your application -- `adminGroup`: Group name for users with model management permissions -- `jwtGroupsProperty`: Path to the groups field in the JWT token -- `additionalScopes` (optional): Extra scopes for group membership information - -#### Cognito Configuration Example: -In Cognito, the `authority` will be the URL to your User Pool. As an example, if your User Pool ID, not the name, is `us-east-1_example`, and if it is -running in `us-east-1`, then the URL to put in the `authority` field would be `https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example`. The `clientId` -can be found in your User Pool's "App integration" tab from within the AWS Management Console, and at the bottom of the page, you will see the list of clients -and their associated Client IDs. The ID here is what we will need for the `clientId` field. - - -```yaml -authConfig: - authority: https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example - clientId: your-client-id - adminGroup: AdminGroup - jwtGroupsProperty: cognito:groups -``` - -#### Keycloak Configuration Example: -In Keycloak, the `authority` will be the URL to your Keycloak server. The `clientId` is likely not a random string like in the Cognito clients, and instead -will be a string configured by your Keycloak administrator. Your administrator will be able to give you a client name or create a client for you to use for -this application. Once you have this string, use that as the `clientId` within the `authConfig` block. - -```yaml -authConfig: - authority: https://your-keycloak-server.com - clientId: your-client-name - adminGroup: AdminGroup - jwtGroupsProperty: realm_access.roles -``` - ---- - -## Step 7: Configure LiteLLM -We utilize LiteLLM under the hood to allow LISA to respond to the [OpenAI specification](https://platform.openai.com/docs/api-reference). -For LiteLLM configuration, a key must be set up so that the system may communicate with a database for tracking all the models that are added or removed -using the [Model Management API](#admin-level-model-management-api). The key must start with `sk-` and then can be any arbitrary string. We recommend generating a new UUID and then using that as -the key. Configuration example is below. - - -```yaml -litellmConfig: - general_settings: - master_key: sk-00000000-0000-0000-0000-000000000000 # needed for db operations, create your own key # pragma: allowlist-secret - model_list: [] -``` - -**Note**: It is possible to add LiteLLM-only models to this configuration, but it is not recommended as the models in this configuration will not show in the -Chat or Model Management UIs. Instead, use the [Model Management UI](#admin-level-model-management-api) to add or remove LiteLLM-only model configurations. - ---- - -## Step 8: Set Up SSL Certificates (Development Only) - -**WARNING: THIS IS FOR DEV ONLY** -When deploying for dev and testing you can use a self-signed certificate for the REST API ALB. You can create this by using the script: `gen-cert.sh` and uploading it to `IAM`. - -```bash -export REGION= -./scripts/gen-certs.sh -aws iam upload-server-certificate --server-certificate-name --certificate-body file://scripts/server.pem --private-key file://scripts/server.key -``` - -Update your `config.yaml` with the certificate ARN: - -```yaml -restApiConfig: - loadBalancerConfig: - sslCertIamArn: arn:aws:iam:::server-certificate/ -``` ---- - -## Step 9: Customize Model Deployment - -In the `ecsModels` section of `config.yaml`, allow our deployment process to pull the model weights for you. - -During the deployment process, LISA will optionally attempt to download your model weights if you specify an optional `ecsModels` -array, this will only work in non ADC regions. Specifically, see the `ecsModels` section of the [example_config.yaml](./example_config.yaml) file. -Here we define the model name, inference container, and baseImage: - -```yaml -ecsModels: - - modelName: your-model-name - inferenceContainer: tgi - baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 -``` - ---- - -## Step 10: Bootstrap CDK (If Not Already Done) - -If you haven't bootstrapped your AWS account for CDK: - -```bash -make bootstrap -``` - ---- - -## Recommended LiteLLM Configuration Options - -While LISA is designed to be flexible, configuring external models requires careful consideration. The following guide -provides a recommended minimal setup for integrating various model types with LISA using LiteLLM. - -### Configuration Overview - -This example configuration demonstrates how to set up: -1. A SageMaker Endpoint -2. An Amazon Bedrock Model -3. A self-hosted OpenAI-compatible text generation model -4. A self-hosted OpenAI-compatible embedding model - -**Note:** Ensure that all endpoints and models are in the same AWS region as your LISA installation. - -### SageMaker Endpoints and Bedrock Models - -LISA supports adding existing SageMaker Endpoints and Bedrock Models to the LiteLLM configuration. As long as these -services are in the same region as the LISA installation, LISA can use them alongside any other deployed models. - -**To use a SageMaker Endpoint:** -1. Install LISA without initially referencing the SageMaker Endpoint. -2. Create a SageMaker Model using the private subnets of the LISA deployment. -3. This setup allows the LISA REST API container to communicate with any Endpoint using that SageMaker Model. - -**SageMaker Endpoints and Bedrock Models can be configured:** -- Statically at LISA deployment time -- Dynamically using the LISA Model Management API - -**Important:** Endpoints or Models statically defined during LISA deployment cannot be removed or updated using the -LISA Model Management API, and they will not show in the Chat UI. These will only show as part of the OpenAI `/models` API. -Although there is support for it, we recommend using the [Model Management API](#admin-level-model-management-api) instead of the following static configuration. - -### Example Configuration - -```yaml -dev: - litellmConfig: - litellm_settings: - telemetry: false # Disable telemetry to LiteLLM servers (recommended for VPC deployments) - drop_params: true # Ignore unrecognized parameters instead of failing - - model_list: - # 1. SageMaker Endpoint Configuration - - model_name: test-endpoint # Human-readable name, can be anything and will be used for OpenAI API calls - litellm_params: - model: sagemaker/test-endpoint # Prefix required for SageMaker Endpoints and "test-endpoint" matches Endpoint name - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 2. Amazon Bedrock Model Configuration - - model_name: bedrock-titan-express # Human-readable name for future OpenAI API calls - litellm_params: - model: bedrock/amazon.titan-text-express-v1 # Prefix required for Bedrock Models, and exact name of Model to use - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 3. Custom OpenAI-compatible Text Generation Model - - model_name: custom-openai-model # Used in future OpenAI-compatible calls to LiteLLM - litellm_params: - model: openai/custom-provider/textgen-model # Format: openai// - api_base: https://your-domain-here:443/v1 # Your model's base URI - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 4. Custom OpenAI-compatible Embedding Model - - model_name: custom-openai-embedding-model # Used in future OpenAI-compatible calls to LiteLLM - litellm_params: - model: openai/modelProvider/modelName # Prefix required for OpenAI-compatible models followed by model provider and name details - api_base: https://your-domain-here:443/v1 # Your model's base URI - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: embedding -``` - ---- - -# API Usage Overview - -LISA provides robust API endpoints for managing models, both for users and administrators. These endpoints allow for operations such as listing, creating, updating, and deleting models. - -## API Gateway and ALB Endpoints - -LISA uses two primary APIs for model management: - -1. **User-facing OpenAI-Compatible API**: Available to all users for inference tasks and accessible through the LISA Serve ALB. This API provides an interface for querying and interacting with models deployed on Amazon ECS, Amazon Bedrock, or through LiteLLM. -2. **Admin-level Model Management API**: Available only to administrators through the API Gateway (APIGW). This API allows for full control of model lifecycle management, including creating, updating, and deleting models. - -### LiteLLM Routing in All Models - -Every model request is routed through LiteLLM, regardless of whether infrastructure (like ECS) is created for it. Whether deployed on ECS, external models via Bedrock, or managed through LiteLLM, all models are added to LiteLLM for traffic routing. The distinction is whether infrastructure is created (determined by request payloads), but LiteLLM integration is consistent for all models. The model management APIs will handle adding or removing model configurations from LiteLLM, and the LISA Serve endpoint will handle the inference requests against models available in LiteLLM. - -## User-facing OpenAI-Compatible API - -The OpenAI-compatible API is accessible through the LISA Serve ALB and allows users to list models available for inference tasks. Although not specifically part of the model management APIs, any model that is added or removed from LiteLLM via the model management API Gateway APIs will be reflected immediately upon queries to LiteLLM through the LISA Serve ALB. - -### Listing Models - -The `/v2/serve/models` endpoint on the LISA Serve ALB allows users to list all models available for inference in the LISA system. - -#### Request Example: - -```bash -curl -s -H 'Authorization: Bearer ' -X GET https:///v2/serve/models -``` - -#### Response Example: - -```json -{ - "data": [ - { - "id": "bedrock-embed-text-v2", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, - { - "id": "titan-express-v1", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, - { - "id": "sagemaker-amazon-mistrallite", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - } - ], - "object": "list" -} -``` - -#### Explanation of Response Fields: - -These fields are all defined by the OpenAI API specification, which is documented [here](https://platform.openai.com/docs/api-reference/models/list). - -- `id`: A unique identifier for the model. -- `object`: The type of object, which is "model" in this case. -- `created`: A Unix timestamp representing when the model was created. -- `owned_by`: The entity responsible for the model, such as "openai." - -## Admin-level Model Management API - -This API is only accessible by administrators via the API Gateway and is used to create, update, and delete models. It supports full model lifecycle management. - -### Listing Models (Admin API) - -The `/models` route allows admins to list all models managed by the system. This includes models that are either creating, deleting, already active, or in a failed state. Models can be deployed via ECS or managed externally through a LiteLLM configuration. -#### Request Example: - -```bash -curl -s -H "Authorization: Bearer " -X GET https:///models -``` - -#### Response Example: - -```json -{ - "models": [ - { - "autoScalingConfig": { - "minCapacity": 1, - "maxCapacity": 1, - "cooldown": 420, - "defaultInstanceWarmup": 180, - "metricConfig": { - "albMetricName": "RequestCountPerTarget", - "targetValue": 30, - "duration": 60, - "estimatedInstanceWarmup": 330 - } - }, - "containerConfig": { - "image": { - "baseImage": "vllm/vllm-openai:v0.5.0", - "type": "asset" - }, - "sharedMemorySize": 2048, - "healthCheckConfig": { - "command": [ - "CMD-SHELL", - "exit 0" - ], - "interval": 10, - "startPeriod": 30, - "timeout": 5, - "retries": 3 - }, - "environment": { - "MAX_TOTAL_TOKENS": "2048", - "MAX_CONCURRENT_REQUESTS": "128", - "MAX_INPUT_LENGTH": "1024" - } - }, - "loadBalancerConfig": { - "healthCheckConfig": { - "path": "/health", - "interval": 60, - "timeout": 30, - "healthyThresholdCount": 2, - "unhealthyThresholdCount": 10 - } - }, - "instanceType": "g5.xlarge", - "modelId": "mistral-vllm", - "modelName": "mistralai/Mistral-7B-Instruct-v0.2", - "modelType": "textgen", - "modelUrl": null, - "status": "Creating", - "streaming": true - }, - { - "autoScalingConfig": null, - "containerConfig": null, - "loadBalancerConfig": null, - "instanceType": null, - "modelId": "titan-express-v1", - "modelName": "bedrock/amazon.titan-text-express-v1", - "modelType": "textgen", - "modelUrl": null, - "status": "InService", - "streaming": true - } - ] -} -``` - -#### Explanation of Response Fields: - -- `modelId`: A unique identifier for the model. -- `modelName`: The name of the model, typically referencing the underlying service (Bedrock, SageMaker, etc.). -- `status`: The current state of the model, e.g., "Creating," "Active," or "Failed." -- `streaming`: Whether the model supports streaming inference. -- `instanceType` (optional): The instance type if the model is deployed via ECS. - -### Creating a Model (Admin API) - -LISA provides the `/models` endpoint for creating both ECS and LiteLLM-hosted models. Depending on the request payload, infrastructure will be created or bypassed (e.g., for LiteLLM-only models). - -This API accepts the same model definition parameters that were accepted in the V2 model definitions within the config.yaml file with one notable difference: the `containerConfig.image.path` field is -now omitted because it corresponded with the `inferenceContainer` selection. As a convenience, this path is no longer required. - -#### Request Example: - -``` -POST https:///models -``` - -#### Example Payload for ECS Model: - -```json -{ - "modelId": "mistral-vllm", - "modelName": "mistralai/Mistral-7B-Instruct-v0.2", - "modelType": "textgen", - "inferenceContainer": "vllm", - "instanceType": "g5.xlarge", - "streaming": true, - "containerConfig": { - "image": { - "baseImage": "vllm/vllm-openai:v0.5.0", - "type": "asset" - }, - "sharedMemorySize": 2048, - "environment": { - "MAX_CONCURRENT_REQUESTS": "128", - "MAX_INPUT_LENGTH": "1024", - "MAX_TOTAL_TOKENS": "2048" - }, - "healthCheckConfig": { - "command": ["CMD-SHELL", "exit 0"], - "interval": 10, - "startPeriod": 30, - "timeout": 5, - "retries": 3 - } - }, - "autoScalingConfig": { - "minCapacity": 1, - "maxCapacity": 1, - "cooldown": 420, - "defaultInstanceWarmup": 180, - "metricConfig": { - "albMetricName": "RequestCountPerTarget", - "targetValue": 30, - "duration": 60, - "estimatedInstanceWarmup": 330 - } - }, - "loadBalancerConfig": { - "healthCheckConfig": { - "path": "/health", - "interval": 60, - "timeout": 30, - "healthyThresholdCount": 2, - "unhealthyThresholdCount": 10 - } - } -} -``` - -#### Creating a LiteLLM-Only Model: - -```json -{ - "modelId": "titan-express-v1", - "modelName": "bedrock/amazon.titan-text-express-v1", - "modelType": "textgen", - "streaming": true -} -``` - -#### Explanation of Key Fields for Creation Payload: - -- `modelId`: The unique identifier for the model. This is any name you would like it to be. -- `modelName`: The name of the model as it appears in the system. For LISA-hosted models, this must be the S3 Key to your model artifacts, otherwise - this is the LiteLLM-compatible reference to a SageMaker Endpoint or Bedrock Foundation Model. Note: Bedrock and SageMaker resources must exist in the - same region as your LISA deployment. If your LISA installation is in us-east-1, then all SageMaker and Bedrock calls will also happen in us-east-1. - Configuration examples: - - LISA hosting: If your model artifacts are in `s3://${lisa_models_bucket}/path/to/model/weights`, then the `modelName` value here should be `path/to/model/weights` - - LiteLLM-only, Bedrock: If you want to use `amazon.titan-text-lite-v1`, your `modelName` value should be `bedrock/amazon.titan-text-lite-v1` - - LiteLLM-only, SageMaker: If you want to use a SageMaker Endpoint named `my-sm-endpoint`, then the `modelName` value should be `sagemaker/my-sm-endpoint`. -- `modelType`: The type of model, such as text generation (textgen). -- `streaming`: Whether the model supports streaming inference. -- `instanceType`: The type of EC2 instance to be used (only applicable for ECS models). -- `containerConfig`: Details about the Docker container, memory allocation, and environment variables. -- `autoScalingConfig`: Configuration related to ECS autoscaling. -- `loadBalancerConfig`: Health check configuration for load balancers. - -### Deleting a Model (Admin API) - -Admins can delete a model using the following endpoint. Deleting a model removes the infrastructure (ECS) or disconnects from LiteLLM. - -#### Request Example: - -``` -DELETE https:///models/{modelId} -``` - -#### Response Example: - -```json -{ - "status": "success", - "message": "Model mistral-vllm has been deleted successfully." -} -``` - -### Updating a Model - -LISA offers basic updating functionality for both LISA-hosted and LiteLLM-only models. For both types, the model type and streaming support can be updated -in the cases that the models were originally created with the wrong parameters. For example, if an embedding model was accidentally created as a `textgen` -model, the UpdateModel API can be used to set it to the intended `embedding` value. Additionally, for LISA-hosted models, users may update the AutoScaling -configuration to increase or decrease capacity usage for each model. Users may use this API to completely shut down all instances behind a model until -they want to add capacity back to the model for usage later. This feature can help users to effectively manage costs so that instances do not have to stay -running in time periods of little or no expected usage. - -The UpdateModel API has mutually exclusive payload fields to avoid conflicting requests. The API does not allow for shutting off a model at the same time -as updating its AutoScaling configuration, as these would introduce ambiguous intents. The API does not allow for setting AutoScaling limits to 0 and instead -requires the usage of the enable/disable functionality to allow models to fully scale down or turn back on. Metadata updates, such as changing the model type -or streaming compatibility, can happen in either type of update or simply by themselves. - -#### Request Example - -``` -PUT https:///models/{modelId} -``` - -#### Example Payloads - -##### Update Model Metadata - -This payload will simply update the model metadata, which will complete within seconds of invoking. If setting a model as an `embedding` model, then the -`streaming` option must be set to `false` or omitted as LISA does not support streaming with embedding models. Both the `streaming` and `modelType` options -may be included in any other update request. - -```json -{ - "streaming": true, - "modelType": "textgen" -} -``` - -##### Update AutoScaling Configuration - -This payload will update the AutoScaling configuration for minimum, maximum, and desired number of instances. The desired number must be between the -minimum or maximum numbers, inclusive, and all the numbers must be strictly greater than 0. If the model currently has less than the minimum number, then -the desired count will automatically raise to the minimum if a desired count is not specified. Despite setting a desired capacity, the model will scale down -to the minimum number over time if you are not hitting the scaling thresholds set when creating the model in the first place. - -The AutoScaling configuration **can** be updated while the model is in the Stopped state, but it won't be applied immediately. Instead, the configuration will -be saved until the model is started again, in which it will use the most recently updated AutoScaling configuration. - -The request will fail if the `autoScalingInstanceConfig` is defined at the same time as the `enabled` field. These options are mutually exclusive and must be -handled as separate operations. Any or all of the options within the `autoScalingInstanceConfig` may be set as needed, so if you only wish to change the `desiredCapacity`, -then that is the only option that you need to specify in the request object within the `autoScalingInstanceConfig`. - -```json -{ - "autoScalingInstanceConfig": { - "minCapacity": 2, - "maxCapacity": 4, - "desiredCapacity": 3 - } -} -``` - -##### Stop Model - Scale Down to 0 Instances - -This payload will stop all model EC2 instances and remove the model reference from LiteLLM so that users are unable to make inference requests against a model -with no capacity. This option is useful for users who wish to manage costs and turn off instances when the model is not currently needed but will be used again -in the future. - -The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be -handled as separate operations. - -```json -{ - "enabled": false -} -``` - -##### Start Model - Restore Previous AutoScaling Configuration - -After stopping a model, this payload will turn the model back on by spinning up instances, waiting for the expected spin-up time to allow models to initialize, and then -adding the reference back to LiteLLM so that users may query the model again. This is expected to be a much faster operation than creating the model through the CreateModel -API, so as long as the model details don't have to change, this in combination with the Stop payload will help to manage costs while still providing model availability as -quickly as the system can spin it up again. - -The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be -handled as separate operations. - -```json -{ - "enabled": true -} -``` - ---- - -# Error Handling for API Requests - -In the LISA model management API, error handling is designed to ensure robustness and consistent responses when errors occur during the execution of API requests. This section provides a detailed explanation of the error handling mechanisms in place, including the types of errors that are managed, how they are raised, and what kind of responses clients can expect when these errors occur. - -## Common Errors and Their HTTP Responses - -Below is a list of common errors that can occur in the system, along with the HTTP status codes and response structures that are returned to the client. - -### ModelNotFoundError - -* **Description**: Raised when a model that is requested for retrieval or deletion is not found in the system. -* **HTTP Status Code**: `404 Not Found` -* **Response Body**: - -```json -{ - "error": "ModelNotFoundError", - "message": "The requested model with ID could not be found." -} -``` - -* **Example Scenario**: When a client attempts to fetch details of a model that does not exist in the database, the `ModelNotFoundError` is raised. - -### ModelAlreadyExistsError - -* **Description:** Raised when a request to create a model is made, but the model already exists in the system. -* **HTTP Status Code**: `400` -* **Response Body**: - -```json -{ - "error": "ModelAlreadyExistsError", - "message": "A model with the given configuration already exists." -} -``` - -* **Example Scenario:** A client attempts to create a model with an ID or name that already exists in the database. The system detects the conflict and raises the `ModelAlreadyExistsError`. - -### InvalidInputError (Hypothetical Example) - -* **Description**: Raised when the input provided by the client for creating or updating a model is invalid or does not conform to expected formats. -* **HTTP Status Code**: `400 Bad Request` -* **Response Body**: - -```json -{ - "error": "InvalidInputError", - "message": "The input provided is invalid. Please check the required fields and formats." -} -``` - -* **Example Scenario**: The client submits a malformed JSON body or omits required fields in a model creation request, triggering an `InvalidInputError`. - -## Handling Validation Errors - -Validation errors are handled across the API via utility functions and model transformation logic. These errors typically occur when user inputs fail validation checks or when required data is missing from a request. - -### Example Response for Validation Error: - -* **HTTP Status Code**: `422 Unprocessable Entity` -* **Response Body**: - -```json -{ - "error": "ValidationError", - "message": "The input provided does not meet the required validation criteria." -} -``` - ---- - -# Deployment -## Using pre-built resources - -A default configuration will build the necessary containers, lambda layers, and production optimized -web application at build time. In the event that you would like to use pre-built resources due to -network connectivity reasons or other concerns with the environment where you'll be deploying LISA -you can do so. - -- For ECS containers (Models, APIs, etc) you can modify the `containerConfig` block of - the corresponding entry in `config.yaml`. For container images you can provide a path to a directory - from which a docker container will be built (default), a path to a tarball, an ECR repository arn and - optional tag, or a public registry path. - - We provide immediate support for HuggingFace TGI and TEI containers and for vLLM containers. The `example_config.yaml` - file provides examples for TGI and TEI, and the only difference for using vLLM is to change the - `inferenceContainer`, `baseImage`, and `path` options, as indicated in the snippet below. All other options can - remain the same as the model definition examples we have for the TGI or TEI models. vLLM can also support embedding - models in this way, so all you need to do is refer to the embedding model artifacts and remove the `streaming` field - to deploy the embedding model. - - vLLM has support for the OpenAI Embeddings API, but model support for it is limited because the feature is new. Currently, - the only supported embedding model with vLLM is [intfloat/e5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct), - but this list is expected to grow over time as vLLM updates. - ```yaml - ecsModels: - - modelName: your-model-name - inferenceContainer: tgi - baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 - ``` -- If you are deploying the LISA Chat User Interface you can optionally specify the path to the pre-built - website assets using the top level `webAppAssetsPath` parameter in `config.yaml`. Specifying this path - (typically `lib/user-interface/react/dist`) will avoid using a container to build and bundle the assets - at CDK build time. -- For the lambda layers you can specify the path to a local zip archive of the layer code by including - the optional `lambdaLayerAssets` block in `config.yaml` similar to the following: - -``` -lambdaLayerAssets: - authorizerLayerPath: lib/core/layers/authorizer_layer.zip - commonLayerPath: lib/core/layers/common_layer.zip - fastapiLayerPath: /path/to/fastapi_layer.zip - sdkLayerPath: lib/rag/layers/sdk_layer.zip -``` ---- - -## Deploying - -Now that we have everything setup we are ready to deploy. - -```bash -make deploy -``` - -By default, all stacks will be deployed but a particular stack can be deployed by providing the `STACK` argument to the `deploy` target. - -```bash -make deploy STACK=LisaServe -``` - -Available stacks can be listed by running: - -```bash -make listStacks -``` - -After the `deploy` command is run, you should see many docker build outputs and eventually a CDK progress bar. The deployment should take about 10-15 minutes and will produce a single cloud formation output for the websocket URL. - -You can test the deployment with the integration test: - -```bash -pytest lisa-sdk/tests --url --verify | false -``` - ---- - -## Programmatic API Tokens - -The LISA Serve ALB can be used for programmatic access outside the example Chat application. -An example use case would be for allowing LISA to serve LLM requests that originate from the [Continue VSCode Plugin](https://www.continue.dev/). -To facilitate communication directly with the LISA Serve ALB, a user with sufficient DynamoDB PutItem permissions may add -API keys to the APITokenTable, and once created, a user may make requests by including the `Authorization: Bearer ${token}` -header or the `Api-Key: ${token}` header with that token. If using any OpenAI-compatible library, the `api_key` fields -will use the `Authorization: Bearer ${token}` format automatically, so there is no need to include additional headers -when using those libraries. - -### Adding a Token - -An account owner may create a long-lived API Token using the following AWS CLI command. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" # change to a unique string for a user -aws --region $AWS_REGION dynamodb put-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --item '{"token": {"S": "'${token_string}'"}}' -``` - -If an account owner wants the API Token to be temporary and expire after a specific date, LISA will allow for this too. -In addition to the `token` field, the owner may specify the `tokenExpiration` field, which accepts a UNIX timestamp, -in seconds. The following command shows an example of how to do this. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" -token_expiration=$(echo $(date +%s) + 3600 | bc) # token that expires in one hour, 3600 seconds -aws --region $AWS_REGION dynamodb put-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --item '{ - "token": {"S": "'${token_string}'"}, - "tokenExpiration": {"N": "'${token_expiration}'"} - }' -``` - -Once the token is inserted into the DynamoDB Table, a user may use the token in the `Authorization` request header like -in the following snippet. - -```bash -lisa_serve_rest_url="https://" -token_string="YOUR_STRING_HERE" -curl ${lisa_serve_rest_url}/v2/serve/models \ - -H 'accept: application/json' \ - -H 'Content-Type: application/json' \ - -H "Authorization: Bearer ${token_string}" -``` - -### Updating a Token - -In the case that an owner wishes to change an existing expiration time or add one to a key that did not previously have -an expiration, this can be accomplished by editing the existing item. The following commands can be used as an example -for updating an existing token. Setting the expiration time to a time in the past will effectively remove access for -that key. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" -token_expiration=$(echo $(date +%s) + 600 | bc) # token that expires in 10 minutes from now -aws --region $AWS_REGION dynamodb update-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --key '{"token": {"S": "'${token_string}'"}}' \ - --update-expression 'SET tokenExpiration=:t' \ - --expression-attribute-values '{":t": {"N": "'${token_expiration}'"}}' -``` - -### Removing a Token - -Tokens will not be automatically removed even if they are no longer valid. An owner may remove an key, expired or not, -from the database to fully revoke the key, by deleting the item. As an example, the following commands can be used to -remove a token. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" # change to the token to remove -aws --region $AWS_REGION dynamodb delete-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --key '{"token": {"S": "'${token_string}'"}}' -``` - ---- - -## Model Compatibility - -### HuggingFace Generation Models - -For generation models, or causal language models, LISA supports models that are supported by the underlying serving container, TGI. TGI divides compatibility into two categories: optimized models and best effort supported models. The list of optimized models is found [here](https://huggingface.co/docs/text-generation-inference/supported_models). The best effort uses the `transformers` codebase under-the-hood and so should work for most causal models on HuggingFace: - -```python -AutoModelForCausalLM.from_pretrained(, device_map="auto") -``` - -or - -```python -AutoModelForSeq2SeqLM.from_pretrained(, device_map="auto") -``` - -### HuggingFace Embedding Models - -Embedding models often utilize custom codebases and are not as uniform as generation models. For this reason you will likely need to create a new `inferenceContainer`. Follow the [example](./lib/ecs-model/embedding/instructor) provided for the `instructor` model. - -### vLLM Models - -In addition to the support we have for the TGI and TEI containers, we support hosting models using the [vLLM container](https://docs.vllm.ai/en/latest/). vLLM abides by the OpenAI specification, and as such allows both text generation and embedding on the models that vLLM supports. -See the [deployment](#deployment) section for details on how to set up the vLLM container for your models. Similar to how the HuggingFace containers will serve safetensor weights downloaded from the -HuggingFace website, vLLM will do the same, and our configuration will allow you to serve these artifacts automatically. vLLM does not have many supported models for embeddings, but as they become available, -LISA will support them as long as the vLLM container version is updated in the config.yaml file and as long as the model's safetensors can be found in S3. - ---- - -# Chatbot Example - -This repository include an example chatbot web application. The react based web application can be optionally deployed to demonstrate the capabilities of LISA Serve. The chatbot consists of a static react based single page application hosted via API GW S3 proxy integration. The app connects to the LISA Serve REST API and an optional RAG API. The app integrates with an OIDC compatible IdP and allows users to interact directly with any of the textgen models hosted with LISA Serve. If the optional RAG stack is deployed then users can also leverage the embeddings models and AWS OpenSearch or PGVector to demonstrate chat with RAG. Chat sessions are maintained in dynamodb table and a number of parameters are exposed through the UI to allow experimentation with various parameters including prompt, temperature, top k, top p, max tokens, and more. - -## Local development - -### Configuring Pre-Commit Hooks - -To ensure code quality and consistency, this project uses pre-commit hooks. These hooks are configured to perform checks, such as linting and formatting, helping to catch potential issues early. These hooks are run automatically on each push to a remote branch but if you wish to run them locally before each commit, follow these steps: - -1. Install pre-commit: `pip install pre-commit` -2. Install the git hook scripts: `pre-commit install` - -The hooks will now run automatically on changed files but if you wish to test them against all files, run the following command: `pre-commit run --all-files`. - -### Run REST API locally - -``` -cd lib/serve/rest-api -pip install -r src/requirements.txt -export AWS_REGION= -export AUTHORITY= -export CLIENT_ID= -export REGISTERED_MODELS_PS_NAME= -export TOKEN_TABLE_NAME="/LISAApiTokenTable" -gunicorn -k uvicorn.workers.UvicornWorker -w 2 -b "0.0.0.0:8080" "src.main:app" -``` - -### Run example chatbot locally - -Create `lib/user-interface/react/public/env.js` file with the following contents: - -``` -window.env = { - AUTHORITY: '', - CLIENT_ID: '', - JWT_GROUPS_PROP: '', - ADMIN_GROUP: '', - CUSTOM_SCOPES:[], - // Alternatively you can set this to be your REST api elb endpoint - RESTAPI_URI: 'http://localhost:8080/', - API_BASE_URL: 'https://${deployment_id}.execute-api.${regional_domain}/${deployment_stage}', - RESTAPI_VERSION: 'v2', - "MODELS": [ - { - "model": "streaming-textgen-model", - "streaming": true, - "modelType": "textgen" - }, - { - "model": "non-streaming-textgen-model", - "streaming": false, - "modelType": "textgen" - }, - { - "model": "embedding-model", - "streaming": null, - "modelType": "embedding" - } - ] -} -``` - -Launch the Chat UI: - -``` -cd lib/user-interface/react/ -npm run dev -``` ---- - -# Usage and Features - -The LISA Serve endpoint can be used independently of the Chat UI, and the following shows a few examples of how to do that. The Serve endpoint -will still validate user auth, so if you have a Bearer token from the IdP configured with LISA, we will honor it, or if you've set up an API -token using the [DynamoDB instructions](#programmatic-api-tokens), we will also accept that. This diagram shows the LISA Serve components that -would be utilized during direct REST API requests. - -## OpenAI Specification Compatibility - -We now provide greater support for the [OpenAI specification](https://platform.openai.com/docs/api-reference) for model inference and embeddings. -We utilize LiteLLM as a proxy for both models we spin up on behalf of the user and additional models configured through the config.yaml file, and because of that, the -LISA REST API endpoint allows for a central location for making text generation and embeddings requests. We support, and are not limited to, the following popular endpoint -routes as long as your underlying models can also respond to them. - -- /models -- /chat/completions -- /completions -- /embeddings - -By supporting the OpenAI spec, we can more easily allow users to integrate their collection of models into their LLM applications and workflows. In LISA, users can authenticate -using their OpenID Connect Identity Provider, or with an API token created through the DynamoDB token workflow as described [here](#programmatic-api-tokens). Once the token -is retrieved, users can use that in direct requests to the LISA Serve REST API. If using the IdP, users must set the 'Authorization' header, otherwise if using the API token, -either the 'Api-Key' header or the 'Authorization' header. After that, requests to `https://${lisa_serve_alb}/v2/serve` will handle the OpenAI API calls. As an example, the following call can list all -models that LISA is aware of, assuming usage of the API token. If you are using a self-signed cert, you must also provide the `--cacert $path` option to specify a CA bundle to trust for SSL verification. - -```shell -curl -s -H 'Api-Key: your-token' -X GET https://${lisa_serve_alb}/v2/serve/models -``` - -If using the IdP, the request would look like the following: - -```shell -curl -s -H 'Authorization: Bearer your-token' -X GET https://${lisa_serve_alb}/v2/serve/models -``` - -When using a library that requests an OpenAI-compatible base_url, you can provide `https://${lisa_serve_alb}/v2/serve` here. All of the OpenAI routes will -automatically be added to the base URL, just as we appended `/models` to the `/v2/serve` route for listing all models tracked by LISA. - ---- - -## Continue JetBrains and VS Code Plugin - -For developers that desire an LLM assistant to help with programming tasks, we support adding LISA as an LLM provider for the [Continue plugin](https://www.continue.dev). -To add LISA as a provider, open up the Continue plugin's `config.json` file and locate the `models` list. In this list, add the following block, replacing the placeholder URL -with your own REST API domain or ALB. The `/v2/serve` is required at the end of the `apiBase`. This configuration requires an API token as created through the [DynamoDB workflow](#programmatic-api-tokens). - -```json -{ - "model": "AUTODETECT", - "title": "LISA", - "apiBase": "https:///v2/serve", - "provider": "openai", - "apiKey": "your-api-token" // pragma: allowlist-secret -} -``` - -Once you save the `config.json` file, the Continue plugin will call the `/models` API to get a list of models at your disposal. The ones provided by LISA will be prefaced -with "LISA" or with the string you place in the `title` field of the config above. Once the configuration is complete and a model is selected, you can use that model to -generate code and perform AI assistant tasks within your development environment. See the [Continue documentation](https://docs.continue.dev/how-to-use-continue) for more -information about its features, capabilities, and usage. - -### Usage in LLM Libraries - -If your workflow includes using libraries, such as [LangChain](https://python.langchain.com/v0.2/docs/introduction/) or [OpenAI](https://github.com/openai/openai-python), -then you can place LISA right in your application by changing only the endpoint and headers for the client objects. As an example, using the OpenAI library, the client would -normally be instantiated and invoked with the following block. - -```python -from openai import OpenAI - -client = OpenAI( - api_key="my_key" # pragma: allowlist-secret not a real key -) -client.models.list() -``` - -To use the models being served by LISA, the client needs only a few changes: - -1. Specify the `base_url` as the LISA Serve ALB, using the /v2/serve route at the end, similar to the apiBase in the [Continue example](#continue-jetbrains-and-vs-code-plugin) -2. Add the API key that you generated from the [token generation steps](#programmatic-api-tokens) as your `api_key` field. -3. If using a self-signed cert, you must provide a certificate path for validating SSL. If you're using an ACM or public cert, then this may be omitted. -1. We provide a convenience function in the `lisa-sdk` for generating a cert path from an IAM certificate ARN if one is provided in the `RESTAPI_SSL_CERT_ARN` environment variable. - -The Code block will now look like this and you can continue to use the library without any other modifications. - -```python -# for self-signed certificates -import boto3 -from lisapy.utils import get_cert_path -# main client library -from openai import DefaultHttpxClient, OpenAI - -iam_client = boto3.client("iam") -cert_path = get_cert_path(iam_client) - -client = OpenAI( - api_key="my_key", # pragma: allowlist-secret not a real key - base_url="https:///v2/serve", - http_client=DefaultHttpxClient(verify=cert_path), # needed for self-signed certs on your ALB, can be omitted otherwise -) -client.models.list() -``` - ---- - -# License Notice +[![Full Documentation](https://img.shields.io/badge/Full%20Documentation-blue?style=for-the-badge&logo=Vite&logoColor=white)](https://awslabs.github.io/LISA/) -Although this repository is released under the Apache 2.0 license, when configured to use PGVector as a RAG store it uses -the third party `psycopg2-binary` library. The `psycopg2-binary` project's licensing includes the [LGPL with exceptions](https://github.com/psycopg/psycopg2/blob/master/LICENSE) license. +## What is LISA? + +LISA is an infrastructure-as-code solution providing scalable, low latency access to customers’ generative LLMs and +embedding language models. LISA accelerates and supports customers’ GenAI experimentation and adoption, particularly in +regions where Amazon Bedrock is not available. LISA allows customers to move quickly rather than independently solve the +undifferentiated heavy lifting of hosting and inference architecture. Customers deploy LISA into a single AWS account +and integrate it with an identity provider. Customers bring their own models to LISA for self-hosting and inference +supported by Amazon Elastic Container Service (ECS). Model configuration is managed through LISA’s model management +APIs. + +As use cases and model requirements grow, customers can configure LISA with external model providers. Through OpenAI's +API spec via the LiteLLM proxy, LISA is compatible with 100+ models from various providers, including Amazon Bedrock and +Amazon Jumpstart. LISA customers can centralize communication across many model providers via LiteLLM, leveraging LISA +for model orchestration. Using LISA as a model orchestration layer allows customers to standardize integrations with +externally hosted models in a single place. Without an orchestration layer, customers must individually manage unique +API integrations with each provider. + +## Key Features + +* **Self Host Models:** Bring your own text generation and embedding models to LISA for hosting and inference. +* **Model Orchestration:** Centralize and standardize configuration with 100+ models from model providers via LiteLLM, + including Amazon Bedrock models. +* **Chatbot User Interface:** Through the chatbot user interface, users can prompt LLMs, receive responses, modify prompt + templates, change model arguments, and manage their session history. Administrators can control available features via + the configuration page. +* **Retrieval-augmented generation (RAG):** RAG reduces the need for fine-tuning, an expensive and time-consuming + undertaking, and delivers more contextually relevant outputs. LISA offers RAG through Amazon OpenSearch or + PostgreSQL’s PGVector extension on Amazon RDS. +* **Non-RAG Model Context:** Users can upload documents to their chat sessions to enhance responses or support use cases + like document summarization. +* **Model Management:** Administrators can add, remove, and update models configured with LISA through the model management + configuration page or APIs. +* **OpenAI API spec:** LISA can be configured with compatible tooling. For example, customers can configure LISA as the + model provider for the [Continue](https://www.continue.dev/) plugin, an open-source AI code assistance for JetBrains and Visual Studio Code + integrated development environments (IDEs). This allows users to select from any LISA-configured model to support LLM + prompting directly in their IDE. +* **Libraries:** If your workflow includes libraries such as [LangChain](https://python.langchain.com/) + or [OpenAI](https://github.com/openai/openai-python), then you can place LISA in your + application by changing only the endpoint and headers for the client objects. +* **FedRAMP:** The AWS services that LISA leverages are FedRAMP High compliant. +* **Ongoing Releases:** We offer on-going release with new functionality. LISA’s roadmap is customer driven. + +## Deployment Prerequisites + +### Pre-Deployment Steps + +* Set up and have access to an AWS account with appropriate permissions + * All the resource creation that happens as part of CDK deployments expects Administrator or Administrator-like + permissions with resource creation and mutation permissions. Installation will not succeed if this profile does + not have permissions to create and edit arbitrary resources for the system. Note: This level of permissions is not + required for the runtime of LISA. This is only necessary for deployment and subsequent updates. +* Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles +* Optional: If using the chat UI, Have your Identity Provider (IdP) information and access +* Optional: Have your VPC information available, if you are using an existing one for your deployment +* Note: CDK and Model Management both leverage AWS Systems Manager Agent (SSM) parameter store. Confirm that SSM is approved for use by your organization before beginning. + +### Software + +* AWS CLI installed and configured +* Python 3.9 or later +* Node.js 14 or later +* Docker installed and running +* Sufficient disk space for model downloads and conversions + + +## Getting Started + +For detailed instructions on setting up, configuring, and deploying LISA, please refer to our separate documentation on +installation and usage. + +- [Deployment Guide](lib/docs/admin/getting-started.md) +- [Configuration](lib/docs/config/configuration.md) + +## License + +Although this repository is released under the Apache 2.0 license, when configured to use PGVector as a RAG store it +uses +the third party `psycopg2-binary` library. The `psycopg2-binary` project's licensing includes +the [LGPL with exceptions](https://github.com/psycopg/psycopg2/blob/master/LICENSE) license. diff --git a/VERSION b/VERSION index fd2a0186..944880fa 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 +3.2.0 diff --git a/ecs_model_deployer/Dockerfile b/ecs_model_deployer/Dockerfile index 80375d26..5cdf25f5 100644 --- a/ecs_model_deployer/Dockerfile +++ b/ecs_model_deployer/Dockerfile @@ -1,4 +1,5 @@ FROM public.ecr.aws/lambda/nodejs:18 COPY ./dist/ ${LAMBDA_TASK_ROOT} +RUN chmod 777 -R ${LAMBDA_TASK_ROOT} CMD ["index.handler"] diff --git a/ecs_model_deployer/src/lib/ecs-model.ts b/ecs_model_deployer/src/lib/ecs-model.ts index 66016cf6..1deb6a7a 100644 --- a/ecs_model_deployer/src/lib/ecs-model.ts +++ b/ecs_model_deployer/src/lib/ecs-model.ts @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -*/ + */ // ECS Model Construct. import { ISecurityGroup, IVpc, SubnetSelection } from 'aws-cdk-lib/aws-ec2'; diff --git a/ecs_model_deployer/src/lib/lisa_model_stack.ts b/ecs_model_deployer/src/lib/lisa_model_stack.ts index 0d59814e..01b96e7b 100644 --- a/ecs_model_deployer/src/lib/lisa_model_stack.ts +++ b/ecs_model_deployer/src/lib/lisa_model_stack.ts @@ -62,9 +62,9 @@ export class LisaModelStack extends Stack { let subnetSelection: SubnetSelection | undefined; - if (props.config.subnetIds && props.config.subnetIds.length > 0) { + if (props.config.subnets && props.config.subnets.length > 0) { subnetSelection = { - subnets: props.config.subnetIds?.map((subnet, index) => Subnet.fromSubnetId(this, index.toString(), subnet)) + subnets: props.config.subnets?.map((subnet, index) => Subnet.fromSubnetId(this, index.toString(), subnet.subnetId)) }; } diff --git a/ecs_model_deployer/src/lib/schema.ts b/ecs_model_deployer/src/lib/schema.ts index fda17ba9..11e3c5ec 100644 --- a/ecs_model_deployer/src/lib/schema.ts +++ b/ecs_model_deployer/src/lib/schema.ts @@ -618,7 +618,10 @@ const RawConfigSchema = z instanceProfilePrefix: z.string().optional(), }) .optional(), - subnetIds: z.array(z.string()).optional(), + subnets: z.array(z.object({ + subnetId: z.string().startsWith('subnet-'), + ipv4CidrBlock: z.string() + })).optional(), }) .refine((config) => (config.pypiConfig.indexUrl && config.region.includes('iso')) || !config.region.includes('iso'), { message: 'Must set PypiConfig if in an iso region', diff --git a/example_config.yaml b/example_config.yaml index 1275ab32..dd86e02f 100644 --- a/example_config.yaml +++ b/example_config.yaml @@ -1,151 +1,83 @@ -env: dev - -dev: - appName: lisa - profile: - deploymentName: - accountNumber: 012345678901 - region: us-east-1 - deploymentStage: dev - removalPolicy: destroy - runCdkNag: false - # lambdaLayerAssets: - # authorizerLayerPath: /path/to/authorizer_layer.zip - # commonLayerPath: /path/to/common_layer.zip - # fastapiLayerPath: /path/to/fastapi_layer.zip - # ragLayerPath: /path/to/rag_layer.zip - # sdkLayerPath: /path/to/sdk_layer.zip - # stackSynthesizer: CliCredentialsStackSynthesizer - # permissionsBoundaryAspect: - # permissionsBoundaryPolicyName: CustomPermissionBoundary - # rolePrefix: CustomPrefix - # policyPrefix: CustomPrefix - # instanceProfilePrefix: CustomPrefix - # vpcId: vpc-0123456789abcdef, - # subnetIds: [subnet-fedcba9876543210, subnet-0987654321fedcba], - s3BucketModels: hf-models-gaiic - # aws partition mountS3 package location - mountS3DebUrl: https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb - # aws-iso partition mountS3 package location - # mountS3DebUrl: https://mountpoint-s3-release-us-iso-east-1.s3.us-iso-east-1.c2s.ic.gov/latest/x86_64/mount-s3.deb - # aws-iso-b partition mountS3 package location - # mountS3DebUrl: https://mountpoint-s3-release-us-isob-east-1.s3.us-isob-east-1.sc2s.sgov.gov/latest/x86_64/mount-s3.deb - accountNumbersEcr: - - 012345678901 - deployRag: true - deployChat: true - deployUi: true - privateEndpoints: false - lambdaConfig: - pythonRuntime: PYTHON_3_10 - logLevel: DEBUG - vpcAutoscalingConfig: - provisionedConcurrentExecutions: 5 - minCapacity: 1 - maxCapacity: 50 - targetValue: 0.80 - cooldown: 30 - authConfig: - authority: - clientId: - adminGroup: - jwtGroupsProperty: - logLevel: DEBUG - # NOTE: The following configuration will allow for using a custom domain for the chat user interface. - # If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL. - # Users must use the custom domain for the user interface to work if this option is populated. - apiGatewayConfig: - domainName: - restApiConfig: - apiVersion: v2 - instanceType: m5.large - containerConfig: - image: - baseImage: python:3.9 - path: lib/serve/rest-api - type: asset - healthCheckConfig: - command: ["CMD-SHELL", "exit 0"] - interval: 10 - startPeriod: 30 - timeout: 5 - retries: 3 - autoScalingConfig: - minCapacity: 1 - maxCapacity: 1 - cooldown: 60 - defaultInstanceWarmup: 60 - metricConfig: - AlbMetricName: RequestCountPerTarget - targetValue: 1000 - duration: 60 - estimatedInstanceWarmup: 30 - internetFacing: true - loadBalancerConfig: - sslCertIamArn: arn:aws:iam::012345678901:server-certificate/lisa-self-signed-dev - healthCheckConfig: - path: /health - interval: 60 - timeout: 30 - healthyThresholdCount: 2 - unhealthyThresholdCount: 10 - domainName: - ragRepositories: - - repositoryId: pgvector-rag - type: pgvector - rdsConfig: - username: postgres - # - repositoryId: default - # type: opensearch - # opensearchConfig: - # dataNodes: 2 - # dataNodeInstanceType: r6g.large.search - # masterNodes: 0 - # masterNodeInstanceType: r6g.large.search - # volumeSize: 300 - # If adding an existing PGVector database, this configurations assumes: - # 1. The database has been configured to have pgvector installed and enabled: https://aws.amazon.com/about-aws/whats-new/2023/05/amazon-rds-postgresql-pgvector-ml-model-integration/ - # 2. The database is accessible by RAG-related lambda functions (add inbound PostgreSQL access on the database's security group for all Lambda RAG security groups) - # 3. A secret ID exists in SecretsManager holding the database password within a json block of '{"password":"your_password_here"}'. This is the same format that RDS natively provides a password in SecretsManager. - # If the passwordSecretId or dbHost are not provided, then a sample database will be created for you. Only the username is required. - # - repositoryId: pgvector-rag - # type: pgvector - # rdsConfig: - # username: postgres - # passwordSecretId: # password ID as stored in SecretsManager. Example: "rds!db-aa88493d-be8d-4a3f-96dc-c668165f7826" - # dbHost: # Host name of database. Example hostname from RDS: "my-db-name.291b2f03.us-east-1.rds.amazonaws.com" - # dbName: postgres - ragFileProcessingConfig: - chunkSize: 512 - chunkOverlap: 51 - ecsModels: - - modelName: mistralai/Mistral-7B-Instruct-v0.2 - inferenceContainer: tgi - baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 - - modelName: intfloat/e5-large-v2 - inferenceContainer: tei - baseImage: ghcr.io/huggingface/text-embeddings-inference:1.2.3 - # - modelName: mistralai/Mixtral-8x7B-Instruct-v0.1 - # inferenceContainer: tgi - # baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 - # LiteLLM Config options found here: https://litellm.vercel.app/docs/proxy/configs#all-settings - # Anything within this config is copied to a configuration for starting LiteLLM in the REST API container. - # It is suggested to put an "ignored" API key so that calls to locally hosted models don't fail on OpenAI calls - # from LiteLLM. - # We added `lisa_params` to add additional metadata for interaction with the Chat UI. Specify if the model is a - # textgen or embedding model, and if it is textgen, specify whether it supports streaming. If embedding, then - # omit the `streaming` parameter. When defining the model list, the `lisa_params` will be an object in the model - # definition that will have the `model_type` and `streaming` fields in it. A commented example is provided below. - litellmConfig: - litellm_settings: - telemetry: false # Don't try to send telemetry to LiteLLM servers. - general_settings: - master_key: sk-d7a77bcb-3e23-483c-beec-2700f2baeeb1 # A key is required for model management purposes - model_list: # Add any of your existing (not LISA-hosted) models here. -# - model_name: mymodel -# litellm_params: -# model: openai/myprovider/mymodel -# api_key: ignored -# lisa_params: -# model_type: textgen -# streaming: true +accountNumber: "012345678901" +region: us-east-1 +authConfig: + authority: + clientId: + adminGroup: + jwtGroupsProperty: +s3BucketModels: hf-models-gaiic +########################### OPTIONAL BELOW ####################################### +# profile: AWS CLI profile for deployment. +# vpcId: VPC ID for the application. (e.g. vpc-0123456789abcdef) +# The following is an array of subnet objects for the application. These contain a subnetId(e.g. [subnet-fedcba9876543210] and ipv4CidrBlock +# subnets: +# - subnetId: +# ipv4CidrBlock: +# The following configuration will allow for using a custom domain for the chat user interface. +# If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL. +# Users must use the custom domain for the user interface to work if this option is populated. +# apiGatewayConfig: +# domainName: +# restApiConfig: +# sslCertIamArn: ARN of the self-signed cert to be used throughout the system +# Some customers will want to download required libs prior to deployment, provide a path to the zipped resources +# lambdaLayerAssets: +# authorizerLayerPath: /path/to/authorizer_layer.zip +# commonLayerPath: /path/to/common_layer.zip +# fastapiLayerPath: /path/to/fastapi_layer.zip +# ragLayerPath: /path/to/rag_layer.zip +# sdkLayerPath: /path/to/sdk_layer.zip +# stackSynthesizer: CliCredentialsStackSynthesizer +# deploymentPrefix: Prefix for deployment resources. +# webAppAssetsPath: Optional path to precompiled webapp assets. If not specified the web application will be built at deploy time. +# permissionsBoundaryAspect: +# permissionsBoundaryPolicyName: CustomPermissionBoundary +# rolePrefix: CustomPrefix +# policyPrefix: CustomPrefix +# instanceProfilePrefix: CustomPrefix +# vpcId: vpc-0123456789abcdef, +# aws-iso partition mountS3 package location +# mountS3DebUrl: https://mountpoint-s3-release-us-iso-east-1.s3.us-iso-east-1.c2s.ic.gov/latest/x86_64/mount-s3.deb +# aws-iso-b partition mountS3 package location +# mountS3DebUrl: https://mountpoint-s3-release-us-isob-east-1.s3.us-isob-east-1.sc2s.sgov.gov/latest/x86_64/mount-s3.deb +# List of AWS account numbers for ECR repositories. +# accountNumbersEcr: +# - 012345678901 +# ragRepositories: +# - repositoryId: pgvector-rag +# type: pgvector +# rdsConfig: +# username: postgres +# - repositoryId: default +# type: opensearch +# opensearchConfig: +# dataNodes: 2 +# dataNodeInstanceType: r6g.large.search +# masterNodes: 0 +# masterNodeInstanceType: r6g.large.search +# volumeSize: 300 +# If adding an existing PGVector database, this configurations assumes: +# 1. The database has been configured to have pgvector installed and enabled: https://aws.amazon.com/about-aws/whats-new/2023/05/amazon-rds-postgresql-pgvector-ml-model-integration/ +# 2. The database is accessible by RAG-related lambda functions (add inbound PostgreSQL access on the database's security group for all Lambda RAG security groups) +# 3. A secret ID exists in SecretsManager holding the database password within a json block of '{"password":"your_password_here"}'. This is the same format that RDS natively provides a password in SecretsManager. +# If the passwordSecretId or dbHost are not provided, then a sample database will be created for you. Only the username is required. +# - repositoryId: pgvector-rag +# type: pgvector +# rdsConfig: +# username: postgres +# passwordSecretId: # password ID as stored in SecretsManager. Example: "rds!db-aa88493d-be8d-4a3f-96dc-c668165f7826" +# dbHost: # Host name of database. Example hostname from RDS: "my-db-name.291b2f03.us-east-1.rds.amazonaws.com" +# dbName: postgres +# You can optionally provide a list of models and the deployment process will ensure they exist in your model bucket and try to download them if they don't exist +# ecsModels: +# - modelName: mistralai/Mistral-7B-Instruct-v0.2 +# inferenceContainer: tgi +# baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 +# - modelName: intfloat/e5-large-v2 +# inferenceContainer: tei +# baseImage: ghcr.io/huggingface/text-embeddings-inference:1.2.3 +# - modelName: mistralai/Mixtral-8x7B-Instruct-v0.1 +# inferenceContainer: tgi +# baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 +# litellmConfig: +# db_key: sk-d7a77bcb-3e23-483c-beec-2700f2baeeb1 # A key is required for model management purposes - must start with sk- diff --git a/lambda/dockerimagebuilder/__init__.py b/lambda/dockerimagebuilder/__init__.py index 75ccda4f..dcda9f06 100644 --- a/lambda/dockerimagebuilder/__init__.py +++ b/lambda/dockerimagebuilder/__init__.py @@ -70,6 +70,7 @@ def handler(event: Dict[str, Any], context) -> Dict[str, Any]: # type: ignore [ try: instances = ec2_resource.create_instances( ImageId=ami_id, + SubnetId=os.environ["LISA_SUBNET_ID"], MinCount=1, MaxCount=1, InstanceType="m5.large", diff --git a/lib/api-base/utils.ts b/lib/api-base/utils.ts index e618c78e..2422742d 100644 --- a/lib/api-base/utils.ts +++ b/lib/api-base/utils.ts @@ -34,13 +34,12 @@ import { IRestApi, Cors, } from 'aws-cdk-lib/aws-apigateway'; -import { ISecurityGroup, ISubnet } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { IRole } from 'aws-cdk-lib/aws-iam'; import { Code, Function, Runtime, ILayerVersion, IFunction, CfnPermission } from 'aws-cdk-lib/aws-lambda'; import { Construct } from 'constructs'; import { Vpc } from '../networking/vpc'; import { Queue } from 'aws-cdk-lib/aws-sqs'; -import * as AWS from 'aws-sdk'; /** * Type representing python lambda function @@ -157,40 +156,3 @@ function getOrCreateResource (scope: Construct, parentResource: IResource, path: } return resource; } - -export async function getSubnetCidrRange (subnet: string): Promise { - const ec2 = new AWS.EC2(); - try { - const describeSubnetsResponse = await ec2.describeSubnets({ - SubnetIds: [subnet], - }).promise(); - - const retrievedSubnet = describeSubnetsResponse.Subnets?.[0]; - - if (retrievedSubnet && retrievedSubnet.CidrBlock) { - return retrievedSubnet.CidrBlock; - } - } catch (error) { - console.error('Error retrieving subnet CIDR range:', error); - } - - return undefined; -} - -export async function isSubnetPublic (subnet: ISubnet): Promise { - const ec2 = new AWS.EC2(); - try { - const describeSubnetsResponse = await ec2.describeSubnets({ - SubnetIds: [subnet.subnetId], - }).promise(); - - const retrievedSubnet = describeSubnetsResponse.Subnets?.[0]; - - if (retrievedSubnet && retrievedSubnet.MapPublicIpOnLaunch) { - return true; - } - } catch (error) { - console.error('Error retrieving subnet CIDR range:', error); - } - return false; -} diff --git a/lib/docs/.gitignore b/lib/docs/.gitignore index 80ac0588..99d9f534 100644 --- a/lib/docs/.gitignore +++ b/lib/docs/.gitignore @@ -1,2 +1,3 @@ dist/ .vitepress/cache/ +/config/schema.md diff --git a/lib/docs/.vitepress/config.mts b/lib/docs/.vitepress/config.mts index df61b006..8f69311b 100644 --- a/lib/docs/.vitepress/config.mts +++ b/lib/docs/.vitepress/config.mts @@ -20,29 +20,23 @@ const navLinks = [ { text: 'System Administrator Guide', items: [ + { text: 'What is LISA?', link: '/admin/overview' }, { text: 'Architecture Overview', link: '/admin/architecture' }, - { text: 'LISA Components', link: '/admin/components' }, { text: 'Getting Started', link: '/admin/getting-started' }, - { text: 'Configure IdP: Cognito & Keycloak Examples', link: '/admin/idp' }, { text: 'Deployment', link: '/admin/deploy' }, - { text: 'Setting Model Management Admin Group', link: '/admin/model-management' }, - { text: 'LiteLLM', link: '/admin/lite-llm' }, - { text: 'API Overview', link: '/admin/api' }, + { text: 'Model Management API Usage', link: '/admin/model-management' }, + { text: 'Chat UI Configuration', link: '/admin/ui-configuration' }, { text: 'API Request Error Handling', link: '/admin/error' }, - { text: 'Security', link: '/admin/security' }, ], }, { text: 'Advanced Configuration', items: [ - { text: 'Programmatic API Tokens', link: '/config/api-tokens' }, + { text: 'Configuration Schema', link: '/config/configuration' }, { text: 'Model Compatibility', link: '/config/model-compatibility' }, - { text: 'Model Management API', link: '/config/model-management-api' }, - { text: 'Model Management UI', link: '/config/model-management-ui' }, { text: 'Rag Vector Stores', link: '/config/vector-stores' }, - { text: 'Usage & Features', link: '/config/features' }, - { text: 'Branding', link: '/config/branding' }, - { text: 'Hiding Advanced Chat UI Components', link: '/config/hiding-chat-components' }, + { text: 'Configure IdP: Cognito & Keycloak Examples', link: '/config/idp' }, + { text: 'LiteLLM', link: '/config/lite-llm' }, ], }, { @@ -52,9 +46,12 @@ const navLinks = [ { text: 'RAG', link: '/user/rag' }, { text: 'Context Windows', link: '/user/context-windows' }, { text: 'Model KWARGS', link: '/user/model-kwargs' }, + { text: 'Model Management UI', link: '/user/model-management-ui' }, { text: 'Non-RAG in Context File Management', link: '/user/nonrag-management' }, { text: 'Prompt Engineering', link: '/user/prompt-engineering' }, { text: 'Session History', link: '/user/history' }, + { text: 'Breaking Changes', link: '/user/breaking-changes' }, + { text: 'Change Log', link: 'https://github.com/awslabs/LISA/releases' }, ], }]; diff --git a/lib/docs/admin/api.md b/lib/docs/admin/api.md deleted file mode 100644 index ad9e63e6..00000000 --- a/lib/docs/admin/api.md +++ /dev/null @@ -1,364 +0,0 @@ - -# API Usage Overview - -LISA provides robust API endpoints for managing models, both for users and administrators. These endpoints allow for operations such as listing, creating, updating, and deleting models. - -## API Gateway and ALB Endpoints - -LISA uses two primary APIs for model management: - -1. **User-facing OpenAI-Compatible API**: Available to all users for inference tasks and accessible through the LISA Serve ALB. This API provides an interface for querying and interacting with models deployed on Amazon ECS, Amazon Bedrock, or through LiteLLM. -2. **Admin-level Model Management API**: Available only to administrators through the API Gateway (APIGW). This API allows for full control of model lifecycle management, including creating, updating, and deleting models. - -### LiteLLM Routing in All Models - -Every model request is routed through LiteLLM, regardless of whether infrastructure (like ECS) is created for it. Whether deployed on ECS, external models via Bedrock, or managed through LiteLLM, all models are added to LiteLLM for traffic routing. The distinction is whether infrastructure is created (determined by request payloads), but LiteLLM integration is consistent for all models. The model management APIs will handle adding or removing model configurations from LiteLLM, and the LISA Serve endpoint will handle the inference requests against models available in LiteLLM. - -## User-facing OpenAI-Compatible API - -The OpenAI-compatible API is accessible through the LISA Serve ALB and allows users to list models available for inference tasks. Although not specifically part of the model management APIs, any model that is added or removed from LiteLLM via the model management API Gateway APIs will be reflected immediately upon queries to LiteLLM through the LISA Serve ALB. - -### Listing Models - -The `/v2/serve/models` endpoint on the LISA Serve ALB allows users to list all models available for inference in the LISA system. - -#### Request Example: - -```bash -curl -s -H 'Authorization: Bearer ' -X GET https:///v2/serve/models -``` - -#### Response Example: - -```json -{ - "data": [ - { - "id": "bedrock-embed-text-v2", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, - { - "id": "titan-express-v1", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, - { - "id": "sagemaker-amazon-mistrallite", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - } - ], - "object": "list" -} -``` - -#### Explanation of Response Fields: - -These fields are all defined by the OpenAI API specification, which is documented [here](https://platform.openai.com/docs/api-reference/models/list). - -- `id`: A unique identifier for the model. -- `object`: The type of object, which is "model" in this case. -- `created`: A Unix timestamp representing when the model was created. -- `owned_by`: The entity responsible for the model, such as "openai." - -## Admin-level Model Management API - -This API is only accessible by administrators via the API Gateway and is used to create, update, and delete models. It supports full model lifecycle management. - -### Listing Models (Admin API) - -The `/models` route allows admins to list all models managed by the system. This includes models that are either creating, deleting, already active, or in a failed state. Models can be deployed via ECS or managed externally through a LiteLLM configuration. - -#### Request Example: - -```bash -curl -s -H "Authorization: Bearer " -X GET https:///models -``` - -#### Response Example: - -```json -{ - "models": [ - { - "autoScalingConfig": { - "minCapacity": 1, - "maxCapacity": 1, - "cooldown": 420, - "defaultInstanceWarmup": 180, - "metricConfig": { - "albMetricName": "RequestCountPerTarget", - "targetValue": 30, - "duration": 60, - "estimatedInstanceWarmup": 330 - } - }, - "containerConfig": { - "image": { - "baseImage": "vllm/vllm-openai:v0.5.0", - "type": "asset" - }, - "sharedMemorySize": 2048, - "healthCheckConfig": { - "command": [ - "CMD-SHELL", - "exit 0" - ], - "interval": 10, - "startPeriod": 30, - "timeout": 5, - "retries": 3 - }, - "environment": { - "MAX_TOTAL_TOKENS": "2048", - "MAX_CONCURRENT_REQUESTS": "128", - "MAX_INPUT_LENGTH": "1024" - } - }, - "loadBalancerConfig": { - "healthCheckConfig": { - "path": "/health", - "interval": 60, - "timeout": 30, - "healthyThresholdCount": 2, - "unhealthyThresholdCount": 10 - } - }, - "instanceType": "g5.xlarge", - "modelId": "mistral-vllm", - "modelName": "mistralai/Mistral-7B-Instruct-v0.2", - "modelType": "textgen", - "modelUrl": null, - "status": "Creating", - "streaming": true - }, - { - "autoScalingConfig": null, - "containerConfig": null, - "loadBalancerConfig": null, - "instanceType": null, - "modelId": "titan-express-v1", - "modelName": "bedrock/amazon.titan-text-express-v1", - "modelType": "textgen", - "modelUrl": null, - "status": "InService", - "streaming": true - } - ] -} -``` - -#### Explanation of Response Fields: - -- `modelId`: A unique identifier for the model. -- `modelName`: The name of the model, typically referencing the underlying service (Bedrock, SageMaker, etc.). -- `status`: The current state of the model, e.g., "Creating," "Active," or "Failed." -- `streaming`: Whether the model supports streaming inference. -- `instanceType` (optional): The instance type if the model is deployed via ECS. - -### Creating a Model (Admin API) - -LISA provides the `/models` endpoint for creating both ECS and LiteLLM-hosted models. Depending on the request payload, infrastructure will be created or bypassed (e.g., for LiteLLM-only models). - -This API accepts the same model definition parameters that were accepted in the V2 model definitions within the config.yaml file with one notable difference: the `containerConfig.image.path` field is -now omitted because it corresponded with the `inferenceContainer` selection. As a convenience, this path is no longer required. - -#### Request Example: - -``` -POST https:///models -``` - -#### Example Payload for ECS Model: - -```json -{ - "modelId": "mistral-vllm", - "modelName": "mistralai/Mistral-7B-Instruct-v0.2", - "modelType": "textgen", - "inferenceContainer": "vllm", - "instanceType": "g5.xlarge", - "streaming": true, - "containerConfig": { - "image": { - "baseImage": "vllm/vllm-openai:v0.5.0", - "type": "asset" - }, - "sharedMemorySize": 2048, - "environment": { - "MAX_CONCURRENT_REQUESTS": "128", - "MAX_INPUT_LENGTH": "1024", - "MAX_TOTAL_TOKENS": "2048" - }, - "healthCheckConfig": { - "command": ["CMD-SHELL", "exit 0"], - "interval": 10, - "startPeriod": 30, - "timeout": 5, - "retries": 3 - } - }, - "autoScalingConfig": { - "minCapacity": 1, - "maxCapacity": 1, - "cooldown": 420, - "defaultInstanceWarmup": 180, - "metricConfig": { - "albMetricName": "RequestCountPerTarget", - "targetValue": 30, - "duration": 60, - "estimatedInstanceWarmup": 330 - } - }, - "loadBalancerConfig": { - "healthCheckConfig": { - "path": "/health", - "interval": 60, - "timeout": 30, - "healthyThresholdCount": 2, - "unhealthyThresholdCount": 10 - } - } -} -``` - -#### Creating a LiteLLM-Only Model: - -```json -{ - "modelId": "titan-express-v1", - "modelName": "bedrock/amazon.titan-text-express-v1", - "modelType": "textgen", - "streaming": true -} -``` - -#### Explanation of Key Fields for Creation Payload: - -- `modelId`: The unique identifier for the model. This is any name you would like it to be. -- `modelName`: The name of the model as it appears in the system. For LISA-hosted models, this must be the S3 Key to your model artifacts, otherwise - this is the LiteLLM-compatible reference to a SageMaker Endpoint or Bedrock Foundation Model. Note: Bedrock and SageMaker resources must exist in the - same region as your LISA deployment. If your LISA installation is in us-east-1, then all SageMaker and Bedrock calls will also happen in us-east-1. - Configuration examples: - - LISA hosting: If your model artifacts are in `s3://${lisa_models_bucket}/path/to/model/weights`, then the `modelName` value here should be `path/to/model/weights` - - LiteLLM-only, Bedrock: If you want to use `amazon.titan-text-lite-v1`, your `modelName` value should be `bedrock/amazon.titan-text-lite-v1` - - LiteLLM-only, SageMaker: If you want to use a SageMaker Endpoint named `my-sm-endpoint`, then the `modelName` value should be `sagemaker/my-sm-endpoint`. -- `modelType`: The type of model, such as text generation (textgen). -- `streaming`: Whether the model supports streaming inference. -- `instanceType`: The type of EC2 instance to be used (only applicable for ECS models). -- `containerConfig`: Details about the Docker container, memory allocation, and environment variables. -- `autoScalingConfig`: Configuration related to ECS autoscaling. -- `loadBalancerConfig`: Health check configuration for load balancers. - -### Deleting a Model (Admin API) - -Admins can delete a model using the following endpoint. Deleting a model removes the infrastructure (ECS) or disconnects from LiteLLM. - -#### Request Example: - -``` -DELETE https:///models/{modelId} -``` - -#### Response Example: - -```json -{ - "status": "success", - "message": "Model mistral-vllm has been deleted successfully." -} -``` - -### Updating a Model - -LISA offers basic updating functionality for both LISA-hosted and LiteLLM-only models. For both types, the model type and streaming support can be updated -in the cases that the models were originally created with the wrong parameters. For example, if an embedding model was accidentally created as a `textgen` -model, the UpdateModel API can be used to set it to the intended `embedding` value. Additionally, for LISA-hosted models, users may update the AutoScaling -configuration to increase or decrease capacity usage for each model. Users may use this API to completely shut down all instances behind a model until -they want to add capacity back to the model for usage later. This feature can help users to effectively manage costs so that instances do not have to stay -running in time periods of little or no expected usage. - -The UpdateModel API has mutually exclusive payload fields to avoid conflicting requests. The API does not allow for shutting off a model at the same time -as updating its AutoScaling configuration, as these would introduce ambiguous intents. The API does not allow for setting AutoScaling limits to 0 and instead -requires the usage of the enable/disable functionality to allow models to fully scale down or turn back on. Metadata updates, such as changing the model type -or streaming compatibility, can happen in either type of update or simply by themselves. - -#### Request Example - -``` -PUT https:///models/{modelId} -``` - -#### Example Payloads - -##### Update Model Metadata - -This payload will simply update the model metadata, which will complete within seconds of invoking. If setting a model as an `embedding` model, then the -`streaming` option must be set to `false` or omitted as LISA does not support streaming with embedding models. Both the `streaming` and `modelType` options -may be included in any other update request. - -```json -{ - "streaming": true, - "modelType": "textgen" -} -``` - -##### Update AutoScaling Configuration - -This payload will update the AutoScaling configuration for minimum, maximum, and desired number of instances. The desired number must be between the -minimum or maximum numbers, inclusive, and all the numbers must be strictly greater than 0. If the model currently has less than the minimum number, then -the desired count will automatically raise to the minimum if a desired count is not specified. Despite setting a desired capacity, the model will scale down -to the minimum number over time if you are not hitting the scaling thresholds set when creating the model in the first place. - -The AutoScaling configuration **can** be updated while the model is in the Stopped state, but it won't be applied immediately. Instead, the configuration will -be saved until the model is started again, in which it will use the most recently updated AutoScaling configuration. - -The request will fail if the `autoScalingInstanceConfig` is defined at the same time as the `enabled` field. These options are mutually exclusive and must be -handled as separate operations. Any or all of the options within the `autoScalingInstanceConfig` may be set as needed, so if you only wish to change the `desiredCapacity`, -then that is the only option that you need to specify in the request object within the `autoScalingInstanceConfig`. - -```json -{ - "autoScalingInstanceConfig": { - "minCapacity": 2, - "maxCapacity": 4, - "desiredCapacity": 3 - } -} -``` - -##### Stop Model - Scale Down to 0 Instances - -This payload will stop all model EC2 instances and remove the model reference from LiteLLM so that users are unable to make inference requests against a model -with no capacity. This option is useful for users who wish to manage costs and turn off instances when the model is not currently needed but will be used again -in the future. - -The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be -handled as separate operations. - -```json -{ - "enabled": false -} -``` - -##### Start Model - Restore Previous AutoScaling Configuration - -After stopping a model, this payload will turn the model back on by spinning up instances, waiting for the expected spin-up time to allow models to initialize, and then -adding the reference back to LiteLLM so that users may query the model again. This is expected to be a much faster operation than creating the model through the CreateModel -API, so as long as the model details don't have to change, this in combination with the Stop payload will help to manage costs while still providing model availability as -quickly as the system can spin it up again. - -The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be -handled as separate operations. - -```json -{ - "enabled": true -} -``` diff --git a/lib/docs/admin/architecture.md b/lib/docs/admin/architecture.md index e0570bea..92b4326b 100644 --- a/lib/docs/admin/architecture.md +++ b/lib/docs/admin/architecture.md @@ -1,41 +1,68 @@ -# LLM Inference Solution for Amazon Dedicated Cloud (LISA) +# Architecture Overview + +LISA’s major components include: LISA Serve, LISA Chat API, LISA Chatbot, LISA RAG, and LISA Model Management. + +**Key Solution Features:** + +* **Model Hosting**, LISA serve hosts your models in managed and scalable ECS Clusters. +* **Model Management**, LISA has APIs around deploying, updating, and deleting third party and internally hosted models deployed in your account. +* **Inference Requests**, interact with your models via exposed REST APIs or through the LISA Chatbot UI. +* **Chatbot UI** allows users to seamlessly interact with Models, Model Management, RAG, and Configuration APIs. +* **Retrieval-Augmented Generation (RAG) Operations**, leveraging either OpenSearch and/or PGVector for efficient retrieval of relevant external data to enhance model responses. +* **Authentication and Authorization**, LISA supports customers bringing their own OpenID IDP and the use of DynamoDB stored Tokens to interact with the exposed APIs. + +### Solution Architecture + ![LISA Architecture](../assets/LisaArchitecture.png) -LISA is an infrastructure-as-code solution that supports model hosting and inference. Customers deploy LISA directly -into an AWS account and provision their own infrastructure. Customers bring their own models to LISA for hosting and -inference through Amazon ECS. LISA accelerates the use of Generative AI (GenAI) applications by providing scalable, -low latency access to customers’ generative LLMs and embedding language models. Customers can then focus on -experimenting with LLMs and developing GenAI applications. +- **User Interaction with Chat UI or API:** Users can interact with LISA through the Chat UI or REST API. Each interaction is authenticated using AWS Cognito or OIDC, ensuring secure access. +- **Request Routing:** The API Gateway securely routes user requests to the appropriate backend services, whether for fetching the chat UI, performing RAG operations, or managing models. +- **Model Management:** Administrators can deploy, update, or delete models via the Model Management API, which triggers ECS deployment and scaling workflows. +- **Model Inference:** Inference requests are routed to ECS-hosted models or external models via the LiteLLM proxy. Responses are served back to users through the ALB. +- **RAG Integration:** When RAG is enabled, LISA retrieves relevant documents from OpenSearch or PGVector, augmenting the model's response with external knowledge. +- **Session Continuity:** User session data is stored in DynamoDB, ensuring that users can retrieve and continue previous conversations across multiple interactions. +- **Autoscaling:** ECS tasks automatically scale based on system load, with ALBs distributing traffic across available instances to ensure performance. + +## LISA Components + +### LISA Model Management +![LISA Model Management Architecture](../assets/LisaModelManagement.png) +The Model Management component is responsible for managing the entire lifecycle of models in LISA. This includes creation, updating, deletion of models deployed on ECS or third party provided. The service integration automates and scales these operations, ensuring that the underlying infrastructure is managed efficiently. + +* **Self-Hosted Models**: Models are containerized and deployed on AWS ECS, with each model hosted in its own isolated ECS task. This design allows models to be independently scaled based on demand. Traffic to the models is balanced using Application Load Balancers (ALBs), ensuring that the autoscaling mechanism reacts to load fluctuations in real time, optimizing both performance and availability. +* **External Model Routing**: LISA utilizes the LiteLLM proxy to route traffic to different model providers, no matter their API and payload format. Users may add models from external providers, such as SageMaker or Bedrock, to LISA. LISA will simply add the configuration to LiteLLM without creating any additional supporting infrastructure. Customers do not have to independently manage the API integration with the use of LiteLLM. +* **Model Lifecycle Management**: AWS Step Functions are used to orchestrate the lifecycle of models, handling the creation, update, and deletion workflows. Each workflow provisions the required resources using CloudFormation templates, which manage infrastructure components like EC2 instances, security groups, and ECS services. LISA ensures that the necessary security, networking, and infrastructure components are automatically deployed and configured. + * The CloudFormation stacks define essential resources using the LISA core VPC configuration, ensuring best practices for security and access across all resources in the environment. + * DynamoDB stores model metadata, while Amazon S3 securely manages model weights, enabling ECS instances to retrieve the weights dynamically during deployment. + +#### Technical Implementation + +* **Model Lifecycle**: Lifecycle operations such as creation, update, and deletion are executed by Step Functions and backed by AWS Lambda in ```lambda/models/lambda_functions.py```. +* **CloudFormation**: Infrastructure components are provisioned using CloudFormation templates, as defined in ```ecs_model_deployer/src/lib/lisa_model_stack.ts```. +* **ECS Cluster**: ECS cluster and task definitions are located in ```ecs_model_deployer/src/lib/ecsCluster.ts```, with model containers specified in ```ecs_model_deployer/src/lib/ecs-model.ts```. -LISA’s chatbot user interface can be used for experiment with features and for production use cases. LISA enhances model -output by integrating retrieval-augmented generation (RAG) with Amazon OpenSearch or PostgreSQL’s PGVector extension, -incorporating external knowledge sources into model responses. This helps reduce the need for fine-tuning and delivers -more contextually relevant outputs. -LISA supports OpenAI’s API Spec via the LiteLLM proxy. This means that LISA is compatible for customers to configure -with models hosted externally by supported model providers. LiteLLM also allows customers to use LISA to standardize -model orchestration and communication across model providers instead of managing each individually. With OpenAI API spec -support, LISA can also be used as a stand-in replacement for any application that already utilizes OpenAI-centric -tooling (ex: OpenAI’s Python library, LangChain). +### LISA Serve +![LISA Serve Architecture](../assets/LisaServe.png) +LISA Serve is responsible for processing inference requests and serving model predictions. This component manages user requests to interact with LLMs and ensures that the models deliver low-latency responses. -## Background +* **Inference Requests**: Requests are routed via ALB, which serves as the main entry point to LISA’s backend infrastructure. The ALB forwards requests to the appropriate ECS-hosted model or externally-hosted model based on the request parameters. For models hosted within LISA, traffic to the models is managed with model-specific ALBs, which enable autoscaling if the models are under heavy load. LISA supports both direct REST API-based interaction and interaction through the Chat UI, enabling programmatic access or a user-friendly chat experience. +* **RAG (Retrieval-Augmented Generation)**: RAG operations enhance model responses by integrating external data sources. LISA leverages OpenSearch or PGVector (PostgreSQL) as vector stores, enabling vector-based search and retrieval of relevant knowledge to augment LLM outputs dynamically. -LISA is a robust, AWS-native platform designed to simplify the deployment and management of Large Language Models (LLMs) in scalable, secure, and highly available environments. Drawing inspiration from the AWS open-source project [aws-genai-llm-chatbot](https://github.com/aws-samples/aws-genai-llm-chatbot), LISA builds on this foundation by offering more specialized functionality, particularly in the areas of security, modularity, and flexibility. +#### Technical Implementation -One of the key differentiators of LISA is its ability to leverage the [text-generation-inference](https://github.com/huggingface/text-generation-inference/tree/main) text-generation-inference container from HuggingFace, allowing users to deploy cutting-edge LLMs. LISA also introduces several innovations that extend beyond its inspiration: +* RAG operations are managed through ```lambda/rag/lambda_functions.py```, which handles embedding generation and document retrieval via OpenSearch and PostgreSQL. +* Direct requests to the LISA Serve ALB entrypoint must utilize the OpenAI API spec, which we support through the use of the LiteLLM proxy. -1. **Support for Amazon Dedicated Cloud (ADC):** LISA is designed to operate in highly controlled environments like Amazon Dedicated Cloud (ADC) partitions, making it ideal for industries with stringent regulatory and security requirements. This focus on secure, isolated deployments differentiates LISA from other open-source platforms. -1. **Modular Design for Composability:** LISA's architecture is designed to be composable, splitting its components into distinct services. The core components, LISA Serve (for LLM serving and inference) and LISA Chat (for the chat interface), can be deployed as independent stacks. This modularity allows users to deploy only the parts they need, enhancing flexibility and scalability across different deployment environments. -1. **OpenAI API Specification Support:** LISA is built to support the OpenAI API specification, allowing users to replace OpenAI’s API with LISA without needing to change existing application code. This makes LISA a drop-in replacement for any workflow or application that already leverages OpenAI’s tooling, such as the OpenAI Python library or LangChain. -## System Overview +### LISA Chat +![LISA Chatbot Architecture](../assets/LisaChat.png) +LISA Chat provides a customizable chat interface that enables users to interact with models in real-time. This component ensures that users have a seamless experience for submitting queries and maintaining session continuity. -LISA is designed using a modular, microservices-based architecture, where each service performs a distinct function. It is composed of three core components: LISA Model Management, LISA Serve, and LISA Chat. Each of these components is responsible for specific functionality and interacts via well-defined API endpoints to ensure scalability, security, and fault tolerance across the system. +* **Chat Interface**: The Chat UI is hosted as a static website on Amazon S3 and is served via API Gateway. Users can interact with models directly through the web-based frontend, sending queries and viewing real-time responses from the models. The interface is integrated with LISA's backend services for model inference, retrieval augmented generation, and session management. +* **Session History Management**: LISA maintains session histories using DynamoDB, allowing users to retrieve and continue previous conversations seamlessly. This feature is crucial for maintaining continuity in multi-turn conversations with the models. -**Key System Functionalities:** +#### Technical Implementation -* **Authentication and Authorization** via AWS Cognito or OpenID Connect (OIDC) providers, ensuring secure access to both the REST API and Chat UI through token-based authentication and role-based access control. -* **Model Hosting** on AWS ECS with autoscaling and efficient traffic management using Application Load Balancers (ALBs), providing scalable and high-performance model inference. -* **Model Management** using AWS Step Functions to orchestrate complex workflows for creating, updating, and deleting models, automatically managing underlying ECS infrastructure. -* **Inference Requests** served via both the REST API and the Chat UI, dynamically routing user inputs to the appropriate ECS-hosted models for real-time inference. -* **Chat Interface** enabling users to interact with LISA through a user-friendly web interface, offering seamless real-time model interaction and session continuity. -* **Retrieval-Augmented Generation (RAG) Operations**, leveraging either OpenSearch or PGVector for efficient retrieval of relevant external data to enhance model responses. +* The Chat UI is implemented in the ```lib/user-interface/react/``` folder and is deployed using the scripts in the ```scripts/``` folder. +* Session management logic is handled in ```lambda/session/lambda_functions.py```, where session data is stored and retrieved from DynamoDB. +* RAG operations are defined in lambda/repository/lambda_functions.py diff --git a/lib/docs/admin/components.md b/lib/docs/admin/components.md deleted file mode 100644 index 3df5fda1..00000000 --- a/lib/docs/admin/components.md +++ /dev/null @@ -1,55 +0,0 @@ -## LISA Components - -### LISA Model Management -![LISA Model Management Architecture](../assets/LisaModelManagement.png) -The Model Management component is responsible for managing the entire lifecycle of models in LISA. This includes creation, updating, deletion, and scaling of models deployed on ECS. The system automates and scales these operations, ensuring that the underlying infrastructure is managed efficiently. - -* **Model Hosting**: Models are containerized and deployed on AWS ECS, with each model hosted in its own isolated ECS task. This design allows models to be independently scaled based on demand. Traffic to the models is balanced using Application Load Balancers (ALBs), ensuring that the autoscaling mechanism reacts to load fluctuations in real time, optimizing both performance and availability. -* **External Model Routing**: LISA utilizes the LiteLLM proxy to route traffic to different model providers, no matter their API and payload format. Users may add models from external providers, such as SageMaker or Bedrock, to their system to allow requests to models hosted in those systems and services. LISA will simply add the configuration to LiteLLM without creating any additional supporting infrastructure. -* **Model Lifecycle Management**: AWS Step Functions are used to orchestrate the lifecycle of models, handling the creation, update, and deletion workflows. Each workflow provisions the required resources using CloudFormation templates, which manage infrastructure components like EC2 instances, security groups, and ECS services. The system ensures that the necessary security, networking, and infrastructure components are automatically deployed and configured. - * The CloudFormation stacks define essential resources using the LISA core VPC configuration, ensuring best practices for security and access across all resources in the environment. - * DynamoDB stores model metadata, while Amazon S3 securely manages model weights, enabling ECS instances to retrieve the weights dynamically during deployment. - -#### Technical Implementation - -* **Model Lifecycle**: Lifecycle operations such as creation, update, and deletion are executed by Step Functions and backed by AWS Lambda in ```lambda/models/lambda_functions.py```. -* **CloudFormation**: Infrastructure components are provisioned using CloudFormation templates, as defined in ```ecs_model_deployer/src/lib/lisa_model_stack.ts```. -* **ECS Cluster**: ECS cluster and task definitions are located in ```ecs_model_deployer/src/lib/ecsCluster.ts```, with model containers specified in ```ecs_model_deployer/src/lib/ecs-model.ts```. - - -### LISA Serve -![LISA Serve Architecture](../assets/LisaServe.png) -LISA Serve is responsible for processing inference requests and serving model predictions. This component manages user requests to interact with LLMs and ensures that the models deliver low-latency responses. - -* **Inference Requests**: Requests are routed via ALB, which serves as the main entry point to LISA’s backend infrastructure. The ALB forwards requests to the appropriate ECS-hosted model or externally-hosted model based on the request parameters. For models hosted within LISA, traffic to the models is managed with model-specific ALBs, which enable autoscaling if the models are under heavy load. LISA supports both direct REST API-based interaction and interaction through the Chat UI, enabling programmatic access or a user-friendly chat experience. -* **RAG (Retrieval-Augmented Generation)**: RAG operations enhance model responses by integrating external data sources. LISA leverages OpenSearch or PGVector (PostgreSQL) as vector stores, enabling vector-based search and retrieval of relevant knowledge to augment LLM outputs dynamically. - -#### Technical Implementation - -* RAG operations are managed through ```lambda/rag/lambda_functions.py```, which handles embedding generation and document retrieval via OpenSearch and PostgreSQL. -* Direct requests to the LISA Serve ALB entrypoint must utilize the OpenAI API spec, which we support through the use of the LiteLLM proxy. - - -### LISA Chat -![LISA Chatbot Architecture](../assets/LisaChat.png) -LISA Chat provides a customizable chat interface that enables users to interact with models in real-time. This component ensures that users have a seamless experience for submitting queries and maintaining session continuity. - -* **Chat Interface**: The Chat UI is hosted as a static website on Amazon S3 and is served via API Gateway. Users can interact with models directly through the web-based frontend, sending queries and viewing real-time responses from the models. The interface is integrated with LISA's backend services for model inference, retrieval augmented generation, and session management. -* **Session History Management**: LISA maintains session histories using DynamoDB, allowing users to retrieve and continue previous conversations seamlessly. This feature is crucial for maintaining continuity in multi-turn conversations with the models. - -#### Technical Implementation - -* The Chat UI is implemented in the ```lib/user-interface/react/``` folder and is deployed using the scripts in the ```scripts/``` folder. -* Session management logic is handled in ```lambda/session/lambda_functions.py```, where session data is stored and retrieved from DynamoDB. -* RAG operations are defined in lambda/repository/lambda_functions.py - - -## Interaction Flow - -1. **User Interaction with Chat UI or API:** Users can interact with LISA through the Chat UI or REST API. Each interaction is authenticated using AWS Cognito or OIDC, ensuring secure access. -1. **Request Routing:** The API Gateway securely routes user requests to the appropriate backend services, whether for fetching the chat UI, performing RAG operations, or managing models. -1. **Model Management:** Administrators can deploy, update, or delete models via the Model Management API, which triggers ECS deployment and scaling workflows. -1. **Model Inference:** Inference requests are routed to ECS-hosted models or external models via the LiteLLM proxy. Responses are served back to users through the ALB. -1. **RAG Integration:** When RAG is enabled, LISA retrieves relevant documents from OpenSearch or PGVector, augmenting the model's response with external knowledge. -1. **Session Continuity:** User session data is stored in DynamoDB, ensuring that users can retrieve and continue previous conversations across multiple interactions. -1. **Autoscaling:** ECS tasks automatically scale based on system load, with ALBs distributing traffic across available instances to ensure performance. diff --git a/lib/docs/admin/getting-started.md b/lib/docs/admin/getting-started.md index 7358be69..1c828188 100644 --- a/lib/docs/admin/getting-started.md +++ b/lib/docs/admin/getting-started.md @@ -1,24 +1,27 @@ # Getting Started with LISA -LISA (LLM Inference Solution for Amazon Dedicated Cloud) is an advanced infrastructure solution for deploying and -managing Large Language Models (LLMs) on AWS. This guide will walk you through the setup process, from prerequisites -to deployment. +LISA is an infrastructure-as-code solution that leverages AWS services. Customers deploy LISA directly into an AWS account. -## Prerequisites +## Deployment Prerequisites -Before beginning, ensure you have: +### Pre-Deployment Steps + +* Set up and have access to an AWS account with appropriate permissions + * All the resource creation that happens as part of CDK deployments expects Administrator or Administrator-like permissions with resource creation and mutation permissions. Installation will not succeed if this profile does not have permissions to create and edit arbitrary resources for the system. Note: This level of permissions is not required for the runtime of LISA. This is only necessary for deployment and subsequent updates. +* Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles +* Optional: If using the chat UI, Have your Identity Provider (IdP) information and access +* Optional: Have your VPC information available, if you are using an existing one for your deployment +* Note: CDK and Model Management both leverage AWS Systems Manager Agent (SSM) parameter store. Confirm that SSM is approved for use by your organization before beginning. + +### Software + +* AWS CLI installed and configured +* Python 3.9 or later +* Node.js 14 or later +* Docker installed and running +* Sufficient disk space for model downloads and conversions -1. An AWS account with appropriate permissions. - 1. Because of all the resource creation that happens as part of CDK deployments, we expect Administrator or Administrator-like permissions with resource creation and mutation permissions. - Installation will not succeed if this profile does not have permissions to create and edit arbitrary resources for the system. - **Note**: This level of permissions is not required for the runtime of LISA, only its deployment and subsequent updates. -2. AWS CLI installed and configured -3. Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles -4. Python 3.9 or later -5. Node.js 14 or later -6. Docker installed and running -7. Sufficient disk space for model downloads and conversions If you're new to CDK, review the [AWS CDK Documentation](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html) and consult with your AWS support team. @@ -36,10 +39,10 @@ cd lisa ## Step 2: Set Up Environment Variables -Create and configure your `config.yaml` file: +Create and configure your `config-custom.yaml` file: ```bash -cp example_config.yaml config.yaml +cp example_config.yaml config-custom.yaml ``` Set the following environment variables: @@ -48,6 +51,7 @@ Set the following environment variables: export PROFILE=my-aws-profile # Optional, can be left blank export DEPLOYMENT_NAME=my-deployment export ENV=dev # Options: dev, test, or prod +export CDK_DOCKER=finch # Optional, only required if not using docker as container engine ``` ## Step 3: Set Up Python and TypeScript Environments @@ -79,16 +83,15 @@ make installTypeScriptRequirements ## Step 4: Configure LISA -Edit the `config.yaml` file to customize your LISA deployment. Key configurations include: +Edit the `config-custom.yaml` file to customize your LISA deployment. Key configurations include: - AWS account and region settings -- Model configurations - Authentication settings -- Networking and infrastructure preferences +- Model bucket name ## Step 5: Stage Model Weights -LISA requires model weights to be staged in the S3 bucket specified in your `config.yaml` file, assuming the S3 bucket follows this structure: +LISA requires model weights to be staged in the S3 bucket specified in your `config-custom.yaml` file, assuming the S3 bucket follows this structure: ``` s3:/// @@ -107,7 +110,7 @@ s3:///mistralai/Mistral-7B-Instruct-v0.2/ ... ``` -To automatically download and stage the model weights defined by the `ecsModels` parameter in your `config.yaml`, use the following command: +To automatically download and stage the model weights defined by the `ecsModels` parameter in your `config-custom.yaml`, use the following command: ```bash make modelCheck @@ -116,7 +119,10 @@ make modelCheck This command verifies if the model's weights are already present in your S3 bucket. If not, it downloads the weights, converts them to the required format, and uploads them to your S3 bucket. Ensure adequate disk space is available for this process. > **WARNING** -> As of LISA 3.0, the `ecsModels` parameter in `config.yaml` is solely for staging model weights in your S3 bucket. Previously, before models could be managed through the [API](https://github.com/awslabs/LISA/blob/develop/README.md#creating-a-model-admin-api) or via the Model Management section of the [Chatbot](https://github.com/awslabs/LISA/blob/develop/README.md#chatbot-example), this parameter also dictated which models were deployed. +> As of LISA 3.0, the `ecsModels` parameter in `config-custom.yaml` is solely for staging model weights in your S3 bucket. +> Previously, before models could be managed through the [API](/admin/model-management) or via the Model Management +> section of the [Chatbot](/user/chat), this parameter also +> dictated which models were deployed. > **NOTE** > For air-gapped systems, before running `make modelCheck` you should manually download model artifacts and place them in a `models` directory at the project root, using the structure: `models/`. @@ -126,7 +132,7 @@ This command verifies if the model's weights are already present in your S3 buck ## Step 6: Configure Identity Provider -In the `config.yaml` file, configure the `authConfig` block for authentication. LISA supports OpenID Connect (OIDC) providers such as AWS Cognito or Keycloak. Required fields include: +In the `config-custom.yaml` file, configure the `authConfig` block for authentication. LISA supports OpenID Connect (OIDC) providers such as AWS Cognito or Keycloak. Required fields include: - `authority`: URL of your identity provider - `clientId`: Client ID for your application @@ -134,51 +140,22 @@ In the `config.yaml` file, configure the `authConfig` block for authentication. - `jwtGroupsProperty`: Path to the groups field in the JWT token - `additionalScopes` (optional): Extra scopes for group membership information -#### Cognito Configuration Example: -In Cognito, the `authority` will be the URL to your User Pool. As an example, if your User Pool ID, not the name, is `us-east-1_example`, and if it is -running in `us-east-1`, then the URL to put in the `authority` field would be `https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example`. The `clientId` -can be found in your User Pool's "App integration" tab from within the AWS Management Console, and at the bottom of the page, you will see the list of clients -and their associated Client IDs. The ID here is what we will need for the `clientId` field. - +IDP Configuration examples using AWS Cognito and Keycloak can be found: [IDP Configuration Examples](/config/idp) -```yaml -authConfig: - authority: https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example - clientId: your-client-id - adminGroup: AdminGroup - jwtGroupsProperty: cognito:groups -``` - -#### Keycloak Configuration Example: -In Keycloak, the `authority` will be the URL to your Keycloak server. The `clientId` is likely not a random string like in the Cognito clients, and instead -will be a string configured by your Keycloak administrator. Your administrator will be able to give you a client name or create a client for you to use for -this application. Once you have this string, use that as the `clientId` within the `authConfig` block. - -```yaml -authConfig: - authority: https://your-keycloak-server.com - clientId: your-client-name - adminGroup: AdminGroup - jwtGroupsProperty: realm_access.roles -``` ## Step 7: Configure LiteLLM We utilize LiteLLM under the hood to allow LISA to respond to the [OpenAI specification](https://platform.openai.com/docs/api-reference). For LiteLLM configuration, a key must be set up so that the system may communicate with a database for tracking all the models that are added or removed -using the [Model Management API](#admin-level-model-management-api). The key must start with `sk-` and then can be any arbitrary string. We recommend generating a new UUID and then using that as +using the [Model Management API](/admin/model-management). The key must start with `sk-` and then can be any arbitrary +string. We recommend generating a new UUID and then using that as the key. Configuration example is below. ```yaml litellmConfig: - general_settings: - master_key: sk-00000000-0000-0000-0000-000000000000 # needed for db operations, create your own key # pragma: allowlist-secret - model_list: [] + db_key: sk-00000000-0000-0000-0000-000000000000 # needed for db operations, create your own key # pragma: allowlist-secret ``` -**Note**: It is possible to add LiteLLM-only models to this configuration, but it is not recommended as the models in this configuration will not show in the -Chat or Model Management UIs. Instead, use the [Model Management UI](#admin-level-model-management-api) to add or remove LiteLLM-only model configurations. - ## Step 8: Set Up SSL Certificates (Development Only) **WARNING: THIS IS FOR DEV ONLY** @@ -190,20 +167,20 @@ export REGION= aws iam upload-server-certificate --server-certificate-name --certificate-body file://scripts/server.pem --private-key file://scripts/server.key ``` -Update your `config.yaml` with the certificate ARN: +Update your `config-custom.yaml` with the certificate ARN: ```yaml restApiConfig: - loadBalancerConfig: - sslCertIamArn: arn:aws:iam:::server-certificate/ + sslCertIamArn: arn:aws:iam:::server-certificate/ ``` ## Step 9: Customize Model Deployment -In the `ecsModels` section of `config.yaml`, allow our deployment process to pull the model weights for you. +In the `ecsModels` section of `config-custom.yaml`, allow our deployment process to pull the model weights for you. During the deployment process, LISA will optionally attempt to download your model weights if you specify an optional `ecsModels` -array, this will only work in non ADC regions. Specifically, see the `ecsModels` section of the [example_config.yaml](./example_config.yaml) file. +array, this will only work in non ADC regions. Specifically, see the `ecsModels` section of +the [example_config.yaml](https://github.com/awslabs/LISA/blob/develop/example_config.yaml) file. Here we define the model name, inference container, and baseImage: ```yaml @@ -252,52 +229,5 @@ services are in the same region as the LISA installation, LISA can use them alon **Important:** Endpoints or Models statically defined during LISA deployment cannot be removed or updated using the LISA Model Management API, and they will not show in the Chat UI. These will only show as part of the OpenAI `/models` API. -Although there is support for it, we recommend using the [Model Management API](#admin-level-model-management-api) instead of the following static configuration. - -### Example Configuration - -```yaml -dev: - litellmConfig: - litellm_settings: - telemetry: false # Disable telemetry to LiteLLM servers (recommended for VPC deployments) - drop_params: true # Ignore unrecognized parameters instead of failing - - model_list: - # 1. SageMaker Endpoint Configuration - - model_name: test-endpoint # Human-readable name, can be anything and will be used for OpenAI API calls - litellm_params: - model: sagemaker/test-endpoint # Prefix required for SageMaker Endpoints and "test-endpoint" matches Endpoint name - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 2. Amazon Bedrock Model Configuration - - model_name: bedrock-titan-express # Human-readable name for future OpenAI API calls - litellm_params: - model: bedrock/amazon.titan-text-express-v1 # Prefix required for Bedrock Models, and exact name of Model to use - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 3. Custom OpenAI-compatible Text Generation Model - - model_name: custom-openai-model # Used in future OpenAI-compatible calls to LiteLLM - litellm_params: - model: openai/custom-provider/textgen-model # Format: openai// - api_base: https://your-domain-here:443/v1 # Your model's base URI - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 4. Custom OpenAI-compatible Embedding Model - - model_name: custom-openai-embedding-model # Used in future OpenAI-compatible calls to LiteLLM - litellm_params: - model: openai/modelProvider/modelName # Prefix required for OpenAI-compatible models followed by model provider and name details - api_base: https://your-domain-here:443/v1 # Your model's base URI - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: embedding -``` +Although there is support for it, we recommend using the [Model Management API](/admin/model-management) instead of the +following static configuration. diff --git a/lib/docs/admin/idp.md b/lib/docs/admin/idp.md deleted file mode 100644 index 46409041..00000000 --- a/lib/docs/admin/idp.md +++ /dev/null @@ -1 +0,0 @@ -# TODO diff --git a/lib/docs/admin/model-management.md b/lib/docs/admin/model-management.md index 46409041..cb4b7ba9 100644 --- a/lib/docs/admin/model-management.md +++ b/lib/docs/admin/model-management.md @@ -1 +1,364 @@ -# TODO + +# Model Management API Usage + +LISA provides robust API endpoints for managing models, both for users and administrators. These endpoints allow for operations such as listing, creating, updating, and deleting models. + +## API Gateway and ALB Endpoints + +LISA uses two primary APIs for model management: + +1. **User-facing OpenAI-Compatible API**: Available to all users for inference tasks and accessible through the LISA Serve ALB. This API provides an interface for querying and interacting with models deployed on Amazon ECS, Amazon Bedrock, or through LiteLLM. +2. **Admin-level Model Management API**: Available only to administrators through the API Gateway (APIGW). This API allows for full control of model lifecycle management, including creating, updating, and deleting models. + +### LiteLLM Routing in All Models + +Every model request is routed through LiteLLM, regardless of whether infrastructure (like ECS) is created for it. Whether deployed on ECS, external models via Bedrock, or managed through LiteLLM, all models are added to LiteLLM for traffic routing. The distinction is whether infrastructure is created (determined by request payloads), but LiteLLM integration is consistent for all models. The model management APIs will handle adding or removing model configurations from LiteLLM, and the LISA Serve endpoint will handle the inference requests against models available in LiteLLM. + +## User-facing OpenAI-Compatible API + +The OpenAI-compatible API is accessible through the LISA Serve ALB and allows users to list models available for inference tasks. Although not specifically part of the model management APIs, any model that is added or removed from LiteLLM via the model management API Gateway APIs will be reflected immediately upon queries to LiteLLM through the LISA Serve ALB. + +### Listing Models + +The `/v2/serve/models` endpoint on the LISA Serve ALB allows users to list all models available for inference in the LISA system. + +#### Request Example: + +```bash +curl -s -H 'Authorization: Bearer ' -X GET https:///v2/serve/models +``` + +#### Response Example: + +```json +{ + "data": [ + { + "id": "bedrock-embed-text-v2", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + }, + { + "id": "titan-express-v1", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + }, + { + "id": "sagemaker-amazon-mistrallite", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + } + ], + "object": "list" +} +``` + +#### Explanation of Response Fields: + +These fields are all defined by the OpenAI API specification, which is documented [here](https://platform.openai.com/docs/api-reference/models/list). + +- `id`: A unique identifier for the model. +- `object`: The type of object, which is "model" in this case. +- `created`: A Unix timestamp representing when the model was created. +- `owned_by`: The entity responsible for the model, such as "openai." + +## Admin-level Model Management API + +This API is only accessible by administrators via the API Gateway and is used to create, update, and delete models. It supports full model lifecycle management. + +### Listing Models (Admin API) + +The `/models` route allows admins to list all models managed by the system. This includes models that are either creating, deleting, already active, or in a failed state. Models can be deployed via ECS or managed externally through a LiteLLM configuration. + +#### Request Example: + +```bash +curl -s -H "Authorization: Bearer " -X GET https:///models +``` + +#### Response Example: + +```json +{ + "models": [ + { + "autoScalingConfig": { + "minCapacity": 1, + "maxCapacity": 1, + "cooldown": 420, + "defaultInstanceWarmup": 180, + "metricConfig": { + "albMetricName": "RequestCountPerTarget", + "targetValue": 30, + "duration": 60, + "estimatedInstanceWarmup": 330 + } + }, + "containerConfig": { + "image": { + "baseImage": "vllm/vllm-openai:v0.5.0", + "type": "asset" + }, + "sharedMemorySize": 2048, + "healthCheckConfig": { + "command": [ + "CMD-SHELL", + "exit 0" + ], + "interval": 10, + "startPeriod": 30, + "timeout": 5, + "retries": 3 + }, + "environment": { + "MAX_TOTAL_TOKENS": "2048", + "MAX_CONCURRENT_REQUESTS": "128", + "MAX_INPUT_LENGTH": "1024" + } + }, + "loadBalancerConfig": { + "healthCheckConfig": { + "path": "/health", + "interval": 60, + "timeout": 30, + "healthyThresholdCount": 2, + "unhealthyThresholdCount": 10 + } + }, + "instanceType": "g5.xlarge", + "modelId": "mistral-vllm", + "modelName": "mistralai/Mistral-7B-Instruct-v0.2", + "modelType": "textgen", + "modelUrl": null, + "status": "Creating", + "streaming": true + }, + { + "autoScalingConfig": null, + "containerConfig": null, + "loadBalancerConfig": null, + "instanceType": null, + "modelId": "titan-express-v1", + "modelName": "bedrock/amazon.titan-text-express-v1", + "modelType": "textgen", + "modelUrl": null, + "status": "InService", + "streaming": true + } + ] +} +``` + +#### Explanation of Response Fields: + +- `modelId`: A unique identifier for the model. +- `modelName`: The name of the model, typically referencing the underlying service (Bedrock, SageMaker, etc.). +- `status`: The current state of the model, e.g., "Creating," "Active," or "Failed." +- `streaming`: Whether the model supports streaming inference. +- `instanceType` (optional): The instance type if the model is deployed via ECS. + +### Creating a Model (Admin API) + +LISA provides the `/models` endpoint for creating both ECS and LiteLLM-hosted models. Depending on the request payload, infrastructure will be created or bypassed (e.g., for LiteLLM-only models). + +This API accepts the same model definition parameters that were accepted in the V2 model definitions within the config.yaml file with one notable difference: the `containerConfig.image.path` field is +now omitted because it corresponded with the `inferenceContainer` selection. As a convenience, this path is no longer required. + +#### Request Example: + +``` +POST https:///models +``` + +#### Example Payload for ECS Model: + +```json +{ + "modelId": "mistral-vllm", + "modelName": "mistralai/Mistral-7B-Instruct-v0.2", + "modelType": "textgen", + "inferenceContainer": "vllm", + "instanceType": "g5.xlarge", + "streaming": true, + "containerConfig": { + "image": { + "baseImage": "vllm/vllm-openai:v0.5.0", + "type": "asset" + }, + "sharedMemorySize": 2048, + "environment": { + "MAX_CONCURRENT_REQUESTS": "128", + "MAX_INPUT_LENGTH": "1024", + "MAX_TOTAL_TOKENS": "2048" + }, + "healthCheckConfig": { + "command": ["CMD-SHELL", "exit 0"], + "interval": 10, + "startPeriod": 30, + "timeout": 5, + "retries": 3 + } + }, + "autoScalingConfig": { + "minCapacity": 1, + "maxCapacity": 1, + "cooldown": 420, + "defaultInstanceWarmup": 180, + "metricConfig": { + "albMetricName": "RequestCountPerTarget", + "targetValue": 30, + "duration": 60, + "estimatedInstanceWarmup": 330 + } + }, + "loadBalancerConfig": { + "healthCheckConfig": { + "path": "/health", + "interval": 60, + "timeout": 30, + "healthyThresholdCount": 2, + "unhealthyThresholdCount": 10 + } + } +} +``` + +#### Creating a LiteLLM-Only Model: + +```json +{ + "modelId": "titan-express-v1", + "modelName": "bedrock/amazon.titan-text-express-v1", + "modelType": "textgen", + "streaming": true +} +``` + +#### Explanation of Key Fields for Creation Payload: + +- `modelId`: The unique identifier for the model. This is any name you would like it to be. +- `modelName`: The name of the model as it appears in the system. For LISA-hosted models, this must be the S3 Key to your model artifacts, otherwise + this is the LiteLLM-compatible reference to a SageMaker Endpoint or Bedrock Foundation Model. Note: Bedrock and SageMaker resources must exist in the + same region as your LISA deployment. If your LISA installation is in us-east-1, then all SageMaker and Bedrock calls will also happen in us-east-1. + Configuration examples: + - LISA hosting: If your model artifacts are in `s3://${lisa_models_bucket}/path/to/model/weights`, then the `modelName` value here should be `path/to/model/weights` + - LiteLLM-only, Bedrock: If you want to use `amazon.titan-text-lite-v1`, your `modelName` value should be `bedrock/amazon.titan-text-lite-v1` + - LiteLLM-only, SageMaker: If you want to use a SageMaker Endpoint named `my-sm-endpoint`, then the `modelName` value should be `sagemaker/my-sm-endpoint`. +- `modelType`: The type of model, such as text generation (textgen). +- `streaming`: Whether the model supports streaming inference. +- `instanceType`: The type of EC2 instance to be used (only applicable for ECS models). +- `containerConfig`: Details about the Docker container, memory allocation, and environment variables. +- `autoScalingConfig`: Configuration related to ECS autoscaling. +- `loadBalancerConfig`: Health check configuration for load balancers. + +### Deleting a Model (Admin API) + +Admins can delete a model using the following endpoint. Deleting a model removes the infrastructure (ECS) or disconnects from LiteLLM. + +#### Request Example: + +``` +DELETE https:///models/{modelId} +``` + +#### Response Example: + +```json +{ + "status": "success", + "message": "Model mistral-vllm has been deleted successfully." +} +``` + +### Updating a Model + +LISA offers basic updating functionality for both LISA-hosted and LiteLLM-only models. For both types, the model type and streaming support can be updated +in the cases that the models were originally created with the wrong parameters. For example, if an embedding model was accidentally created as a `textgen` +model, the UpdateModel API can be used to set it to the intended `embedding` value. Additionally, for LISA-hosted models, users may update the AutoScaling +configuration to increase or decrease capacity usage for each model. Users may use this API to completely shut down all instances behind a model until +they want to add capacity back to the model for usage later. This feature can help users to effectively manage costs so that instances do not have to stay +running in time periods of little or no expected usage. + +The UpdateModel API has mutually exclusive payload fields to avoid conflicting requests. The API does not allow for shutting off a model at the same time +as updating its AutoScaling configuration, as these would introduce ambiguous intents. The API does not allow for setting AutoScaling limits to 0 and instead +requires the usage of the enable/disable functionality to allow models to fully scale down or turn back on. Metadata updates, such as changing the model type +or streaming compatibility, can happen in either type of update or simply by themselves. + +#### Request Example + +``` +PUT https:///models/{modelId} +``` + +#### Example Payloads + +##### Update Model Metadata + +This payload will simply update the model metadata, which will complete within seconds of invoking. If setting a model as an `embedding` model, then the +`streaming` option must be set to `false` or omitted as LISA does not support streaming with embedding models. Both the `streaming` and `modelType` options +may be included in any other update request. + +```json +{ + "streaming": true, + "modelType": "textgen" +} +``` + +##### Update AutoScaling Configuration + +This payload will update the AutoScaling configuration for minimum, maximum, and desired number of instances. The desired number must be between the +minimum or maximum numbers, inclusive, and all the numbers must be strictly greater than 0. If the model currently has less than the minimum number, then +the desired count will automatically raise to the minimum if a desired count is not specified. Despite setting a desired capacity, the model will scale down +to the minimum number over time if you are not hitting the scaling thresholds set when creating the model in the first place. + +The AutoScaling configuration **can** be updated while the model is in the Stopped state, but it won't be applied immediately. Instead, the configuration will +be saved until the model is started again, in which it will use the most recently updated AutoScaling configuration. + +The request will fail if the `autoScalingInstanceConfig` is defined at the same time as the `enabled` field. These options are mutually exclusive and must be +handled as separate operations. Any or all of the options within the `autoScalingInstanceConfig` may be set as needed, so if you only wish to change the `desiredCapacity`, +then that is the only option that you need to specify in the request object within the `autoScalingInstanceConfig`. + +```json +{ + "autoScalingInstanceConfig": { + "minCapacity": 2, + "maxCapacity": 4, + "desiredCapacity": 3 + } +} +``` + +##### Stop Model - Scale Down to 0 Instances + +This payload will stop all model EC2 instances and remove the model reference from LiteLLM so that users are unable to make inference requests against a model +with no capacity. This option is useful for users who wish to manage costs and turn off instances when the model is not currently needed but will be used again +in the future. + +The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be +handled as separate operations. + +```json +{ + "enabled": false +} +``` + +##### Start Model - Restore Previous AutoScaling Configuration + +After stopping a model, this payload will turn the model back on by spinning up instances, waiting for the expected spin-up time to allow models to initialize, and then +adding the reference back to LiteLLM so that users may query the model again. This is expected to be a much faster operation than creating the model through the CreateModel +API, so as long as the model details don't have to change, this in combination with the Stop payload will help to manage costs while still providing model availability as +quickly as the system can spin it up again. + +The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be +handled as separate operations. + +```json +{ + "enabled": true +} +``` diff --git a/lib/docs/admin/overview.md b/lib/docs/admin/overview.md new file mode 100644 index 00000000..223205ba --- /dev/null +++ b/lib/docs/admin/overview.md @@ -0,0 +1,41 @@ +# What is LISA? + +LISA is an infrastructure-as-code solution providing scalable, low latency access to customers’ generative LLMs and +embedding language models. LISA accelerates and supports customers’ GenAI experimentation and adoption, particularly in +regions where Amazon Bedrock is not available. LISA allows customers to move quickly rather than independently solve the +undifferentiated heavy lifting of hosting and inference architecture. Customers deploy LISA into a single AWS account +and integrate it with an identity provider. Customers bring their own models to LISA for self-hosting and inference +supported by Amazon Elastic Container Service (ECS). Model configuration is managed through LISA’s model management +APIs. + +As use cases and model requirements grow, customers can configure LISA with external model providers. Through OpenAI's +API spec via the LiteLLM proxy, LISA is compatible with 100+ models from various providers, including Amazon Bedrock and +Amazon Jumpstart. LISA customers can centralize communication across many model providers via LiteLLM, leveraging LISA +for model orchestration. Using LISA as a model orchestration layer allows customers to standardize integrations with +externally hosted models in a single place. Without an orchestration layer, customers must individually manage unique +API integrations with each provider. + +## Key Features + +* Self Host Models: Bring your own text generation and embedding models to LISA for hosting and inference. +* Model Orchestration: Centralize and standardize configuration with 100+ models from model providers via LiteLLM, + including Amazon Bedrock models. +* Chatbot User Interface: Through the chatbot user interface, users can prompt LLMs, receive responses, modify prompt + templates, change model arguments, and manage their session history. Administrators can control available features via + the configuration page. +* Retrieval-augmented generation (RAG): RAG reduces the need for fine-tuning, an expensive and time-consuming + undertaking, and delivers more contextually relevant outputs. LISA offers RAG through Amazon OpenSearch or + PostgreSQL’s PGVector extension on Amazon RDS. +* Non-RAG Model Context: Users can upload documents to their chat sessions to enhance responses or support use cases + like document summarization. +* Model Management: Administrators can add, remove, and update models configured with LISA through the model management + configuration page or APIs. +* OpenAI API spec: LISA can be configured with compatible tooling. For example, customers can configure LISA as the + model provider for the Continue plugin, an open-source AI code assistance for JetBrains and Visual Studio Code + integrated development environments (IDEs). This allows users to select from any LISA-configured model to support LLM + prompting directly in their IDE. +* Libraries: If your workflow includes libraries such as [LangChain](https://python.langchain.com/) or [OpenAI](https://github.com/openai/openai-python), then you + can place LISA in your + application by changing only the endpoint and headers for the client objects. +* FedRAMP: The AWS services that LISA leverages are FedRAMP High compliant. +* Ongoing Releases: We offer on-going release with new functionality. LISA’s roadmap is customer driven. diff --git a/lib/docs/admin/security.md b/lib/docs/admin/security.md deleted file mode 100644 index 46409041..00000000 --- a/lib/docs/admin/security.md +++ /dev/null @@ -1 +0,0 @@ -# TODO diff --git a/lib/docs/admin/ui-configuration.md b/lib/docs/admin/ui-configuration.md new file mode 100644 index 00000000..9c427086 --- /dev/null +++ b/lib/docs/admin/ui-configuration.md @@ -0,0 +1,32 @@ + +# Chat UI Configuration + +The release of LISA 3.2.0 introduces enhanced administrative controls for the Chat UI, allowing for granular customization of user interfaces. System administrators now have the ability to activate, deactivate, or configure specific components for all users through the application's configuration panel. + +The following features can be managed: + +1. Session History Management + - Activate or deactivate the option to delete session history + +2. Message Information + - Control visibility of message metadata + +3. Chat Parameters + - Configure chat Kwargs + - Customize prompt templates + - Adjust chat history buffer settings + +4. Retrieval-Augmented Generation (RAG) Settings + - Modify the number of RAG documents to be included in the retrieval process (TopK) + - Activate or deactivate RAG document uploads + +5. Contextual Document Management + - Control the ability to upload in-context documents + +6. System Banner Customization + - Toggle banner visibility + - Edit banner text + - Customize text color + - Adjust background color + +These new configuration options provide administrators with greater flexibility in tailoring the Chat UI to organizational needs, enhancing both security and user experience across the platform. diff --git a/lib/docs/config/api-tokens.md b/lib/docs/config/api-tokens.md deleted file mode 100644 index 1a0afa75..00000000 --- a/lib/docs/config/api-tokens.md +++ /dev/null @@ -1,77 +0,0 @@ -## Programmatic API Tokens - -The LISA Serve ALB can be used for programmatic access outside the example Chat application. -An example use case would be for allowing LISA to serve LLM requests that originate from the [Continue VSCode Plugin](https://www.continue.dev/). -To facilitate communication directly with the LISA Serve ALB, a user with sufficient DynamoDB PutItem permissions may add -API keys to the APITokenTable, and once created, a user may make requests by including the `Authorization: Bearer ${token}` -header or the `Api-Key: ${token}` header with that token. If using any OpenAI-compatible library, the `api_key` fields -will use the `Authorization: Bearer ${token}` format automatically, so there is no need to include additional headers -when using those libraries. - -### Adding a Token - -An account owner may create a long-lived API Token using the following AWS CLI command. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" # change to a unique string for a user -aws --region $AWS_REGION dynamodb put-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --item '{"token": {"S": "'${token_string}'"}}' -``` - -If an account owner wants the API Token to be temporary and expire after a specific date, LISA will allow for this too. -In addition to the `token` field, the owner may specify the `tokenExpiration` field, which accepts a UNIX timestamp, -in seconds. The following command shows an example of how to do this. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" -token_expiration=$(echo $(date +%s) + 3600 | bc) # token that expires in one hour, 3600 seconds -aws --region $AWS_REGION dynamodb put-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --item '{ - "token": {"S": "'${token_string}'"}, - "tokenExpiration": {"N": "'${token_expiration}'"} - }' -``` - -Once the token is inserted into the DynamoDB Table, a user may use the token in the `Authorization` request header like -in the following snippet. - -```bash -lisa_serve_rest_url="https://" -token_string="YOUR_STRING_HERE" -curl ${lisa_serve_rest_url}/v2/serve/models \ - -H 'accept: application/json' \ - -H 'Content-Type: application/json' \ - -H "Authorization: Bearer ${token_string}" -``` - -### Updating a Token - -In the case that an owner wishes to change an existing expiration time or add one to a key that did not previously have -an expiration, this can be accomplished by editing the existing item. The following commands can be used as an example -for updating an existing token. Setting the expiration time to a time in the past will effectively remove access for -that key. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" -token_expiration=$(echo $(date +%s) + 600 | bc) # token that expires in 10 minutes from now -aws --region $AWS_REGION dynamodb update-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --key '{"token": {"S": "'${token_string}'"}}' \ - --update-expression 'SET tokenExpiration=:t' \ - --expression-attribute-values '{":t": {"N": "'${token_expiration}'"}}' -``` - -### Removing a Token - -Tokens will not be automatically removed even if they are no longer valid. An owner may remove an key, expired or not, -from the database to fully revoke the key, by deleting the item. As an example, the following commands can be used to -remove a token. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" # change to the token to remove -aws --region $AWS_REGION dynamodb delete-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --key '{"token": {"S": "'${token_string}'"}}' -``` diff --git a/lib/docs/config/branding.md b/lib/docs/config/branding.md deleted file mode 100644 index 46409041..00000000 --- a/lib/docs/config/branding.md +++ /dev/null @@ -1 +0,0 @@ -# TODO diff --git a/lib/docs/config/configuration.md b/lib/docs/config/configuration.md new file mode 100644 index 00000000..520e9ab3 --- /dev/null +++ b/lib/docs/config/configuration.md @@ -0,0 +1,19 @@ +# Minimal Configuration + +Configurations for LISA are split into 2 configuration files, base and custom. The base configuration contains the +recommended properties that can be overridden with the custom properties file. The custom configuration should contain +the minimal properties required to deploy LISA, and any optional properties or overrides. This file should be created +at the root of your project (./config-custom.yaml) and needs to contain the following properties: + +```yaml +accountNumber: +region: +s3BucketModels: +authConfig: + authority: + clientId: + adminGroup: + jwtGroupsProperty: +``` + + diff --git a/lib/docs/config/features.md b/lib/docs/config/features.md deleted file mode 100644 index 46409041..00000000 --- a/lib/docs/config/features.md +++ /dev/null @@ -1 +0,0 @@ -# TODO diff --git a/lib/docs/config/hiding-chat-components.md b/lib/docs/config/hiding-chat-components.md deleted file mode 100644 index 46409041..00000000 --- a/lib/docs/config/hiding-chat-components.md +++ /dev/null @@ -1 +0,0 @@ -# TODO diff --git a/lib/docs/config/idp.md b/lib/docs/config/idp.md new file mode 100644 index 00000000..b0771014 --- /dev/null +++ b/lib/docs/config/idp.md @@ -0,0 +1,35 @@ +# IDP Configuration Examples + +## AWS Cognito Example: + +In Cognito, the `authority` will be the URL to your User Pool. As an example, if your User Pool ID, not the name, is +`us-east-1_example`, and if it is +running in `us-east-1`, then the URL to put in the `authority` field would be +`https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example`. The `clientId` +can be found in your User Pool's "App integration" tab from within the AWS Management Console, and at the bottom of the +page, you will see the list of clients +and their associated Client IDs. The ID here is what we will need for the `clientId` field. + +```yaml +authConfig: + authority: https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example + clientId: your-client-id + adminGroup: AdminGroup + jwtGroupsProperty: cognito:groups +``` + +## Keycloak Example: + +In Keycloak, the `authority` will be the URL to your Keycloak server. The `clientId` is likely not a random string like +in the Cognito clients, and instead +will be a string configured by your Keycloak administrator. Your administrator will be able to give you a client name or +create a client for you to use for +this application. Once you have this string, use that as the `clientId` within the `authConfig` block. + +```yaml +authConfig: + authority: https://your-keycloak-server.com + clientId: your-client-name + adminGroup: AdminGroup + jwtGroupsProperty: realm_access.roles +``` diff --git a/lib/docs/admin/lite-llm.md b/lib/docs/config/lite-llm.md similarity index 100% rename from lib/docs/admin/lite-llm.md rename to lib/docs/config/lite-llm.md diff --git a/lib/docs/config/model-compatibility.md b/lib/docs/config/model-compatibility.md index 1f28b61e..8f7ea9b4 100644 --- a/lib/docs/config/model-compatibility.md +++ b/lib/docs/config/model-compatibility.md @@ -16,11 +16,15 @@ AutoModelForSeq2SeqLM.from_pretrained(, device_map="auto") ### HuggingFace Embedding Models -Embedding models often utilize custom codebases and are not as uniform as generation models. For this reason you will likely need to create a new `inferenceContainer`. Follow the [example](https://github.com/awslabs/LISA/tree/develop/lib/serve/ecs-model/embedding/instructor) provided for the `instructor` model. +Embedding models often utilize custom codebases and are not as uniform as generation models. For this reason you will +likely need to create a new `inferenceContainer`. Follow +the [example](https://github.com/awslabs/LISA/blob/develop/lib/serve/ecs-model/embedding/instructor) provided for the +`instructor` model. ### vLLM Models In addition to the support we have for the TGI and TEI containers, we support hosting models using the [vLLM container](https://docs.vllm.ai/en/latest/). vLLM abides by the OpenAI specification, and as such allows both text generation and embedding on the models that vLLM supports. -See the [deployment](#deployment) section for details on how to set up the vLLM container for your models. Similar to how the HuggingFace containers will serve safetensor weights downloaded from the +See the [deployment](/admin/deploy) section for details on how to set up the vLLM container for your models. Similar to +how the HuggingFace containers will serve safetensor weights downloaded from the HuggingFace website, vLLM will do the same, and our configuration will allow you to serve these artifacts automatically. vLLM does not have many supported models for embeddings, but as they become available, LISA will support them as long as the vLLM container version is updated in the config.yaml file and as long as the model's safetensors can be found in S3. diff --git a/lib/docs/config/model-management-api.md b/lib/docs/config/model-management-api.md deleted file mode 100644 index 46409041..00000000 --- a/lib/docs/config/model-management-api.md +++ /dev/null @@ -1 +0,0 @@ -# TODO diff --git a/lib/docs/package.json b/lib/docs/package.json index 7441aa18..f794a384 100644 --- a/lib/docs/package.json +++ b/lib/docs/package.json @@ -4,7 +4,8 @@ "version": "1.0.0", "description": "Documentation of LISA", "scripts": { - "build": "vitepress build .", + "prebuild": "(cd ../../ && npm run generateSchemaDocs)", + "build": "npm run docs:build", "docs:dev": "vitepress dev .", "docs:build": "vitepress build .", "docs:preview": "vitepress preview ." diff --git a/lib/docs/user/breaking-changes.md b/lib/docs/user/breaking-changes.md new file mode 100644 index 00000000..b71fc184 --- /dev/null +++ b/lib/docs/user/breaking-changes.md @@ -0,0 +1,80 @@ +# Breaking Changes + +## Migrating to v3.2.0 + +With the release of LISA v3.2.0, we have implemented a significant update to the configuration file schema to streamline +the deployment process. The previous single config.yaml file has been deprecated in favor of a more flexible two-file +system: config-base.yaml and config-custom.yaml. + +The config-base.yaml file now contains default properties, which can be selectively overridden using the +config-custom.yaml file. This new structure allows for greater customization while maintaining a standardized base +configuration. + +To facilitate the transition to this new configuration system, we have developed a migration utility. Users can execute +the command `npm run migrate-properties` to automatically convert their existing config.yaml file into the new +config-custom.yaml format. + +This update enhances the overall flexibility and maintainability of LISA configurations, providing a more robust +foundation for future developments and easier customization for end-users. + +## v2 to v3 Migration + +With the release of LISA v3.0.0, we have introduced several architectural changes that are incompatible with previous +versions. Although these changes may cause some friction for existing users, they aim to simplify the deployment +experience and enhance long-term scalability. The following breaking changes are critical for existing users planning to +upgrade: + +1. Model Deletion Upon Upgrade: Models deployed via EC2 and ECS using the config.yaml file’s ecsModels list will be + deleted during the upgrade process. LISA has migrated to a new model deployment system that manages models + internally, rendering the ecsModels list obsolete. We recommend backing up your model settings to facilitate their + redeployment through the new Model Management API with minimal downtime. +1. Networking Changes and Full Teardown: Core networking changes require a complete teardown of the existing LISA + installation using the make destroy command before upgrading. Cross-stack dependencies have been modified, + necessitating this full teardown to ensure proper application of the v3 infrastructure changes. Additionally, users + may need to manually delete some resources, such as ECR repositories or S3 buckets, if they were populated before + CloudFormation began deleting the stack. This operation is destructive and irreversible, so it is crucial to back up + any critical configurations and data (e.g., S3 RAG bucket contents, DynamoDB token tables) before proceeding with the + upgrade. +1. New LiteLLM Admin Key Requirement: The new Model Management API requires an "admin" key for LiteLLM to track models + for inference requests. This key, while transparent to users, must be present and conform to the required format ( + starting with sk-). The key is defined in the config.yaml file, and the LISA schema validator will prompt an error if + it is missing or incorrectly formatted. + +## v3.0.0 to v3.1.0 + +In preparation of the v3.1.0 release, there are several changes that we needed to make in order to ensure the stability +of the LISA system. + +1. The CreateModel API `containerConfig` object has been changed so that the Docker Image repository is listed in + `containerConfig.image.baseImage` instead of + its previous location at `containerConfig.baseImage.baseImage`. This change makes the configuration consistent with + the config.yaml file in LISA v2.0 and prior. +2. The CreateModel API `containerConfig.image` object no longer requires the `path` option. We identified that this was + a confusing and redundant option to set, considering + that the path was based on the LISA code repository structure, and that we already had an option to specify if a + model was using TGI, TEI, or vLLM. Specifying the `inferenceContainer` + is sufficient for the system to infer which files to use so that the user does not have to provide this information. +3. The ApiDeployment stack now follows the same naming convention as the rest of the stacks that we deploy, utilization + the deployment name and the deploymentStage names. This allows users + to have multiple LISA installations with different parameters in the same account without needing to change region or + account entirely. After successful deployment, you may safely delete the + previous `${deploymentStage}-LisaApiDeployment` stack, as it is no longer in use. +4. If you have installed v3.0.0 or v3.0.1, you will need to **delete** the Models API stack so that the model deployer + function will deploy again. The function was converted to a Docker Image + Function so that the growing Function size would fit within the Lambda constraints. We recommend that you take the + following actions to avoid leaked resources: + 1. Use the Model Management UI to **delete all models** from LISA. This is needed so that we delete any + CloudFormation stacks that track GPU instances. Failure to do this will require manual + resource cleanup to rid the account of inaccessible EC2 instances. Once the Models DynamoDB Table is deleted, we + do not have a programmatic way to re-reference deployed models, so that is + why we recommend deleting them first. + 2. **Only after deleting all models through the Model Management UI**, manually delete the Model Management API + stack in CloudFormation. This will take at least 45 minutes due to Lambda's use + of Elastic Network Interfaces for VPC access. The stack name will look like: + `${deployment}-lisa-models-${deploymentStage}`. + 3. After the stack has been deleted, deploy LISA v3.1.0, which will recreate the Models API stack, along with the + Docker Lambda Function. +5. The `ecsModels` section of `config.yaml` has been stripped down to only 3 fields per model: `modelName`, + `inferenceContainer`, and `baseImage`. Just as before, the system will check to see if the models + defined here exist in your models S3 bucket prior to LISA deployment. These values will be needed later when invoking + the Model Management API to create a model. diff --git a/lib/docs/user/chat.md b/lib/docs/user/chat.md index 30e82fab..35425f65 100644 --- a/lib/docs/user/chat.md +++ b/lib/docs/user/chat.md @@ -73,7 +73,8 @@ npm run dev The LISA Serve endpoint can be used independently of the Chat UI, and the following shows a few examples of how to do that. The Serve endpoint will still validate user auth, so if you have a Bearer token from the IdP configured with LISA, we will honor it, or if you've set up an API -token using the [DynamoDB instructions](#programmatic-api-tokens), we will also accept that. This diagram shows the LISA Serve components that +token using the [DynamoDB instructions](/admin/api-tokens), we will also accept that. This diagram shows the LISA Serve +components that would be utilized during direct REST API requests. ## OpenAI Specification Compatibility @@ -89,7 +90,8 @@ routes as long as your underlying models can also respond to them. - /embeddings By supporting the OpenAI spec, we can more easily allow users to integrate their collection of models into their LLM applications and workflows. In LISA, users can authenticate -using their OpenID Connect Identity Provider, or with an API token created through the DynamoDB token workflow as described [here](#programmatic-api-tokens). Once the token +using their OpenID Connect Identity Provider, or with an API token created through the DynamoDB token workflow as +described [here](/admin/api-tokens). Once the token is retrieved, users can use that in direct requests to the LISA Serve REST API. If using the IdP, users must set the 'Authorization' header, otherwise if using the API token, either the 'Api-Key' header or the 'Authorization' header. After that, requests to `https://${lisa_serve_alb}/v2/serve` will handle the OpenAI API calls. As an example, the following call can list all models that LISA is aware of, assuming usage of the API token. If you are using a self-signed cert, you must also provide the `--cacert $path` option to specify a CA bundle to trust for SSL verification. @@ -112,7 +114,8 @@ automatically be added to the base URL, just as we appended `/models` to the `/v For developers that desire an LLM assistant to help with programming tasks, we support adding LISA as an LLM provider for the [Continue plugin](https://www.continue.dev). To add LISA as a provider, open up the Continue plugin's `config.json` file and locate the `models` list. In this list, add the following block, replacing the placeholder URL -with your own REST API domain or ALB. The `/v2/serve` is required at the end of the `apiBase`. This configuration requires an API token as created through the [DynamoDB workflow](#programmatic-api-tokens). +with your own REST API domain or ALB. The `/v2/serve` is required at the end of the `apiBase`. This configuration +requires an API token as created through the [DynamoDB workflow](/admin/api-tokens). ```json { @@ -147,7 +150,7 @@ client.models.list() To use the models being served by LISA, the client needs only a few changes: 1. Specify the `base_url` as the LISA Serve ALB, using the /v2/serve route at the end, similar to the apiBase in the [Continue example](#continue-jetbrains-and-vs-code-plugin) -2. Add the API key that you generated from the [token generation steps](#programmatic-api-tokens) as your `api_key` field. +2. Add the API key that you generated from the [token generation steps](/admin/api-tokens) as your `api_key` field. 3. If using a self-signed cert, you must provide a certificate path for validating SSL. If you're using an ACM or public cert, then this may be omitted. 1. We provide a convenience function in the `lisa-sdk` for generating a cert path from an IAM certificate ARN if one is provided in the `RESTAPI_SSL_CERT_ARN` environment variable. diff --git a/lib/docs/config/model-management-ui.md b/lib/docs/user/model-management-ui.md similarity index 100% rename from lib/docs/config/model-management-ui.md rename to lib/docs/user/model-management-ui.md diff --git a/lib/models/docker-image-builder.ts b/lib/models/docker-image-builder.ts index da2174d9..d7b2cc3d 100644 --- a/lib/models/docker-image-builder.ts +++ b/lib/models/docker-image-builder.ts @@ -16,7 +16,14 @@ import { Construct } from 'constructs'; import { Code, Function, Runtime } from 'aws-cdk-lib/aws-lambda'; -import { Role, InstanceProfile, ServicePrincipal, ManagedPolicy, Policy, PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import { + Role, + InstanceProfile, + ServicePrincipal, + ManagedPolicy, + Policy, + PolicyStatement +} from 'aws-cdk-lib/aws-iam'; import { Stack, Duration } from 'aws-cdk-lib'; import { Bucket } from 'aws-cdk-lib/aws-s3'; import { BucketDeployment, Source } from 'aws-cdk-lib/aws-s3-deployment'; @@ -91,7 +98,13 @@ export class DockerImageBuilder extends Construct { new PolicyStatement({ actions: [ 'ec2:RunInstances', - 'ec2:CreateTags' + 'ec2:CreateTags', + 'ec2:CreateNetworkInterface', + 'ec2:DescribeNetworkInterfaces', + 'ec2:DescribeSubnets', + 'ec2:DeleteNetworkInterface', + 'ec2:AssignPrivateIpAddresses', + 'ec2:UnassignPrivateIpAddresses' ], resources: ['*'] }), @@ -134,7 +147,8 @@ export class DockerImageBuilder extends Construct { 'LISA_DOCKER_BUCKET': ec2DockerBucket.bucketName, 'LISA_ECR_URI': props.ecrUri, 'LISA_INSTANCE_PROFILE': ec2InstanceProfile.instanceProfileArn, - 'LISA_MOUNTS3_DEB_URL': props.mountS3DebUrl + 'LISA_MOUNTS3_DEB_URL': props.mountS3DebUrl, + ...(props.config?.subnets && {'LISA_SUBNET_ID': props.config.subnets[0].subnetId}) }, vpc: props.vpc?.subnetSelection ? props.vpc?.vpc : undefined, vpcSubnets: props.vpc?.subnetSelection, diff --git a/lib/models/ecs-model-deployer.ts b/lib/models/ecs-model-deployer.ts index 09c06174..9cf1b7e3 100644 --- a/lib/models/ecs-model-deployer.ts +++ b/lib/models/ecs-model-deployer.ts @@ -16,7 +16,14 @@ import { Construct } from 'constructs'; import { DockerImageCode, DockerImageFunction, IFunction } from 'aws-cdk-lib/aws-lambda'; -import { Role, ServicePrincipal, ManagedPolicy, Policy, PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import { + Role, + ServicePrincipal, + ManagedPolicy, + PolicyStatement, + Effect, + PolicyDocument +} from 'aws-cdk-lib/aws-iam'; import { Stack, Duration, Size } from 'aws-cdk-lib'; import { createCdkId } from '../core/utils'; @@ -35,20 +42,34 @@ export class ECSModelDeployer extends Construct { super(scope, id); const stackName = Stack.of(scope).stackName; const role = new Role(this, createCdkId([stackName, 'ecs-model-deployer-role']), { - assumedBy: new ServicePrincipal('lambda.amazonaws.com') - }); - - const assumeCdkPolicy = new Policy(this, createCdkId([stackName, 'ecs-model-deployer-policy']), { - statements: [ - new PolicyStatement({ - actions: ['sts:AssumeRole'], - resources: ['arn:*:iam::*:role/cdk-*'] + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + managedPolicies: [ + ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaVPCAccessExecutionRole'), + ], + inlinePolicies: { + lambdaPermissions: new PolicyDocument({ + statements: [ + new PolicyStatement({ + actions: ['sts:AssumeRole'], + resources: ['arn:*:iam::*:role/cdk-*'] + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'ec2:CreateNetworkInterface', + 'ec2:DescribeNetworkInterfaces', + 'ec2:DescribeSubnets', + 'ec2:DeleteNetworkInterface', + 'ec2:AssignPrivateIpAddresses', + 'ec2:UnassignPrivateIpAddresses' + ], + resources: ['*'], + }) + ] }) - ] - }); - role.attachInlinePolicy(assumeCdkPolicy); - role.addManagedPolicy(ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole')); + } + }); const stripped_config = { 'appName': props.config.appName, @@ -59,7 +80,7 @@ export class ECSModelDeployer extends Construct { 's3BucketModels': props.config.s3BucketModels, 'mountS3DebUrl': props.config.mountS3DebUrl, 'permissionsBoundaryAspect': props.config.permissionsBoundaryAspect, - 'subnetIds': props.config.subnetIds + 'subnets': props.config.subnets }; const functionId = createCdkId([stackName, 'ecs_model_deployer']); diff --git a/lib/models/model-api.ts b/lib/models/model-api.ts index d3c41c56..cdb2224e 100644 --- a/lib/models/model-api.ts +++ b/lib/models/model-api.ts @@ -179,7 +179,6 @@ export class ModelsApi extends Construct { new PolicyStatement({ effect: Effect.ALLOW, actions: [ - 'ec2:TerminateInstances', 'ec2:CreateNetworkInterface', 'ec2:DescribeNetworkInterfaces', 'ec2:DescribeSubnets', @@ -188,6 +187,13 @@ export class ModelsApi extends Construct { 'ec2:UnassignPrivateIpAddresses' ], resources: ['*'], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'ec2:TerminateInstances' + ], + resources: ['*'], conditions: { 'StringEquals': {'aws:ResourceTag/lisa_temporary_instance': 'true'} } diff --git a/lib/networking/vpc/index.ts b/lib/networking/vpc/index.ts index 72abd2f1..89b608ce 100644 --- a/lib/networking/vpc/index.ts +++ b/lib/networking/vpc/index.ts @@ -69,9 +69,9 @@ export class Vpc extends Construct { // Checks if SubnetIds are provided in the config, if so we import them for use. // A VPC must be supplied if Subnets are being used. - if (config.subnetIds && config.subnetIds.length > 0) { + if (config.subnets && config.subnets.length > 0) { this.subnetSelection = { - subnets: props.config.subnetIds?.map((subnet, index) => Subnet.fromSubnetId(this, index.toString(), subnet)) + subnets: props.config.subnets?.map((subnet, index) => Subnet.fromSubnetId(this, index.toString(), subnet.subnetId)) }; this.subnetGroup = new SubnetGroup( diff --git a/lib/rag/index.ts b/lib/rag/index.ts index cb3bc006..cbfd831b 100644 --- a/lib/rag/index.ts +++ b/lib/rag/index.ts @@ -39,7 +39,6 @@ import { Layer } from '../core/layers'; import { createCdkId } from '../core/utils'; import { Vpc } from '../networking/vpc'; import { BaseProps, RagRepositoryType } from '../schema'; -import { getSubnetCidrRange, isSubnetPublic } from '../api-base/utils'; import { IngestPipelineStateMachine } from './state_machine/ingest-pipeline'; @@ -159,17 +158,13 @@ export class LisaRagStack extends Stack { description: 'Security group for RAG OpenSearch domain', }); // Allow communication from private subnets to ECS cluster - const subNets = config.subnetIds && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); - subNets?.filter((subnet) => !isSubnetPublic(subnet)).forEach((subnet) => { - getSubnetCidrRange(subnet.subnetId).then((cidrRange) => { - if (cidrRange){ - openSearchSg.connections.allowFrom( - Peer.ipv4(cidrRange), - Port.tcp(config.restApiConfig.rdsConfig.dbPort), - 'Allow REST API private subnets to communicate with LiteLLM database', - ); - } - }); + const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + subNets?.forEach((subnet) => { + openSearchSg.connections.allowFrom( + Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Port.tcp(config.restApiConfig.rdsConfig.dbPort), + 'Allow REST API private subnets to communicate with LiteLLM database', + ); }); new CfnOutput(this, 'openSearchSg', { value: openSearchSg.securityGroupId }); @@ -286,17 +281,13 @@ export class LisaRagStack extends Stack { description: 'Security group for RAG PGVector database', }); - const subNets = config.subnetIds && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); - subNets?.filter((subnet) => !isSubnetPublic(subnet)).forEach((subnet) => { - getSubnetCidrRange(subnet.subnetId).then((cidrRange) => { - if (cidrRange){ - pgvectorSg.connections.allowFrom( - Peer.ipv4(cidrRange), - Port.tcp(config.restApiConfig.rdsConfig.dbPort), - 'Allow REST API private subnets to communicate with LiteLLM database', - ); - } - }); + const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + subNets?.forEach((subnet) => { + pgvectorSg.connections.allowFrom( + Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Port.tcp(config.restApiConfig.rdsConfig.dbPort), + 'Allow REST API private subnets to communicate with LiteLLM database', + ); }); const username = ragConfig.rdsConfig.username; diff --git a/lib/schema.ts b/lib/schema.ts index e7537588..ad76342e 100644 --- a/lib/schema.ts +++ b/lib/schema.ts @@ -1,18 +1,18 @@ /** - Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"). - You may not use this file except in compliance with the License. - You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ // Models for schema validation. import * as fs from 'fs'; @@ -32,14 +32,6 @@ const REMOVAL_POLICIES: Record = { retain: cdk.RemovalPolicy.RETAIN, }; -/** - * Enum for different types of models. - */ -export enum ModelType { - TEXTGEN = 'textgen', - EMBEDDING = 'embedding', -} - /** * Enum for different types of ECS container image sources. */ @@ -50,31 +42,12 @@ export enum EcsSourceType { TARBALL = 'tarball', } -/** - * Details and configurations of a registered model. - * - * @property {string} provider - Model provider, of the form .. - * @property {string} modelName - The unique name that identifies the model. - * @property {string} modelId - The unique user-provided name for the model. - * @property {ModelType} modelType - Specifies the type of model (e.g., 'textgen', 'embedding'). - * @property {string} endpointUrl - The URL endpoint where the model can be accessed or invoked. - * @property {boolean} streaming - Indicates whether the model supports streaming capabilities. - */ -export type RegisteredModel = { - provider: string; - modelId: string; - modelName: string; - modelType: ModelType; - endpointUrl: string; - streaming?: boolean; -}; - /** * Custom security groups for application. * - * @property {ec2.SecurityGroup} ecsModelAlbSg - ECS model application load balancer security group. - * @property {ec2.SecurityGroup} restApiAlbSg - REST API application load balancer security group. - * @property {ec2.SecurityGroup} lambdaSecurityGroup - Lambda security group. + * @property {ec2.SecurityGroup} ecsModelAlbSg - .describe('ECS model application load balancer security group.') + * @property {ec2.SecurityGroup} restApiAlbSg - .describe('REST API application load balancer security group.') + * @property {ec2.SecurityGroup} lambdaSecurityGroup - .describe('Lambda security group.') */ export type SecurityGroups = { ecsModelAlbSg: ec2.SecurityGroup; @@ -82,22 +55,13 @@ export type SecurityGroups = { lambdaSecurityGroup: ec2.SecurityGroup; }; -/** - * Metadata for a specific EC2 instance type. - * - * @property {number} memory - Memory in megabytes (MB). - * @property {number} gpuCount - Number of GPUs. - * @property {string} nvmePath - Path to NVMe drive to mount. - * @property {number} maxThroughput - Maximum network throughput in gigabits per second (Gbps). - * @property {number} vCpus - Number of virtual CPUs (vCPUs). - */ const Ec2TypeSchema = z.object({ - memory: z.number(), - gpuCount: z.number().min(0), - nvmePath: z.string().optional().default(''), - maxThroughput: z.number(), - vCpus: z.number(), -}); + memory: z.number().describe('Memory in megabytes (MB)'), + gpuCount: z.number().min(0).describe('Number of GPUs'), + nvmePath: z.string().default('').describe('Path to NVMe drive to mount'), + maxThroughput: z.number().describe('Maximum network throughput in gigabits per second (Gbps)'), + vCpus: z.number().describe('Number of virtual CPUs (vCPUs)'), +}).describe('Metadata for a specific EC2 instance type.'); type Ec2Type = z.infer; @@ -258,12 +222,12 @@ export class Ec2Metadata { }; /** - * Getter method to access EC2 metadata. Retrieves the metadata for a specific EC2 instance type. - * - * @param {string} key - The key representing the EC2 instance type (e.g., 'g4dn.xlarge'). - * @throws {Error} Throws an error if no metadata is found for the specified EC2 instance type. - * @returns {Ec2Type} The metadata for the specified EC2 instance type. - */ + * Getter method to access EC2 metadata. Retrieves the metadata for a specific EC2 instance type. + * + * @param {string} key - .describe('The key representing the EC2 instance type (e.g., 'g4dn.xlarge').') + * @throws {Error} Throws an error if no metadata is found for the specified EC2 instance type. + * @returns {Ec2Type} The metadata for the specified EC2 instance type. + */ static get (key: string): Ec2Type { const instance = this.instances[key]; if (!instance) { @@ -273,10 +237,10 @@ export class Ec2Metadata { } /** - * Get EC2 instances defined with metadata. - * - * @returns {string[]} Array of EC2 instances. - */ + * Get EC2 instances defined with metadata. + * + * @returns {string[]} Array of EC2 instances. + */ static getValidInstanceKeys (): string[] { return Object.keys(this.instances); } @@ -284,68 +248,43 @@ export class Ec2Metadata { const VALID_INSTANCE_KEYS = Ec2Metadata.getValidInstanceKeys() as [string, ...string[]]; -/** - * Configuration for container health checks. - * - * @property {string[]} [command=['CMD-SHELL', 'exit 0']] - The command to run for health checks. - * @property {number} [interval=10] - The time interval between health checks, in seconds. - * @property {number} [startPeriod=30] - The time to wait before starting the first health check, in seconds. - * @property {number} [timeout=5] - The maximum time allowed for each health check to complete, in seconds. - * @property {number} [retries=2] - The number of times to retry a failed health check before considering the container - * as unhealthy. - */ const ContainerHealthCheckConfigSchema = z.object({ - command: z.array(z.string()).default(['CMD-SHELL', 'exit 0']), - interval: z.number().default(10), - startPeriod: z.number().default(30), - timeout: z.number().default(5), - retries: z.number().default(2), -}); + command: z.array(z.string()).default(['CMD-SHELL', 'exit 0']).describe('The command to run for health checks'), + interval: z.number().default(10).describe('The time interval between health checks, in seconds.'), + startPeriod: z.number().default(30).describe('The time to wait before starting the first health check, in seconds.'), + timeout: z.number().default(5).describe('The maximum time allowed for each health check to complete, in seconds'), + retries: z.number().default(2).describe('The number of times to retry a failed health check before considering the container as unhealthy.'), +}) + .describe('Configuration for container health checks'); -/** - * Container image that will use tarball on disk - */ const ImageTarballAsset = z.object({ path: z.string(), type: z.literal(EcsSourceType.TARBALL), -}); +}) + .describe('Container image that will use tarball on disk'); -/** - * Container image that will be built based on Dockerfile and assets at the supplied path - */ const ImageSourceAsset = z.object({ baseImage: z.string(), path: z.string(), type: z.literal(EcsSourceType.ASSET), -}); +}) + .describe('Container image that will be built based on Dockerfile and assets at the supplied path'); -/** - * Container image that will be pulled from the specified ECR repository - */ const ImageECRAsset = z.object({ repositoryArn: z.string(), tag: z.string().optional(), type: z.literal(EcsSourceType.ECR), -}); +}) + .describe('Container image that will be pulled from the specified ECR repository'); -/** - * Container image that will be pulled from the specified public registry - */ const ImageRegistryAsset = z.object({ registry: z.string(), type: z.literal(EcsSourceType.REGISTRY), -}); +}) + .describe('Container image that will be pulled from the specified public registry'); -/** - * Configuration for a container. - * - * @property {string} baseImage - Base image for the container. - * @property {Record} [environment={}] - Environment variables for the container. - * @property {ContainerHealthCheckConfig} [healthCheckConfig={}] - Health check configuration for the container. - * @property {number} [sharedMemorySize=0] - The value for the size of the /dev/shm volume. - */ const ContainerConfigSchema = z.object({ - image: z.union([ImageTarballAsset, ImageSourceAsset, ImageECRAsset, ImageRegistryAsset]), + image: z.union([ImageTarballAsset, ImageSourceAsset, ImageECRAsset, ImageRegistryAsset]).describe('Base image for the container.'), environment: z .record(z.any()) .transform((obj) => { @@ -357,109 +296,62 @@ const ContainerConfigSchema = z.object({ {} as Record, ); }) - .default({}), - sharedMemorySize: z.number().min(0).optional().default(0), + .default({}) + .describe('Environment variables for the container.'), + sharedMemorySize: z.number().min(0).default(0).describe('The value for the size of the /dev/shm volume.'), healthCheckConfig: ContainerHealthCheckConfigSchema.default({}), -}); +}).describe('Configuration for the container.'); -/** - * Configuration schema for health checks in load balancer settings. - * - * @property {string} path - Path for the health check. - * @property {number} [interval=30] - Interval in seconds between health checks. - * @property {number} [timeout=10] - Timeout in seconds for each health check. - * @property {number} [healthyThresholdCount=2] - Number of consecutive successful health checks required to consider - * the target healthy. - * @property {number} [unhealthyThresholdCount=2] - Number of consecutive failed health checks required to consider the - * target unhealthy. - */ const HealthCheckConfigSchema = z.object({ - path: z.string(), - interval: z.number().default(30), - timeout: z.number().default(10), - healthyThresholdCount: z.number().default(2), - unhealthyThresholdCount: z.number().default(2), -}); + path: z.string().describe('Path for the health check.'), + interval: z.number().default(30).describe('Interval in seconds between health checks.'), + timeout: z.number().default(10).describe('Timeout in seconds for each health check.'), + healthyThresholdCount: z.number().default(2).describe('Number of consecutive successful health checks required to consider the target healthy.'), + unhealthyThresholdCount: z.number().default(2).describe('Number of consecutive failed health checks required to consider the target unhealthy.'), +}) + .describe('Health check configuration for the load balancer.'); -/** - * Configuration schema for the load balancer. - * - * @property {string} [sslCertIamArn=null] - SSL certificate IAM ARN for load balancer. - * @property {HealthCheckConfig} healthCheckConfig - Health check configuration for the load balancer. - * @property {string} domainName - Domain name to use instead of the load balancer's default DNS name. - */ const LoadBalancerConfigSchema = z.object({ - sslCertIamArn: z.string().optional().nullable().default(null), + sslCertIamArn: z.string().nullish().default(null).describe('SSL certificate IAM ARN for load balancer.'), healthCheckConfig: HealthCheckConfigSchema, - domainName: z.string().optional().nullable().default(null), -}); + domainName: z.string().nullish().default(null).describe('Domain name to use instead of the load balancer\'s default DNS name.'), +}) + .describe('Configuration for load balancer settings.'); -/** - * Configuration schema for ECS auto scaling metrics. - * - * @property {string} AlbMetricName - Name of the ALB metric. - * @property {number} targetValue - Target value for the metric. - * @property {number} [duration=60] - Duration in seconds for metric evaluation. - * @property {number} [estimatedInstanceWarmup=180] - Estimated warm-up time in seconds until a newly launched instance - * can send metrics to CloudWatch. - * - */ const MetricConfigSchema = z.object({ - AlbMetricName: z.string(), - targetValue: z.number(), - duration: z.number().default(60), - estimatedInstanceWarmup: z.number().min(0).default(180), -}); + AlbMetricName: z.string().describe('Name of the ALB metric.'), + targetValue: z.number().describe('Target value for the metric.'), + duration: z.number().default(60).describe('Duration in seconds for metric evaluation.'), + estimatedInstanceWarmup: z.number().min(0).default(180).describe('Estimated warm-up time in seconds until a newly launched instance can send metrics to CloudWatch.'), +}) + .describe('Metric configuration for ECS auto scaling.'); -/** - * Configuration schema for ECS auto scaling settings. -* -* @property {number} [minCapacity=1] - Minimum capacity for auto scaling. Must be at least 1. -* @property {number} [maxCapacity=2] - Maximum capacity for auto scaling. Must be at least 1. -* @property {number} [cooldown=420] - Cool down period in seconds between scaling activities. -* @property {number} [defaultInstanceWarmup=180] - Default warm-up time in seconds until a newly launched instance can - send metrics to CloudWatch. -* @property {MetricConfig} metricConfig - Metric configuration for auto scaling. -*/ const AutoScalingConfigSchema = z.object({ blockDeviceVolumeSize: z.number().min(30).default(30), - minCapacity: z.number().min(1).default(1), - maxCapacity: z.number().min(1).default(2), - defaultInstanceWarmup: z.number().default(180), - cooldown: z.number().min(1).default(420), + minCapacity: z.number().min(1).default(1).describe('Minimum capacity for auto scaling. Must be at least 1.'), + maxCapacity: z.number().min(1).default(2).describe('Maximum capacity for auto scaling. Must be at least 1.'), + defaultInstanceWarmup: z.number().default(180).describe('Default warm-up time in seconds until a newly launched instance can'), + cooldown: z.number().min(1).default(420).describe('Cool down period in seconds between scaling activities.'), metricConfig: MetricConfigSchema, -}); +}) + .describe('Configuration for auto scaling settings.'); -/** - * Configuration schema for an ECS model. - * - * @property {AmiHardwareType} amiHardwareType - Name of the model. - * @property {AutoScalingConfigSchema} autoScalingConfig - Configuration for auto scaling settings. - * @property {Record} buildArgs - Optional build args to be applied when creating the - * task container if containerConfig.image.type is ASSET - * @property {ContainerConfig} containerConfig - Configuration for the container. - * @property {number} [containerMemoryBuffer=2048] - This is the amount of memory to buffer (or subtract off) - * from the total instance memory, if we don't include this, - * the container can have a hard time finding available RAM - * resources to start and the tasks will fail deployment - * @property {Record} environment - Environment variables set on the task container - * @property {identifier} modelType - Unique identifier for the cluster which will be used when naming resources - * @property {string} instanceType - EC2 instance type for running the model. - * @property {boolean} [internetFacing=false] - Whether or not the cluster will be configured as internet facing - * @property {LoadBalancerConfig} loadBalancerConfig - Configuration for load balancer settings. - */ const EcsBaseConfigSchema = z.object({ - amiHardwareType: z.nativeEnum(AmiHardwareType), - autoScalingConfig: AutoScalingConfigSchema, - buildArgs: z.record(z.string()).optional(), + amiHardwareType: z.nativeEnum(AmiHardwareType).describe('Name of the model.'), + autoScalingConfig: AutoScalingConfigSchema.describe('Configuration for auto scaling settings.'), + buildArgs: z.record(z.string()).optional() + .describe('Optional build args to be applied when creating the task container if containerConfig.image.type is ASSET'), containerConfig: ContainerConfigSchema, - containerMemoryBuffer: z.number().default(1024 * 2), - environment: z.record(z.string()), + containerMemoryBuffer: z.number().default(1024 * 2) + .describe('This is the amount of memory to buffer (or subtract off) from the total instance memory, ' + + 'if we don\'t include this, the container can have a hard time finding available RAM resources to start and the tasks will fail deployment'), + environment: z.record(z.string()).describe('Environment variables set on the task container'), identifier: z.string(), - instanceType: z.enum(VALID_INSTANCE_KEYS), - internetFacing: z.boolean().default(false), + instanceType: z.enum(VALID_INSTANCE_KEYS).describe('EC2 instance type for running the model.'), + internetFacing: z.boolean().default(false).describe('Whether or not the cluster will be configured as internet facing'), loadBalancerConfig: LoadBalancerConfigSchema, -}); +}) + .describe('Configuration schema for an ECS model'); /** * Type representing configuration for an ECS model. @@ -471,23 +363,18 @@ type EcsBaseConfig = z.infer; */ export type ECSConfig = EcsBaseConfig; -/** - * Configuration schema for an ECS model. - * - * @property {string} modelName - Name of the model. - * @property {string} baseImage - Base image for the container. - * @property {string} inferenceContainer - Prebuilt inference container for serving model. - */ const EcsModelConfigSchema = z .object({ - modelName: z.string(), - baseImage: z.string(), + modelName: z.string().describe('Name of the model.'), + baseImage: z.string().describe('Base image for the container.'), inferenceContainer: z .union([z.literal('tgi'), z.literal('tei'), z.literal('instructor'), z.literal('vllm')]) .refine((data) => { return !data.includes('.'); // string cannot contain a period }) - }); + .describe('Prebuilt inference container for serving model.'), + }) + .describe('Configuration schema for an ECS model.'); /** * Type representing configuration for an ECS model. @@ -499,15 +386,6 @@ type EcsModelConfig = z.infer; */ export type ModelConfig = EcsModelConfig; -/** - * Configuration schema for authorization. - * - * @property {string} [authority=null] - URL of OIDC authority. - * @property {string} [clientId=null] - Client ID for OIDC IDP . - * @property {string} [adminGroup=null] - Name of the admin group. - * @property {string} [jwtGroupsProperty=null] - Name of the JWT groups property. - * @property {string[]} [additionalScopes=null] - Additional JWT scopes to request. - */ const AuthConfigSchema = z.object({ authority: z.string().transform((value) => { if (value.endsWith('/')) { @@ -515,48 +393,32 @@ const AuthConfigSchema = z.object({ } else { return value; } - }), - clientId: z.string(), - adminGroup: z.string().optional().default(''), - jwtGroupsProperty: z.string().optional().default(''), - additionalScopes: z.array(z.string()).optional().default([]), -}); + }) + .describe('URL of OIDC authority.'), + clientId: z.string().describe('Client ID for OIDC IDP .'), + adminGroup: z.string().default('').describe('Name of the admin group.'), + jwtGroupsProperty: z.string().default('').describe('Name of the JWT groups property.'), + additionalScopes: z.array(z.string()).default([]).describe('Additional JWT scopes to request.'), +}).describe('Configuration schema for authorization.'); -/** - * Configuration schema for RDS Instances needed for LiteLLM scaling or PGVector RAG operations. - * - * The optional fields can be omitted to create a new database instance, otherwise fill in all fields to use - * an existing database instance. - * - * @property {string} username - Database username. - * @property {string} passwordSecretId - SecretsManager Secret ID that stores an existing database password. - * @property {string} dbHost - Database hostname for existing database instance. - * @property {string} dbName - Database name for existing database instance. - * @property {number} dbPort - Port to open on the database instance. - */ const RdsInstanceConfig = z.object({ - username: z.string().optional().default('postgres'), - passwordSecretId: z.string().optional(), - dbHost: z.string().optional(), - dbName: z.string().optional().default('postgres'), - dbPort: z.number().optional().default(5432), -}); + username: z.string().default('postgres').describe('Database username.'), + passwordSecretId: z.string().optional().describe('SecretsManager Secret ID that stores an existing database password.'), + dbHost: z.string().optional().describe('Database hostname for existing database instance.'), + dbName: z.string().default('postgres').describe('Database name for existing database instance.'), + dbPort: z.number().default(5432).describe('Port to open on the database instance.'), +}).describe('Configuration schema for RDS Instances needed for LiteLLM scaling or PGVector RAG operations.\n \n ' + + 'The optional fields can be omitted to create a new database instance, otherwise fill in all fields to use an existing database instance.'); -/** - * Configuration schema for REST API. - * - * @property {boolean} [internetFacing=true] - Whether the REST API ALB will be configured as internet facing. - * @property {string} sslCertIamArn - ARN of the self-signed cert to be used throughout the system - */ const FastApiContainerConfigSchema = z.object({ - internetFacing: z.boolean().default(true), - domainName: z.string().optional().nullable().default(null), - sslCertIamArn: z.string().optional().nullable().default(null), - rdsConfig: RdsInstanceConfig.optional() + internetFacing: z.boolean().default(true).describe('Whether the REST API ALB will be configured as internet facing.'), + domainName: z.string().nullish().default(null), + sslCertIamArn: z.string().nullish().default(null).describe('ARN of the self-signed cert to be used throughout the system'), + rdsConfig: RdsInstanceConfig .default({ dbName: 'postgres', username: 'postgres', - dbPort: 5432 + dbPort: 5432, }) .refine( (config) => { @@ -564,11 +426,11 @@ const FastApiContainerConfigSchema = z.object({ }, { message: - 'We do not allow using an existing DB for LiteLLM because of its requirement in internal model management ' + - 'APIs. Please do not define the dbHost or passwordSecretId fields for the FastAPI container DB config.', + 'We do not allow using an existing DB for LiteLLM because of its requirement in internal model management ' + + 'APIs. Please do not define the dbHost or passwordSecretId fields for the FastAPI container DB config.', }, ), -}); +}).describe('Configuration schema for REST API.'); /** * Enum for different types of RAG repositories available @@ -591,51 +453,39 @@ const OpenSearchExistingClusterConfig = z.object({ endpoint: z.string(), }); -/** - * Configuration schema for RAG repository. Defines settings for OpenSearch. - */ -const RagRepositoryConfigSchema = z.object({ - repositoryId: z.string(), - type: z.nativeEnum(RagRepositoryType), - opensearchConfig: z.union([OpenSearchExistingClusterConfig, OpenSearchNewClusterConfig]).optional(), - rdsConfig: RdsInstanceConfig.optional(), - pipelines: z.array(z.object({ - chunkOverlap: z.number(), - chunkSize: z.number(), - embeddingModel: z.string(), - s3Bucket: z.string(), - s3Prefix: z.string(), - trigger: z.union([z.literal('daily'), z.literal('event')]), - collectionName: z.string() - })).optional() -}).refine((input) => { - if ( - (input.type === RagRepositoryType.OPENSEARCH && input.opensearchConfig === undefined) || - (input.type === RagRepositoryType.PGVECTOR && input.rdsConfig === undefined) - ) { - return false; - } - return true; -}); +const RagRepositoryConfigSchema = z + .object({ + repositoryId: z.string(), + type: z.nativeEnum(RagRepositoryType), + opensearchConfig: z.union([OpenSearchExistingClusterConfig, OpenSearchNewClusterConfig]).optional(), + rdsConfig: RdsInstanceConfig.optional(), + pipelines: z.array(z.object({ + chunkOverlap: z.number(), + chunkSize: z.number(), + embeddingModel: z.string(), + s3Bucket: z.string(), + s3Prefix: z.string(), + trigger: z.union([z.literal('daily'), z.literal('event')]), + collectionName: z.string() + })).optional(), + }) + .refine((input) => { + return !((input.type === RagRepositoryType.OPENSEARCH && input.opensearchConfig === undefined) || + (input.type === RagRepositoryType.PGVECTOR && input.rdsConfig === undefined)); + }) + .describe('Configuration schema for RAG repository. Defines settings for OpenSearch.'); -/** - * Configuration schema for RAG file processing. Determines the chunk size and chunk overlap when processing documents. - */ const RagFileProcessingConfigSchema = z.object({ chunkSize: z.number().min(100).max(10000), chunkOverlap: z.number().min(0), -}); +}) + .describe('Configuration schema for RAG file processing. Determines the chunk size and chunk overlap when processing documents.'); -/** - * Configuration schema for pypi. - * - * @property {string} [indexUrl=''] - URL for the pypi index. - * @property {string} [trustedHost=''] - Trusted host for pypi. - */ const PypiConfigSchema = z.object({ - indexUrl: z.string().optional().default(''), - trustedHost: z.string().optional().default(''), -}); + indexUrl: z.string().default('').describe('URL for the pypi index.'), + trustedHost: z.string().default('').describe('Trusted host for pypi.'), +}) + .describe('Configuration schema for pypi'); /** * Enum for different types of stack synthesizers @@ -646,113 +496,83 @@ export enum stackSynthesizerType { LegacyStackSynthesizer = 'LegacyStackSynthesizer', } -/** - * Configuration schema for API Gateway Endpoint - * - * @property {string} domainName - Custom domain name for API Gateway Endpoint - */ const ApiGatewayConfigSchema = z .object({ - domainName: z.string().optional().nullable().default(null), + domainName: z.string().nullish().default(null).describe('Custom domain name for API Gateway Endpoint'), }) - .optional(); + .optional() + .describe('Configuration schema for API Gateway Endpoint'); -/** - * Core LiteLLM configuration. - * See https://litellm.vercel.app/docs/proxy/configs#all-settings for more details about each field. - */ const LiteLLMConfig = z.object({ db_key: z.string().refine( (key) => key.startsWith('sk-'), // key needed for model management actions 'Key string must be defined for model management operations, and it must start with "sk-".' + - 'This can be any string, and a random UUID is recommended. Example: sk-f132c7cc-059c-481b-b5ca-a42e191672aa', + 'This can be any string, and a random UUID is recommended. Example: sk-f132c7cc-059c-481b-b5ca-a42e191672aa', ), -}); +}) + .describe('Core LiteLLM configuration - see https://litellm.vercel.app/docs/proxy/configs#all-settings for more details about each field.'); -/** - * Raw application configuration schema. - * - * @property {string} [appName='lisa'] - Name of the application. - * @property {string} [profile=null] - AWS CLI profile for deployment. - * @property {string} deploymentName - Name of the deployment. - * @property {string} accountNumber - AWS account number for deployment. Must be 12 digits. - * @property {string} region - AWS region for deployment. - * @property {string} deploymentStage - Deployment stage for the application. - * @property {string} removalPolicy - Removal policy for resources (destroy or retain). - * @property {boolean} [runCdkNag=false] - Whether to run CDK Nag checks. - * @property {string} [lambdaSourcePath='./lambda'] - Path to Lambda source code dir. - * @property {string} s3BucketModels - S3 bucket for models. - * @property {string} mountS3DebUrl - URL for S3-mounted Debian package. - * @property {string[]} [accountNumbersEcr=null] - List of AWS account numbers for ECR repositories. - * @property {boolean} [deployRag=false] - Whether to deploy RAG stacks. - * @property {boolean} [deployChat=true] - Whether to deploy chat stacks. - * @property {boolean} [deployDocs=true] - Whether to deploy docs stacks. - * @property {boolean} [deployUi=true] - Whether to deploy UI stacks. - * @property {string} logLevel - Log level for application. - * @property {AuthConfigSchema} authConfig - Authorization configuration. - * @property {RagRepositoryConfigSchema} ragRepositoryConfig - Rag Repository configuration. - * @property {RagFileProcessingConfigSchema} ragFileProcessingConfig - Rag file processing configuration. - * @property {EcsModelConfigSchema[]} ecsModels - Array of ECS model configurations. - * @property {ApiGatewayConfigSchema} apiGatewayConfig - API Gateway Endpoint configuration. - * @property {string} [nvmeHostMountPath='/nvme'] - Host path for NVMe drives. - * @property {string} [nvmeContainerMountPath='/nvme'] - Container path for NVMe drives. - * @property {Array<{ Key: string, Value: string }>} [tags=null] - Array of key-value pairs for tagging. - * @property {string} [deploymentPrefix=null] - Prefix for deployment resources. - * @property {string} [webAppAssetsPath=null] - Optional path to precompiled webapp assets. If not - * specified the web application will be built at deploy - * time. - */ const RawConfigSchema = z .object({ - appName: z.string().default('lisa'), + appName: z.string().default('lisa').describe('Name of the application.'), profile: z .string() - .optional() - .nullable() - .transform((value) => value ?? ''), - deploymentName: z.string().default('prod'), + .nullish() + .transform((value) => value ?? '') + .describe('AWS CLI profile for deployment.'), + deploymentName: z.string().default('prod').describe('Name of the deployment.'), accountNumber: z .number() .or(z.string()) .transform((value) => value.toString()) .refine((value) => value.length === 12, { message: 'AWS account number should be 12 digits. If your account ID starts with 0, then please surround the ID with quotation marks.', - }), - region: z.string(), + }) + .describe('AWS account number for deployment. Must be 12 digits.'), + region: z.string().describe('AWS region for deployment.'), restApiConfig: FastApiContainerConfigSchema, - vpcId: z.string().optional(), - subnetIds: z.array(z.string().startsWith('subnet-')).optional(), - deploymentStage: z.string().default('prod'), - removalPolicy: z.union([z.literal('destroy'), z.literal('retain')]).transform((value) => REMOVAL_POLICIES[value]).default('destroy'), - runCdkNag: z.boolean().default(false), - privateEndpoints: z.boolean().optional().default(false), - s3BucketModels: z.string(), - mountS3DebUrl: z.string().optional(), + vpcId: z.string().optional().describe('VPC ID for the application. (e.g. vpc-0123456789abcdef)'), + subnets: z.array(z.object({ + subnetId: z.string().startsWith('subnet-'), + ipv4CidrBlock: z.string() + })).optional().describe('Array of subnet objects for the application. These contain a subnetId(e.g. [subnet-fedcba9876543210] and ipv4CidrBlock'), + deploymentStage: z.string().default('prod').describe('Deployment stage for the application.'), + removalPolicy: z.union([z.literal('destroy'), z.literal('retain')]) + .transform((value) => REMOVAL_POLICIES[value]) + .default('destroy') + .describe('Removal policy for resources (destroy or retain).'), + runCdkNag: z.boolean().default(false).describe('Whether to run CDK Nag checks.'), + privateEndpoints: z.boolean().default(false).describe('Whether to use privateEndpoints for REST API.'), + s3BucketModels: z.string().describe('S3 bucket for models.'), + mountS3DebUrl: z.string().describe('URL for S3-mounted Debian package.'), accountNumbersEcr: z .array(z.union([z.number(), z.string()])) .transform((arr) => arr.map(String)) .refine((value) => value.every((num) => num.length === 12), { message: 'AWS account number should be 12 digits. If your account ID starts with 0, then please surround the ID with quotation marks.', }) - .optional(), - deployRag: z.boolean().optional().default(true), - deployChat: z.boolean().optional().default(true), - deployDocs: z.boolean().optional().default(true), - deployUi: z.boolean().optional().default(true), - logLevel: z.union([z.literal('DEBUG'), z.literal('INFO'), z.literal('WARNING'), z.literal('ERROR')]).default('DEBUG'), - authConfig: AuthConfigSchema.optional(), - pypiConfig: PypiConfigSchema.optional().default({ + .optional() + .describe('List of AWS account numbers for ECR repositories.'), + deployRag: z.boolean().default(true).describe('Whether to deploy RAG stacks.'), + deployChat: z.boolean().default(true).describe('Whether to deploy chat stacks.'), + deployDocs: z.boolean().default(true).describe('Whether to deploy docs stacks.'), + deployUi: z.boolean().default(true).describe('Whether to deploy UI stacks.'), + logLevel: z.union([z.literal('DEBUG'), z.literal('INFO'), z.literal('WARNING'), z.literal('ERROR')]) + .default('DEBUG') + .describe('Log level for application.'), + authConfig: AuthConfigSchema.optional().describe('Authorization configuration.'), + pypiConfig: PypiConfigSchema.default({ indexUrl: '', trustedHost: '', - }), - condaUrl: z.string().optional().default(''), - certificateAuthorityBundle: z.string().optional().default(''), - ragRepositories: z.array(RagRepositoryConfigSchema).default([]), - ragFileProcessingConfig: RagFileProcessingConfigSchema.optional(), - ecsModels: z.array(EcsModelConfigSchema).optional(), - apiGatewayConfig: ApiGatewayConfigSchema.optional(), - nvmeHostMountPath: z.string().default('/nvme'), - nvmeContainerMountPath: z.string().default('/nvme'), + }).describe('Pypi configuration.'), + condaUrl: z.string().default('').describe('Conda URL configuration'), + certificateAuthorityBundle: z.string().default('').describe('Certificate Authority Bundle file'), + ragRepositories: z.array(RagRepositoryConfigSchema).default([]).describe('Rag Repository configuration.'), + ragFileProcessingConfig: RagFileProcessingConfigSchema.optional().describe('Rag file processing configuration.'), + ecsModels: z.array(EcsModelConfigSchema).optional().describe('Array of ECS model configurations.'), + apiGatewayConfig: ApiGatewayConfigSchema, + nvmeHostMountPath: z.string().default('/nvme').describe('Host path for NVMe drives.'), + nvmeContainerMountPath: z.string().default('/nvme').describe('Container path for NVMe drives.'), tags: z .array( z.object({ @@ -760,18 +580,20 @@ const RawConfigSchema = z Value: z.string(), }), ) - .optional(), - deploymentPrefix: z.string().optional(), - webAppAssetsPath: z.string().optional(), + .optional() + .describe('Array of key-value pairs for tagging.'), + deploymentPrefix: z.string().optional().describe('Prefix for deployment resources.'), + webAppAssetsPath: z.string().optional().describe('Optional path to precompiled webapp assets. If not specified the web application will be built at deploy time.'), lambdaLayerAssets: z .object({ - authorizerLayerPath: z.string().optional(), - commonLayerPath: z.string().optional(), - fastapiLayerPath: z.string().optional(), - ragLayerPath: z.string().optional(), - sdkLayerPath: z.string().optional(), + authorizerLayerPath: z.string().optional().describe('Lambda Authorizer code path'), + commonLayerPath: z.string().optional().describe('Lambda common layer code path'), + fastapiLayerPath: z.string().optional().describe('Lambda API code path'), + ragLayerPath: z.string().optional().describe('Lambda RAG layer code path'), + sdkLayerPath: z.string().optional().describe('Lambda SDK layer code path'), }) - .optional(), + .optional() + .describe('Configuration for local Lambda layer code'), permissionsBoundaryAspect: z .object({ permissionsBoundaryPolicyName: z.string(), @@ -779,8 +601,9 @@ const RawConfigSchema = z policyPrefix: z.string().max(20).optional(), instanceProfilePrefix: z.string().optional(), }) - .optional(), - stackSynthesizer: z.nativeEnum(stackSynthesizerType).optional(), + .optional() + .describe('Aspect CDK injector for permissions. Ref: https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_iam.PermissionsBoundary.html'), + stackSynthesizer: z.nativeEnum(stackSynthesizerType).optional().describe('Set the stack synthesize type. Ref: https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.StackSynthesizer.html'), litellmConfig: LiteLLMConfig, }) .refine((config) => (config.pypiConfig.indexUrl && config.region.includes('iso')) || !config.region.includes('iso'), { @@ -806,21 +629,22 @@ const RawConfigSchema = z (config) => { return ( !(config.deployChat || config.deployRag || config.deployUi) || - config.authConfig + config.authConfig ); }, { message: - 'An auth config must be provided when deploying the chat, RAG, or UI stacks or when deploying an internet ' + - 'facing ALB. Check that `deployChat`, `deployRag`, `deployUi`, and `restApiConfig.internetFacing` are all ' + - 'false or that an `authConfig` is provided.', + 'An auth config must be provided when deploying the chat, RAG, or UI stacks or when deploying an internet ' + + 'facing ALB. Check that `deployChat`, `deployRag`, `deployUi`, and `restApiConfig.internetFacing` are all ' + + 'false or that an `authConfig` is provided.', }, - ); + ) + .describe('Raw application configuration schema.'); /** * Apply transformations to the raw application configuration schema. * - * @param {Object} rawConfig - The raw application configuration. + * @param {Object} rawConfig - .describe('The raw application configuration.') * @returns {Object} The transformed application configuration. */ export const ConfigSchema = RawConfigSchema.transform((rawConfig) => { @@ -866,12 +690,10 @@ export const ConfigSchema = RawConfigSchema.transform((rawConfig) => { */ export type Config = z.infer; -export type FastApiContainerConfig = z.infer; - /** * Basic properties required for a stack definition in CDK. * - * @property {Config} config - The application configuration. + * @property {Config} config - .describe('The application configuration.') */ export type BaseProps = { config: Config; diff --git a/lib/serve/index.ts b/lib/serve/index.ts index 20ba95fc..5d35f6fd 100644 --- a/lib/serve/index.ts +++ b/lib/serve/index.ts @@ -30,7 +30,6 @@ import { Vpc } from '../networking/vpc'; import { BaseProps } from '../schema'; import { Effect, Policy, PolicyStatement } from 'aws-cdk-lib/aws-iam'; import { Secret } from 'aws-cdk-lib/aws-secretsmanager'; -import { getSubnetCidrRange, isSubnetPublic } from '../api-base/utils'; const HERE = path.resolve(__dirname); @@ -149,17 +148,13 @@ export class LisaServeApplicationStack extends Stack { description: 'Security group for LiteLLM dynamic model management database.', }); - const subNets = config.subnetIds && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); - subNets?.filter((subnet) => !isSubnetPublic(subnet)).forEach((subnet) => { - getSubnetCidrRange(subnet.subnetId).then((cidrRange) => { - if (cidrRange){ - litellmDbSg.connections.allowFrom( - Peer.ipv4(cidrRange), - Port.tcp(config.restApiConfig.rdsConfig.dbPort), - 'Allow REST API private subnets to communicate with LiteLLM database', - ); - } - }); + const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + subNets?.forEach((subnet) => { + litellmDbSg.connections.allowFrom( + Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Port.tcp(config.restApiConfig.rdsConfig.dbPort), + 'Allow REST API private subnets to communicate with LiteLLM database', + ); }); const username = config.restApiConfig.rdsConfig.username; diff --git a/lib/serve/rest-api/src/requirements.txt b/lib/serve/rest-api/src/requirements.txt index 2931cbc8..60339093 100644 --- a/lib/serve/rest-api/src/requirements.txt +++ b/lib/serve/rest-api/src/requirements.txt @@ -1,6 +1,6 @@ aioboto3==13.1.1 aiobotocore==2.13.1 -aiohttp==3.10.2 +aiohttp==3.10.11 boto3==1.34.131 click==8.1.7 cryptography==42.0.8 diff --git a/lib/stages.ts b/lib/stages.ts index 19ffce02..e1edff73 100644 --- a/lib/stages.ts +++ b/lib/stages.ts @@ -102,7 +102,7 @@ export class LisaServeApplicationStage extends Stage { baseStackProps.synthesizer = new DefaultStackSynthesizer(); break; default: - throw Error('Unrecognized config value: "stackSyntehsizer"'); + throw Error('Unrecognized config value: "stackSynthesizer"'); } } diff --git a/lib/user-interface/react/package.json b/lib/user-interface/react/package.json index 19759667..61069de6 100644 --- a/lib/user-interface/react/package.json +++ b/lib/user-interface/react/package.json @@ -1,7 +1,7 @@ { "name": "lisa-web", "private": true, - "version": "3.1.0", + "version": "3.2.0", "type": "module", "scripts": { "dev": "vite", diff --git a/lib/zod2md.config.ts b/lib/zod2md.config.ts new file mode 100644 index 00000000..84e70bbd --- /dev/null +++ b/lib/zod2md.config.ts @@ -0,0 +1,24 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +import type { Config } from 'zod2md'; + +export default { + title: 'LISA Configuration Schema', + entry: './lib/schema.ts', + output: './lib/docs/config/schema.md', + tsconfig: 'tsconfig.json', +} satisfies Config; diff --git a/lisa-sdk/pyproject.toml b/lisa-sdk/pyproject.toml index 4bd844ef..20dcfd30 100644 --- a/lisa-sdk/pyproject.toml +++ b/lisa-sdk/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lisapy" -version = "3.1.0" +version = "3.2.0" description = "A simple SDK to help you interact with LISA. LISA is an LLM hosting solution for AWS dedicated clouds or ADCs." authors = ["Steve Goley "] readme = "README.md" diff --git a/package-lock.json b/package-lock.json index 0ac5490a..4fca840a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,6 @@ "license": "Apache-2.0", "dependencies": { "aws-cdk-lib": "2.125.0", - "aws-sdk": "^2.0.0", "cdk-nag": "^2.27.198", "constructs": "^10.0.0", "js-yaml": "^4.1.0", @@ -45,7 +44,8 @@ "lint-staged": "^15.2.10", "ts-jest": "^29.1.1", "ts-node": "^10.9.1", - "typescript": "~5.1.6" + "typescript": "~5.1.6", + "zod2md": "^0.1.4" } }, "node_modules/@ampproject/remapping": { @@ -1738,6 +1738,15 @@ "node": ">=4.0" } }, + "node_modules/@commander-js/extra-typings": { + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/@commander-js/extra-typings/-/extra-typings-12.1.0.tgz", + "integrity": "sha512-wf/lwQvWAA0goIghcb91dQYpkLBcyhOhQNqG/VgWhnKzgt+UOMvra7EX/2fv70arm5RW+PUHoQHHDa6/p77Eqg==", + "dev": true, + "peerDependencies": { + "commander": "~12.1.0" + } + }, "node_modules/@cspotcode/source-map-support": { "version": "0.8.1", "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", @@ -1760,6 +1769,374 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz", + "integrity": "sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.12.tgz", + "integrity": "sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz", + "integrity": "sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.12.tgz", + "integrity": "sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz", + "integrity": "sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz", + "integrity": "sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz", + "integrity": "sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz", + "integrity": "sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz", + "integrity": "sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz", + "integrity": "sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz", + "integrity": "sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz", + "integrity": "sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz", + "integrity": "sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz", + "integrity": "sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz", + "integrity": "sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz", + "integrity": "sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz", + "integrity": "sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz", + "integrity": "sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz", + "integrity": "sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz", + "integrity": "sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz", + "integrity": "sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz", + "integrity": "sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz", + "integrity": "sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, "node_modules/@eslint-community/eslint-utils": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz", @@ -4236,6 +4613,7 @@ "version": "1.0.7", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", "integrity": "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==", + "dev": true, "dependencies": { "possible-typed-array-names": "^1.0.0" }, @@ -4616,37 +4994,6 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, - "node_modules/aws-sdk": { - "version": "2.1692.0", - "resolved": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.1692.0.tgz", - "integrity": "sha512-x511uiJ/57FIsbgUe5csJ13k3uzu25uWQE+XqfBis/sB0SFoiElJWXRkgEAUh0U6n40eT3ay5Ue4oPkRMu1LYw==", - "hasInstallScript": true, - "license": "Apache-2.0", - "dependencies": { - "buffer": "4.9.2", - "events": "1.1.1", - "ieee754": "1.1.13", - "jmespath": "0.16.0", - "querystring": "0.2.0", - "sax": "1.2.1", - "url": "0.10.3", - "util": "^0.12.4", - "uuid": "8.0.0", - "xml2js": "0.6.2" - }, - "engines": { - "node": ">= 10.0.0" - } - }, - "node_modules/aws-sdk/node_modules/uuid": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.0.0.tgz", - "integrity": "sha512-jOXGuXZAWdsTH7eZLtyXMqUb9EcWMGZNbL9YcGBJl4MH4nrxHmZJhEHvyLFrkxo+28uLb/NYRcStH48fnD0Vzw==", - "license": "MIT", - "bin": { - "uuid": "dist/bin/uuid" - } - }, "node_modules/babel-jest": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz", @@ -4842,26 +5189,6 @@ "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", "dev": true }, - "node_modules/base64-js": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", - "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, "node_modules/bowser": { "version": "2.11.0", "resolved": "https://registry.npmjs.org/bowser/-/bowser-2.11.0.tgz", @@ -4942,32 +5269,31 @@ "node-int64": "^0.4.0" } }, - "node_modules/buffer": { - "version": "4.9.2", - "resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.2.tgz", - "integrity": "sha512-xq+q3SRMOxGivLhBNaUdC64hDTQwejJ+H0T/NB1XMtTVEwNTrfFF3gAxiyW0Bu/xWEGhjVKgUcMhCrUy2+uCWg==", - "license": "MIT", - "dependencies": { - "base64-js": "^1.0.2", - "ieee754": "^1.1.4", - "isarray": "^1.0.0" - } - }, "node_modules/buffer-from": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==" }, - "node_modules/buffer/node_modules/isarray": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", - "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", - "license": "MIT" + "node_modules/bundle-require": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/bundle-require/-/bundle-require-4.2.1.tgz", + "integrity": "sha512-7Q/6vkyYAwOmQNRw75x+4yRtZCZJXUDmHHlFdkiV0wgv/reNjtJwpu1jPJ0w2kbEpIM0uoKI3S4/f39dU7AjSA==", + "dev": true, + "dependencies": { + "load-tsconfig": "^0.2.3" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "peerDependencies": { + "esbuild": ">=0.17" + } }, "node_modules/call-bind": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", + "dev": true, "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", @@ -5504,6 +5830,7 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", + "dev": true, "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", @@ -5811,6 +6138,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", + "dev": true, "dependencies": { "get-intrinsic": "^1.2.4" }, @@ -5822,6 +6150,7 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "dev": true, "engines": { "node": ">= 0.4" } @@ -5878,6 +6207,44 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/esbuild": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.12.tgz", + "integrity": "sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.19.12", + "@esbuild/android-arm": "0.19.12", + "@esbuild/android-arm64": "0.19.12", + "@esbuild/android-x64": "0.19.12", + "@esbuild/darwin-arm64": "0.19.12", + "@esbuild/darwin-x64": "0.19.12", + "@esbuild/freebsd-arm64": "0.19.12", + "@esbuild/freebsd-x64": "0.19.12", + "@esbuild/linux-arm": "0.19.12", + "@esbuild/linux-arm64": "0.19.12", + "@esbuild/linux-ia32": "0.19.12", + "@esbuild/linux-loong64": "0.19.12", + "@esbuild/linux-mips64el": "0.19.12", + "@esbuild/linux-ppc64": "0.19.12", + "@esbuild/linux-riscv64": "0.19.12", + "@esbuild/linux-s390x": "0.19.12", + "@esbuild/linux-x64": "0.19.12", + "@esbuild/netbsd-x64": "0.19.12", + "@esbuild/openbsd-x64": "0.19.12", + "@esbuild/sunos-x64": "0.19.12", + "@esbuild/win32-arm64": "0.19.12", + "@esbuild/win32-ia32": "0.19.12", + "@esbuild/win32-x64": "0.19.12" + } + }, "node_modules/escalade": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", @@ -6345,15 +6712,6 @@ "dev": true, "license": "MIT" }, - "node_modules/events": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz", - "integrity": "sha512-kEcvvCBByWXGnZy6JUlgAp2gBIUjfCAV6P6TgT1/aaQKcmuAEC4OZTV1I4EWQLz2gxZw76atuVyvHhTxvi0Flw==", - "license": "MIT", - "engines": { - "node": ">=0.4.x" - } - }, "node_modules/execa": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", @@ -6600,6 +6958,7 @@ "version": "0.3.3", "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz", "integrity": "sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw==", + "dev": true, "dependencies": { "is-callable": "^1.1.3" } @@ -6629,6 +6988,7 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "dev": true, "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -6695,6 +7055,7 @@ "version": "1.2.4", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", + "dev": true, "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2", @@ -6893,6 +7254,7 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz", "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==", + "dev": true, "dependencies": { "get-intrinsic": "^1.1.3" }, @@ -6934,6 +7296,7 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", + "dev": true, "dependencies": { "es-define-property": "^1.0.0" }, @@ -6945,6 +7308,7 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz", "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==", + "dev": true, "engines": { "node": ">= 0.4" }, @@ -6956,6 +7320,7 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz", "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==", + "dev": true, "engines": { "node": ">= 0.4" }, @@ -6967,6 +7332,7 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "dev": true, "dependencies": { "has-symbols": "^1.0.3" }, @@ -6981,6 +7347,7 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dev": true, "dependencies": { "function-bind": "^1.1.2" }, @@ -7030,12 +7397,6 @@ "url": "https://github.com/sponsors/typicode" } }, - "node_modules/ieee754": { - "version": "1.1.13", - "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.1.13.tgz", - "integrity": "sha512-4vf7I2LYV/HaWerSo3XmlMkp5eZ83i+/CDluXi/IGTs/O1sejBNhTtnxzmRZfvOUqj7lZjqHkeTvpgSFDlWZTg==", - "license": "BSD-3-Clause" - }, "node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -7112,7 +7473,8 @@ "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true }, "node_modules/ini": { "version": "1.3.8", @@ -7134,22 +7496,6 @@ "node": ">= 0.4" } }, - "node_modules/is-arguments": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.1.1.tgz", - "integrity": "sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.2", - "has-tostringtag": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/is-array-buffer": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.4.tgz", @@ -7204,6 +7550,7 @@ "version": "1.2.7", "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz", "integrity": "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==", + "dev": true, "engines": { "node": ">= 0.4" }, @@ -7287,21 +7634,6 @@ "node": ">=6" } }, - "node_modules/is-generator-function": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.0.10.tgz", - "integrity": "sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A==", - "license": "MIT", - "dependencies": { - "has-tostringtag": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/is-glob": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", @@ -7436,6 +7768,7 @@ "version": "1.1.13", "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.13.tgz", "integrity": "sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw==", + "dev": true, "dependencies": { "which-typed-array": "^1.1.14" }, @@ -9296,15 +9629,6 @@ "url": "https://github.com/chalk/supports-color?sponsor=1" } }, - "node_modules/jmespath": { - "version": "0.16.0", - "resolved": "https://registry.npmjs.org/jmespath/-/jmespath-0.16.0.tgz", - "integrity": "sha512-9FzQjJ7MATs1tSpnco1K6ayiYE3figslrXA72G2HQ/n76RzvYlofyi5QM+iX4YRs/pu3yzxlVQSST23+dMDknw==", - "license": "Apache-2.0", - "engines": { - "node": ">= 0.6.0" - } - }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", @@ -9644,6 +9968,15 @@ "node": ">=18.0.0" } }, + "node_modules/load-tsconfig": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/load-tsconfig/-/load-tsconfig-0.2.5.tgz", + "integrity": "sha512-IXO6OCs9yg8tMKzfPZ1YmheJbZCiEsnBdcB03l0OcfK9prKnJb96siuHCr5Fl37/yo9DnKU+TLpxzTUspw9shg==", + "dev": true, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + } + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -10379,6 +10712,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz", "integrity": "sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q==", + "dev": true, "engines": { "node": ">= 0.4" } @@ -10484,15 +10818,6 @@ } ] }, - "node_modules/querystring": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz", - "integrity": "sha512-X/xY82scca2tau62i9mDyU9K+I+djTMUsvwf7xnUX5GLvVzgJybOJf4Y6o9Zx3oJK/LSXg5tTZBjwzqVPaPO2g==", - "deprecated": "The querystring API is considered Legacy. new code should use the URLSearchParams API instead.", - "engines": { - "node": ">=0.4.x" - } - }, "node_modules/queue-microtask": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", @@ -10761,12 +11086,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/sax": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.1.tgz", - "integrity": "sha512-8I2a3LovHTOpm7NV5yOyO8IHqgVsfK4+UuySrXU8YXkSRX7k6hCV9b3HrkKCr3nMpgj+0bmocaJJWpvp1oc7ZA==", - "license": "ISC" - }, "node_modules/semver": { "version": "7.6.3", "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", @@ -10789,6 +11108,7 @@ "version": "1.2.2", "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", + "dev": true, "dependencies": { "define-data-property": "^1.1.4", "es-errors": "^1.3.0", @@ -11533,35 +11853,6 @@ "punycode": "^2.1.0" } }, - "node_modules/url": { - "version": "0.10.3", - "resolved": "https://registry.npmjs.org/url/-/url-0.10.3.tgz", - "integrity": "sha512-hzSUW2q06EqL1gKM/a+obYHLIO6ct2hwPuviqTTOcfFVc61UbfJ2Q32+uGL/HCPxKqrdGB5QUwIe7UqlDgwsOQ==", - "license": "MIT", - "dependencies": { - "punycode": "1.3.2", - "querystring": "0.2.0" - } - }, - "node_modules/url/node_modules/punycode": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz", - "integrity": "sha512-RofWgt/7fL5wP1Y7fxE7/EmTLzQVnB0ycyibJ0OOHIlJqTNzglYFxVwETOcIoJqJmpDXJ9xImDv+Fq34F/d4Dw==", - "license": "MIT" - }, - "node_modules/util": { - "version": "0.12.5", - "resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz", - "integrity": "sha512-kZf/K6hEIrWHI6XqOFUiiMa+79wE/D8Q+NCNAWclkyg3b4d2k7s0QGepNjiABc+aR3N1PAyHL7p6UcLY6LmrnA==", - "license": "MIT", - "dependencies": { - "inherits": "^2.0.3", - "is-arguments": "^1.0.4", - "is-generator-function": "^1.0.7", - "is-typed-array": "^1.1.3", - "which-typed-array": "^1.1.2" - } - }, "node_modules/uuid": { "version": "9.0.1", "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", @@ -11639,6 +11930,7 @@ "version": "1.1.15", "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.15.tgz", "integrity": "sha512-oV0jmFtUky6CXfkqehVvBP/LSWJ2sy4vWMioiENyJLePrBO/yKyV9OyJySfAKosh+RYkIl5zJCNZ8/4JncrpdA==", + "dev": true, "dependencies": { "available-typed-arrays": "^1.0.7", "call-bind": "^1.0.7", @@ -11741,28 +12033,6 @@ "node": "^12.13.0 || ^14.15.0 || >=16.0.0" } }, - "node_modules/xml2js": { - "version": "0.6.2", - "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.6.2.tgz", - "integrity": "sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==", - "license": "MIT", - "dependencies": { - "sax": ">=0.6.0", - "xmlbuilder": "~11.0.0" - }, - "engines": { - "node": ">=4.0.0" - } - }, - "node_modules/xmlbuilder": { - "version": "11.0.1", - "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", - "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==", - "license": "MIT", - "engines": { - "node": ">=4.0" - } - }, "node_modules/y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", @@ -11880,6 +12150,24 @@ "funding": { "url": "https://github.com/sponsors/colinhacks" } + }, + "node_modules/zod2md": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/zod2md/-/zod2md-0.1.4.tgz", + "integrity": "sha512-ZEW9TZd4M9PHB/UeZcLXIjlCbzPUESGvzEN+Ttye18quh4Afap8DYd/zpIPfw+DrVsSSWoNU40HVnfE9UcpmPw==", + "dev": true, + "dependencies": { + "@commander-js/extra-typings": "^12.0.0", + "bundle-require": "^4.0.2", + "commander": "^12.0.0", + "esbuild": "^0.19.11" + }, + "bin": { + "zod2md": "dist/bin.js" + }, + "peerDependencies": { + "zod": "^3.22.0" + } } } } diff --git a/package.json b/package.json index 6bc33d90..5f8721bd 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,8 @@ "lint-staged": "^15.2.10", "ts-jest": "^29.1.1", "ts-node": "^10.9.1", - "typescript": "~5.1.6" + "typescript": "~5.1.6", + "zod2md": "^0.1.4" }, "dependencies": { "aws-cdk-lib": "2.125.0",