diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml index d46d0778e..a80de744a 100644 --- a/.github/workflows/e2e_tests.yaml +++ b/.github/workflows/e2e_tests.yaml @@ -10,7 +10,7 @@ jobs: fail-fast: false matrix: mode: ["server", "library"] - environment: ["ci", "azure"] + environment: ["ci", "azure", "vertexai"] name: "E2E: ${{ matrix.mode }} mode / ${{ matrix.environment }}" @@ -52,8 +52,7 @@ jobs: - name: Load lightspeed-stack.yaml configuration run: | MODE="${{ matrix.mode }}" - CONFIG_FILE="tests/e2e/configuration/lightspeed-stack-${MODE}-mode.yaml" - + CONFIG_FILE="tests/e2e/configuration/${MODE}-mode/lightspeed-stack.yaml" echo "Loading configuration for ${MODE} mode" echo "Source: ${CONFIG_FILE}" @@ -91,6 +90,45 @@ jobs: echo "✅ Successfully obtained Azure access token." echo "AZURE_API_KEY=$ACCESS_TOKEN" >> $GITHUB_ENV + - name: Save VertexAI service account key to file + if: matrix.environment == 'vertexai' + env: + GOOGLE_SA_KEY: ${{ secrets.GOOGLE_SA_KEY }} + run: | + echo "Setting up Google Cloud service account credentials..." + + if [ -z "$GOOGLE_SA_KEY" ]; then + echo "❌ GOOGLE_SA_KEY is not set. Please configure the secret in GitHub repository settings." + exit 1 + fi + + GCP_KEYS_PATH=./tmp/.gcp-keys + echo "GCP_KEYS_PATH=$GCP_KEYS_PATH" >> $GITHUB_ENV + + mkdir -p $GCP_KEYS_PATH + + echo "Writing service account key to file..." + + # Decode from base64, needed because GH changes the key if using the raw key + printf '%s' "$GOOGLE_SA_KEY" | base64 -d > $GCP_KEYS_PATH/gcp-key.json + + # Verify the file was created and is valid JSON + if [ ! -f "$GCP_KEYS_PATH/gcp-key.json" ]; then + echo "❌ Failed to create gcp-key.json file" + exit 1 + fi + + if ! jq empty "$GCP_KEYS_PATH/gcp-key.json" 2>/dev/null; then + echo "❌ gcp-key.json is not valid JSON" + exit 1 + fi + echo "✅ gcp-key.json is valid JSON" + + # Set proper permissions (readable by all, needed for container user 1001) + chmod 644 $GCP_KEYS_PATH/gcp-key.json + + echo "GOOGLE_APPLICATION_CREDENTIALS=/opt/app-root/.gcp-keys/gcp-key.json" >> $GITHUB_ENV + - name: Select and configure run.yaml env: CONFIG_ENVIRONMENT: ${{ matrix.environment || 'ci' }} @@ -146,19 +184,30 @@ jobs: run: | echo $QUAY_ROBOT_TOKEN | docker login quay.io -u=$QUAY_ROBOT_USERNAME --password-stdin + - name: Create dummy GCP keys directory + if: matrix.environment != 'vertexai' + run: | + echo "Creating dummy GCP keys directory for non-VertexAI environment..." + mkdir -p ./tmp/.gcp-keys-dummy + echo "✅ Dummy directory created." + - name: Run services (Server Mode) if: matrix.mode == 'server' - env: + env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} AZURE_API_KEY: ${{ env.AZURE_API_KEY }} + VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }} + VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }} + GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }} + GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }} run: | # Debug: Check if environment variable is available for docker-compose echo "OPENAI_API_KEY is set: $([ -n "$OPENAI_API_KEY" ] && echo 'YES' || echo 'NO')" echo "OPENAI_API_KEY length: ${#OPENAI_API_KEY}" - + docker compose version docker compose up -d - + # Check for errors and show logs if any services failed if docker compose ps | grep -E 'Exit|exited|stopped'; then echo "Some services failed to start - showing logs:" @@ -173,10 +222,14 @@ jobs: env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} AZURE_API_KEY: ${{ env.AZURE_API_KEY }} + VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }} + VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }} + GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }} + GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }} run: | echo "Starting service in library mode (1 container)" docker compose -f docker-compose-library.yaml up -d - + if docker compose -f docker-compose-library.yaml ps | grep -E 'Exit|exited|stopped'; then echo "Service failed to start - showing logs:" docker compose -f docker-compose-library.yaml logs diff --git a/README.md b/README.md index b0c9de235..c7a84a1b5 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ Lightspeed Core Stack is based on the FastAPI framework (Uvicorn). The service i |----------------|-----------------------------------------------------------------------| | OpenAI | https://platform.openai.com | | Azure OpenAI | https://azure.microsoft.com/en-us/products/ai-services/openai-service | + | Google VertexAI| https://cloud.google.com/vertex-ai | | RHOAI (vLLM) | See tests/e2e-prow/rhoai/configs/run.yaml | | RHEL AI (vLLM) | See tests/e2e/configs/run-rhelai.yaml | @@ -175,6 +176,9 @@ __Note__: Support for individual models is dependent on the specific inference p | RHEL AI (vLLM)| meta-llama/Llama-3.1-8B-Instruct | Yes | remote::vllm | [1](tests/e2e/configs/run-rhelai.yaml) | | Azure | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes | remote::azure | [1](examples/azure-run.yaml) | | Azure | o1, o1-mini | No | remote::azure | | +| VertexAI | google/gemini-2.0-flash, google/gemini-2.5-flash, google/gemini-2.5-pro [^1] | Yes | remote::vertexai | [1](examples/vertexai-run.yaml) | + +[^1]: List of models is limited by design in llama-stack, future versions will probably allow to use more models (see [here](https://github.com/llamastack/llama-stack/blob/release-0.3.x/llama_stack/providers/remote/inference/vertexai/vertexai.py#L54)) The "provider_type" is used in the llama stack configuration file when refering to the provider. diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index e61eda9d9..4733d5d6c 100644 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -12,20 +12,28 @@ services: # Mount both config files - lightspeed-stack.yaml should have library mode enabled - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z - ./run.yaml:/app-root/run.yaml:Z + - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro environment: - # LLM Provider API Keys + - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} + - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} + # OpenAI - OPENAI_API_KEY=${OPENAI_API_KEY} - E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL:-gpt-4-turbo} + # Azure - AZURE_API_KEY=${AZURE_API_KEY:-} - - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} - - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} + # RHAIIS - RHAIIS_URL=${RHAIIS_URL:-} - RHAIIS_API_KEY=${RHAIIS_API_KEY:-} - RHAIIS_MODEL=${RHAIIS_MODEL:-} + # RHEL AI - RHEL_AI_URL=${RHEL_AI_URL:-} - RHEL_AI_PORT=${RHEL_AI_PORT:-} - RHEL_AI_API_KEY=${RHEL_AI_API_KEY:-} - RHEL_AI_MODEL=${RHEL_AI_MODEL:-} + # VertexAI + - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-} + - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-} + - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-} # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} healthcheck: diff --git a/docker-compose.yaml b/docker-compose.yaml index 8262e314e..3b00c3815 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -10,22 +10,30 @@ services: - "8321:8321" # Expose llama-stack on 8321 (adjust if needed) volumes: - ./run.yaml:/opt/app-root/run.yaml:Z + - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro environment: + - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} + - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} + # OpenAI - OPENAI_API_KEY=${OPENAI_API_KEY} - E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL} + # Azure - AZURE_API_KEY=${AZURE_API_KEY} - - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} - - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} + # RHAIIS - RHAIIS_URL=${RHAIIS_URL} - RHAIIS_API_KEY=${RHAIIS_API_KEY} - RHAIIS_MODEL=${RHAIIS_MODEL} + # RHEL AI - RHEL_AI_URL=${RHEL_AI_URL} - RHEL_AI_PORT=${RHEL_AI_PORT} - RHEL_AI_API_KEY=${RHEL_AI_API_KEY} - RHEL_AI_MODEL=${RHEL_AI_MODEL} + # VertexAI + - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-} + - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-} + - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-} # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} - networks: - lightspeednet healthcheck: diff --git a/docs/providers.md b/docs/providers.md index efd2453fd..32f320dcb 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -54,7 +54,7 @@ The tables below summarize each provider category, containing the following atri | sambanova | remote | `litellm` | ❌ | | tgi | remote | `huggingface_hub`, `aiohttp` | ❌ | | together | remote | `together` | ❌ | -| vertexai | remote | `litellm`, `google-cloud-aiplatform` | ❌ | +| vertexai | remote | `google-auth` | ✅ | | watsonx | remote | `ibm_watsonx_ai` | ❌ | Red Hat providers: diff --git a/examples/vertexai-run.yaml b/examples/vertexai-run.yaml index 410560486..37e083b8f 100644 --- a/examples/vertexai-run.yaml +++ b/examples/vertexai-run.yaml @@ -1,91 +1,143 @@ -# Example llama-stack configuration for VertexAI inference -# -# Contributed by @eloycoto (2025-08). See https://github.com/rhdhorchestrator/LS-core-test/blob/master/run-llama-stack.yaml -# This file shows how to integrate VertexAI with LCS. -# -# Notes: -# - You will need to configure Gemini inference on VertexAI. -# -version: '3' -image_name: ollama-llama-stack-config +version: 2 + apis: - - agents - - inference - - safety - - telemetry - - tool_runtime - - vector_io -logging: - level: DEBUG # Set root logger to DEBUG - category_levels: - llama_stack: DEBUG # Enable DEBUG for all llama_stack modules - llama_stack.providers.remote.inference.vllm: DEBUG - llama_stack.providers.inline.agents.meta_reference: DEBUG - llama_stack.providers.inline.agents.meta_reference.agent_instance: DEBUG - llama_stack.providers.inline.vector_io.faiss: DEBUG - llama_stack.providers.inline.telemetry.meta_reference: DEBUG - llama_stack.core: DEBUG - llama_stack.apis: DEBUG - uvicorn: DEBUG - uvicorn.access: INFO # Keep HTTP requests at INFO to reduce noise - fastapi: DEBUG +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +benchmarks: [] +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite +datasets: [] +image_name: starter +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ~/.llama/storage/inference-store.db + type: sqlite +metadata_store: + db_path: ~/.llama/storage/registry.db + type: sqlite providers: - vector_io: - - config: - kvstore: - db_path: /tmp/faiss_store.db - type: sqlite - provider_id: faiss - provider_type: inline::faiss - agents: - config: - persistence_store: - db_path: /tmp/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: /tmp/responses_store.db - type: sqlite + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default provider_id: meta-reference provider_type: inline::meta-reference - - + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + namespace: eval_store + backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage + provider_id: meta-reference-files + provider_type: inline::localfs inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=false} - - - provider_id: google-vertex - provider_type: remote::vertexai - config: - project: ${env.VERTEXAI_PROJECT} - region: ${env.VERTEXAI_REGION:=us-east5} - + - provider_id: google-vertex + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT} + location: ${env.VERTEX_AI_LOCATION} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + - config: {} + provider_id: llm-as-judge + provider_type: inline::llm-as-judge tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - module: null - - telemetry: - - config: - service_name: 'llama-stack' - sinks: console,sqlite - sqlite_db_path: /tmp/trace_store.db - provider_id: meta-reference - provider_type: inline::meta-reference - -metadata_store: - type: sqlite - db_path: /tmp/registry.db - namespace: null - -inference_store: - type: sqlite - db_path: /tmp/inference_store.db \ No newline at end of file + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: faiss_store + backend: kv_default + provider_id: faiss + provider_type: inline::faiss +scoring_fns: [] +server: + port: 8321 +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ~/.llama/storage/kv_store.db + sql_default: + type: sql_sqlite + db_path: ~/.llama/storage/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime diff --git a/tests/e2e/configs/run-vertexai.yaml b/tests/e2e/configs/run-vertexai.yaml new file mode 100644 index 000000000..37e083b8f --- /dev/null +++ b/tests/e2e/configs/run-vertexai.yaml @@ -0,0 +1,143 @@ +version: 2 + +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +benchmarks: [] +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite +datasets: [] +image_name: starter +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ~/.llama/storage/inference-store.db + type: sqlite +metadata_store: + db_path: ~/.llama/storage/registry.db + type: sqlite + +providers: + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + namespace: eval_store + backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage + provider_id: meta-reference-files + provider_type: inline::localfs + inference: + - provider_id: google-vertex + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT} + location: ${env.VERTEX_AI_LOCATION} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + - config: {} + provider_id: llm-as-judge + provider_type: inline::llm-as-judge + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: faiss_store + backend: kv_default + provider_id: faiss + provider_type: inline::faiss +scoring_fns: [] +server: + port: 8321 +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ~/.llama/storage/kv_store.db + sql_default: + type: sql_sqlite + db_path: ~/.llama/storage/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime diff --git a/tests/e2e/configuration/lightspeed-stack-library-mode.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml similarity index 96% rename from tests/e2e/configuration/lightspeed-stack-library-mode.yaml rename to tests/e2e/configuration/library-mode/lightspeed-stack.yaml index 47257bfb1..e6d02d3a6 100644 --- a/tests/e2e/configuration/lightspeed-stack-library-mode.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml @@ -16,4 +16,4 @@ user_data_collection: transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" authentication: - module: "noop" + module: "noop" \ No newline at end of file diff --git a/tests/e2e/configuration/lightspeed-stack-server-mode.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml similarity index 96% rename from tests/e2e/configuration/lightspeed-stack-server-mode.yaml rename to tests/e2e/configuration/server-mode/lightspeed-stack.yaml index cc699ba89..adc5b4829 100644 --- a/tests/e2e/configuration/lightspeed-stack-server-mode.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml @@ -17,4 +17,4 @@ user_data_collection: transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" authentication: - module: "noop" + module: "noop" \ No newline at end of file diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py index f7f366998..09b7feeff 100644 --- a/tests/e2e/features/environment.py +++ b/tests/e2e/features/environment.py @@ -55,9 +55,9 @@ def before_all(context: Context) -> None: context.deployment_mode = os.getenv("E2E_DEPLOYMENT_MODE", "server").lower() context.is_library_mode = context.deployment_mode == "library" + # Get first LLM model from running service print(f"Running tests in {context.deployment_mode} mode") - # Get first LLM model from running service llm_model = _fetch_models_from_service() if llm_model: @@ -68,9 +68,11 @@ def before_all(context: Context) -> None: ) else: # Fallback for development - context.default_model = "gpt-4-turbo" + context.default_model = "gpt-4o-mini" context.default_provider = "openai" - print("⚠ Could not detect models, using fallback: gpt-4-turbo/openai") + print( + f"⚠ Could not detect models, using fallback: {context.default_provider}/{context.default_model}" + ) def before_scenario(context: Context, scenario: Scenario) -> None: