diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index 096459dc0900..8c5099b59880 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -161,3 +161,40 @@ jobs: SKIP_BUILD: 1 run: | bash scripts/test_compaction.sh + + smoke-tests-code-exec: + name: Smoke Tests (Code Execution) + runs-on: ubuntu-latest + needs: build-binary + steps: + - name: Checkout Code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v4 + with: + ref: ${{ github.event.inputs.branch || github.ref }} + + - name: Download Binary + uses: actions/download-artifact@v4 + with: + name: goose-binary + path: target/release + + - name: Make Binary Executable + run: chmod +x target/release/goose + + - name: Run Provider Tests (Code Execution Mode) + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} + DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + XAI_API_KEY: ${{ secrets.XAI_API_KEY }} + TETRATE_API_KEY: ${{ secrets.TETRATE_API_KEY }} + HOME: /tmp/goose-home + GOOSE_DISABLE_KEYRING: 1 + SKIP_BUILD: 1 + run: | + mkdir -p $HOME/.local/share/goose/sessions + mkdir -p $HOME/.config/goose + bash scripts/test_providers.sh --code-exec diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index 13e5e7431a52..17c6e5941c3c 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -1,4 +1,18 @@ #!/bin/bash +# Test providers with optional code_execution mode +# Usage: +# ./test_providers.sh # Normal mode (direct tool calls) +# ./test_providers.sh --code-exec # Code execution mode (JS batching) + +CODE_EXEC_MODE=false +for arg in "$@"; do + case $arg in + --code-exec) + CODE_EXEC_MODE=true + ;; + esac +done + if [ -f .env ]; then export $(grep -v '^#' .env | xargs) fi @@ -37,6 +51,23 @@ else PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o") fi +# Configure mode-specific settings +if [ "$CODE_EXEC_MODE" = true ]; then + echo "Mode: code_execution (JS batching)" + BUILTINS="developer,code_execution" + # Match "execute_code | code_execution" or "read_module | code_execution" in output + SUCCESS_PATTERN="(execute_code \| code_execution)|(read_module \| code_execution)" + SUCCESS_MSG="code_execution tool called" + FAILURE_MSG="no code_execution tools called" +else + echo "Mode: normal (direct tool calls)" + BUILTINS="developer,autovisualiser,computercontroller,tutorial,todo,extensionmanager" + SUCCESS_PATTERN="shell \| developer" + SUCCESS_MSG="developer tool called" + FAILURE_MSG="no developer tools called" +fi +echo "" + RESULTS=() for provider_config in "${PROVIDERS[@]}"; do @@ -52,13 +83,13 @@ for provider_config in "${PROVIDERS[@]}"; do echo "Model: ${MODEL}" echo "" TMPFILE=$(mktemp) - (cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "please list files in the current directory" --with-builtin developer,autovisualiser,computercontroller,tutorial,todo,extensionmanager 2>&1) | tee "$TMPFILE" + (cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "please list files in the current directory" --with-builtin "$BUILTINS" 2>&1) | tee "$TMPFILE" echo "" - if grep -q "shell | developer" "$TMPFILE"; then - echo "✓ SUCCESS: Test passed - developer tool called" + if grep -qE "$SUCCESS_PATTERN" "$TMPFILE"; then + echo "✓ SUCCESS: Test passed - $SUCCESS_MSG" RESULTS+=("✓ ${PROVIDER}: ${MODEL}") else - echo "✗ FAILED: Test failed - no developer tools called" + echo "✗ FAILED: Test failed - $FAILURE_MSG" RESULTS+=("✗ ${PROVIDER}: ${MODEL}") fi rm "$TMPFILE"