diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index cb0dc47075..e36652aa36 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -25,16 +25,6 @@
"pull-requests": "write",
"workflows": "write"
}
- },
- "github/agentic-workflow-workbench": {
- "permissions": {
- "actions": "write",
- "contents": "write",
- "discussions": "read",
- "issues": "read",
- "pull-requests": "write",
- "workflows": "write"
- }
}
}
}
diff --git a/.github/workflows/daily-cli-tools-tester.lock.yml b/.github/workflows/daily-cli-tools-tester.lock.yml
new file mode 100644
index 0000000000..242c462d35
--- /dev/null
+++ b/.github/workflows/daily-cli-tools-tester.lock.yml
@@ -0,0 +1,1110 @@
+#
+# ___ _ _
+# / _ \ | | (_)
+# | |_| | __ _ ___ _ __ | |_ _ ___
+# | _ |/ _` |/ _ \ '_ \| __| |/ __|
+# | | | | (_| | __/ | | | |_| | (__
+# \_| |_/\__, |\___|_| |_|\__|_|\___|
+# __/ |
+# _ _ |___/
+# | | | | / _| |
+# | | | | ___ _ __ _ __| |_| | _____ ____
+# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___|
+# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \
+# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/
+#
+# This file was automatically generated by gh-aw. DO NOT EDIT.
+#
+# To update this file, edit the corresponding .md file and run:
+# gh aw compile
+# For more information: https://github.com/github/gh-aw/blob/main/.github/aw/github-agentic-workflows.md
+#
+# Daily exploratory testing of audit, logs, and compile tools in gh-aw CLI
+#
+# frontmatter-hash: fc352f2d89c4737ca090591c135242a8e29e14c751026cfcb1905ff1445c7617
+
+name: "Daily CLI Tools Exploratory Tester"
+"on":
+ schedule:
+ - cron: "44 17 * * *"
+ # Friendly format: daily (scattered)
+ workflow_dispatch:
+
+permissions: {}
+
+concurrency:
+ group: "gh-aw-${{ github.workflow }}"
+
+run-name: "Daily CLI Tools Exploratory Tester"
+
+jobs:
+ activation:
+ runs-on: ubuntu-slim
+ permissions:
+ contents: read
+ outputs:
+ comment_id: ""
+ comment_repo: ""
+ steps:
+ - name: Checkout actions folder
+ uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
+ with:
+ sparse-checkout: |
+ actions
+ persist-credentials: false
+ - name: Setup Scripts
+ uses: ./actions/setup
+ with:
+ destination: /opt/gh-aw/actions
+ - name: Check workflow file timestamps
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_WORKFLOW_FILE: "daily-cli-tools-tester.lock.yml"
+ with:
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/check_workflow_timestamp_api.cjs');
+ await main();
+
+ agent:
+ needs: activation
+ runs-on: ubuntu-latest
+ permissions:
+ actions: read
+ contents: read
+ issues: read
+ pull-requests: read
+ concurrency:
+ group: "gh-aw-copilot-${{ github.workflow }}"
+ env:
+ DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+ GH_AW_ASSETS_ALLOWED_EXTS: ""
+ GH_AW_ASSETS_BRANCH: ""
+ GH_AW_ASSETS_MAX_SIZE_KB: 0
+ GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs
+ GH_AW_SAFE_OUTPUTS: /opt/gh-aw/safeoutputs/outputs.jsonl
+ GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json
+ GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json
+ outputs:
+ checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }}
+ has_patch: ${{ steps.collect_output.outputs.has_patch }}
+ model: ${{ steps.generate_aw_info.outputs.model }}
+ output: ${{ steps.collect_output.outputs.output }}
+ output_types: ${{ steps.collect_output.outputs.output_types }}
+ secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }}
+ steps:
+ - name: Checkout actions folder
+ uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
+ with:
+ sparse-checkout: |
+ actions
+ persist-credentials: false
+ - name: Setup Scripts
+ uses: ./actions/setup
+ with:
+ destination: /opt/gh-aw/actions
+ - name: Checkout repository
+ uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
+ with:
+ persist-credentials: false
+ - name: Setup Go for CLI build
+ uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
+ with:
+ go-version-file: go.mod
+ cache: true
+ - name: Build gh-aw CLI
+ run: |
+ echo "Building gh-aw CLI for linux/amd64..."
+ mkdir -p dist
+ VERSION=$(git describe --tags --always --dirty)
+ CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
+ -ldflags "-s -w -X main.version=${VERSION}" \
+ -o dist/gh-aw-linux-amd64 \
+ ./cmd/gh-aw
+ # Copy binary to root for direct execution in user-defined steps
+ cp dist/gh-aw-linux-amd64 ./gh-aw
+ chmod +x ./gh-aw
+ echo "✓ Built gh-aw CLI successfully"
+ - name: Setup Docker Buildx
+ uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
+ - name: Build gh-aw Docker image
+ uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6
+ with:
+ context: .
+ platforms: linux/amd64
+ push: false
+ load: true
+ tags: localhost/gh-aw:dev
+ build-args: |
+ BINARY=dist/gh-aw-linux-amd64
+ - name: Create gh-aw temp directory
+ run: bash /opt/gh-aw/actions/create_gh_aw_tmp_dir.sh
+ - name: Configure Git credentials
+ env:
+ REPO_NAME: ${{ github.repository }}
+ SERVER_URL: ${{ github.server_url }}
+ run: |
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
+ git config --global user.name "github-actions[bot]"
+ # Re-authenticate git with GitHub token
+ SERVER_URL_STRIPPED="${SERVER_URL#https://}"
+ git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git"
+ echo "Git configured with standard GitHub Actions identity"
+ - name: Checkout PR branch
+ id: checkout-pr
+ if: |
+ github.event.pull_request
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/checkout_pr_branch.cjs');
+ await main();
+ - name: Validate COPILOT_GITHUB_TOKEN secret
+ id: validate-secret
+ run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default
+ env:
+ COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ - name: Install GitHub Copilot CLI
+ run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.405
+ - name: Install awf binary
+ run: bash /opt/gh-aw/actions/install_awf_binary.sh v0.13.7
+ - name: Determine automatic lockdown mode for GitHub MCP server
+ id: determine-automatic-lockdown
+ env:
+ TOKEN_CHECK: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }}
+ if: env.TOKEN_CHECK != ''
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ with:
+ script: |
+ const determineAutomaticLockdown = require('/opt/gh-aw/actions/determine_automatic_lockdown.cjs');
+ await determineAutomaticLockdown(github, context, core);
+ - name: Download container images
+ run: bash /opt/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.13.7 ghcr.io/github/gh-aw-firewall/squid:0.13.7 ghcr.io/github/gh-aw-mcpg:v0.0.103 ghcr.io/github/github-mcp-server:v0.30.3 node:lts-alpine
+ - name: Install gh-aw extension
+ env:
+ GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ run: |
+ # Check if gh-aw extension is already installed
+ if gh extension list | grep -q "github/gh-aw"; then
+ echo "gh-aw extension already installed, upgrading..."
+ gh extension upgrade gh-aw || true
+ else
+ echo "Installing gh-aw extension..."
+ gh extension install github/gh-aw
+ fi
+ gh aw --version
+ # Copy the gh-aw binary to /opt/gh-aw for MCP server containerization
+ mkdir -p /opt/gh-aw
+ GH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1)
+ if [ -n "$GH_AW_BIN" ] && [ -f "$GH_AW_BIN" ]; then
+ cp "$GH_AW_BIN" /opt/gh-aw/gh-aw
+ chmod +x /opt/gh-aw/gh-aw
+ echo "Copied gh-aw binary to /opt/gh-aw/gh-aw"
+ else
+ echo "::error::Failed to find gh-aw binary for MCP server"
+ exit 1
+ fi
+ - name: Write Safe Outputs Config
+ run: |
+ mkdir -p /opt/gh-aw/safeoutputs
+ mkdir -p /tmp/gh-aw/safeoutputs
+ mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
+ cat > /opt/gh-aw/safeoutputs/config.json << 'EOF'
+ {"create_issue":{"expires":168,"max":5},"missing_data":{},"missing_tool":{},"noop":{"max":1}}
+ EOF
+ cat > /opt/gh-aw/safeoutputs/tools.json << 'EOF'
+ [
+ {
+ "description": "Create a new GitHub issue for tracking bugs, feature requests, or tasks. Use this for actionable work items that need assignment, labeling, and status tracking. For reports, announcements, or status updates that don't require task tracking, use create_discussion instead. CONSTRAINTS: Maximum 5 issue(s) can be created. Title will be prefixed with \"[cli-tools-test] \". Labels [testing automation cli-tools] will be automatically added.",
+ "inputSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "body": {
+ "description": "Detailed issue description in Markdown. Do NOT repeat the title as a heading since it already appears as the issue's h1. Include context, reproduction steps, or acceptance criteria as appropriate.",
+ "type": "string"
+ },
+ "labels": {
+ "description": "Labels to categorize the issue (e.g., 'bug', 'enhancement'). Labels must exist in the repository.",
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ "parent": {
+ "description": "Parent issue number for creating sub-issues. This is the numeric ID from the GitHub URL (e.g., 42 in github.com/owner/repo/issues/42). Can also be a temporary_id (e.g., 'aw_abc123def456') from a previously created issue in the same workflow run.",
+ "type": [
+ "number",
+ "string"
+ ]
+ },
+ "temporary_id": {
+ "description": "Unique temporary identifier for referencing this issue before it's created. Format: 'aw_' followed by 12 hex characters (e.g., 'aw_abc123def456'). Use '#aw_ID' in body text to reference other issues by their temporary_id; these are replaced with actual issue numbers after creation.",
+ "type": "string"
+ },
+ "title": {
+ "description": "Concise issue title summarizing the bug, feature, or task. The title appears as the main heading, so keep it brief and descriptive.",
+ "type": "string"
+ }
+ },
+ "required": [
+ "title",
+ "body"
+ ],
+ "type": "object"
+ },
+ "name": "create_issue"
+ },
+ {
+ "description": "Report that a tool or capability needed to complete the task is not available, or share any information you deem important about missing functionality or limitations. Use this when you cannot accomplish what was requested because the required functionality is missing or access is restricted.",
+ "inputSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "alternatives": {
+ "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).",
+ "type": "string"
+ },
+ "reason": {
+ "description": "Explanation of why this tool is needed or what information you want to share about the limitation (max 256 characters).",
+ "type": "string"
+ },
+ "tool": {
+ "description": "Optional: Name or description of the missing tool or capability (max 128 characters). Be specific about what functionality is needed.",
+ "type": "string"
+ }
+ },
+ "required": [
+ "reason"
+ ],
+ "type": "object"
+ },
+ "name": "missing_tool"
+ },
+ {
+ "description": "Log a transparency message when no significant actions are needed. Use this to confirm workflow completion and provide visibility when analysis is complete but no changes or outputs are required (e.g., 'No issues found', 'All checks passed'). This ensures the workflow produces human-visible output even when no other actions are taken.",
+ "inputSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "message": {
+ "description": "Status or completion message to log. Should explain what was analyzed and the outcome (e.g., 'Code review complete - no issues found', 'Analysis complete - all tests passing').",
+ "type": "string"
+ }
+ },
+ "required": [
+ "message"
+ ],
+ "type": "object"
+ },
+ "name": "noop"
+ },
+ {
+ "description": "Report that data or information needed to complete the task is not available. Use this when you cannot accomplish what was requested because required data, context, or information is missing.",
+ "inputSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "alternatives": {
+ "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).",
+ "type": "string"
+ },
+ "context": {
+ "description": "Additional context about the missing data or where it should come from (max 256 characters).",
+ "type": "string"
+ },
+ "data_type": {
+ "description": "Type or description of the missing data or information (max 128 characters). Be specific about what data is needed.",
+ "type": "string"
+ },
+ "reason": {
+ "description": "Explanation of why this data is needed to complete the task (max 256 characters).",
+ "type": "string"
+ }
+ },
+ "required": [],
+ "type": "object"
+ },
+ "name": "missing_data"
+ }
+ ]
+ EOF
+ cat > /opt/gh-aw/safeoutputs/validation.json << 'EOF'
+ {
+ "create_issue": {
+ "defaultMax": 1,
+ "fields": {
+ "body": {
+ "required": true,
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 65000
+ },
+ "labels": {
+ "type": "array",
+ "itemType": "string",
+ "itemSanitize": true,
+ "itemMaxLength": 128
+ },
+ "parent": {
+ "issueOrPRNumber": true
+ },
+ "repo": {
+ "type": "string",
+ "maxLength": 256
+ },
+ "temporary_id": {
+ "type": "string"
+ },
+ "title": {
+ "required": true,
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 128
+ }
+ }
+ },
+ "missing_tool": {
+ "defaultMax": 20,
+ "fields": {
+ "alternatives": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 512
+ },
+ "reason": {
+ "required": true,
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 256
+ },
+ "tool": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 128
+ }
+ }
+ },
+ "noop": {
+ "defaultMax": 1,
+ "fields": {
+ "message": {
+ "required": true,
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 65000
+ }
+ }
+ }
+ }
+ EOF
+ - name: Generate Safe Outputs MCP Server Config
+ id: safe-outputs-config
+ run: |
+ # Generate a secure random API key (360 bits of entropy, 40+ chars)
+ API_KEY=""
+ API_KEY=$(openssl rand -base64 45 | tr -d '/+=')
+ PORT=3001
+
+ # Register API key as secret to mask it from logs
+ echo "::add-mask::${API_KEY}"
+
+ # Set outputs for next steps
+ {
+ echo "safe_outputs_api_key=${API_KEY}"
+ echo "safe_outputs_port=${PORT}"
+ } >> "$GITHUB_OUTPUT"
+
+ echo "Safe Outputs MCP server will run on port ${PORT}"
+
+ - name: Start Safe Outputs MCP HTTP Server
+ id: safe-outputs-start
+ env:
+ DEBUG: '*'
+ GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }}
+ GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }}
+ GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json
+ GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json
+ GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs
+ run: |
+ # Environment variables are set above to prevent template injection
+ export DEBUG
+ export GH_AW_SAFE_OUTPUTS_PORT
+ export GH_AW_SAFE_OUTPUTS_API_KEY
+ export GH_AW_SAFE_OUTPUTS_TOOLS_PATH
+ export GH_AW_SAFE_OUTPUTS_CONFIG_PATH
+ export GH_AW_MCP_LOG_DIR
+
+ bash /opt/gh-aw/actions/start_safe_outputs_server.sh
+
+ - name: Start MCP gateway
+ id: start-mcp-gateway
+ env:
+ GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }}
+ GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }}
+ GITHUB_MCP_LOCKDOWN: ${{ steps.determine-automatic-lockdown.outputs.lockdown == 'true' && '1' || '0' }}
+ GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ set -eo pipefail
+ mkdir -p /tmp/gh-aw/mcp-config
+
+ # Export gateway environment variables for MCP config and gateway script
+ export MCP_GATEWAY_PORT="80"
+ export MCP_GATEWAY_DOMAIN="host.docker.internal"
+ MCP_GATEWAY_API_KEY=""
+ MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=')
+ export MCP_GATEWAY_API_KEY
+ export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads"
+ mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}"
+ export DEBUG="*"
+
+ # Register API key as secret to mask it from logs
+ echo "::add-mask::${MCP_GATEWAY_API_KEY}"
+ export GH_AW_ENGINE="copilot"
+ export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_LOCKDOWN -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.0.103'
+
+ mkdir -p /home/runner/.copilot
+ cat << MCPCONFIG_EOF | bash /opt/gh-aw/actions/start_mcp_gateway.sh
+ {
+ "mcpServers": {
+ "agentic_workflows": {
+ "type": "stdio",
+ "container": "localhost/gh-aw:dev",
+ "mounts": ["${{ github.workspace }}:${{ github.workspace }}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"],
+ "args": ["-w", "${{ github.workspace }}"],
+ "env": {
+ "DEBUG": "*",
+ "GITHUB_TOKEN": "\${GITHUB_TOKEN}"
+ }
+ },
+ "github": {
+ "type": "stdio",
+ "container": "ghcr.io/github/github-mcp-server:v0.30.3",
+ "env": {
+ "GITHUB_LOCKDOWN_MODE": "$GITHUB_MCP_LOCKDOWN",
+ "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}",
+ "GITHUB_READ_ONLY": "1",
+ "GITHUB_TOOLSETS": "context,repos,issues,pull_requests"
+ }
+ },
+ "safeoutputs": {
+ "type": "http",
+ "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT",
+ "headers": {
+ "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}"
+ }
+ }
+ },
+ "gateway": {
+ "port": $MCP_GATEWAY_PORT,
+ "domain": "${MCP_GATEWAY_DOMAIN}",
+ "apiKey": "${MCP_GATEWAY_API_KEY}",
+ "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
+ }
+ }
+ MCPCONFIG_EOF
+ - name: Generate agentic run info
+ id: generate_aw_info
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ with:
+ script: |
+ const fs = require('fs');
+
+ const awInfo = {
+ engine_id: "copilot",
+ engine_name: "GitHub Copilot CLI",
+ model: process.env.GH_AW_MODEL_AGENT_COPILOT || "",
+ version: "",
+ agent_version: "0.0.405",
+ workflow_name: "Daily CLI Tools Exploratory Tester",
+ experimental: false,
+ supports_tools_allowlist: true,
+ supports_http_transport: true,
+ run_id: context.runId,
+ run_number: context.runNumber,
+ run_attempt: process.env.GITHUB_RUN_ATTEMPT,
+ repository: context.repo.owner + '/' + context.repo.repo,
+ ref: context.ref,
+ sha: context.sha,
+ actor: context.actor,
+ event_name: context.eventName,
+ staged: false,
+ allowed_domains: ["defaults"],
+ firewall_enabled: true,
+ awf_version: "v0.13.7",
+ awmg_version: "v0.0.103",
+ steps: {
+ firewall: "squid"
+ },
+ created_at: new Date().toISOString()
+ };
+
+ // Write to /tmp/gh-aw directory to avoid inclusion in PR
+ const tmpPath = '/tmp/gh-aw/aw_info.json';
+ fs.writeFileSync(tmpPath, JSON.stringify(awInfo, null, 2));
+ console.log('Generated aw_info.json at:', tmpPath);
+ console.log(JSON.stringify(awInfo, null, 2));
+
+ // Set model as output for reuse in other steps/jobs
+ core.setOutput('model', awInfo.model);
+ - name: Generate workflow overview
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ with:
+ script: |
+ const { generateWorkflowOverview } = require('/opt/gh-aw/actions/generate_workflow_overview.cjs');
+ await generateWorkflowOverview(core);
+ - name: Create prompt with built-in context
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ GH_AW_GITHUB_ACTOR: ${{ github.actor }}
+ GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }}
+ GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }}
+ GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }}
+ GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }}
+ GH_AW_GITHUB_REPOSITORY: ${{ github.repository }}
+ GH_AW_GITHUB_RUN_ID: ${{ github.run_id }}
+ GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }}
+ run: |
+ bash /opt/gh-aw/actions/create_prompt_first.sh
+ cat << 'PROMPT_EOF' > "$GH_AW_PROMPT"
+
+ PROMPT_EOF
+ cat "/opt/gh-aw/prompts/temp_folder_prompt.md" >> "$GH_AW_PROMPT"
+ cat "/opt/gh-aw/prompts/markdown.md" >> "$GH_AW_PROMPT"
+ cat << 'PROMPT_EOF' >> "$GH_AW_PROMPT"
+
+ GitHub API Access Instructions
+
+ The gh CLI is NOT authenticated. Do NOT use gh commands for GitHub operations.
+
+
+ To create or modify GitHub resources (issues, discussions, pull requests, etc.), you MUST call the appropriate safe output tool. Simply writing content will NOT work - the workflow requires actual tool calls.
+
+ Discover available tools from the safeoutputs MCP server.
+
+ **Critical**: Tool calls write structured data that downstream jobs process. Without tool calls, follow-up actions will be skipped.
+
+ **Note**: If you made no other safe output tool calls during this workflow execution, call the "noop" tool to provide a status message indicating completion or that no actions were needed.
+
+
+
+ The following GitHub context information is available for this workflow:
+ {{#if __GH_AW_GITHUB_ACTOR__ }}
+ - **actor**: __GH_AW_GITHUB_ACTOR__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_REPOSITORY__ }}
+ - **repository**: __GH_AW_GITHUB_REPOSITORY__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_WORKSPACE__ }}
+ - **workspace**: __GH_AW_GITHUB_WORKSPACE__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }}
+ - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }}
+ - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }}
+ - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }}
+ - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_RUN_ID__ }}
+ - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__
+ {{/if}}
+
+
+ PROMPT_EOF
+ cat << 'PROMPT_EOF' >> "$GH_AW_PROMPT"
+
+ PROMPT_EOF
+ cat << 'PROMPT_EOF' >> "$GH_AW_PROMPT"
+ {{#runtime-import .github/workflows/daily-cli-tools-tester.md}}
+ PROMPT_EOF
+ - name: Substitute placeholders
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ GH_AW_GITHUB_ACTOR: ${{ github.actor }}
+ GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }}
+ GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }}
+ GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }}
+ GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }}
+ GH_AW_GITHUB_REPOSITORY: ${{ github.repository }}
+ GH_AW_GITHUB_RUN_ID: ${{ github.run_id }}
+ GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }}
+ with:
+ script: |
+ const substitutePlaceholders = require('/opt/gh-aw/actions/substitute_placeholders.cjs');
+
+ // Call the substitution function
+ return await substitutePlaceholders({
+ file: process.env.GH_AW_PROMPT,
+ substitutions: {
+ GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR,
+ GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID,
+ GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER,
+ GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER,
+ GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER,
+ GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY,
+ GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID,
+ GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE
+ }
+ });
+ - name: Interpolate variables and render templates
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ with:
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/interpolate_prompt.cjs');
+ await main();
+ - name: Validate prompt placeholders
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ run: bash /opt/gh-aw/actions/validate_prompt_placeholders.sh
+ - name: Print prompt
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ run: bash /opt/gh-aw/actions/print_prompt_summary.sh
+ - name: Execute GitHub Copilot CLI
+ id: agentic_execution
+ # Copilot CLI tool arguments (sorted):
+ # --allow-tool github
+ # --allow-tool safeoutputs
+ # --allow-tool shell(cat)
+ # --allow-tool shell(date)
+ # --allow-tool shell(echo)
+ # --allow-tool shell(grep)
+ # --allow-tool shell(head)
+ # --allow-tool shell(ls)
+ # --allow-tool shell(pwd)
+ # --allow-tool shell(sort)
+ # --allow-tool shell(tail)
+ # --allow-tool shell(uniq)
+ # --allow-tool shell(wc)
+ # --allow-tool shell(yq)
+ # --allow-tool write
+ timeout-minutes: 60
+ run: |
+ set -o pipefail
+ sudo -E awf --enable-chroot --env-all --container-workdir "${GITHUB_WORKSPACE}" --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.13.7 --skip-pull \
+ -- '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool github --allow-tool safeoutputs --allow-tool '\''shell(cat)'\'' --allow-tool '\''shell(date)'\'' --allow-tool '\''shell(echo)'\'' --allow-tool '\''shell(grep)'\'' --allow-tool '\''shell(head)'\'' --allow-tool '\''shell(ls)'\'' --allow-tool '\''shell(pwd)'\'' --allow-tool '\''shell(sort)'\'' --allow-tool '\''shell(tail)'\'' --allow-tool '\''shell(uniq)'\'' --allow-tool '\''shell(wc)'\'' --allow-tool '\''shell(yq)'\'' --allow-tool write --allow-all-paths --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"${GH_AW_MODEL_AGENT_COPILOT:+ --model "$GH_AW_MODEL_AGENT_COPILOT"}' \
+ 2>&1 | tee /tmp/gh-aw/agent-stdio.log
+ env:
+ COPILOT_AGENT_RUNNER_TYPE: STANDALONE
+ COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json
+ GH_AW_MODEL_AGENT_COPILOT: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }}
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ GITHUB_HEAD_REF: ${{ github.head_ref }}
+ GITHUB_REF_NAME: ${{ github.ref_name }}
+ GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }}
+ GITHUB_WORKSPACE: ${{ github.workspace }}
+ XDG_CONFIG_HOME: /home/runner
+ - name: Copy Copilot session state files to logs
+ if: always()
+ continue-on-error: true
+ run: |
+ # Copy Copilot session state files to logs folder for artifact collection
+ # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them
+ SESSION_STATE_DIR="$HOME/.copilot/session-state"
+ LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs"
+
+ if [ -d "$SESSION_STATE_DIR" ]; then
+ echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR"
+ mkdir -p "$LOGS_DIR"
+ cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true
+ echo "Session state files copied successfully"
+ else
+ echo "No session-state directory found at $SESSION_STATE_DIR"
+ fi
+ - name: Stop MCP gateway
+ if: always()
+ continue-on-error: true
+ env:
+ MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }}
+ MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }}
+ GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }}
+ run: |
+ bash /opt/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID"
+ - name: Redact secrets in logs
+ if: always()
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ with:
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/redact_secrets.cjs');
+ await main();
+ env:
+ GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN'
+ SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }}
+ SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }}
+ SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ - name: Upload Safe Outputs
+ if: always()
+ uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+ with:
+ name: safe-output
+ path: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ if-no-files-found: warn
+ - name: Ingest agent output
+ id: collect_output
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com"
+ GITHUB_SERVER_URL: ${{ github.server_url }}
+ GITHUB_API_URL: ${{ github.api_url }}
+ with:
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/collect_ndjson_output.cjs');
+ await main();
+ - name: Upload sanitized agent output
+ if: always() && env.GH_AW_AGENT_OUTPUT
+ uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+ with:
+ name: agent-output
+ path: ${{ env.GH_AW_AGENT_OUTPUT }}
+ if-no-files-found: warn
+ - name: Upload engine output files
+ uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+ with:
+ name: agent_outputs
+ path: |
+ /tmp/gh-aw/sandbox/agent/logs/
+ /tmp/gh-aw/redacted-urls.log
+ if-no-files-found: ignore
+ - name: Parse agent logs for step summary
+ if: always()
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/
+ with:
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/parse_copilot_log.cjs');
+ await main();
+ - name: Parse MCP gateway logs for step summary
+ if: always()
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ with:
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/parse_mcp_gateway_log.cjs');
+ await main();
+ - name: Print firewall logs
+ if: always()
+ continue-on-error: true
+ env:
+ AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs
+ run: |
+ # Fix permissions on firewall logs so they can be uploaded as artifacts
+ # AWF runs with sudo, creating files owned by root
+ sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true
+ awf logs summary | tee -a "$GITHUB_STEP_SUMMARY"
+ - name: Upload agent artifacts
+ if: always()
+ continue-on-error: true
+ uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+ with:
+ name: agent-artifacts
+ path: |
+ /tmp/gh-aw/aw-prompts/prompt.txt
+ /tmp/gh-aw/aw_info.json
+ /tmp/gh-aw/mcp-logs/
+ /tmp/gh-aw/sandbox/firewall/logs/
+ /tmp/gh-aw/agent-stdio.log
+ /tmp/gh-aw/agent/
+ if-no-files-found: ignore
+
+ conclusion:
+ needs:
+ - activation
+ - agent
+ - detection
+ - safe_outputs
+ if: (always()) && (needs.agent.result != 'skipped')
+ runs-on: ubuntu-slim
+ permissions:
+ contents: read
+ discussions: write
+ issues: write
+ pull-requests: write
+ outputs:
+ noop_message: ${{ steps.noop.outputs.noop_message }}
+ tools_reported: ${{ steps.missing_tool.outputs.tools_reported }}
+ total_count: ${{ steps.missing_tool.outputs.total_count }}
+ steps:
+ - name: Checkout actions folder
+ uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
+ with:
+ sparse-checkout: |
+ actions
+ persist-credentials: false
+ - name: Setup Scripts
+ uses: ./actions/setup
+ with:
+ destination: /opt/gh-aw/actions
+ - name: Debug job inputs
+ env:
+ COMMENT_ID: ${{ needs.activation.outputs.comment_id }}
+ COMMENT_REPO: ${{ needs.activation.outputs.comment_repo }}
+ AGENT_OUTPUT_TYPES: ${{ needs.agent.outputs.output_types }}
+ AGENT_CONCLUSION: ${{ needs.agent.result }}
+ run: |
+ echo "Comment ID: $COMMENT_ID"
+ echo "Comment Repo: $COMMENT_REPO"
+ echo "Agent Output Types: $AGENT_OUTPUT_TYPES"
+ echo "Agent Conclusion: $AGENT_CONCLUSION"
+ - name: Download agent output artifact
+ continue-on-error: true
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+ with:
+ name: agent-output
+ path: /tmp/gh-aw/safeoutputs/
+ - name: Setup agent output environment variable
+ run: |
+ mkdir -p /tmp/gh-aw/safeoutputs/
+ find "/tmp/gh-aw/safeoutputs/" -type f -print
+ echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV"
+ - name: Process No-Op Messages
+ id: noop
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_NOOP_MAX: 1
+ GH_AW_WORKFLOW_NAME: "Daily CLI Tools Exploratory Tester"
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/noop.cjs');
+ await main();
+ - name: Record Missing Tool
+ id: missing_tool
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_WORKFLOW_NAME: "Daily CLI Tools Exploratory Tester"
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/missing_tool.cjs');
+ await main();
+ - name: Handle Agent Failure
+ id: handle_agent_failure
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_WORKFLOW_NAME: "Daily CLI Tools Exploratory Tester"
+ GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }}
+ GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.agent.outputs.secret_verification_result }}
+ GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }}
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/handle_agent_failure.cjs');
+ await main();
+ - name: Update reaction comment with completion status
+ id: conclusion
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_COMMENT_ID: ${{ needs.activation.outputs.comment_id }}
+ GH_AW_COMMENT_REPO: ${{ needs.activation.outputs.comment_repo }}
+ GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ GH_AW_WORKFLOW_NAME: "Daily CLI Tools Exploratory Tester"
+ GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }}
+ GH_AW_DETECTION_CONCLUSION: ${{ needs.detection.result }}
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/notify_comment_error.cjs');
+ await main();
+
+ detection:
+ needs: agent
+ if: needs.agent.outputs.output_types != '' || needs.agent.outputs.has_patch == 'true'
+ runs-on: ubuntu-latest
+ permissions: {}
+ concurrency:
+ group: "gh-aw-copilot-${{ github.workflow }}"
+ timeout-minutes: 10
+ outputs:
+ success: ${{ steps.parse_results.outputs.success }}
+ steps:
+ - name: Checkout actions folder
+ uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
+ with:
+ sparse-checkout: |
+ actions
+ persist-credentials: false
+ - name: Setup Scripts
+ uses: ./actions/setup
+ with:
+ destination: /opt/gh-aw/actions
+ - name: Download agent artifacts
+ continue-on-error: true
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+ with:
+ name: agent-artifacts
+ path: /tmp/gh-aw/threat-detection/
+ - name: Download agent output artifact
+ continue-on-error: true
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+ with:
+ name: agent-output
+ path: /tmp/gh-aw/threat-detection/
+ - name: Echo agent output types
+ env:
+ AGENT_OUTPUT_TYPES: ${{ needs.agent.outputs.output_types }}
+ run: |
+ echo "Agent output-types: $AGENT_OUTPUT_TYPES"
+ - name: Setup threat detection
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ WORKFLOW_NAME: "Daily CLI Tools Exploratory Tester"
+ WORKFLOW_DESCRIPTION: "Daily exploratory testing of audit, logs, and compile tools in gh-aw CLI"
+ HAS_PATCH: ${{ needs.agent.outputs.has_patch }}
+ with:
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/setup_threat_detection.cjs');
+ await main();
+ - name: Ensure threat-detection directory and log
+ run: |
+ mkdir -p /tmp/gh-aw/threat-detection
+ touch /tmp/gh-aw/threat-detection/detection.log
+ - name: Validate COPILOT_GITHUB_TOKEN secret
+ id: validate-secret
+ run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default
+ env:
+ COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ - name: Install GitHub Copilot CLI
+ run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.405
+ - name: Execute GitHub Copilot CLI
+ id: agentic_execution
+ # Copilot CLI tool arguments (sorted):
+ # --allow-tool shell(cat)
+ # --allow-tool shell(grep)
+ # --allow-tool shell(head)
+ # --allow-tool shell(jq)
+ # --allow-tool shell(ls)
+ # --allow-tool shell(tail)
+ # --allow-tool shell(wc)
+ timeout-minutes: 20
+ run: |
+ set -o pipefail
+ COPILOT_CLI_INSTRUCTION="$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"
+ mkdir -p /tmp/
+ mkdir -p /tmp/gh-aw/
+ mkdir -p /tmp/gh-aw/agent/
+ mkdir -p /tmp/gh-aw/sandbox/agent/logs/
+ copilot --add-dir /tmp/ --add-dir /tmp/gh-aw/ --add-dir /tmp/gh-aw/agent/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-tool 'shell(cat)' --allow-tool 'shell(grep)' --allow-tool 'shell(head)' --allow-tool 'shell(jq)' --allow-tool 'shell(ls)' --allow-tool 'shell(tail)' --allow-tool 'shell(wc)' --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$COPILOT_CLI_INSTRUCTION"${GH_AW_MODEL_DETECTION_COPILOT:+ --model "$GH_AW_MODEL_DETECTION_COPILOT"} 2>&1 | tee /tmp/gh-aw/threat-detection/detection.log
+ env:
+ COPILOT_AGENT_RUNNER_TYPE: STANDALONE
+ COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ GH_AW_MODEL_DETECTION_COPILOT: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }}
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ GITHUB_HEAD_REF: ${{ github.head_ref }}
+ GITHUB_REF_NAME: ${{ github.ref_name }}
+ GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }}
+ GITHUB_WORKSPACE: ${{ github.workspace }}
+ XDG_CONFIG_HOME: /home/runner
+ - name: Parse threat detection results
+ id: parse_results
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ with:
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/parse_threat_detection_results.cjs');
+ await main();
+ - name: Upload threat detection log
+ if: always()
+ uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+ with:
+ name: threat-detection.log
+ path: /tmp/gh-aw/threat-detection/detection.log
+ if-no-files-found: ignore
+
+ safe_outputs:
+ needs:
+ - agent
+ - detection
+ if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (needs.detection.outputs.success == 'true')
+ runs-on: ubuntu-slim
+ permissions:
+ contents: read
+ issues: write
+ timeout-minutes: 15
+ env:
+ GH_AW_ENGINE_ID: "copilot"
+ GH_AW_WORKFLOW_ID: "daily-cli-tools-tester"
+ GH_AW_WORKFLOW_NAME: "Daily CLI Tools Exploratory Tester"
+ outputs:
+ create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }}
+ create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }}
+ process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }}
+ process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }}
+ steps:
+ - name: Checkout actions folder
+ uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
+ with:
+ sparse-checkout: |
+ actions
+ persist-credentials: false
+ - name: Setup Scripts
+ uses: ./actions/setup
+ with:
+ destination: /opt/gh-aw/actions
+ - name: Download agent output artifact
+ continue-on-error: true
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+ with:
+ name: agent-output
+ path: /tmp/gh-aw/safeoutputs/
+ - name: Setup agent output environment variable
+ run: |
+ mkdir -p /tmp/gh-aw/safeoutputs/
+ find "/tmp/gh-aw/safeoutputs/" -type f -print
+ echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV"
+ - name: Process Safe Outputs
+ id: process_safe_outputs
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_issue\":{\"expires\":168,\"labels\":[\"testing\",\"automation\",\"cli-tools\"],\"max\":5,\"title_prefix\":\"[cli-tools-test] \"},\"missing_data\":{},\"missing_tool\":{}}"
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('/opt/gh-aw/actions/safe_output_handler_manager.cjs');
+ await main();
+
diff --git a/.github/workflows/daily-cli-tools-tester.md b/.github/workflows/daily-cli-tools-tester.md
new file mode 100644
index 0000000000..b160680868
--- /dev/null
+++ b/.github/workflows/daily-cli-tools-tester.md
@@ -0,0 +1,669 @@
+---
+description: Daily exploratory testing of audit, logs, and compile tools in gh-aw CLI
+on:
+ schedule: daily
+ workflow_dispatch:
+permissions:
+ contents: read
+ issues: read
+ pull-requests: read
+ actions: read
+tools:
+ agentic-workflows:
+ bash:
+safe-outputs:
+ create-issue:
+ expires: 7d
+ title-prefix: "[cli-tools-test] "
+ labels: [testing, automation, cli-tools]
+ max: 5
+ noop:
+timeout-minutes: 60
+strict: true
+---
+
+# Daily CLI Tools Exploratory Tester
+
+You are the Daily CLI Tools Exploratory Tester - an expert system that performs deep exploratory testing of the `gh aw audit`, `gh aw logs`, and `gh aw compile` commands in the agentic-workflows CLI.
+
+## Mission
+
+Perform comprehensive exploratory testing of three critical gh-aw CLI tools daily:
+1. **`gh aw audit`** - Workflow run investigation and analysis
+2. **`gh aw logs`** - Workflow log download and analysis
+3. **`gh aw compile`** - Workflow compilation from markdown to YAML
+
+When problems are detected, create detailed GitHub issues with reproduction steps and diagnostics.
+
+**Repository**: ${{ github.repository }}
+**Run ID**: ${{ github.run_id }}
+**Timeout**: 60 minutes
+
+## Available Tools
+
+### Agentic Workflows MCP Server
+
+You have access to the `agentic-workflows` MCP tool which provides:
+- `audit` - Audit a workflow run and generate detailed report
+- `logs` - Download workflow logs with filtering and analysis
+- `compile` - Compile workflow markdown files to YAML
+- `list` - List all workflows in the repository
+- `status` - Get status and metadata for workflows
+
+**CRITICAL**: Use the MCP tool exclusively - do NOT try to run `gh aw` commands directly via bash as authentication is not configured for direct CLI usage.
+
+### Testing Strategy
+
+The agentic-workflows MCP server is your testing interface. Use it systematically to explore the behavior of audit, logs, and compile functionality through the MCP layer.
+
+## Phase 1: Environment Setup and Discovery
+
+### 1.1 Verify MCP Server Availability
+
+First, verify the agentic-workflows MCP server is available:
+
+```
+Use the agentic-workflows MCP tool's "status" command to verify the server is operational.
+```
+
+### 1.2 Discover Available Workflows
+
+Get a comprehensive list of workflows to test:
+
+```
+Use the agentic-workflows MCP tool's "list" command to enumerate all workflows in the repository.
+```
+
+Expected output: List of workflow markdown files with metadata
+
+**Analysis questions**:
+- Are all workflows detected correctly?
+- Are workflow names parsed correctly?
+- Is metadata extraction working?
+
+### 1.3 Select Test Workflows
+
+From the list, identify workflows for testing different scenarios:
+- **Simple workflow**: A basic workflow with minimal configuration
+- **Complex workflow**: A workflow with multiple tools, MCP servers, safe outputs
+- **MCP-heavy workflow**: A workflow with multiple MCP server configurations
+- **Scheduled workflow**: A workflow triggered on a schedule
+- **Event-triggered workflow**: A workflow triggered by issues/PRs
+
+Document your selections and rationale.
+
+## Phase 2: Test `gh aw logs` Command
+
+### 2.1 Basic Log Download
+
+Test downloading logs from the last 24 hours:
+
+```
+Use the agentic-workflows "logs" tool to download logs from the last 24 hours (start-date: "-1d")
+```
+
+**Validation checks**:
+- ✅ Logs download successfully
+- ✅ Appropriate number of runs returned
+- ✅ Log files are structured correctly
+- ✅ Metadata is complete (run ID, workflow name, status, timestamps)
+
+**Document any issues**:
+- Missing logs
+- Incomplete metadata
+- Parsing errors
+- Performance problems
+
+### 2.2 Filtered Log Queries
+
+Test various filtering options:
+
+#### Test A: Filter by Workflow Name
+```
+Use the "logs" tool with a specific workflow name (select one from Phase 1.2)
+```
+
+**Expected**: Only logs for that specific workflow
+
+#### Test B: Filter by Engine
+```
+Use the "logs" tool with engine filter (e.g., engine: "copilot" or "claude")
+```
+
+**Expected**: Only workflows using the specified engine
+
+#### Test C: Filter by Date Range
+```
+Use the "logs" tool with various date ranges:
+- Last 7 days: "-7d"
+- Specific date: "2024-01-15"
+- Custom range if supported
+```
+
+**Expected**: Logs within the specified timeframe
+
+#### Test D: Limit Results Count
+```
+Use the "logs" tool with count parameter to limit results (e.g., count: 5)
+```
+
+**Expected**: Maximum of specified number of runs returned
+
+### 2.3 Log Content Analysis
+
+Examine the downloaded logs:
+
+```bash
+# List downloaded logs
+ls -R /tmp/gh-aw/aw-mcp/logs/
+
+# Check log structure
+find /tmp/gh-aw/aw-mcp/logs/ -type f -name "*.txt" | head -5
+
+# Verify log content
+for logfile in $(find /tmp/gh-aw/aw-mcp/logs/ -name "agent.txt" -type f | head -3); do
+ echo "=== $logfile ==="
+ head -20 "$logfile"
+ echo ""
+done
+```
+
+**Validation checks**:
+- ✅ Agent logs contain expected content
+- ✅ Job logs are complete
+- ✅ Metadata files are properly formatted
+- ✅ Directory structure is logical
+
+### 2.4 Edge Cases for Logs
+
+Test edge cases and error conditions:
+
+#### Edge Case A: Non-existent Workflow
+```
+Try to download logs for a workflow that doesn't exist (use "logs" with workflow-name: "nonexistent-workflow-xyz")
+```
+
+**Expected**: Graceful error message, not a crash
+
+#### Edge Case B: Future Date
+```
+Try to download logs with a future date
+```
+
+**Expected**: Appropriate error or empty result
+
+#### Edge Case C: Very Old Date
+```
+Try to download logs from 1 year ago
+```
+
+**Expected**: Either no results or appropriate message about retention
+
+### 2.5 Document Logs Test Results
+
+Create a summary:
+- What worked correctly?
+- What failed or behaved unexpectedly?
+- Performance observations (speed, memory usage)
+- Usability issues (confusing output, unclear errors)
+
+## Phase 3: Test `gh aw audit` Command
+
+### 3.1 Select Workflow Runs for Auditing
+
+From the logs downloaded in Phase 2, identify interesting runs to audit:
+1. **Successful run**: A run that completed successfully
+2. **Failed run**: A run that failed (if available)
+3. **Run with safe outputs**: A run that created issues/PRs
+4. **Long-running run**: A run that took significant time
+
+Extract the run IDs from the downloaded logs.
+
+### 3.2 Audit Successful Run
+
+Test auditing a successful workflow run:
+
+```
+Use the agentic-workflows "audit" tool with a successful run ID
+```
+
+**Validation checks**:
+- ✅ Audit completes successfully
+- ✅ Report includes all expected sections:
+ - Run metadata (ID, workflow, status, duration)
+ - Job execution timeline
+ - Tool usage analysis
+ - Safe output operations
+ - Network activity
+ - Error detection (should be none for successful run)
+- ✅ Timing information is accurate
+- ✅ Resource usage is reported
+
+### 3.3 Audit Failed Run (if available)
+
+If you found a failed run in Phase 3.1:
+
+```
+Use the agentic-workflows "audit" tool with a failed run ID
+```
+
+**Validation checks**:
+- ✅ Audit identifies the failure point
+- ✅ Error messages are extracted correctly
+- ✅ Root cause analysis is provided
+- ✅ Related logs are referenced
+
+### 3.4 Audit Run with Safe Outputs
+
+Test auditing a run that used safe outputs (create-issue, add-comment, etc.):
+
+```
+Use the agentic-workflows "audit" tool with a run that has safe outputs
+```
+
+**Validation checks**:
+- ✅ Safe output operations are detected
+- ✅ Created resources are identified (issue numbers, PR numbers)
+- ✅ Links to created resources are provided
+- ✅ Safe output job status is reported
+
+### 3.5 Deep Analysis Tests
+
+For each audited run, verify deep analysis features:
+
+#### Test A: Tool Usage Detection
+**Check**: Does audit correctly identify all tools used (bash, edit, github, MCP servers)?
+
+#### Test B: MCP Server Analysis
+**Check**: For workflows with MCP servers, does audit show which MCP tools were called?
+
+#### Test C: Network Activity
+**Check**: For workflows with network access, does audit show network requests?
+
+#### Test D: Performance Metrics
+**Check**: Does audit report execution time, job durations, step timing?
+
+### 3.6 Edge Cases for Audit
+
+Test edge cases:
+
+#### Edge Case A: Invalid Run ID
+```
+Try to audit with an invalid or non-existent run ID
+```
+
+**Expected**: Clear error message
+
+#### Edge Case B: Very Old Run
+```
+Try to audit a run from several months ago (if available)
+```
+
+**Expected**: Either works or clear message about data availability
+
+#### Edge Case C: In-Progress Run
+```
+If possible, try to audit a currently running workflow
+```
+
+**Expected**: Partial data or appropriate message
+
+### 3.7 Document Audit Test Results
+
+Create a summary:
+- What worked correctly?
+- What analysis features are missing?
+- Are error messages helpful?
+- Is the report format useful?
+- Any crashes or unexpected behavior?
+
+## Phase 4: Test `gh aw compile` Command
+
+### 4.1 Compile All Workflows
+
+Test bulk compilation:
+
+```
+Use the agentic-workflows "compile" tool without specifying a workflow (compiles all)
+```
+
+**Validation checks**:
+- ✅ All workflows compile successfully
+- ✅ Lock files (.lock.yml) are generated
+- ✅ No compilation errors
+- ✅ Performance is reasonable (time taken)
+
+Document:
+- Number of workflows compiled
+- Time taken
+- Any warnings or errors
+
+### 4.2 Compile Specific Workflows
+
+Test targeted compilation for different workflow types:
+
+#### Test A: Simple Workflow
+```
+Select a simple workflow and compile it individually
+Use the "compile" tool with workflow-name: ""
+```
+
+#### Test B: Complex Workflow
+```
+Select a complex workflow with multiple tools/MCP servers
+Use the "compile" tool with workflow-name: ""
+```
+
+#### Test C: Workflow with Imports
+```
+Find a workflow that imports shared components
+Use the "compile" tool with workflow-name: ""
+```
+
+**For each test, validate**:
+- ✅ Compilation succeeds
+- ✅ Lock file is created at correct path
+- ✅ Generated YAML is valid GitHub Actions syntax
+- ✅ All frontmatter fields are preserved
+- ✅ Imports are resolved correctly (if applicable)
+
+### 4.3 Validation Mode Tests
+
+Test compilation validation:
+
+```
+Use the "compile" tool with strict validation enabled (if supported by MCP interface)
+```
+
+**Validation checks**:
+- ✅ Strict mode detects invalid configurations
+- ✅ Helpful error messages for validation failures
+- ✅ Line numbers referenced correctly in errors
+
+### 4.4 Verify Generated Lock Files
+
+After compilation, inspect generated lock files:
+
+```bash
+# Find recently compiled lock files
+find .github/workflows/ -name "*.lock.yml" -mmin -10 | head -5
+
+# Check a generated lock file structure
+for lockfile in $(find .github/workflows/ -name "*.lock.yml" -mmin -10 | head -3); do
+ echo "=== $lockfile ==="
+ head -50 "$lockfile"
+ echo ""
+done
+```
+
+**Validation checks**:
+- ✅ Lock files have correct structure
+- ✅ Jobs are configured correctly
+- ✅ Environment variables are set
+- ✅ Safe output jobs are created
+- ✅ Frontmatter hash is included
+
+### 4.5 Incremental Compilation Tests
+
+Test whether compilation correctly detects changes:
+
+```bash
+# Record current state
+ls -la .github/workflows/*.lock.yml > /tmp/before.txt
+
+# Compile again without changes
+# Use the "compile" tool to recompile all workflows
+
+# Check if lock files changed
+ls -la .github/workflows/*.lock.yml > /tmp/after.txt
+diff /tmp/before.txt /tmp/after.txt
+```
+
+**Expected**: Lock files should not change if markdown source hasn't changed
+
+### 4.6 Edge Cases for Compile
+
+Test error handling:
+
+#### Edge Case A: Malformed Markdown
+```
+Create a test workflow with invalid YAML frontmatter
+Attempt to compile it
+```
+
+**Expected**: Clear error message with line number
+
+#### Edge Case B: Invalid Tool Configuration
+```
+Create a test workflow with non-existent tool
+Attempt to compile it
+```
+
+**Expected**: Validation error identifying the invalid tool
+
+#### Edge Case C: Missing Imports
+```
+Create a test workflow that imports a non-existent file
+Attempt to compile it
+```
+
+**Expected**: Error indicating missing import file
+
+### 4.7 Document Compile Test Results
+
+Create a summary:
+- Compilation success rate?
+- Performance acceptable?
+- Error messages helpful?
+- Any crashes or hangs?
+- Lock file quality issues?
+
+## Phase 5: Cross-Command Integration Tests
+
+Test how the commands work together:
+
+### 5.1 Compile → Run → Audit Flow
+
+1. **Compile**: Compile a test workflow
+2. **Run**: Trigger it (if possible via MCP or note for manual trigger)
+3. **Audit**: Audit the run after it completes
+
+**Validation**: End-to-end workflow lifecycle works correctly
+
+### 5.2 Logs → Audit Integration
+
+1. **Download logs**: Use "logs" to find recent runs
+2. **Extract run ID**: Parse a run ID from the logs
+3. **Audit the run**: Use "audit" with that run ID
+
+**Validation**: Data consistency between logs and audit
+
+### 5.3 Status → Compile Integration
+
+1. **Check status**: Use "status" to see workflow states
+2. **Identify outdated**: Find workflows needing recompilation
+3. **Compile**: Recompile those workflows
+
+**Validation**: Status correctly identifies outdated workflows
+
+## Phase 6: Performance and Reliability Testing
+
+### 6.1 Performance Benchmarks
+
+Measure and document performance:
+
+```bash
+# Measure logs download time
+time Use_agentic_workflows_logs_tool
+
+# Measure audit time
+time Use_agentic_workflows_audit_tool_with_recent_run_id
+
+# Measure compile time
+time Use_agentic_workflows_compile_tool
+```
+
+**Document**:
+- Logs download: ___ seconds (for N runs)
+- Audit: ___ seconds per run
+- Compile: ___ seconds (for M workflows)
+
+**Expected targets**:
+- Logs: <10s for typical query
+- Audit: <30s for most runs
+- Compile: <5s per workflow
+
+### 6.2 Resource Usage
+
+Monitor resource consumption during testing:
+
+```bash
+# Check disk usage
+df -h /tmp/gh-aw/
+
+# Count log files downloaded
+find /tmp/gh-aw/aw-mcp/logs/ -type f | wc -l
+
+# Check log file sizes
+du -sh /tmp/gh-aw/aw-mcp/logs/
+```
+
+### 6.3 Reliability Assessment
+
+Track reliability metrics:
+- Commands executed successfully: ___
+- Commands that failed: ___
+- Crashes or hangs: ___
+- Unexpected behaviors: ___
+
+## Phase 7: Usability and Developer Experience
+
+### 7.1 Error Message Quality
+
+Review all error messages encountered:
+- Are they clear and actionable?
+- Do they suggest next steps?
+- Are they too technical or too vague?
+
+### 7.2 Output Format Assessment
+
+Evaluate output formats:
+- Is JSON/text output well-structured?
+- Is information easy to parse?
+- Are important details highlighted?
+
+### 7.3 Documentation Gaps
+
+Identify areas where documentation could be improved:
+- Missing command options?
+- Unclear behavior?
+- Undocumented features?
+
+## Phase 8: Issue Creation and Reporting
+
+### 8.1 Categorize Findings
+
+Group your findings into categories:
+1. **Critical bugs**: Crashes, data loss, incorrect results
+2. **Major issues**: Significant usability problems, missing features
+3. **Minor issues**: Small bugs, cosmetic issues
+4. **Enhancements**: Ideas for improvement
+
+### 8.2 Create Issues for Problems
+
+For each significant problem found, create a GitHub issue with:
+
+**Issue Template**:
+```markdown
+### Problem Description
+
+[Clear description of the issue]
+
+### Command/Tool
+
+- **Tool**: audit / logs / compile
+- **Command**: [Exact command or MCP tool usage]
+
+### Steps to Reproduce
+
+1. [Step 1]
+2. [Step 2]
+3. [Step 3]
+
+### Expected Behavior
+
+[What should happen]
+
+### Actual Behavior
+
+[What actually happened]
+
+### Environment
+
+- **Repository**: ${{ github.repository }}
+- **Run ID**: ${{ github.run_id }}
+- **Date**: [Date of testing]
+- **gh-aw version**: [From status command if available]
+
+### Impact
+
+- **Severity**: Critical / High / Medium / Low
+- **Frequency**: Always / Sometimes / Rare
+- **Workaround**: [If available]
+
+### Logs/Diagnostics
+
+[Relevant log excerpts, error messages, screenshots]
+
+### Additional Context
+
+[Any other relevant information]
+```
+
+**IMPORTANT**: Create one issue per distinct problem (max 5 issues as per safe-outputs config).
+
+### 8.3 Use Noop for Successful Testing
+
+If all tests pass and no problems are detected:
+
+```
+Use the "noop" safe output with a message like:
+"✅ Daily CLI tools testing completed successfully. All audit, logs, and compile commands functioning correctly. No issues detected."
+```
+
+## Success Criteria
+
+A successful testing session will:
+
+✅ **Phase 1**: Discover and document available workflows
+✅ **Phase 2**: Thoroughly test logs command with various filters and edge cases
+✅ **Phase 3**: Audit multiple workflow runs and verify report completeness
+✅ **Phase 4**: Compile workflows and validate generated lock files
+✅ **Phase 5**: Test integration between commands
+✅ **Phase 6**: Measure and document performance
+✅ **Phase 7**: Assess usability and developer experience
+✅ **Phase 8**: Create detailed issues for any problems found, or use noop if all tests pass
+
+## Testing Philosophy
+
+As an exploratory tester, you should:
+
+🔍 **Be curious**: Don't just test the happy path - try edge cases and unusual inputs
+🎯 **Be systematic**: Follow the phases in order to ensure comprehensive coverage
+📝 **Be thorough**: Document everything you try and observe
+🐛 **Be skeptical**: Question assumptions and verify expected behaviors
+💡 **Be creative**: Think of scenarios that might not be explicitly documented
+
+## Timeout Management
+
+You have 60 minutes to complete testing. If approaching timeout:
+
+1. **Prioritize**: Complete critical tests (logs download, basic audit, basic compile) first
+2. **Document**: Note which phases were not completed
+3. **Create issue**: If timeout is due to performance problems, create an issue about it
+
+## Begin Testing
+
+Start your exploratory testing session now. Work through each phase systematically, document your findings, and create issues for any problems discovered.
+
+Good luck! 🚀