diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml index fe68837808..fc638892c1 100644 --- a/.github/workflows/smoke-copilot.lock.yml +++ b/.github/workflows/smoke-copilot.lock.yml @@ -20,6 +20,10 @@ # For more information: https://github.com/githubnext/gh-aw/blob/main/.github/aw/github-agentic-workflows.md # # Smoke Copilot +# +# Resolved workflow manifest: +# Imports: +# - shared/mcp/tavily.md name: "Smoke Copilot" "on": @@ -541,6 +545,19 @@ jobs: "entrypoint": "serena", "entrypointArgs": ["start-mcp-server", "--context", "codex", "--project", "${{ github.workspace }}"], "mounts": ["${{ github.workspace }}:${{ github.workspace }}:rw"] + }, + "tavily": { + "type": "http", + "url": "https://mcp.tavily.com/mcp/", + "headers": { + "Authorization": "Bearer \${TAVILY_API_KEY}" + }, + "tools": [ + "*" + ], + "env": { + "TAVILY_API_KEY": "\${TAVILY_API_KEY}" + } } }, "gateway": { @@ -610,6 +627,8 @@ jobs: run: | bash /opt/gh-aw/actions/create_prompt_first.sh cat << 'PROMPT_EOF' > "$GH_AW_PROMPT" + + # Smoke Test: Copilot Engine Validation **IMPORTANT: Keep all outputs extremely short and concise. Use single-line responses where possible. No verbose explanations.** @@ -619,8 +638,9 @@ jobs: 1. **GitHub MCP Testing**: Review the last 2 merged pull requests in __GH_AW_GITHUB_REPOSITORY__ 2. **Serena Go Testing**: Use the `serena-go` tool to run a basic go command like "go version" to verify the tool is available 3. **Playwright Testing**: Use playwright to navigate to and verify the page title contains "GitHub" - 4. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-copilot-__GH_AW_GITHUB_RUN_ID__.txt` with content "Smoke test passed for Copilot at $(date)" (create the directory if it doesn't exist) - 5. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back) + 4. **Tavily Web Search Testing**: Use the Tavily MCP server to perform a web search for "GitHub Agentic Workflows" and verify that results are returned with at least one item + 5. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-copilot-__GH_AW_GITHUB_RUN_ID__.txt` with content "Smoke test passed for Copilot at $(date)" (create the directory if it doesn't exist) + 6. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back) ## Output @@ -804,7 +824,7 @@ jobs: timeout-minutes: 5 run: | set -o pipefail - sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount /tmp:/tmp:rw --mount "${GITHUB_WORKSPACE}:${GITHUB_WORKSPACE}:rw" --mount /usr/bin/date:/usr/bin/date:ro --mount /usr/bin/gh:/usr/bin/gh:ro --mount /usr/bin/yq:/usr/bin/yq:ro --mount /usr/local/bin/copilot:/usr/local/bin/copilot:ro --mount /home/runner/.copilot:/home/runner/.copilot:rw --mount /opt/gh-aw:/opt/gh-aw:ro --allow-domains '*.githubusercontent.com,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.npms.io,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,bun.sh,cdn.playwright.dev,codeload.github.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,deb.nodesource.com,deno.land,get.pnpm.io,github-cloud.githubusercontent.com,github-cloud.s3.amazonaws.com,github.com,github.githubassets.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,lfs.github.com,nodejs.org,npm.pkg.github.com,npmjs.com,npmjs.org,objects.githubusercontent.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,playwright.download.prss.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.bower.io,registry.npmjs.com,registry.npmjs.org,registry.yarnpkg.com,repo.yarnpkg.com,s.symcb.com,s.symcd.com,security.ubuntu.com,skimdb.npmjs.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.npmjs.com,www.npmjs.org,yarnpkg.com' --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.8.2 \ + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount /tmp:/tmp:rw --mount "${GITHUB_WORKSPACE}:${GITHUB_WORKSPACE}:rw" --mount /usr/bin/date:/usr/bin/date:ro --mount /usr/bin/gh:/usr/bin/gh:ro --mount /usr/bin/yq:/usr/bin/yq:ro --mount /usr/local/bin/copilot:/usr/local/bin/copilot:ro --mount /home/runner/.copilot:/home/runner/.copilot:rw --mount /opt/gh-aw:/opt/gh-aw:ro --allow-domains '*.githubusercontent.com,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.npms.io,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,bun.sh,cdn.playwright.dev,codeload.github.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,deb.nodesource.com,deno.land,get.pnpm.io,github-cloud.githubusercontent.com,github-cloud.s3.amazonaws.com,github.com,github.githubassets.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,lfs.github.com,mcp.tavily.com,nodejs.org,npm.pkg.github.com,npmjs.com,npmjs.org,objects.githubusercontent.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,playwright.download.prss.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.bower.io,registry.npmjs.com,registry.npmjs.org,registry.yarnpkg.com,repo.yarnpkg.com,s.symcb.com,s.symcd.com,security.ubuntu.com,skimdb.npmjs.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.npmjs.com,www.npmjs.org,yarnpkg.com' --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.8.2 \ -- /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --add-dir /tmp/gh-aw/cache-memory/ --allow-all-paths --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"${GH_AW_MODEL_AGENT_COPILOT:+ --model "$GH_AW_MODEL_AGENT_COPILOT"} \ 2>&1 | tee /tmp/gh-aw/agent-stdio.log env: @@ -818,6 +838,7 @@ jobs: GITHUB_REF_NAME: ${{ github.ref_name }} GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} GITHUB_WORKSPACE: ${{ github.workspace }} + TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }} XDG_CONFIG_HOME: /home/runner - name: Copy Copilot session state files to logs if: always() @@ -854,11 +875,12 @@ jobs: const { main } = require('/opt/gh-aw/actions/redact_secrets.cjs'); await main(); env: - GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' + GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN,TAVILY_API_KEY' SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SECRET_TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }} - name: Upload Safe Outputs if: always() uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 diff --git a/.github/workflows/smoke-copilot.md b/.github/workflows/smoke-copilot.md index 5e5c670af2..728fb418b8 100644 --- a/.github/workflows/smoke-copilot.md +++ b/.github/workflows/smoke-copilot.md @@ -13,6 +13,8 @@ permissions: issues: read name: Smoke Copilot engine: copilot +imports: + - shared/mcp/tavily.md network: allowed: - defaults @@ -59,8 +61,9 @@ strict: true 1. **GitHub MCP Testing**: Review the last 2 merged pull requests in ${{ github.repository }} 2. **Serena Go Testing**: Use the `serena-go` tool to run a basic go command like "go version" to verify the tool is available 3. **Playwright Testing**: Use playwright to navigate to and verify the page title contains "GitHub" -4. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-copilot-${{ github.run_id }}.txt` with content "Smoke test passed for Copilot at $(date)" (create the directory if it doesn't exist) -5. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back) +4. **Tavily Web Search Testing**: Use the Tavily MCP server to perform a web search for "GitHub Agentic Workflows" and verify that results are returned with at least one item +5. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-copilot-${{ github.run_id }}.txt` with content "Smoke test passed for Copilot at $(date)" (create the directory if it doesn't exist) +6. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back) ## Output diff --git a/actions/setup/sh/convert_gateway_config_copilot.sh b/actions/setup/sh/convert_gateway_config_copilot.sh index 14f471bbd6..4d4712cac8 100755 --- a/actions/setup/sh/convert_gateway_config_copilot.sh +++ b/actions/setup/sh/convert_gateway_config_copilot.sh @@ -63,7 +63,8 @@ echo "Target domain: $MCP_GATEWAY_DOMAIN:$MCP_GATEWAY_PORT" # } # # The main differences: -# 1. Copilot requires the "tools" field +# 1. Copilot requires the "tools" field, always set to ["*"] (wildcard) +# Tool filtering is handled at the gateway level # 2. URLs must use the correct domain (host.docker.internal) for container access # The gateway may output 0.0.0.0 or localhost which won't work from within containers @@ -73,8 +74,8 @@ URL_PREFIX="http://${MCP_GATEWAY_DOMAIN}:${MCP_GATEWAY_PORT}" jq --arg urlPrefix "$URL_PREFIX" ' .mcpServers |= with_entries( .value |= ( - # Add tools field if not present - (if .tools then . else . + {"tools": ["*"]} end) | + # Always use wildcard for tools (filtering done at gateway level) + (.tools = ["*"]) | # Fix the URL to use the correct domain # Replace http://anything:port/mcp/ with http://domain:port/mcp/ .url |= (. | sub("^http://[^/]+/mcp/"; $urlPrefix + "/mcp/")) diff --git a/pkg/cli/gateway_logs.go b/pkg/cli/gateway_logs.go index 469cacc1c3..857e278bf5 100644 --- a/pkg/cli/gateway_logs.go +++ b/pkg/cli/gateway_logs.go @@ -27,29 +27,33 @@ var gatewayLogsLog = logger.New("cli:gateway_logs") // GatewayLogEntry represents a single log entry from gateway.jsonl type GatewayLogEntry struct { - Timestamp string `json:"timestamp"` - Level string `json:"level"` - Type string `json:"type"` - Event string `json:"event"` - ServerName string `json:"server_name,omitempty"` - ToolName string `json:"tool_name,omitempty"` - Method string `json:"method,omitempty"` - Duration float64 `json:"duration,omitempty"` // in milliseconds - InputSize int `json:"input_size,omitempty"` - OutputSize int `json:"output_size,omitempty"` - Status string `json:"status,omitempty"` - Error string `json:"error,omitempty"` - Message string `json:"message,omitempty"` + Timestamp string `json:"timestamp"` + Level string `json:"level"` + Type string `json:"type"` + Event string `json:"event"` + ServerName string `json:"server_name,omitempty"` + ToolName string `json:"tool_name,omitempty"` + Method string `json:"method,omitempty"` + Duration float64 `json:"duration,omitempty"` // in milliseconds + InputSize int `json:"input_size,omitempty"` + OutputSize int `json:"output_size,omitempty"` + Status string `json:"status,omitempty"` + Error string `json:"error,omitempty"` + Message string `json:"message,omitempty"` + TimeoutType string `json:"timeout_type,omitempty"` // "startup" or "tool" } // GatewayServerMetrics represents usage metrics for a single MCP server type GatewayServerMetrics struct { - ServerName string - RequestCount int - ToolCallCount int - TotalDuration float64 // in milliseconds - ErrorCount int - Tools map[string]*GatewayToolMetrics + ServerName string + RequestCount int + ToolCallCount int + TotalDuration float64 // in milliseconds + ErrorCount int + TimeoutCount int + StartupTimeouts int + ToolTimeouts int + Tools map[string]*GatewayToolMetrics } // GatewayToolMetrics represents usage metrics for a specific tool @@ -61,19 +65,23 @@ type GatewayToolMetrics struct { MaxDuration float64 // in milliseconds MinDuration float64 // in milliseconds ErrorCount int + TimeoutCount int TotalInputSize int TotalOutputSize int } // GatewayMetrics represents aggregated metrics from gateway logs type GatewayMetrics struct { - TotalRequests int - TotalToolCalls int - TotalErrors int - Servers map[string]*GatewayServerMetrics - StartTime time.Time - EndTime time.Time - TotalDuration float64 // in milliseconds + TotalRequests int + TotalToolCalls int + TotalErrors int + TotalTimeouts int + StartupTimeouts int + ToolTimeouts int + Servers map[string]*GatewayServerMetrics + StartTime time.Time + EndTime time.Time + TotalDuration float64 // in milliseconds } // parseGatewayLogs parses a gateway.jsonl file and extracts metrics @@ -166,6 +174,35 @@ func processGatewayLogEntry(entry *GatewayLogEntry, metrics *GatewayMetrics, ver // Process based on event type switch entry.Event { + case "timeout": + // Track timeout events + metrics.TotalTimeouts++ + + if entry.ServerName != "" { + server := getOrCreateServer(metrics, entry.ServerName) + server.TimeoutCount++ + + // Track timeout type (startup vs tool) + switch entry.TimeoutType { + case "startup": + metrics.StartupTimeouts++ + server.StartupTimeouts++ + case "tool": + metrics.ToolTimeouts++ + server.ToolTimeouts++ + + // Track tool-specific timeout + if entry.ToolName != "" || entry.Method != "" { + toolName := entry.ToolName + if toolName == "" { + toolName = entry.Method + } + tool := getOrCreateTool(server, toolName) + tool.TimeoutCount++ + } + } + } + case "request", "tool_call", "rpc_call": metrics.TotalRequests++ @@ -266,6 +303,11 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string { fmt.Fprintf(&output, "Total Requests: %d\n", metrics.TotalRequests) fmt.Fprintf(&output, "Total Tool Calls: %d\n", metrics.TotalToolCalls) fmt.Fprintf(&output, "Total Errors: %d\n", metrics.TotalErrors) + fmt.Fprintf(&output, "Total Timeouts: %d", metrics.TotalTimeouts) + if metrics.TotalTimeouts > 0 { + fmt.Fprintf(&output, " (Startup: %d, Tool: %d)", metrics.StartupTimeouts, metrics.ToolTimeouts) + } + fmt.Fprintf(&output, "\n") fmt.Fprintf(&output, "Servers: %d\n", len(metrics.Servers)) if !metrics.StartTime.IsZero() && !metrics.EndTime.IsZero() { @@ -278,9 +320,6 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string { // Server metrics table if len(metrics.Servers) > 0 { output.WriteString("Server Usage:\n") - output.WriteString("┌────────────────────────────┬──────────┬────────────┬───────────┬────────┐\n") - output.WriteString("│ Server │ Requests │ Tool Calls │ Avg Time │ Errors │\n") - output.WriteString("├────────────────────────────┼──────────┼────────────┼───────────┼────────┤\n") // Sort servers by request count var serverNames []string @@ -291,6 +330,12 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string { return metrics.Servers[serverNames[i]].RequestCount > metrics.Servers[serverNames[j]].RequestCount }) + // Build table config + tableConfig := console.TableConfig{ + Headers: []string{"Server", "Requests", "Tool Calls", "Avg Time", "Errors", "Timeouts"}, + Rows: make([][]string, 0, len(serverNames)), + } + for _, serverName := range serverNames { server := metrics.Servers[serverName] avgTime := 0.0 @@ -298,15 +343,18 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string { avgTime = server.TotalDuration / float64(server.RequestCount) } - fmt.Fprintf(&output, "│ %-26s │ %8d │ %10d │ %7.0fms │ %6d │\n", + row := []string{ truncateString(serverName, 26), - server.RequestCount, - server.ToolCallCount, - avgTime, - server.ErrorCount) + fmt.Sprintf("%d", server.RequestCount), + fmt.Sprintf("%d", server.ToolCallCount), + fmt.Sprintf("%.0fms", avgTime), + fmt.Sprintf("%d", server.ErrorCount), + fmt.Sprintf("%d", server.TimeoutCount), + } + tableConfig.Rows = append(tableConfig.Rows, row) } - output.WriteString("└────────────────────────────┴──────────┴────────────┴───────────┴────────┘\n") + output.WriteString(console.RenderTable(tableConfig)) } // Tool metrics table (if verbose) @@ -321,9 +369,6 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string { } fmt.Fprintf(&output, "\n%s:\n", serverName) - output.WriteString("┌──────────────────────────┬───────┬──────────┬──────────┬──────────┐\n") - output.WriteString("│ Tool │ Calls │ Avg Time │ Max Time │ Errors │\n") - output.WriteString("├──────────────────────────┼───────┼──────────┼──────────┼──────────┤\n") // Sort tools by call count var toolNames []string @@ -334,17 +379,26 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string { return server.Tools[toolNames[i]].CallCount > server.Tools[toolNames[j]].CallCount }) + // Build table config + tableConfig := console.TableConfig{ + Headers: []string{"Tool", "Calls", "Avg Time", "Max Time", "Errors", "Timeouts"}, + Rows: make([][]string, 0, len(toolNames)), + } + for _, toolName := range toolNames { tool := server.Tools[toolName] - fmt.Fprintf(&output, "│ %-24s │ %5d │ %6.0fms │ %6.0fms │ %8d │\n", + row := []string{ truncateString(toolName, 24), - tool.CallCount, - tool.AvgDuration, - tool.MaxDuration, - tool.ErrorCount) + fmt.Sprintf("%d", tool.CallCount), + fmt.Sprintf("%.0fms", tool.AvgDuration), + fmt.Sprintf("%.0fms", tool.MaxDuration), + fmt.Sprintf("%d", tool.ErrorCount), + fmt.Sprintf("%d", tool.TimeoutCount), + } + tableConfig.Rows = append(tableConfig.Rows, row) } - output.WriteString("└──────────────────────────┴───────┴──────────┴──────────┴──────────┘\n") + output.WriteString(console.RenderTable(tableConfig)) } } @@ -401,6 +455,9 @@ func displayAggregatedGatewayMetrics(processedRuns []ProcessedRun, outputDir str aggregated.TotalRequests += runMetrics.TotalRequests aggregated.TotalToolCalls += runMetrics.TotalToolCalls aggregated.TotalErrors += runMetrics.TotalErrors + aggregated.TotalTimeouts += runMetrics.TotalTimeouts + aggregated.StartupTimeouts += runMetrics.StartupTimeouts + aggregated.ToolTimeouts += runMetrics.ToolTimeouts aggregated.TotalDuration += runMetrics.TotalDuration // Merge server metrics @@ -410,6 +467,9 @@ func displayAggregatedGatewayMetrics(processedRuns []ProcessedRun, outputDir str aggServer.ToolCallCount += serverMetrics.ToolCallCount aggServer.TotalDuration += serverMetrics.TotalDuration aggServer.ErrorCount += serverMetrics.ErrorCount + aggServer.TimeoutCount += serverMetrics.TimeoutCount + aggServer.StartupTimeouts += serverMetrics.StartupTimeouts + aggServer.ToolTimeouts += serverMetrics.ToolTimeouts // Merge tool metrics for toolName, toolMetrics := range serverMetrics.Tools { @@ -417,6 +477,7 @@ func displayAggregatedGatewayMetrics(processedRuns []ProcessedRun, outputDir str aggTool.CallCount += toolMetrics.CallCount aggTool.TotalDuration += toolMetrics.TotalDuration aggTool.ErrorCount += toolMetrics.ErrorCount + aggTool.TimeoutCount += toolMetrics.TimeoutCount aggTool.TotalInputSize += toolMetrics.TotalInputSize aggTool.TotalOutputSize += toolMetrics.TotalOutputSize diff --git a/pkg/cli/gateway_logs_test.go b/pkg/cli/gateway_logs_test.go index 2ab07de291..0a8bca663c 100644 --- a/pkg/cli/gateway_logs_test.go +++ b/pkg/cli/gateway_logs_test.go @@ -42,6 +42,18 @@ func TestParseGatewayLogs(t *testing.T) { wantErrors: 1, wantErr: false, }, + { + name: "gateway log with timeout events", + logContent: `{"timestamp":"2024-01-12T10:00:00Z","level":"error","type":"timeout","event":"timeout","server_name":"github","tool_name":"get_repository","timeout_type":"tool","error":"tool timeout exceeded"} +{"timestamp":"2024-01-12T10:00:01Z","level":"error","type":"timeout","event":"timeout","server_name":"playwright","timeout_type":"startup","error":"startup timeout exceeded"} +{"timestamp":"2024-01-12T10:00:02Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"list_issues","duration":100.0,"status":"success"} +`, + wantServers: 2, + wantRequests: 1, + wantToolCalls: 1, + wantErrors: 2, + wantErr: false, + }, { name: "gateway log with multiple servers", logContent: `{"timestamp":"2024-01-12T10:00:00Z","level":"info","type":"request","event":"rpc_call","server_name":"github","method":"list_repos","duration":100.0,"status":"success"} @@ -450,3 +462,73 @@ func TestGatewayLogsParsingIntegration(t *testing.T) { assert.False(t, metrics.EndTime.IsZero()) assert.True(t, metrics.EndTime.After(metrics.StartTime)) } + +// TestGatewayTimeoutEvents tests that timeout events are properly tracked +func TestGatewayTimeoutEvents(t *testing.T) { + tmpDir := t.TempDir() + + // Create a log with timeout events + logContent := `{"timestamp":"2024-01-12T10:00:00Z","level":"error","type":"timeout","event":"timeout","server_name":"github","tool_name":"get_repository","timeout_type":"tool","error":"tool timeout exceeded","duration":60000} +{"timestamp":"2024-01-12T10:00:01Z","level":"error","type":"timeout","event":"timeout","server_name":"playwright","timeout_type":"startup","error":"startup timeout exceeded","duration":30000} +{"timestamp":"2024-01-12T10:00:02Z","level":"error","type":"timeout","event":"timeout","server_name":"github","tool_name":"list_issues","timeout_type":"tool","error":"tool timeout exceeded","duration":60000} +{"timestamp":"2024-01-12T10:00:03Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"list_issues","duration":100.0,"status":"success"} +` + + gatewayLogPath := filepath.Join(tmpDir, "gateway.jsonl") + err := os.WriteFile(gatewayLogPath, []byte(logContent), 0644) + require.NoError(t, err) + + metrics, err := parseGatewayLogs(tmpDir, false) + require.NoError(t, err) + require.NotNil(t, metrics) + + // Verify timeout metrics + assert.Equal(t, 3, metrics.TotalTimeouts, "Should have 3 total timeouts") + assert.Equal(t, 1, metrics.StartupTimeouts, "Should have 1 startup timeout") + assert.Equal(t, 2, metrics.ToolTimeouts, "Should have 2 tool timeouts") + assert.Equal(t, 3, metrics.TotalErrors, "Should have 3 errors (all timeouts are errors)") + assert.Equal(t, 1, metrics.TotalRequests, "Should have 1 request") + assert.Equal(t, 1, metrics.TotalToolCalls, "Should have 1 tool call") + + // Verify GitHub server timeout metrics + githubServer := metrics.Servers["github"] + require.NotNil(t, githubServer) + assert.Equal(t, 2, githubServer.TimeoutCount, "GitHub should have 2 timeouts") + assert.Equal(t, 0, githubServer.StartupTimeouts, "GitHub should have 0 startup timeouts") + assert.Equal(t, 2, githubServer.ToolTimeouts, "GitHub should have 2 tool timeouts") + assert.Equal(t, 2, githubServer.ErrorCount, "GitHub should have 2 errors") + assert.Equal(t, 1, githubServer.RequestCount, "GitHub should have 1 request") + + // Verify Playwright server timeout metrics + playwrightServer := metrics.Servers["playwright"] + require.NotNil(t, playwrightServer) + assert.Equal(t, 1, playwrightServer.TimeoutCount, "Playwright should have 1 timeout") + assert.Equal(t, 1, playwrightServer.StartupTimeouts, "Playwright should have 1 startup timeout") + assert.Equal(t, 0, playwrightServer.ToolTimeouts, "Playwright should have 0 tool timeouts") + assert.Equal(t, 1, playwrightServer.ErrorCount, "Playwright should have 1 error") + assert.Equal(t, 0, playwrightServer.RequestCount, "Playwright should have 0 requests") + + // Verify tool-specific timeout metrics + getRepoTool := githubServer.Tools["get_repository"] + require.NotNil(t, getRepoTool) + assert.Equal(t, 1, getRepoTool.TimeoutCount, "get_repository should have 1 timeout") + assert.Equal(t, 0, getRepoTool.CallCount, "get_repository should have 0 calls") + + listIssuesTool := githubServer.Tools["list_issues"] + require.NotNil(t, listIssuesTool) + assert.Equal(t, 1, listIssuesTool.TimeoutCount, "list_issues should have 1 timeout") + assert.Equal(t, 1, listIssuesTool.CallCount, "list_issues should have 1 call") + + // Test that timeout info appears in rendered output + output := renderGatewayMetricsTable(metrics, false) + assert.NotEmpty(t, output) + assert.Contains(t, output, "Total Timeouts: 3", "Output should show total timeouts") + assert.Contains(t, output, "Startup: 1", "Output should show startup timeouts") + assert.Contains(t, output, "Tool: 2", "Output should show tool timeouts") + assert.Contains(t, output, "Timeouts", "Output should have Timeouts column") + + // Test verbose output includes timeout column + outputVerbose := renderGatewayMetricsTable(metrics, true) + assert.NotEmpty(t, outputVerbose) + assert.Contains(t, outputVerbose, "Timeouts", "Verbose output should have Timeouts column in tool details") +}