From 2a2a627db529e627db1dbdf86ab5cf608453d32d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 12 Jan 2026 22:37:12 +0000
Subject: [PATCH 1/4] Initial plan


From d557f3f23e62b65f585281be0f79d9df65ae2e55 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 12 Jan 2026 22:43:28 +0000
Subject: [PATCH 2/4] Initial plan: Add gateway.jsonl parsing to logs and audit

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 .github/workflows/smoke-copilot.lock.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml
index 450429a0ce..d98612fd5f 100644
--- a/.github/workflows/smoke-copilot.lock.yml
+++ b/.github/workflows/smoke-copilot.lock.yml
@@ -618,9 +618,9 @@ jobs:
           
           1. **GitHub MCP Testing**: Review the last 2 merged pull requests in __GH_AW_GITHUB_REPOSITORY__
           2. **Serena Go Testing**: Use the `serena-go` tool to run a basic go command like "go version" to verify the tool is available
-          3. **Playwright Testing**: Use playwright to navigate to https://github.com and verify the page title contains "GitHub"
-          5. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-copilot-__GH_AW_GITHUB_RUN_ID__.txt` with content "Smoke test passed for Copilot at $(date)" (create the directory if it doesn't exist)
-          6. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back)
+          3. **Playwright Testing**: Use playwright to navigate to <https://github.com> and verify the page title contains "GitHub"
+          4. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-copilot-__GH_AW_GITHUB_RUN_ID__.txt` with content "Smoke test passed for Copilot at $(date)" (create the directory if it doesn't exist)
+          5. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back)
           
           ## Output
           

From eeea5fe0dd1ca4d0259825c238588b80e50f103f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 12 Jan 2026 22:48:13 +0000
Subject: [PATCH 3/4] Add gateway.jsonl parsing with comprehensive tests

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 pkg/cli/audit.go             |   7 +
 pkg/cli/gateway_logs.go      | 375 +++++++++++++++++++++++++++++
 pkg/cli/gateway_logs_test.go | 452 +++++++++++++++++++++++++++++++++++
 pkg/cli/logs_metrics.go      |  18 ++
 4 files changed, 852 insertions(+)
 create mode 100644 pkg/cli/gateway_logs.go
 create mode 100644 pkg/cli/gateway_logs_test.go

diff --git a/pkg/cli/audit.go b/pkg/cli/audit.go
index f26e10543b..02bed62aee 100644
--- a/pkg/cli/audit.go
+++ b/pkg/cli/audit.go
@@ -340,6 +340,13 @@ func AuditWorkflowRun(ctx context.Context, runID int64, owner, repo, hostname st
 		renderConsole(auditData, runOutputDir)
 	}
 
+	// Display gateway metrics if available
+	if gatewayMetrics, err := parseGatewayLogs(runOutputDir, verbose); err == nil {
+		if metricsOutput := renderGatewayMetricsTable(gatewayMetrics, verbose); metricsOutput != "" {
+			fmt.Fprint(os.Stderr, metricsOutput)
+		}
+	}
+
 	// Conditionally attempt to render agentic log (similar to `logs --parse`) if --parse flag is set
 	// This creates a log.md file in the run directory for a rich, human-readable agent session summary.
 	// We intentionally do not fail the audit on parse errors; they are reported as warnings.
diff --git a/pkg/cli/gateway_logs.go b/pkg/cli/gateway_logs.go
new file mode 100644
index 0000000000..c38c686e0f
--- /dev/null
+++ b/pkg/cli/gateway_logs.go
@@ -0,0 +1,375 @@
+// Package cli provides command-line interface functionality for gh-aw.
+// This file (gateway_logs.go) contains functions for parsing and analyzing
+// MCP gateway logs from gateway.jsonl files.
+//
+// Key responsibilities:
+//   - Parsing gateway.jsonl JSONL format logs
+//   - Extracting server and tool usage metrics
+//   - Aggregating gateway statistics
+//   - Rendering gateway metrics tables
+package cli
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/githubnext/gh-aw/pkg/console"
+	"github.com/githubnext/gh-aw/pkg/logger"
+)
+
+var gatewayLogsLog = logger.New("cli:gateway_logs")
+
+// GatewayLogEntry represents a single log entry from gateway.jsonl
+type GatewayLogEntry struct {
+	Timestamp  string  `json:"timestamp"`
+	Level      string  `json:"level"`
+	Type       string  `json:"type"`
+	Event      string  `json:"event"`
+	ServerName string  `json:"server_name,omitempty"`
+	ToolName   string  `json:"tool_name,omitempty"`
+	Method     string  `json:"method,omitempty"`
+	Duration   float64 `json:"duration,omitempty"` // in milliseconds
+	InputSize  int     `json:"input_size,omitempty"`
+	OutputSize int     `json:"output_size,omitempty"`
+	Status     string  `json:"status,omitempty"`
+	Error      string  `json:"error,omitempty"`
+	Message    string  `json:"message,omitempty"`
+}
+
+// GatewayServerMetrics represents usage metrics for a single MCP server
+type GatewayServerMetrics struct {
+	ServerName    string
+	RequestCount  int
+	ToolCallCount int
+	TotalDuration float64 // in milliseconds
+	ErrorCount    int
+	Tools         map[string]*GatewayToolMetrics
+}
+
+// GatewayToolMetrics represents usage metrics for a specific tool
+type GatewayToolMetrics struct {
+	ToolName      string
+	CallCount     int
+	TotalDuration float64 // in milliseconds
+	AvgDuration   float64 // in milliseconds
+	MaxDuration   float64 // in milliseconds
+	MinDuration   float64 // in milliseconds
+	ErrorCount    int
+	TotalInputSize  int
+	TotalOutputSize int
+}
+
+// GatewayMetrics represents aggregated metrics from gateway logs
+type GatewayMetrics struct {
+	TotalRequests    int
+	TotalToolCalls   int
+	TotalErrors      int
+	Servers          map[string]*GatewayServerMetrics
+	StartTime        time.Time
+	EndTime          time.Time
+	TotalDuration    float64 // in milliseconds
+}
+
+// parseGatewayLogs parses a gateway.jsonl file and extracts metrics
+func parseGatewayLogs(logDir string, verbose bool) (*GatewayMetrics, error) {
+	gatewayLogPath := filepath.Join(logDir, "gateway.jsonl")
+	
+	// Check if gateway.jsonl exists
+	if _, err := os.Stat(gatewayLogPath); os.IsNotExist(err) {
+		gatewayLogsLog.Printf("gateway.jsonl not found at: %s", gatewayLogPath)
+		return nil, fmt.Errorf("gateway.jsonl not found")
+	}
+
+	gatewayLogsLog.Printf("Parsing gateway.jsonl from: %s", gatewayLogPath)
+
+	file, err := os.Open(gatewayLogPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open gateway.jsonl: %w", err)
+	}
+	defer file.Close()
+
+	metrics := &GatewayMetrics{
+		Servers: make(map[string]*GatewayServerMetrics),
+	}
+
+	scanner := bufio.NewScanner(file)
+	lineNum := 0
+	
+	for scanner.Scan() {
+		lineNum++
+		line := strings.TrimSpace(scanner.Text())
+		
+		// Skip empty lines
+		if line == "" {
+			continue
+		}
+
+		var entry GatewayLogEntry
+		if err := json.Unmarshal([]byte(line), &entry); err != nil {
+			gatewayLogsLog.Printf("Failed to parse line %d: %v", lineNum, err)
+			if verbose {
+				fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to parse gateway.jsonl line %d: %v", lineNum, err)))
+			}
+			continue
+		}
+
+		// Process the entry based on its type/event
+		processGatewayLogEntry(&entry, metrics, verbose)
+	}
+
+	if err := scanner.Err(); err != nil {
+		return nil, fmt.Errorf("error reading gateway.jsonl: %w", err)
+	}
+
+	// Calculate aggregate statistics
+	calculateGatewayAggregates(metrics)
+
+	gatewayLogsLog.Printf("Successfully parsed gateway.jsonl: %d servers, %d total requests", 
+		len(metrics.Servers), metrics.TotalRequests)
+
+	return metrics, nil
+}
+
+// processGatewayLogEntry processes a single log entry and updates metrics
+func processGatewayLogEntry(entry *GatewayLogEntry, metrics *GatewayMetrics, verbose bool) {
+	// Parse timestamp for time range
+	if entry.Timestamp != "" {
+		if t, err := time.Parse(time.RFC3339, entry.Timestamp); err == nil {
+			if metrics.StartTime.IsZero() || t.Before(metrics.StartTime) {
+				metrics.StartTime = t
+			}
+			if metrics.EndTime.IsZero() || t.After(metrics.EndTime) {
+				metrics.EndTime = t
+			}
+		}
+	}
+
+	// Track errors
+	if entry.Status == "error" || entry.Error != "" {
+		metrics.TotalErrors++
+		if entry.ServerName != "" {
+			server := getOrCreateServer(metrics, entry.ServerName)
+			server.ErrorCount++
+			
+			if entry.ToolName != "" {
+				tool := getOrCreateTool(server, entry.ToolName)
+				tool.ErrorCount++
+			}
+		}
+	}
+
+	// Process based on event type
+	switch entry.Event {
+	case "request", "tool_call", "rpc_call":
+		metrics.TotalRequests++
+		
+		if entry.ServerName != "" {
+			server := getOrCreateServer(metrics, entry.ServerName)
+			server.RequestCount++
+			
+			if entry.Duration > 0 {
+				server.TotalDuration += entry.Duration
+				metrics.TotalDuration += entry.Duration
+			}
+
+			// Track tool calls
+			if entry.ToolName != "" || entry.Method != "" {
+				toolName := entry.ToolName
+				if toolName == "" {
+					toolName = entry.Method
+				}
+				
+				metrics.TotalToolCalls++
+				server.ToolCallCount++
+				
+				tool := getOrCreateTool(server, toolName)
+				tool.CallCount++
+				
+				if entry.Duration > 0 {
+					tool.TotalDuration += entry.Duration
+					if tool.MaxDuration == 0 || entry.Duration > tool.MaxDuration {
+						tool.MaxDuration = entry.Duration
+					}
+					if tool.MinDuration == 0 || entry.Duration < tool.MinDuration {
+						tool.MinDuration = entry.Duration
+					}
+				}
+				
+				if entry.InputSize > 0 {
+					tool.TotalInputSize += entry.InputSize
+				}
+				if entry.OutputSize > 0 {
+					tool.TotalOutputSize += entry.OutputSize
+				}
+			}
+		}
+	}
+}
+
+// getOrCreateServer gets or creates a server metrics entry
+func getOrCreateServer(metrics *GatewayMetrics, serverName string) *GatewayServerMetrics {
+	if server, exists := metrics.Servers[serverName]; exists {
+		return server
+	}
+	
+	server := &GatewayServerMetrics{
+		ServerName: serverName,
+		Tools:      make(map[string]*GatewayToolMetrics),
+	}
+	metrics.Servers[serverName] = server
+	return server
+}
+
+// getOrCreateTool gets or creates a tool metrics entry
+func getOrCreateTool(server *GatewayServerMetrics, toolName string) *GatewayToolMetrics {
+	if tool, exists := server.Tools[toolName]; exists {
+		return tool
+	}
+	
+	tool := &GatewayToolMetrics{
+		ToolName: toolName,
+	}
+	server.Tools[toolName] = tool
+	return tool
+}
+
+// calculateGatewayAggregates calculates aggregate statistics
+func calculateGatewayAggregates(metrics *GatewayMetrics) {
+	for _, server := range metrics.Servers {
+		for _, tool := range server.Tools {
+			if tool.CallCount > 0 {
+				tool.AvgDuration = tool.TotalDuration / float64(tool.CallCount)
+			}
+		}
+	}
+}
+
+// renderGatewayMetricsTable renders gateway metrics as a console table
+func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string {
+	if metrics == nil || len(metrics.Servers) == 0 {
+		return ""
+	}
+
+	var output strings.Builder
+	
+	output.WriteString("\n")
+	output.WriteString(console.FormatInfoMessage("MCP Gateway Metrics"))
+	output.WriteString("\n\n")
+
+	// Summary statistics
+	output.WriteString(fmt.Sprintf("Total Requests: %d\n", metrics.TotalRequests))
+	output.WriteString(fmt.Sprintf("Total Tool Calls: %d\n", metrics.TotalToolCalls))
+	output.WriteString(fmt.Sprintf("Total Errors: %d\n", metrics.TotalErrors))
+	output.WriteString(fmt.Sprintf("Servers: %d\n", len(metrics.Servers)))
+	
+	if !metrics.StartTime.IsZero() && !metrics.EndTime.IsZero() {
+		duration := metrics.EndTime.Sub(metrics.StartTime)
+		output.WriteString(fmt.Sprintf("Time Range: %s\n", duration.Round(time.Second)))
+	}
+	
+	output.WriteString("\n")
+
+	// Server metrics table
+	if len(metrics.Servers) > 0 {
+		output.WriteString("Server Usage:\n")
+		output.WriteString("┌────────────────────────────┬──────────┬────────────┬───────────┬────────┐\n")
+		output.WriteString("│ Server                     │ Requests │ Tool Calls │ Avg Time  │ Errors │\n")
+		output.WriteString("├────────────────────────────┼──────────┼────────────┼───────────┼────────┤\n")
+
+		// Sort servers by request count
+		var serverNames []string
+		for name := range metrics.Servers {
+			serverNames = append(serverNames, name)
+		}
+		sort.Slice(serverNames, func(i, j int) bool {
+			return metrics.Servers[serverNames[i]].RequestCount > metrics.Servers[serverNames[j]].RequestCount
+		})
+
+		for _, serverName := range serverNames {
+			server := metrics.Servers[serverName]
+			avgTime := 0.0
+			if server.RequestCount > 0 {
+				avgTime = server.TotalDuration / float64(server.RequestCount)
+			}
+			
+			output.WriteString(fmt.Sprintf("│ %-26s │ %8d │ %10d │ %7.0fms │ %6d │\n",
+				truncateString(serverName, 26),
+				server.RequestCount,
+				server.ToolCallCount,
+				avgTime,
+				server.ErrorCount))
+		}
+		
+		output.WriteString("└────────────────────────────┴──────────┴────────────┴───────────┴────────┘\n")
+	}
+
+	// Tool metrics table (if verbose)
+	if verbose {
+		output.WriteString("\n")
+		output.WriteString("Tool Usage Details:\n")
+		
+		for _, serverName := range getSortedServerNames(metrics) {
+			server := metrics.Servers[serverName]
+			if len(server.Tools) == 0 {
+				continue
+			}
+			
+			output.WriteString(fmt.Sprintf("\n%s:\n", serverName))
+			output.WriteString("┌──────────────────────────┬───────┬──────────┬──────────┬──────────┐\n")
+			output.WriteString("│ Tool                     │ Calls │ Avg Time │ Max Time │ Errors   │\n")
+			output.WriteString("├──────────────────────────┼───────┼──────────┼──────────┼──────────┤\n")
+
+			// Sort tools by call count
+			var toolNames []string
+			for name := range server.Tools {
+				toolNames = append(toolNames, name)
+			}
+			sort.Slice(toolNames, func(i, j int) bool {
+				return server.Tools[toolNames[i]].CallCount > server.Tools[toolNames[j]].CallCount
+			})
+
+			for _, toolName := range toolNames {
+				tool := server.Tools[toolName]
+				output.WriteString(fmt.Sprintf("│ %-24s │ %5d │ %6.0fms │ %6.0fms │ %8d │\n",
+					truncateString(toolName, 24),
+					tool.CallCount,
+					tool.AvgDuration,
+					tool.MaxDuration,
+					tool.ErrorCount))
+			}
+			
+			output.WriteString("└──────────────────────────┴───────┴──────────┴──────────┴──────────┘\n")
+		}
+	}
+
+	return output.String()
+}
+
+// getSortedServerNames returns server names sorted by request count
+func getSortedServerNames(metrics *GatewayMetrics) []string {
+	var names []string
+	for name := range metrics.Servers {
+		names = append(names, name)
+	}
+	sort.Slice(names, func(i, j int) bool {
+		return metrics.Servers[names[i]].RequestCount > metrics.Servers[names[j]].RequestCount
+	})
+	return names
+}
+
+// truncateString truncates a string to a maximum length
+func truncateString(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	if maxLen <= 3 {
+		return s[:maxLen]
+	}
+	return s[:maxLen-3] + "..."
+}
diff --git a/pkg/cli/gateway_logs_test.go b/pkg/cli/gateway_logs_test.go
new file mode 100644
index 0000000000..f3a4b4bbd7
--- /dev/null
+++ b/pkg/cli/gateway_logs_test.go
@@ -0,0 +1,452 @@
+package cli
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestParseGatewayLogs(t *testing.T) {
+	tests := []struct {
+		name          string
+		logContent    string
+		wantServers   int
+		wantRequests  int
+		wantToolCalls int
+		wantErrors    int
+		wantErr       bool
+	}{
+		{
+			name: "valid gateway log with tool calls",
+			logContent: `{"timestamp":"2024-01-12T10:00:00Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"get_repository","method":"get_repository","duration":150.5,"input_size":100,"output_size":500,"status":"success"}
+{"timestamp":"2024-01-12T10:00:01Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"list_issues","method":"list_issues","duration":250.3,"input_size":50,"output_size":1000,"status":"success"}
+{"timestamp":"2024-01-12T10:00:02Z","level":"info","type":"request","event":"tool_call","server_name":"playwright","tool_name":"navigate","method":"navigate","duration":500.0,"input_size":200,"output_size":300,"status":"success"}
+`,
+			wantServers:   2,
+			wantRequests:  3,
+			wantToolCalls: 3,
+			wantErrors:    0,
+			wantErr:       false,
+		},
+		{
+			name: "gateway log with errors",
+			logContent: `{"timestamp":"2024-01-12T10:00:00Z","level":"error","type":"request","event":"tool_call","server_name":"github","tool_name":"get_repository","duration":50.0,"status":"error","error":"connection timeout"}
+{"timestamp":"2024-01-12T10:00:01Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"list_issues","duration":100.0,"status":"success"}
+`,
+			wantServers:   1,
+			wantRequests:  2,
+			wantToolCalls: 2,
+			wantErrors:    1,
+			wantErr:       false,
+		},
+		{
+			name: "gateway log with multiple servers",
+			logContent: `{"timestamp":"2024-01-12T10:00:00Z","level":"info","type":"request","event":"rpc_call","server_name":"github","method":"list_repos","duration":100.0,"status":"success"}
+{"timestamp":"2024-01-12T10:00:01Z","level":"info","type":"request","event":"rpc_call","server_name":"playwright","method":"screenshot","duration":200.0,"status":"success"}
+{"timestamp":"2024-01-12T10:00:02Z","level":"info","type":"request","event":"rpc_call","server_name":"tavily","method":"search","duration":300.0,"status":"success"}
+`,
+			wantServers:   3,
+			wantRequests:  3,
+			wantToolCalls: 3,
+			wantErrors:    0,
+			wantErr:       false,
+		},
+		{
+			name:         "empty log file",
+			logContent:   "",
+			wantServers:  0,
+			wantRequests: 0,
+			wantErrors:   0,
+			wantErr:      false,
+		},
+		{
+			name: "log with invalid JSON line",
+			logContent: `{"timestamp":"2024-01-12T10:00:00Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"get_repository","duration":150.5,"status":"success"}
+invalid json line
+{"timestamp":"2024-01-12T10:00:02Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"list_issues","duration":250.3,"status":"success"}
+`,
+			wantServers:   1,
+			wantRequests:  2,
+			wantToolCalls: 2,
+			wantErrors:    0,
+			wantErr:       false, // Should continue parsing after invalid line
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Create a temporary directory
+			tmpDir := t.TempDir()
+
+			// Write the test log content
+			gatewayLogPath := filepath.Join(tmpDir, "gateway.jsonl")
+			err := os.WriteFile(gatewayLogPath, []byte(tt.logContent), 0644)
+			require.NoError(t, err, "Failed to write test gateway.jsonl")
+
+			// Parse the gateway logs
+			metrics, err := parseGatewayLogs(tmpDir, false)
+
+			if tt.wantErr {
+				assert.Error(t, err)
+				return
+			}
+
+			require.NoError(t, err)
+			require.NotNil(t, metrics)
+
+			// Verify metrics
+			assert.Equal(t, tt.wantServers, len(metrics.Servers), "Server count mismatch")
+			assert.Equal(t, tt.wantRequests, metrics.TotalRequests, "Total requests mismatch")
+			assert.Equal(t, tt.wantToolCalls, metrics.TotalToolCalls, "Total tool calls mismatch")
+			assert.Equal(t, tt.wantErrors, metrics.TotalErrors, "Total errors mismatch")
+		})
+	}
+}
+
+func TestParseGatewayLogsFileNotFound(t *testing.T) {
+	tmpDir := t.TempDir()
+	
+	metrics, err := parseGatewayLogs(tmpDir, false)
+	
+	assert.Error(t, err)
+	assert.Nil(t, metrics)
+	assert.Contains(t, err.Error(), "gateway.jsonl not found")
+}
+
+func TestGatewayToolMetrics(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Create a log with multiple calls to the same tool
+	logContent := `{"timestamp":"2024-01-12T10:00:00Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"get_repository","duration":100.0,"status":"success"}
+{"timestamp":"2024-01-12T10:00:01Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"get_repository","duration":200.0,"status":"success"}
+{"timestamp":"2024-01-12T10:00:02Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"get_repository","duration":300.0,"status":"success"}
+`
+
+	gatewayLogPath := filepath.Join(tmpDir, "gateway.jsonl")
+	err := os.WriteFile(gatewayLogPath, []byte(logContent), 0644)
+	require.NoError(t, err)
+
+	metrics, err := parseGatewayLogs(tmpDir, false)
+	require.NoError(t, err)
+	require.NotNil(t, metrics)
+
+	// Verify server metrics
+	require.Equal(t, 1, len(metrics.Servers))
+	server := metrics.Servers["github"]
+	require.NotNil(t, server)
+	assert.Equal(t, "github", server.ServerName)
+	assert.Equal(t, 3, server.RequestCount)
+
+	// Verify tool metrics
+	require.Equal(t, 1, len(server.Tools))
+	tool := server.Tools["get_repository"]
+	require.NotNil(t, tool)
+	assert.Equal(t, "get_repository", tool.ToolName)
+	assert.Equal(t, 3, tool.CallCount)
+	assert.Equal(t, 600.0, tool.TotalDuration)
+	assert.Equal(t, 200.0, tool.AvgDuration)
+	assert.Equal(t, 300.0, tool.MaxDuration)
+	assert.Equal(t, 100.0, tool.MinDuration)
+}
+
+func TestRenderGatewayMetricsTable(t *testing.T) {
+	// Create metrics with some data
+	metrics := &GatewayMetrics{
+		TotalRequests:  10,
+		TotalToolCalls: 8,
+		TotalErrors:    2,
+		Servers: map[string]*GatewayServerMetrics{
+			"github": {
+				ServerName:    "github",
+				RequestCount:  6,
+				ToolCallCount: 5,
+				TotalDuration: 600.0,
+				ErrorCount:    1,
+				Tools: map[string]*GatewayToolMetrics{
+					"get_repository": {
+						ToolName:      "get_repository",
+						CallCount:     3,
+						TotalDuration: 300.0,
+						AvgDuration:   100.0,
+						MaxDuration:   150.0,
+						MinDuration:   50.0,
+						ErrorCount:    0,
+					},
+				},
+			},
+			"playwright": {
+				ServerName:    "playwright",
+				RequestCount:  4,
+				ToolCallCount: 3,
+				TotalDuration: 400.0,
+				ErrorCount:    1,
+				Tools: map[string]*GatewayToolMetrics{
+					"navigate": {
+						ToolName:      "navigate",
+						CallCount:     2,
+						TotalDuration: 200.0,
+						AvgDuration:   100.0,
+						MaxDuration:   120.0,
+						MinDuration:   80.0,
+						ErrorCount:    0,
+					},
+				},
+			},
+		},
+	}
+
+	// Test non-verbose output
+	output := renderGatewayMetricsTable(metrics, false)
+	assert.NotEmpty(t, output)
+	assert.Contains(t, output, "MCP Gateway Metrics")
+	assert.Contains(t, output, "Total Requests: 10")
+	assert.Contains(t, output, "Total Tool Calls: 8")
+	assert.Contains(t, output, "Total Errors: 2")
+	assert.Contains(t, output, "Servers: 2")
+	assert.Contains(t, output, "github")
+	assert.Contains(t, output, "playwright")
+
+	// Test verbose output
+	verboseOutput := renderGatewayMetricsTable(metrics, true)
+	assert.NotEmpty(t, verboseOutput)
+	assert.Contains(t, verboseOutput, "Tool Usage Details")
+	assert.Contains(t, verboseOutput, "get_repository")
+	assert.Contains(t, verboseOutput, "navigate")
+}
+
+func TestRenderGatewayMetricsTableEmpty(t *testing.T) {
+	// Test with nil metrics
+	output := renderGatewayMetricsTable(nil, false)
+	assert.Empty(t, output)
+
+	// Test with empty metrics
+	emptyMetrics := &GatewayMetrics{
+		Servers: make(map[string]*GatewayServerMetrics),
+	}
+	output = renderGatewayMetricsTable(emptyMetrics, false)
+	assert.Empty(t, output)
+}
+
+func TestGatewayTruncateString(t *testing.T) {
+	tests := []struct {
+		name   string
+		input  string
+		maxLen int
+		want   string
+	}{
+		{
+			name:   "string shorter than max",
+			input:  "short",
+			maxLen: 10,
+			want:   "short",
+		},
+		{
+			name:   "string equal to max",
+			input:  "exactlyten",
+			maxLen: 10,
+			want:   "exactlyten",
+		},
+		{
+			name:   "string longer than max",
+			input:  "this is a very long string",
+			maxLen: 10,
+			want:   "this is...",
+		},
+		{
+			name:   "max length very small",
+			input:  "test",
+			maxLen: 2,
+			want:   "te",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := truncateString(tt.input, tt.maxLen)
+			assert.Equal(t, tt.want, result)
+			assert.LessOrEqual(t, len(result), tt.maxLen)
+		})
+	}
+}
+
+func TestProcessGatewayLogEntry(t *testing.T) {
+	metrics := &GatewayMetrics{
+		Servers: make(map[string]*GatewayServerMetrics),
+	}
+
+	// Test request entry
+	entry := &GatewayLogEntry{
+		Timestamp:  "2024-01-12T10:00:00Z",
+		Event:      "tool_call",
+		ServerName: "github",
+		ToolName:   "get_repository",
+		Duration:   150.5,
+		InputSize:  100,
+		OutputSize: 500,
+		Status:     "success",
+	}
+
+	processGatewayLogEntry(entry, metrics, false)
+
+	assert.Equal(t, 1, metrics.TotalRequests)
+	assert.Equal(t, 1, metrics.TotalToolCalls)
+	assert.Equal(t, 0, metrics.TotalErrors)
+	assert.Equal(t, 1, len(metrics.Servers))
+
+	server := metrics.Servers["github"]
+	require.NotNil(t, server)
+	assert.Equal(t, 1, server.RequestCount)
+	assert.Equal(t, 1, server.ToolCallCount)
+	assert.Equal(t, 150.5, server.TotalDuration)
+
+	// Test error entry
+	errorEntry := &GatewayLogEntry{
+		Timestamp:  "2024-01-12T10:00:01Z",
+		Event:      "tool_call",
+		ServerName: "github",
+		ToolName:   "list_issues",
+		Status:     "error",
+		Error:      "connection timeout",
+	}
+
+	processGatewayLogEntry(errorEntry, metrics, false)
+
+	assert.Equal(t, 2, metrics.TotalRequests)
+	assert.Equal(t, 1, metrics.TotalErrors)
+	assert.Equal(t, 1, server.ErrorCount)
+}
+
+func TestGetSortedServerNames(t *testing.T) {
+	metrics := &GatewayMetrics{
+		Servers: map[string]*GatewayServerMetrics{
+			"github": {
+				ServerName:   "github",
+				RequestCount: 10,
+			},
+			"playwright": {
+				ServerName:   "playwright",
+				RequestCount: 5,
+			},
+			"tavily": {
+				ServerName:   "tavily",
+				RequestCount: 15,
+			},
+		},
+	}
+
+	names := getSortedServerNames(metrics)
+	require.Len(t, names, 3)
+	
+	// Should be sorted by request count (descending)
+	assert.Equal(t, "tavily", names[0])
+	assert.Equal(t, "github", names[1])
+	assert.Equal(t, "playwright", names[2])
+}
+
+func TestGatewayLogsWithMethodField(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Test with method field instead of tool_name
+	logContent := `{"timestamp":"2024-01-12T10:00:00Z","level":"info","type":"request","event":"rpc_call","server_name":"github","method":"tools/list","duration":100.0,"status":"success"}
+{"timestamp":"2024-01-12T10:00:01Z","level":"info","type":"request","event":"rpc_call","server_name":"github","method":"tools/call","duration":200.0,"status":"success"}
+`
+
+	gatewayLogPath := filepath.Join(tmpDir, "gateway.jsonl")
+	err := os.WriteFile(gatewayLogPath, []byte(logContent), 0644)
+	require.NoError(t, err)
+
+	metrics, err := parseGatewayLogs(tmpDir, false)
+	require.NoError(t, err)
+	require.NotNil(t, metrics)
+
+	assert.Equal(t, 1, len(metrics.Servers))
+	assert.Equal(t, 2, metrics.TotalRequests)
+	assert.Equal(t, 2, metrics.TotalToolCalls)
+
+	server := metrics.Servers["github"]
+	require.NotNil(t, server)
+	assert.Equal(t, 2, len(server.Tools))
+	
+	// Check that methods were tracked as tools
+	assert.Contains(t, server.Tools, "tools/list")
+	assert.Contains(t, server.Tools, "tools/call")
+}
+
+func TestGatewayLogsParsingIntegration(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Create a comprehensive test log
+	logContent := `{"timestamp":"2024-01-12T10:00:00.000Z","level":"info","type":"gateway","event":"startup","message":"Gateway started"}
+{"timestamp":"2024-01-12T10:00:01.123Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"get_repository","method":"get_repository","duration":150.5,"input_size":100,"output_size":500,"status":"success"}
+{"timestamp":"2024-01-12T10:00:02.456Z","level":"info","type":"request","event":"tool_call","server_name":"github","tool_name":"list_issues","method":"list_issues","duration":250.3,"input_size":50,"output_size":1000,"status":"success"}
+{"timestamp":"2024-01-12T10:00:03.789Z","level":"error","type":"request","event":"tool_call","server_name":"github","tool_name":"get_repository","duration":50.0,"status":"error","error":"rate limit exceeded"}
+{"timestamp":"2024-01-12T10:00:04.012Z","level":"info","type":"request","event":"tool_call","server_name":"playwright","tool_name":"navigate","method":"navigate","duration":500.0,"input_size":200,"output_size":300,"status":"success"}
+{"timestamp":"2024-01-12T10:00:05.345Z","level":"info","type":"request","event":"tool_call","server_name":"playwright","tool_name":"screenshot","method":"screenshot","duration":300.0,"input_size":50,"output_size":2000,"status":"success"}
+{"timestamp":"2024-01-12T10:00:06.678Z","level":"info","type":"gateway","event":"shutdown","message":"Gateway shutting down"}
+`
+
+	gatewayLogPath := filepath.Join(tmpDir, "gateway.jsonl")
+	err := os.WriteFile(gatewayLogPath, []byte(logContent), 0644)
+	require.NoError(t, err)
+
+	metrics, err := parseGatewayLogs(tmpDir, false)
+	require.NoError(t, err)
+	require.NotNil(t, metrics)
+
+	// Verify overall metrics
+	assert.Equal(t, 2, len(metrics.Servers), "Should have 2 servers")
+	assert.Equal(t, 5, metrics.TotalRequests, "Should have 5 requests")
+	assert.Equal(t, 5, metrics.TotalToolCalls, "Should have 5 tool calls")
+	assert.Equal(t, 1, metrics.TotalErrors, "Should have 1 error")
+
+	// Verify GitHub server metrics
+	githubServer := metrics.Servers["github"]
+	require.NotNil(t, githubServer)
+	assert.Equal(t, 3, githubServer.RequestCount)
+	assert.Equal(t, 3, githubServer.ToolCallCount)
+	assert.Equal(t, 1, githubServer.ErrorCount)
+
+	// Verify Playwright server metrics
+	playwrightServer := metrics.Servers["playwright"]
+	require.NotNil(t, playwrightServer)
+	assert.Equal(t, 2, playwrightServer.RequestCount)
+	assert.Equal(t, 2, playwrightServer.ToolCallCount)
+	assert.Equal(t, 0, playwrightServer.ErrorCount)
+
+	// Verify tool metrics
+	assert.Equal(t, 2, len(githubServer.Tools))
+	assert.Equal(t, 2, len(playwrightServer.Tools))
+
+	// Verify GitHub tools
+	getRepoTool := githubServer.Tools["get_repository"]
+	require.NotNil(t, getRepoTool)
+	assert.Equal(t, 2, getRepoTool.CallCount)
+	assert.Equal(t, 1, getRepoTool.ErrorCount)
+
+	listIssuesTool := githubServer.Tools["list_issues"]
+	require.NotNil(t, listIssuesTool)
+	assert.Equal(t, 1, listIssuesTool.CallCount)
+	assert.Equal(t, 0, listIssuesTool.ErrorCount)
+
+	// Test rendering
+	output := renderGatewayMetricsTable(metrics, false)
+	assert.NotEmpty(t, output)
+	assert.Contains(t, output, "github")
+	assert.Contains(t, output, "playwright")
+
+	// Test verbose rendering
+	verboseOutput := renderGatewayMetricsTable(metrics, true)
+	assert.Contains(t, verboseOutput, "Tool Usage Details")
+	assert.Contains(t, verboseOutput, "get_repository")
+	assert.Contains(t, verboseOutput, "list_issues")
+	assert.Contains(t, verboseOutput, "navigate")
+	assert.Contains(t, verboseOutput, "screenshot")
+
+	// Verify time range was captured
+	assert.False(t, metrics.StartTime.IsZero())
+	assert.False(t, metrics.EndTime.IsZero())
+	assert.True(t, metrics.EndTime.After(metrics.StartTime))
+}
diff --git a/pkg/cli/logs_metrics.go b/pkg/cli/logs_metrics.go
index 1d2d97baf6..5c79c1fa1a 100644
--- a/pkg/cli/logs_metrics.go
+++ b/pkg/cli/logs_metrics.go
@@ -165,6 +165,24 @@ func extractLogMetrics(logDir string, verbose bool, workflowPath ...string) (Log
 		return nil
 	})
 
+	// Try to parse gateway.jsonl if it exists
+	gatewayMetrics, gatewayErr := parseGatewayLogs(logDir, verbose)
+	if gatewayErr == nil && gatewayMetrics != nil {
+		if verbose {
+			fmt.Fprintln(os.Stderr, console.FormatSuccessMessage("Successfully parsed gateway.jsonl"))
+		}
+		// We've successfully parsed gateway metrics, but we don't add them to the main metrics
+		// structure since they're tracked separately and displayed in their own table
+		logsMetricsLog.Printf("Parsed gateway.jsonl: %d servers, %d requests", 
+			len(gatewayMetrics.Servers), gatewayMetrics.TotalRequests)
+	} else if gatewayErr != nil && !strings.Contains(gatewayErr.Error(), "not found") {
+		// Only log if it's an error other than "not found"
+		logsMetricsLog.Printf("Failed to parse gateway.jsonl: %v", gatewayErr)
+		if verbose {
+			fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to parse gateway.jsonl: %v", gatewayErr)))
+		}
+	}
+
 	if logsMetricsLog.Enabled() {
 		logsMetricsLog.Printf("Metrics extraction completed: tokens=%d, cost=%.4f, turns=%d",
 			metrics.TokenUsage, metrics.EstimatedCost, metrics.Turns)

From 8da02e2315b61a089a44859ce2367d75441c8236 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 12 Jan 2026 22:56:11 +0000
Subject: [PATCH 4/4] Integrate gateway metrics display into logs and audit
 commands

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 pkg/cli/gateway_logs.go      | 173 ++++++++++++++++++++++++++---------
 pkg/cli/gateway_logs_test.go |  40 ++++----
 pkg/cli/logs_metrics.go      |   2 +-
 pkg/cli/logs_orchestrator.go |   3 +
 4 files changed, 152 insertions(+), 66 deletions(-)

diff --git a/pkg/cli/gateway_logs.go b/pkg/cli/gateway_logs.go
index c38c686e0f..469cacc1c3 100644
--- a/pkg/cli/gateway_logs.go
+++ b/pkg/cli/gateway_logs.go
@@ -54,32 +54,32 @@ type GatewayServerMetrics struct {
 
 // GatewayToolMetrics represents usage metrics for a specific tool
 type GatewayToolMetrics struct {
-	ToolName      string
-	CallCount     int
-	TotalDuration float64 // in milliseconds
-	AvgDuration   float64 // in milliseconds
-	MaxDuration   float64 // in milliseconds
-	MinDuration   float64 // in milliseconds
-	ErrorCount    int
+	ToolName        string
+	CallCount       int
+	TotalDuration   float64 // in milliseconds
+	AvgDuration     float64 // in milliseconds
+	MaxDuration     float64 // in milliseconds
+	MinDuration     float64 // in milliseconds
+	ErrorCount      int
 	TotalInputSize  int
 	TotalOutputSize int
 }
 
 // GatewayMetrics represents aggregated metrics from gateway logs
 type GatewayMetrics struct {
-	TotalRequests    int
-	TotalToolCalls   int
-	TotalErrors      int
-	Servers          map[string]*GatewayServerMetrics
-	StartTime        time.Time
-	EndTime          time.Time
-	TotalDuration    float64 // in milliseconds
+	TotalRequests  int
+	TotalToolCalls int
+	TotalErrors    int
+	Servers        map[string]*GatewayServerMetrics
+	StartTime      time.Time
+	EndTime        time.Time
+	TotalDuration  float64 // in milliseconds
 }
 
 // parseGatewayLogs parses a gateway.jsonl file and extracts metrics
 func parseGatewayLogs(logDir string, verbose bool) (*GatewayMetrics, error) {
 	gatewayLogPath := filepath.Join(logDir, "gateway.jsonl")
-	
+
 	// Check if gateway.jsonl exists
 	if _, err := os.Stat(gatewayLogPath); os.IsNotExist(err) {
 		gatewayLogsLog.Printf("gateway.jsonl not found at: %s", gatewayLogPath)
@@ -100,11 +100,11 @@ func parseGatewayLogs(logDir string, verbose bool) (*GatewayMetrics, error) {
 
 	scanner := bufio.NewScanner(file)
 	lineNum := 0
-	
+
 	for scanner.Scan() {
 		lineNum++
 		line := strings.TrimSpace(scanner.Text())
-		
+
 		// Skip empty lines
 		if line == "" {
 			continue
@@ -130,7 +130,7 @@ func parseGatewayLogs(logDir string, verbose bool) (*GatewayMetrics, error) {
 	// Calculate aggregate statistics
 	calculateGatewayAggregates(metrics)
 
-	gatewayLogsLog.Printf("Successfully parsed gateway.jsonl: %d servers, %d total requests", 
+	gatewayLogsLog.Printf("Successfully parsed gateway.jsonl: %d servers, %d total requests",
 		len(metrics.Servers), metrics.TotalRequests)
 
 	return metrics, nil
@@ -156,7 +156,7 @@ func processGatewayLogEntry(entry *GatewayLogEntry, metrics *GatewayMetrics, ver
 		if entry.ServerName != "" {
 			server := getOrCreateServer(metrics, entry.ServerName)
 			server.ErrorCount++
-			
+
 			if entry.ToolName != "" {
 				tool := getOrCreateTool(server, entry.ToolName)
 				tool.ErrorCount++
@@ -168,11 +168,11 @@ func processGatewayLogEntry(entry *GatewayLogEntry, metrics *GatewayMetrics, ver
 	switch entry.Event {
 	case "request", "tool_call", "rpc_call":
 		metrics.TotalRequests++
-		
+
 		if entry.ServerName != "" {
 			server := getOrCreateServer(metrics, entry.ServerName)
 			server.RequestCount++
-			
+
 			if entry.Duration > 0 {
 				server.TotalDuration += entry.Duration
 				metrics.TotalDuration += entry.Duration
@@ -184,13 +184,13 @@ func processGatewayLogEntry(entry *GatewayLogEntry, metrics *GatewayMetrics, ver
 				if toolName == "" {
 					toolName = entry.Method
 				}
-				
+
 				metrics.TotalToolCalls++
 				server.ToolCallCount++
-				
+
 				tool := getOrCreateTool(server, toolName)
 				tool.CallCount++
-				
+
 				if entry.Duration > 0 {
 					tool.TotalDuration += entry.Duration
 					if tool.MaxDuration == 0 || entry.Duration > tool.MaxDuration {
@@ -200,7 +200,7 @@ func processGatewayLogEntry(entry *GatewayLogEntry, metrics *GatewayMetrics, ver
 						tool.MinDuration = entry.Duration
 					}
 				}
-				
+
 				if entry.InputSize > 0 {
 					tool.TotalInputSize += entry.InputSize
 				}
@@ -217,7 +217,7 @@ func getOrCreateServer(metrics *GatewayMetrics, serverName string) *GatewayServe
 	if server, exists := metrics.Servers[serverName]; exists {
 		return server
 	}
-	
+
 	server := &GatewayServerMetrics{
 		ServerName: serverName,
 		Tools:      make(map[string]*GatewayToolMetrics),
@@ -231,7 +231,7 @@ func getOrCreateTool(server *GatewayServerMetrics, toolName string) *GatewayTool
 	if tool, exists := server.Tools[toolName]; exists {
 		return tool
 	}
-	
+
 	tool := &GatewayToolMetrics{
 		ToolName: toolName,
 	}
@@ -257,22 +257,22 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string {
 	}
 
 	var output strings.Builder
-	
+
 	output.WriteString("\n")
 	output.WriteString(console.FormatInfoMessage("MCP Gateway Metrics"))
 	output.WriteString("\n\n")
 
 	// Summary statistics
-	output.WriteString(fmt.Sprintf("Total Requests: %d\n", metrics.TotalRequests))
-	output.WriteString(fmt.Sprintf("Total Tool Calls: %d\n", metrics.TotalToolCalls))
-	output.WriteString(fmt.Sprintf("Total Errors: %d\n", metrics.TotalErrors))
-	output.WriteString(fmt.Sprintf("Servers: %d\n", len(metrics.Servers)))
-	
+	fmt.Fprintf(&output, "Total Requests: %d\n", metrics.TotalRequests)
+	fmt.Fprintf(&output, "Total Tool Calls: %d\n", metrics.TotalToolCalls)
+	fmt.Fprintf(&output, "Total Errors: %d\n", metrics.TotalErrors)
+	fmt.Fprintf(&output, "Servers: %d\n", len(metrics.Servers))
+
 	if !metrics.StartTime.IsZero() && !metrics.EndTime.IsZero() {
 		duration := metrics.EndTime.Sub(metrics.StartTime)
-		output.WriteString(fmt.Sprintf("Time Range: %s\n", duration.Round(time.Second)))
+		fmt.Fprintf(&output, "Time Range: %s\n", duration.Round(time.Second))
 	}
-	
+
 	output.WriteString("\n")
 
 	// Server metrics table
@@ -297,15 +297,15 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string {
 			if server.RequestCount > 0 {
 				avgTime = server.TotalDuration / float64(server.RequestCount)
 			}
-			
-			output.WriteString(fmt.Sprintf("│ %-26s │ %8d │ %10d │ %7.0fms │ %6d │\n",
+
+			fmt.Fprintf(&output, "│ %-26s │ %8d │ %10d │ %7.0fms │ %6d │\n",
 				truncateString(serverName, 26),
 				server.RequestCount,
 				server.ToolCallCount,
 				avgTime,
-				server.ErrorCount))
+				server.ErrorCount)
 		}
-		
+
 		output.WriteString("└────────────────────────────┴──────────┴────────────┴───────────┴────────┘\n")
 	}
 
@@ -313,14 +313,14 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string {
 	if verbose {
 		output.WriteString("\n")
 		output.WriteString("Tool Usage Details:\n")
-		
+
 		for _, serverName := range getSortedServerNames(metrics) {
 			server := metrics.Servers[serverName]
 			if len(server.Tools) == 0 {
 				continue
 			}
-			
-			output.WriteString(fmt.Sprintf("\n%s:\n", serverName))
+
+			fmt.Fprintf(&output, "\n%s:\n", serverName)
 			output.WriteString("┌──────────────────────────┬───────┬──────────┬──────────┬──────────┐\n")
 			output.WriteString("│ Tool                     │ Calls │ Avg Time │ Max Time │ Errors   │\n")
 			output.WriteString("├──────────────────────────┼───────┼──────────┼──────────┼──────────┤\n")
@@ -336,14 +336,14 @@ func renderGatewayMetricsTable(metrics *GatewayMetrics, verbose bool) string {
 
 			for _, toolName := range toolNames {
 				tool := server.Tools[toolName]
-				output.WriteString(fmt.Sprintf("│ %-24s │ %5d │ %6.0fms │ %6.0fms │ %8d │\n",
+				fmt.Fprintf(&output, "│ %-24s │ %5d │ %6.0fms │ %6.0fms │ %8d │\n",
 					truncateString(toolName, 24),
 					tool.CallCount,
 					tool.AvgDuration,
 					tool.MaxDuration,
-					tool.ErrorCount))
+					tool.ErrorCount)
 			}
-			
+
 			output.WriteString("└──────────────────────────┴───────┴──────────┴──────────┴──────────┘\n")
 		}
 	}
@@ -373,3 +373,86 @@ func truncateString(s string, maxLen int) string {
 	}
 	return s[:maxLen-3] + "..."
 }
+
+// displayAggregatedGatewayMetrics aggregates and displays gateway metrics across all processed runs
+func displayAggregatedGatewayMetrics(processedRuns []ProcessedRun, outputDir string, verbose bool) {
+	// Aggregate gateway metrics from all runs
+	aggregated := &GatewayMetrics{
+		Servers: make(map[string]*GatewayServerMetrics),
+	}
+
+	runCount := 0
+	for _, pr := range processedRuns {
+		runDir := pr.Run.LogsPath
+		if runDir == "" {
+			continue
+		}
+
+		// Try to parse gateway.jsonl from this run
+		runMetrics, err := parseGatewayLogs(runDir, false)
+		if err != nil {
+			// Skip runs without gateway.jsonl (this is normal for runs without MCP gateway)
+			continue
+		}
+
+		runCount++
+
+		// Merge metrics from this run into aggregated metrics
+		aggregated.TotalRequests += runMetrics.TotalRequests
+		aggregated.TotalToolCalls += runMetrics.TotalToolCalls
+		aggregated.TotalErrors += runMetrics.TotalErrors
+		aggregated.TotalDuration += runMetrics.TotalDuration
+
+		// Merge server metrics
+		for serverName, serverMetrics := range runMetrics.Servers {
+			aggServer := getOrCreateServer(aggregated, serverName)
+			aggServer.RequestCount += serverMetrics.RequestCount
+			aggServer.ToolCallCount += serverMetrics.ToolCallCount
+			aggServer.TotalDuration += serverMetrics.TotalDuration
+			aggServer.ErrorCount += serverMetrics.ErrorCount
+
+			// Merge tool metrics
+			for toolName, toolMetrics := range serverMetrics.Tools {
+				aggTool := getOrCreateTool(aggServer, toolName)
+				aggTool.CallCount += toolMetrics.CallCount
+				aggTool.TotalDuration += toolMetrics.TotalDuration
+				aggTool.ErrorCount += toolMetrics.ErrorCount
+				aggTool.TotalInputSize += toolMetrics.TotalInputSize
+				aggTool.TotalOutputSize += toolMetrics.TotalOutputSize
+
+				// Update max/min durations
+				if toolMetrics.MaxDuration > aggTool.MaxDuration {
+					aggTool.MaxDuration = toolMetrics.MaxDuration
+				}
+				if aggTool.MinDuration == 0 || (toolMetrics.MinDuration > 0 && toolMetrics.MinDuration < aggTool.MinDuration) {
+					aggTool.MinDuration = toolMetrics.MinDuration
+				}
+			}
+		}
+
+		// Update time range
+		if aggregated.StartTime.IsZero() || (!runMetrics.StartTime.IsZero() && runMetrics.StartTime.Before(aggregated.StartTime)) {
+			aggregated.StartTime = runMetrics.StartTime
+		}
+		if aggregated.EndTime.IsZero() || (!runMetrics.EndTime.IsZero() && runMetrics.EndTime.After(aggregated.EndTime)) {
+			aggregated.EndTime = runMetrics.EndTime
+		}
+	}
+
+	// Only display if we found gateway metrics
+	if runCount == 0 || len(aggregated.Servers) == 0 {
+		return
+	}
+
+	// Recalculate averages for aggregated data
+	calculateGatewayAggregates(aggregated)
+
+	// Display the aggregated metrics
+	if metricsOutput := renderGatewayMetricsTable(aggregated, verbose); metricsOutput != "" {
+		fmt.Fprint(os.Stderr, metricsOutput)
+		if runCount > 1 {
+			fmt.Fprintf(os.Stderr, "\n%s\n",
+				console.FormatInfoMessage(fmt.Sprintf("Gateway metrics aggregated from %d runs", runCount)))
+		}
+	}
+}
diff --git a/pkg/cli/gateway_logs_test.go b/pkg/cli/gateway_logs_test.go
index f3a4b4bbd7..2ab07de291 100644
--- a/pkg/cli/gateway_logs_test.go
+++ b/pkg/cli/gateway_logs_test.go
@@ -90,7 +90,7 @@ invalid json line
 			metrics, err := parseGatewayLogs(tmpDir, false)
 
 			if tt.wantErr {
-				assert.Error(t, err)
+				require.Error(t, err)
 				return
 			}
 
@@ -98,7 +98,7 @@ invalid json line
 			require.NotNil(t, metrics)
 
 			// Verify metrics
-			assert.Equal(t, tt.wantServers, len(metrics.Servers), "Server count mismatch")
+			assert.Len(t, metrics.Servers, tt.wantServers, "Server count mismatch")
 			assert.Equal(t, tt.wantRequests, metrics.TotalRequests, "Total requests mismatch")
 			assert.Equal(t, tt.wantToolCalls, metrics.TotalToolCalls, "Total tool calls mismatch")
 			assert.Equal(t, tt.wantErrors, metrics.TotalErrors, "Total errors mismatch")
@@ -108,10 +108,10 @@ invalid json line
 
 func TestParseGatewayLogsFileNotFound(t *testing.T) {
 	tmpDir := t.TempDir()
-	
+
 	metrics, err := parseGatewayLogs(tmpDir, false)
-	
-	assert.Error(t, err)
+
+	require.Error(t, err)
 	assert.Nil(t, metrics)
 	assert.Contains(t, err.Error(), "gateway.jsonl not found")
 }
@@ -134,22 +134,22 @@ func TestGatewayToolMetrics(t *testing.T) {
 	require.NotNil(t, metrics)
 
 	// Verify server metrics
-	require.Equal(t, 1, len(metrics.Servers))
+	require.Len(t, metrics.Servers, 1)
 	server := metrics.Servers["github"]
 	require.NotNil(t, server)
 	assert.Equal(t, "github", server.ServerName)
 	assert.Equal(t, 3, server.RequestCount)
 
 	// Verify tool metrics
-	require.Equal(t, 1, len(server.Tools))
+	require.Len(t, server.Tools, 1)
 	tool := server.Tools["get_repository"]
 	require.NotNil(t, tool)
 	assert.Equal(t, "get_repository", tool.ToolName)
 	assert.Equal(t, 3, tool.CallCount)
-	assert.Equal(t, 600.0, tool.TotalDuration)
-	assert.Equal(t, 200.0, tool.AvgDuration)
-	assert.Equal(t, 300.0, tool.MaxDuration)
-	assert.Equal(t, 100.0, tool.MinDuration)
+	assert.InDelta(t, 600.0, tool.TotalDuration, 0.001)
+	assert.InDelta(t, 200.0, tool.AvgDuration, 0.001)
+	assert.InDelta(t, 300.0, tool.MaxDuration, 0.001)
+	assert.InDelta(t, 100.0, tool.MinDuration, 0.001)
 }
 
 func TestRenderGatewayMetricsTable(t *testing.T) {
@@ -294,13 +294,13 @@ func TestProcessGatewayLogEntry(t *testing.T) {
 	assert.Equal(t, 1, metrics.TotalRequests)
 	assert.Equal(t, 1, metrics.TotalToolCalls)
 	assert.Equal(t, 0, metrics.TotalErrors)
-	assert.Equal(t, 1, len(metrics.Servers))
+	assert.Len(t, metrics.Servers, 1)
 
 	server := metrics.Servers["github"]
 	require.NotNil(t, server)
 	assert.Equal(t, 1, server.RequestCount)
 	assert.Equal(t, 1, server.ToolCallCount)
-	assert.Equal(t, 150.5, server.TotalDuration)
+	assert.InDelta(t, 150.5, server.TotalDuration, 0.001)
 
 	// Test error entry
 	errorEntry := &GatewayLogEntry{
@@ -339,7 +339,7 @@ func TestGetSortedServerNames(t *testing.T) {
 
 	names := getSortedServerNames(metrics)
 	require.Len(t, names, 3)
-	
+
 	// Should be sorted by request count (descending)
 	assert.Equal(t, "tavily", names[0])
 	assert.Equal(t, "github", names[1])
@@ -362,14 +362,14 @@ func TestGatewayLogsWithMethodField(t *testing.T) {
 	require.NoError(t, err)
 	require.NotNil(t, metrics)
 
-	assert.Equal(t, 1, len(metrics.Servers))
+	assert.Len(t, metrics.Servers, 1)
 	assert.Equal(t, 2, metrics.TotalRequests)
 	assert.Equal(t, 2, metrics.TotalToolCalls)
 
 	server := metrics.Servers["github"]
 	require.NotNil(t, server)
-	assert.Equal(t, 2, len(server.Tools))
-	
+	assert.Len(t, server.Tools, 2)
+
 	// Check that methods were tracked as tools
 	assert.Contains(t, server.Tools, "tools/list")
 	assert.Contains(t, server.Tools, "tools/call")
@@ -397,7 +397,7 @@ func TestGatewayLogsParsingIntegration(t *testing.T) {
 	require.NotNil(t, metrics)
 
 	// Verify overall metrics
-	assert.Equal(t, 2, len(metrics.Servers), "Should have 2 servers")
+	assert.Len(t, metrics.Servers, 2, "Should have 2 servers")
 	assert.Equal(t, 5, metrics.TotalRequests, "Should have 5 requests")
 	assert.Equal(t, 5, metrics.TotalToolCalls, "Should have 5 tool calls")
 	assert.Equal(t, 1, metrics.TotalErrors, "Should have 1 error")
@@ -417,8 +417,8 @@ func TestGatewayLogsParsingIntegration(t *testing.T) {
 	assert.Equal(t, 0, playwrightServer.ErrorCount)
 
 	// Verify tool metrics
-	assert.Equal(t, 2, len(githubServer.Tools))
-	assert.Equal(t, 2, len(playwrightServer.Tools))
+	assert.Len(t, githubServer.Tools, 2)
+	assert.Len(t, playwrightServer.Tools, 2)
 
 	// Verify GitHub tools
 	getRepoTool := githubServer.Tools["get_repository"]
diff --git a/pkg/cli/logs_metrics.go b/pkg/cli/logs_metrics.go
index 5c79c1fa1a..b51e29c2fe 100644
--- a/pkg/cli/logs_metrics.go
+++ b/pkg/cli/logs_metrics.go
@@ -173,7 +173,7 @@ func extractLogMetrics(logDir string, verbose bool, workflowPath ...string) (Log
 		}
 		// We've successfully parsed gateway metrics, but we don't add them to the main metrics
 		// structure since they're tracked separately and displayed in their own table
-		logsMetricsLog.Printf("Parsed gateway.jsonl: %d servers, %d requests", 
+		logsMetricsLog.Printf("Parsed gateway.jsonl: %d servers, %d requests",
 			len(gatewayMetrics.Servers), gatewayMetrics.TotalRequests)
 	} else if gatewayErr != nil && !strings.Contains(gatewayErr.Error(), "not found") {
 		// Only log if it's an error other than "not found"
diff --git a/pkg/cli/logs_orchestrator.go b/pkg/cli/logs_orchestrator.go
index 526ce58c2a..c729da6644 100644
--- a/pkg/cli/logs_orchestrator.go
+++ b/pkg/cli/logs_orchestrator.go
@@ -499,6 +499,9 @@ func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, s
 	} else {
 		renderLogsConsole(logsData)
 
+		// Display aggregated gateway metrics if any runs have gateway.jsonl files
+		displayAggregatedGatewayMetrics(processedRuns, outputDir, verbose)
+
 		// Generate tool sequence graph if requested (console output only)
 		if toolGraph {
 			generateToolGraph(processedRuns, verbose)