diff --git a/.gitignore b/.gitignore index 91550c18..4bbf4544 100644 --- a/.gitignore +++ b/.gitignore @@ -90,3 +90,6 @@ dist/ .adk/ **/.rogue bin/ + +# Go TUI +packages/tui/cmd/rogue/__debug* diff --git a/.vscode/launch.json b/.vscode/launch.json index 3765647c..96d9bb38 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -63,6 +63,14 @@ ], "envFile": "${workspaceFolder}/.env" }, + { + "name": "Rogue TUI", + "type": "go", + "request": "launch", + "mode": "auto", + "program": "${workspaceFolder}/packages/tui/cmd/rogue/main.go", + "console": "integratedTerminal" + }, { "name": "Rogue AIO", "type": "debugpy", diff --git a/examples/mcp/tshirt_store_langgraph_mcp/__main__.py b/examples/mcp/tshirt_store_langgraph_mcp/__main__.py index f0cb66a1..fcb7a894 100644 --- a/examples/mcp/tshirt_store_langgraph_mcp/__main__.py +++ b/examples/mcp/tshirt_store_langgraph_mcp/__main__.py @@ -15,7 +15,7 @@ "--transport", "transport", default="streamable-http", - choices=["streamable-http", "sse"], + type=click.Choice(["streamable-http", "sse"]), help="Transport to use for the mcp server", ) def main(host: str, port: int, transport: Literal["streamable-http", "sse"]) -> None: diff --git a/packages/tui/internal/tui/common_controller.go b/packages/tui/internal/tui/common_controller.go index 1d947a3c..4ad264be 100644 --- a/packages/tui/internal/tui/common_controller.go +++ b/packages/tui/internal/tui/common_controller.go @@ -45,19 +45,17 @@ func (m Model) handlePasteMsg(msg tea.PasteMsg) (Model, tea.Cmd) { } // Only paste into text fields (Agent URL and Judge Model) - if m.evalState.currentField <= 1 { - switch m.evalState.currentField { - case 0: // Agent URL - // Insert at cursor position - runes := []rune(m.evalState.AgentURL) - m.evalState.AgentURL = string(runes[:m.evalState.cursorPos]) + cleanText + string(runes[m.evalState.cursorPos:]) - m.evalState.cursorPos += len([]rune(cleanText)) - case 1: // Judge Model - // Insert at cursor position - runes := []rune(m.evalState.JudgeModel) - m.evalState.JudgeModel = string(runes[:m.evalState.cursorPos]) + cleanText + string(runes[m.evalState.cursorPos:]) - m.evalState.cursorPos += len([]rune(cleanText)) - } + switch m.evalState.currentField { + case 0: // Agent URL + // Insert at cursor position + runes := []rune(m.evalState.AgentURL) + m.evalState.AgentURL = string(runes[:m.evalState.cursorPos]) + cleanText + string(runes[m.evalState.cursorPos:]) + m.evalState.cursorPos += len([]rune(cleanText)) + case 3: // Judge Model + // Insert at cursor position + runes := []rune(m.evalState.JudgeModel) + m.evalState.JudgeModel = string(runes[:m.evalState.cursorPos]) + cleanText + string(runes[m.evalState.cursorPos:]) + m.evalState.cursorPos += len([]rune(cleanText)) } return m, nil } @@ -202,14 +200,17 @@ func (m Model) handleCommandSelectedMsg(msg components.CommandSelectedMsg) (Mode // Use the configured model in provider/model format judgeModel = m.config.SelectedProvider + "/" + m.config.SelectedModel } + // TODO read agent url and protocol .rogue/user_config.json m.evalState = &EvaluationViewState{ - ServerURL: m.config.ServerURL, - AgentURL: "http://localhost:10001", - JudgeModel: judgeModel, - ParallelRuns: 1, - DeepTest: false, - Scenarios: loadScenariosFromWorkdir(), - cursorPos: len([]rune("http://localhost:10001")), // Set cursor to end of Agent URL + ServerURL: m.config.ServerURL, + AgentURL: "http://localhost:10001", + AgentProtocol: ProtocolA2A, + AgentTransport: TransportHTTP, + JudgeModel: judgeModel, + ParallelRuns: 1, + DeepTest: false, + Scenarios: loadScenariosFromWorkdir(), + cursorPos: len([]rune("http://localhost:10001")), // Set cursor to end of Agent URL } case "configure_models": // Open LLM configuration dialog diff --git a/packages/tui/internal/tui/eval_form.go b/packages/tui/internal/tui/eval_form.go index d52bdb11..760f96d2 100644 --- a/packages/tui/internal/tui/eval_form.go +++ b/packages/tui/internal/tui/eval_form.go @@ -37,8 +37,8 @@ func (m Model) renderNewEvaluation() string { title := titleStyle.Render("๐Ÿงช New Evaluation") - // Helper function to render a field with inline label and value - renderField := func(fieldIndex int, label, value string) string { + // Helper function to render a text field with inline label and value + renderTextField := func(fieldIndex int, label, value string) string { active := m.evalState.currentField == fieldIndex labelStyle := lipgloss.NewStyle(). @@ -83,8 +83,86 @@ func (m Model) renderNewEvaluation() string { return fieldContainer.Render(fieldContent) } + // Helper function to render a dropdown field with indicators + renderDropdownField := func(fieldIndex int, label, value string) string { + active := m.evalState.currentField == fieldIndex + + labelStyle := lipgloss.NewStyle(). + Foreground(t.TextMuted()). + Background(t.Background()). + Width(20). + Align(lipgloss.Right) + + valueStyle := lipgloss.NewStyle(). + Foreground(t.Text()). + Background(t.Background()). + Padding(0, 1) + + if active { + labelStyle = labelStyle.Foreground(t.Primary()).Bold(true) + valueStyle = valueStyle. + Foreground(t.Primary()). + Background(t.Background()). + Bold(true) + // Add dropdown indicators + value = "โ—€ " + value + " โ–ถ" + } + + // Create a full-width container for the field + fieldContainer := lipgloss.NewStyle(). + Width(m.width-4). + Background(t.Background()). + Padding(0, 2) + + fieldContent := lipgloss.JoinHorizontal(lipgloss.Left, + labelStyle.Render(label), + valueStyle.Render(value), + ) + + return fieldContainer.Render(fieldContent) + } + + // Helper function to render a toggle field + renderToggleField := func(fieldIndex int, label, value string) string { + active := m.evalState.currentField == fieldIndex + + labelStyle := lipgloss.NewStyle(). + Foreground(t.TextMuted()). + Background(t.Background()). + Width(20). + Align(lipgloss.Right) + + valueStyle := lipgloss.NewStyle(). + Foreground(t.Text()). + Background(t.Background()). + Padding(0, 1) + + if active { + labelStyle = labelStyle.Foreground(t.Primary()).Bold(true) + valueStyle = valueStyle. + Foreground(t.Primary()). + Background(t.Background()). + Bold(true) + } + + // Create a full-width container for the field + fieldContainer := lipgloss.NewStyle(). + Width(m.width-4). + Background(t.Background()). + Padding(0, 2) + + fieldContent := lipgloss.JoinHorizontal(lipgloss.Left, + labelStyle.Render(label), + valueStyle.Render(value), + ) + + return fieldContainer.Render(fieldContent) + } + // Prepare field values agent := m.evalState.AgentURL + protocol := string(m.evalState.AgentProtocol) + transport := string(m.evalState.AgentTransport) judge := m.evalState.JudgeModel deep := "โŒ" if m.evalState.DeepTest { @@ -93,7 +171,7 @@ func (m Model) renderNewEvaluation() string { // Helper function to render the start button renderStartButton := func() string { - active := m.evalState.currentField == 3 + active := m.evalState.currentField == 5 var buttonText string if m.evalSpinner.IsActive() { @@ -151,9 +229,11 @@ func (m Model) renderNewEvaluation() string { // Build the content sections formSection := lipgloss.JoinVertical(lipgloss.Left, - renderField(0, "Agent URL:", agent), - renderField(1, "Judge LLM:", judge), - renderField(2, "Deep Test:", deep), + renderTextField(0, "Agent URL:", agent), + renderDropdownField(1, "Protocol:", protocol), + renderDropdownField(2, "Transport:", transport), + renderTextField(3, "Judge LLM:", judge), + renderToggleField(4, "Deep Test:", deep), ) var infoLines []string @@ -168,7 +248,7 @@ func (m Model) renderNewEvaluation() string { buttonSection := renderStartButton() - helpText := helpStyle.Render("t Test Server โ†‘/โ†“ switch fields โ†/โ†’ move cursor Space toggle Enter activate Esc Back") + helpText := helpStyle.Render("t Test Server โ†‘/โ†“ switch fields โ†/โ†’ move cursor/cycle dropdown Space toggle Enter activate Esc Back") // Calculate content area height (excluding title and help) contentHeight := m.height - 6 // title(3) + help(1) + margins(2) diff --git a/packages/tui/internal/tui/eval_form_controller.go b/packages/tui/internal/tui/eval_form_controller.go index 558e274b..e5684055 100644 --- a/packages/tui/internal/tui/eval_form_controller.go +++ b/packages/tui/internal/tui/eval_form_controller.go @@ -26,7 +26,7 @@ func (m Model) handleEvalFormInput(msg tea.KeyMsg) (Model, tea.Cmd) { switch m.evalState.currentField { case 0: m.evalState.cursorPos = len([]rune(m.evalState.AgentURL)) - case 1: + case 3: m.evalState.cursorPos = len([]rune(m.evalState.JudgeModel)) default: m.evalState.cursorPos = 0 @@ -35,13 +35,13 @@ func (m Model) handleEvalFormInput(msg tea.KeyMsg) (Model, tea.Cmd) { return m, nil case "down": - if m.evalState.currentField < 3 { // Now includes start button (0-3) + if m.evalState.currentField < 5 { // 0-5 fields (AgentURL, Protocol, Transport, JudgeModel, DeepTest, StartButton) m.evalState.currentField++ // Set cursor to end of field content when switching fields switch m.evalState.currentField { case 0: m.evalState.cursorPos = len([]rune(m.evalState.AgentURL)) - case 1: + case 3: m.evalState.cursorPos = len([]rune(m.evalState.JudgeModel)) default: m.evalState.cursorPos = 0 @@ -50,23 +50,31 @@ func (m Model) handleEvalFormInput(msg tea.KeyMsg) (Model, tea.Cmd) { return m, nil case "left": - if m.evalState.currentField <= 1 && m.evalState.cursorPos > 0 { // Text fields 0-2 - m.evalState.cursorPos-- + switch m.evalState.currentField { + case 0, 3: // Text fields: AgentURL, JudgeModel + if m.evalState.cursorPos > 0 { + m.evalState.cursorPos-- + } + case 1: // Protocol dropdown + m.evalState.cycleProtocol(true) // cycle backwards + case 2: // Transport dropdown + m.evalState.cycleTransport(true) // cycle backwards } return m, nil case "right": - if m.evalState.currentField <= 1 { // Text fields 0-1 - // Get current field length to limit cursor - var fieldLen int - switch m.evalState.currentField { - case 0: - fieldLen = len(m.evalState.AgentURL) - case 1: - fieldLen = len(m.evalState.JudgeModel) - case 2: - fieldLen = len(fmt.Sprintf("%d", m.evalState.ParallelRuns)) + switch m.evalState.currentField { + case 0: // AgentURL text field + fieldLen := len(m.evalState.AgentURL) + if m.evalState.cursorPos < fieldLen { + m.evalState.cursorPos++ } + case 1: // Protocol dropdown + m.evalState.cycleProtocol(false) // cycle forwards + case 2: // Transport dropdown + m.evalState.cycleTransport(false) // cycle forwards + case 3: // JudgeModel text field + fieldLen := len(m.evalState.JudgeModel) if m.evalState.cursorPos < fieldLen { m.evalState.cursorPos++ } @@ -74,14 +82,14 @@ func (m Model) handleEvalFormInput(msg tea.KeyMsg) (Model, tea.Cmd) { return m, nil case "space": - if m.evalState.currentField == 2 { // DeepTest field is now index 2 + if m.evalState.currentField == 4 { // DeepTest field is now index 2 m.evalState.DeepTest = !m.evalState.DeepTest return m, nil } case "tab": // Open LLM config dialog when on Judge Model field - if m.evalState.currentField == 1 { // JudgeModel field + if m.evalState.currentField == 3 { // JudgeModel field llmDialog := components.NewLLMConfigDialog(m.config.APIKeys, m.config.SelectedProvider, m.config.SelectedModel) m.llmDialog = &llmDialog return m, nil @@ -89,7 +97,7 @@ func (m Model) handleEvalFormInput(msg tea.KeyMsg) (Model, tea.Cmd) { case "backspace": // Handle backspace for text fields - if m.evalState.currentField <= 1 && m.evalState.cursorPos > 0 { + if m.evalState.currentField > 0 { switch m.evalState.currentField { case 0: // AgentURL runes := []rune(m.evalState.AgentURL) @@ -97,13 +105,13 @@ func (m Model) handleEvalFormInput(msg tea.KeyMsg) (Model, tea.Cmd) { m.evalState.AgentURL = string(runes[:m.evalState.cursorPos-1]) + string(runes[m.evalState.cursorPos:]) m.evalState.cursorPos-- } - case 1: // JudgeModel + case 3: // JudgeModel runes := []rune(m.evalState.JudgeModel) if m.evalState.cursorPos <= len(runes) { m.evalState.JudgeModel = string(runes[:m.evalState.cursorPos-1]) + string(runes[m.evalState.cursorPos:]) m.evalState.cursorPos-- } - case 2: // ParallelRuns (special handling for numbers) + case 6: // ParallelRuns (special handling for numbers) if m.evalState.ParallelRuns >= 10 { m.evalState.ParallelRuns /= 10 m.evalState.cursorPos-- @@ -118,17 +126,17 @@ func (m Model) handleEvalFormInput(msg tea.KeyMsg) (Model, tea.Cmd) { default: // insert character into text fields s := msg.String() - if len(s) == 1 && m.evalState.currentField <= 2 { // Text fields 0-2 + if len(s) == 1 { switch m.evalState.currentField { case 0: // AgentURL runes := []rune(m.evalState.AgentURL) m.evalState.AgentURL = string(runes[:m.evalState.cursorPos]) + s + string(runes[m.evalState.cursorPos:]) m.evalState.cursorPos++ - case 1: // JudgeModel + case 3: // JudgeModel runes := []rune(m.evalState.JudgeModel) m.evalState.JudgeModel = string(runes[:m.evalState.cursorPos]) + s + string(runes[m.evalState.cursorPos:]) m.evalState.cursorPos++ - case 2: // ParallelRuns (numeric only) + case 6: // ParallelRuns (numeric only) if s[0] >= '0' && s[0] <= '9' { numStr := fmt.Sprintf("%d", m.evalState.ParallelRuns) runes := []rune(numStr) diff --git a/packages/tui/internal/tui/eval_ui.go b/packages/tui/internal/tui/eval_ui.go index 76a553be..bbadd423 100644 --- a/packages/tui/internal/tui/eval_ui.go +++ b/packages/tui/internal/tui/eval_ui.go @@ -7,14 +7,34 @@ import ( "path/filepath" ) +type Protocol string + +const ( + ProtocolA2A Protocol = "a2a" + ProtocolMCP Protocol = "mcp" +) + +type Transport string + +const ( + // mcp transports + TransportSSE Transport = "sse" + TransportStreamableHTTP Transport = "streamable_http" + + // a2a transports + TransportHTTP Transport = "http" +) + // Minimal state for eval screens type EvaluationViewState struct { - ServerURL string // Used from config, not editable in form - AgentURL string - JudgeModel string - ParallelRuns int - DeepTest bool - Scenarios []EvalScenario + ServerURL string // Used from config, not editable in form + AgentURL string + AgentProtocol Protocol + AgentTransport Transport + JudgeModel string + ParallelRuns int + DeepTest bool + Scenarios []EvalScenario // Runtime Running bool @@ -31,7 +51,7 @@ type EvaluationViewState struct { StructuredSummary StructuredSummary // Editing state for New Evaluation - currentField int // 0: AgentURL, 1: JudgeModel, 2: DeepTest, 3: StartButton + currentField int // 0: AgentURL, 1: Protocol, 2: Transport, 3: JudgeModel, 4: DeepTest, 5: StartButton cursorPos int // rune index in current text field } @@ -74,7 +94,7 @@ func loadScenariosFromWorkdir() []EvalScenario { // startEval kicks off evaluation and consumes events into state func (m *Model) startEval(ctx context.Context, st *EvaluationViewState) { - ch, cancel, err := m.StartEvaluation(ctx, st.ServerURL, st.AgentURL, st.Scenarios, st.JudgeModel, st.ParallelRuns, st.DeepTest) + ch, cancel, err := m.StartEvaluation(ctx, st.ServerURL, st.AgentURL, st.AgentProtocol, st.AgentTransport, st.Scenarios, st.JudgeModel, st.ParallelRuns, st.DeepTest) if err != nil { st.Running = false st.Status = "error" @@ -123,3 +143,81 @@ func (m *Model) triggerSummaryGeneration() { // Start spinner for automatic summary generation m.summarySpinner.SetActive(true) } + +// getAllProtocols returns all available protocol options +func getAllProtocols() []Protocol { + return []Protocol{ProtocolA2A, ProtocolMCP} +} + +// getTransportsForProtocol returns valid transport options for a given protocol +func getTransportsForProtocol(protocol Protocol) []Transport { + switch protocol { + case ProtocolMCP: + return []Transport{TransportStreamableHTTP, TransportSSE} + case ProtocolA2A: + return []Transport{TransportHTTP} + default: + return []Transport{} + } +} + +// cycleProtocol cycles to the next protocol option +func (st *EvaluationViewState) cycleProtocol(reverse bool) { + protocols := getAllProtocols() + currentIdx := -1 + for i, p := range protocols { + if p == st.AgentProtocol { + currentIdx = i + break + } + } + + if reverse { + currentIdx-- + if currentIdx < 0 { + currentIdx = len(protocols) - 1 + } + } else { + currentIdx++ + if currentIdx >= len(protocols) { + currentIdx = 0 + } + } + + st.AgentProtocol = protocols[currentIdx] + // Reset transport to first valid option for new protocol + validTransports := getTransportsForProtocol(st.AgentProtocol) + if len(validTransports) > 0 { + st.AgentTransport = validTransports[0] + } +} + +// cycleTransport cycles to the next transport option for the current protocol +func (st *EvaluationViewState) cycleTransport(reverse bool) { + transports := getTransportsForProtocol(st.AgentProtocol) + if len(transports) == 0 { + return + } + + currentIdx := -1 + for i, t := range transports { + if t == st.AgentTransport { + currentIdx = i + break + } + } + + if reverse { + currentIdx-- + if currentIdx < 0 { + currentIdx = len(transports) - 1 + } + } else { + currentIdx++ + if currentIdx >= len(transports) { + currentIdx = 0 + } + } + + st.AgentTransport = transports[currentIdx] +} diff --git a/packages/tui/internal/tui/evaluation.go b/packages/tui/internal/tui/evaluation.go index 6b021191..952b62eb 100644 --- a/packages/tui/internal/tui/evaluation.go +++ b/packages/tui/internal/tui/evaluation.go @@ -32,13 +32,15 @@ const ( ) type AgentConfig struct { - EvaluatedAgentURL string `json:"evaluated_agent_url"` - EvaluatedAgentAuthType AuthType `json:"evaluated_agent_auth_type"` - EvaluatedAgentCredentials string `json:"evaluated_agent_credentials,omitempty"` - JudgeLLMModel string `json:"judge_llm"` - InterviewMode bool `json:"interview_mode"` - DeepTestMode bool `json:"deep_test_mode"` - ParallelRuns int `json:"parallel_runs"` + EvaluatedAgentURL string `json:"evaluated_agent_url"` + EvaluatedAgentProtocol Protocol `json:"protocol"` + EvaluatedAgentTransport Transport `json:"transport"` + EvaluatedAgentAuthType AuthType `json:"evaluated_agent_auth_type"` + EvaluatedAgentCredentials string `json:"evaluated_agent_credentials,omitempty"` + JudgeLLMModel string `json:"judge_llm"` + InterviewMode bool `json:"interview_mode"` + DeepTestMode bool `json:"deep_test_mode"` + ParallelRuns int `json:"parallel_runs"` } type EvalScenario struct { @@ -421,6 +423,8 @@ func (m *Model) StartEvaluation( ctx context.Context, serverURL string, agentURL string, + agentProtocol Protocol, + agentTransport Transport, scenarios []EvalScenario, judgeModel string, parallelRuns int, @@ -439,12 +443,14 @@ func (m *Model) StartEvaluation( // Build evaluation request request := EvaluationRequest{ AgentConfig: AgentConfig{ - EvaluatedAgentURL: agentURL, - EvaluatedAgentAuthType: AuthTypeNoAuth, - JudgeLLMModel: judgeModel, - InterviewMode: true, - DeepTestMode: deepTest, - ParallelRuns: parallelRuns, + EvaluatedAgentURL: agentURL, + EvaluatedAgentProtocol: agentProtocol, + EvaluatedAgentTransport: agentTransport, + EvaluatedAgentAuthType: AuthTypeNoAuth, + JudgeLLMModel: judgeModel, + InterviewMode: true, + DeepTestMode: deepTest, + ParallelRuns: parallelRuns, }, MaxRetries: 3, TimeoutSeconds: 600, diff --git a/packages/tui/internal/tui/keyboard_controller.go b/packages/tui/internal/tui/keyboard_controller.go index 41d5528b..c38b93a9 100644 --- a/packages/tui/internal/tui/keyboard_controller.go +++ b/packages/tui/internal/tui/keyboard_controller.go @@ -77,14 +77,17 @@ func (m Model) handleGlobalCtrlN() (Model, tea.Cmd) { // Use the configured model in provider/model format judgeModel = m.config.SelectedProvider + "/" + m.config.SelectedModel } + // TODO read agent url and protocol .rogue/user_config.json m.evalState = &EvaluationViewState{ - ServerURL: m.config.ServerURL, - AgentURL: "http://localhost:10001", - JudgeModel: judgeModel, - ParallelRuns: 1, - DeepTest: false, - Scenarios: loadScenariosFromWorkdir(), - cursorPos: len([]rune("http://localhost:10001")), // Set cursor to end of Agent URL + ServerURL: m.config.ServerURL, + AgentURL: "http://localhost:10001", + AgentProtocol: ProtocolA2A, + AgentTransport: TransportHTTP, + JudgeModel: judgeModel, + ParallelRuns: 1, + DeepTest: false, + Scenarios: loadScenariosFromWorkdir(), + cursorPos: len([]rune("http://localhost:10001")), // Set cursor to end of Agent URL } m.currentScreen = NewEvaluationScreen return m, nil @@ -139,7 +142,7 @@ func (m Model) handleGlobalSlash(msg tea.KeyMsg) (Model, tea.Cmd) { runes := []rune(m.evalState.AgentURL) m.evalState.AgentURL = string(runes[:m.evalState.cursorPos]) + s + string(runes[m.evalState.cursorPos:]) m.evalState.cursorPos++ - case 1: // JudgeModel + case 3: // JudgeModel runes := []rune(m.evalState.JudgeModel) m.evalState.JudgeModel = string(runes[:m.evalState.cursorPos]) + s + string(runes[m.evalState.cursorPos:]) m.evalState.cursorPos++ @@ -266,11 +269,11 @@ func (m Model) handleGlobalEnter(msg tea.KeyMsg) (Model, tea.Cmd) { } // Handle NewEvaluationScreen enter for start button and LLM config if m.currentScreen == NewEvaluationScreen && m.evalState != nil { - if m.evalState.currentField == 3 { // Start button field + if m.evalState.currentField == 5 { // Start button field m.handleNewEvalEnter() // Return command to start evaluation after showing spinner return m, tea.Batch(m.evalSpinner.Start(), startEvaluationCmd()) - } else if m.evalState.currentField == 1 { // Judge LLM field + } else if m.evalState.currentField == 3 { // Judge LLM field // Open LLM config dialog when Enter is pressed on Judge LLM field llmDialog := components.NewLLMConfigDialog(m.config.APIKeys, m.config.SelectedProvider, m.config.SelectedModel) m.llmDialog = &llmDialog diff --git a/rogue/evaluator_agent/base_evaluator_agent.py b/rogue/evaluator_agent/base_evaluator_agent.py index 86f1d09e..4afdc5a0 100644 --- a/rogue/evaluator_agent/base_evaluator_agent.py +++ b/rogue/evaluator_agent/base_evaluator_agent.py @@ -366,6 +366,7 @@ def _log_evaluation( evaluation_passed: bool, reason: str, scenario_type: Optional[str], + **kwargs, ) -> None: """ Logs the evaluation of the given scenario and test case.