Merge branch 'main' into eval2-base2

jahooma · jahooma · commit 68ed8a147f38 · 2025-10-07T12:18:23.000-07:00
diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts
@@ -76,7 +76,7 @@ Use this workflow to solve a medium or complex coding task:
 3. Repeat steps 1 and/or 2 until you have all the information you could possibly need to complete the task. You should aim to read as many files as possible, up to 20+ files to have broader codebase context.
 4. Spawn a decomposing planner to come up with a plan.
 5. Spawn an editor to implement the plan. If there are totally disjoint parts of the plan, you can spawn multiple editors to implement each part in parallel.
-6. Spawn a reviewer to review the code. If changes are needed, go back to step 5, but no more than once.
+6. Spawn a reviewer to review the changes made by the editor. If more changes are needed, go back to step 5, but no more than once.
 7. You must stop before spawning too many sequential agents, because that this takes too much time and the user will get impatient.
 
 Feel free to modify this workflow as needed. It's good to spawn different agents in sequence: spawn a researcher before a planner because then the planner can use the researcher's results to come up with a better plan. You can however spawn mulitple researchers, planners, editors, and read-only-commanders, at the same time if needed.
@@ -91,6 +91,8 @@ Feel free to modify this workflow as needed. It's good to spawn different agents
 - Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, running scripts that could alter production environments, installing packages globally, etc). Don't do any of these unless the user explicitly asks you to.
 `,
 
+  stepPrompt: `Don't forget to spawn agents that could help, especially: the researcher-file-explorer to get codebase context, the decomposing-planner to craft a great plan, and the reviewer-max to review code changes made by the editor.`,
+
   handleSteps: function* ({ prompt, params }) {
     let steps = 0
     while (true) {
diff --git a/backend/src/__tests__/loop-agent-steps.test.ts b/backend/src/__tests__/loop-agent-steps.test.ts
@@ -97,17 +97,6 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
     mockModule('@codebuff/backend/get-file-reading-updates', () => ({
       getFileReadingUpdates: async () => [],
     }))
-
-    // Mock async agent manager
-    mockModule('@codebuff/backend/async-agent-manager', () => ({
-      asyncAgentManager: {
-        getAgent: () => null,
-        registerAgent: () => {},
-        updateAgentState: () => {},
-        getAndClearMessages: () => [],
-        getMessages: () => [],
-      },
-    }))
   })
 
   beforeEach(() => {
@@ -541,55 +530,6 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
     expect(llmCallCount).toBe(1) // LLM called once after STEP
     expect(result.agentState).toBeDefined()
   })
-
-  it('should respect async agent messages and continue appropriately', async () => {
-    // Test async agent message handling during loopAgentSteps
-
-    const mockGeneratorFunction = function* () {
-      yield { toolName: 'read_files', input: { paths: ['async-test.txt'] } }
-      yield 'STEP'
-    } as () => StepGenerator
-
-    mockTemplate.handleSteps = mockGeneratorFunction
-
-    const localAgentTemplates = {
-      'test-agent': mockTemplate,
-    }
-
-    // Mock async agent manager to simulate pending messages
-    const mockAsyncAgentManager = require('@codebuff/backend/async-agent-manager')
-    let getMessagesCallCount = 0
-    spyOn(
-      mockAsyncAgentManager.asyncAgentManager,
-      'getMessages',
-    ).mockImplementation(() => {
-      getMessagesCallCount++
-      // Return messages on second call to simulate async agent activity
-      return getMessagesCallCount === 2 ? ['async message'] : []
-    })
-
-    const result = await runLoopAgentStepsWithContext(
-      new MockWebSocket() as unknown as WebSocket,
-      {
-        userInputId: 'test-user-input',
-        agentType: 'test-agent',
-        agentState: mockAgentState,
-        prompt: 'Test async agent messages',
-        params: undefined,
-        fingerprintId: 'test-fingerprint',
-        fileContext: mockFileContext,
-        localAgentTemplates,
-        userId: TEST_USER_ID,
-        clientSessionId: 'test-session',
-        onResponseChunk: () => {},
-      },
-    )
-
-    // Should continue when async messages are present
-    expect(result.agentState).toBeDefined()
-    expect(getMessagesCallCount).toBeGreaterThan(0)
-  })
-
   it('should pass shouldEndTurn: true as stepsComplete when end_turn tool is called', async () => {
     // Test that when LLM calls end_turn, shouldEndTurn is correctly passed to runProgrammaticStep
 
@@ -619,19 +559,22 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
       'test-agent': mockTemplate,
     }
 
-    await runLoopAgentStepsWithContext(new MockWebSocket() as unknown as WebSocket, {
-      userInputId: 'test-user-input',
-      agentType: 'test-agent',
-      agentState: mockAgentState,
-      prompt: 'Test shouldEndTurn to stepsComplete flow',
-      params: undefined,
-      fingerprintId: 'test-fingerprint',
-      fileContext: mockFileContext,
-      localAgentTemplates,
-      userId: TEST_USER_ID,
-      clientSessionId: 'test-session',
-      onResponseChunk: () => {},
-    })
+    await runLoopAgentStepsWithContext(
+      new MockWebSocket() as unknown as WebSocket,
+      {
+        userInputId: 'test-user-input',
+        agentType: 'test-agent',
+        agentState: mockAgentState,
+        prompt: 'Test shouldEndTurn to stepsComplete flow',
+        params: undefined,
+        fingerprintId: 'test-fingerprint',
+        fileContext: mockFileContext,
+        localAgentTemplates,
+        userId: TEST_USER_ID,
+        clientSessionId: 'test-session',
+        onResponseChunk: () => {},
+      },
+    )
 
     mockedRunProgrammaticStep.clear()
 
diff --git a/backend/src/__tests__/read-docs-tool.test.ts b/backend/src/__tests__/read-docs-tool.test.ts
@@ -183,15 +183,6 @@ describe('read_docs tool with researcher agent', () => {
       spy: startUserInputSpy,
     })
 
-    const endUserInputSpy = spyOn(
-      liveUserInputs,
-      'endUserInput',
-    ).mockImplementation(() => {})
-    mockedFunctions.push({
-      name: 'liveUserInputs.endUserInput',
-      spy: endUserInputSpy,
-    })
-
     const cancelUserInputSpy = spyOn(
       liveUserInputs,
       'cancelUserInput',
diff --git a/backend/src/__tests__/run-agent-step-tools.test.ts b/backend/src/__tests__/run-agent-step-tools.test.ts
@@ -89,7 +89,6 @@ describe('runAgentStep - set_output tool', () => {
     // Mock live user inputs to always return true (simulating active session)
     spyOn(liveUserInputs, 'checkLiveUserInput').mockImplementation(() => true)
     spyOn(liveUserInputs, 'startUserInput').mockImplementation(() => {})
-    spyOn(liveUserInputs, 'endUserInput').mockImplementation(() => {})
     spyOn(liveUserInputs, 'setSessionConnected').mockImplementation(() => {})
 
     spyOn(websocketAction, 'requestFiles').mockImplementation(
diff --git a/backend/src/templates/strings.ts b/backend/src/templates/strings.ts
@@ -162,7 +162,7 @@ export async function getAgentPrompt<T extends StringField>({
     return undefined
   }
 
-  const prompt = await formatPrompt({
+  let prompt = await formatPrompt({
     prompt: promptValue,
     fileContext,
     agentState,
@@ -174,6 +174,11 @@ export async function getAgentPrompt<T extends StringField>({
 
   let addendum = ''
 
+  if (promptType.type === 'stepPrompt' && agentState.agentType) {
+    // Put step prompt within a system-reminder tag so agent doesn't think the user just spoke again.
+    prompt = `<system-reminder>${prompt}</system-reminder>`
+  }
+
   // Add tool instructions, spawnable agents, and output schema prompts to instructionsPrompt
   if (promptType.type === 'instructionsPrompt' && agentState.agentType) {
     const toolsInstructions = agentTemplate.inheritParentSystemPrompt
diff --git a/npm-app/src/client.ts b/npm-app/src/client.ts
@@ -1348,6 +1348,8 @@ export class Client {
       this.cancelCurrentInput()
       this.currentOnChunk = undefined
 
+      xmlStreamParser.destroy()
+
       const additionalMessages = prompt
         ? [
             { role: 'user' as const, content: prompt },
diff --git a/npm-app/src/display/markdown-renderer.ts b/npm-app/src/display/markdown-renderer.ts
@@ -101,7 +101,7 @@ export class MarkdownStreamRenderer {
         this.width = process.stdout.columns || this.width
       }
       // Use .once with bound handler tracker to avoid duplication
-      process.stdout.addListener('resize', this.resizeHandler)
+      process.stdout.on('resize', this.resizeHandler)
     }
   }
 

Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@ export class MarkdownStreamRenderer {`
`101`	`101`	`this.width = process.stdout.columns \|\| this.width`
`102`	`102`	`}`
`103`	`103`	`// Use .once with bound handler tracker to avoid duplication`
`104`		`- process.stdout.addListener('resize', this.resizeHandler)`
	`104`	`+ process.stdout.on('resize', this.resizeHandler)`
`105`	`105`	`}`
`106`	`106`	`}`
`107`	`107`