- 
                Notifications
    
You must be signed in to change notification settings  - Fork 11
 
🤖 Add OpenAI web_search integration tests #102
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
          
     Open
      
      
            ammar-agent
  wants to merge
  4
  commits into
  main
  
    
      
        
          
  
    
      Choose a base branch
      
     
    
      
        
      
      
        
          
          
        
        
          
            
              
              
              
  
           
        
        
          
            
              
              
           
        
       
     
  
        
          
            
          
            
          
        
       
    
      
from
openai-web-search-test
  
      
      
   
  
    
  
  
  
 
  
      
    base: main
Could not load branches
            
              
  
    Branch not found: {{ refName }}
  
            
                
      Loading
              
            Could not load tags
            
            
              Nothing to show
            
              
  
            
                
      Loading
              
            Are you sure you want to change the base?
            Some commits from the old base branch may be removed from the timeline,
            and old review comments may become outdated.
          
          
      
        
          +90
        
        
          −0
        
        
          
        
      
    
  
  
     Open
                    Changes from 1 commit
      Commits
    
    
            Show all changes
          
          
            4 commits
          
        
        Select commit
          Hold shift + click to select a range
      
      8cb4353
              
                🤖 Add OpenAI web_search integration tests
              
              
                ammar-agent dfa2118
              
                Simplify OpenAI web_search test to prevent timeouts
              
              
                ammar-agent 3b8c8a8
              
                Merge branch 'main' into openai-web-search-test
              
              
                ammario 7acc72e
              
                Update test to reliably reproduce reasoning + web_search bug
              
              
                ammar-agent File filter
Filter by extension
Conversations
          Failed to load comments.   
        
        
          
      Loading
        
  Jump to
        
          Jump to file
        
      
      
          Failed to load files.   
        
        
          
      Loading
        
  Diff view
Diff view
There are no files selected for viewing
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
              | Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,194 @@ | ||
| import { | ||
| setupWorkspace, | ||
| shouldRunIntegrationTests, | ||
| validateApiKeys, | ||
| type TestEnvironment, | ||
| } from "./setup"; | ||
| import { sendMessageWithModel, createEventCollector, assertStreamSuccess } from "./helpers"; | ||
| 
     | 
||
| // Skip all tests if TEST_INTEGRATION is not set | ||
| const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip; | ||
| 
     | 
||
| // Validate API keys before running tests | ||
| if (shouldRunIntegrationTests()) { | ||
| validateApiKeys(["OPENAI_API_KEY"]); | ||
| } | ||
| 
     | 
||
| describeIntegration("OpenAI web_search integration tests", () => { | ||
| // Enable retries in CI for flaky API tests | ||
| if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { | ||
| jest.retryTimes(3, { logErrorsBeforeRetry: true }); | ||
| } | ||
| 
     | 
||
| test.concurrent( | ||
| "should successfully execute web_search tool call", | ||
| async () => { | ||
| // Setup test environment with OpenAI | ||
| const { env, workspaceId, cleanup } = await setupWorkspace("openai"); | ||
| try { | ||
| // Send a message that should trigger web search | ||
| // Use a query that requires current information to encourage web search | ||
| const result = await sendMessageWithModel( | ||
| env.mockIpcRenderer, | ||
| workspaceId, | ||
| "Search the web for current weather in San Francisco", | ||
| "openai", | ||
| "gpt-5-codex" | ||
| ); | ||
| 
     | 
||
| // Verify the IPC call succeeded | ||
| expect(result.success).toBe(true); | ||
| 
     | 
||
| // Collect and verify stream events | ||
| const collector = createEventCollector(env.sentEvents, workspaceId); | ||
| 
     | 
||
| // Wait for stream to start | ||
| const streamStart = await collector.waitForEvent("stream-start", 10000); | ||
| expect(streamStart).toBeDefined(); | ||
| 
     | 
||
| // Wait for tool-call-start (indicates web_search is being executed) | ||
| const toolCallStart = await collector.waitForEvent("tool-call-start", 15000); | ||
| expect(toolCallStart).toBeDefined(); | ||
| 
     | 
||
| // Verify it's a web_search tool call | ||
| if (toolCallStart && "toolName" in toolCallStart) { | ||
| expect(toolCallStart.toolName).toBe("web_search"); | ||
| } | ||
| 
     | 
||
| // Wait for stream to complete | ||
| const streamEnd = await collector.waitForEvent("stream-end", 30000); | ||
| expect(streamEnd).toBeDefined(); | ||
| 
     | 
||
| // Verify no errors occurred | ||
| assertStreamSuccess(collector); | ||
| 
     | 
||
| // Verify we received text deltas (the assistant's response) | ||
| const deltas = collector.getDeltas(); | ||
| expect(deltas.length).toBeGreaterThan(0); | ||
| 
     | 
||
| // Collect all events and verify web_search was executed | ||
| collector.collect(); | ||
| const events = collector.getEvents(); | ||
| 
     | 
||
| const hasWebSearchCall = events.some( | ||
| (e) => | ||
| "type" in e && | ||
| e.type === "tool-call-start" && | ||
| "toolName" in e && | ||
| e.toolName === "web_search" | ||
| ); | ||
| expect(hasWebSearchCall).toBe(true); | ||
| } finally { | ||
| await cleanup(); | ||
| } | ||
| }, | ||
| 45000 // 45 second timeout - web search can take time | ||
| ); | ||
| 
     | 
||
| test.concurrent( | ||
| "should handle multiple web_search calls in sequence", | ||
| async () => { | ||
| // Setup test environment with OpenAI | ||
| const { env, workspaceId, cleanup } = await setupWorkspace("openai"); | ||
| try { | ||
| // Send a message that might trigger multiple web searches | ||
| const result = await sendMessageWithModel( | ||
| env.mockIpcRenderer, | ||
| workspaceId, | ||
| "Search for the latest news about TypeScript and then search for React updates", | ||
| "openai", | ||
| "gpt-5-codex" | ||
| ); | ||
| 
     | 
||
| // Verify the IPC call succeeded | ||
| expect(result.success).toBe(true); | ||
| 
     | 
||
| // Collect and verify stream events | ||
| const collector = createEventCollector(env.sentEvents, workspaceId); | ||
| 
     | 
||
| // Wait for stream to complete (may take longer with multiple searches) | ||
| const streamEnd = await collector.waitForEvent("stream-end", 60000); | ||
| expect(streamEnd).toBeDefined(); | ||
| 
     | 
||
| // Verify no errors occurred | ||
| assertStreamSuccess(collector); | ||
| 
     | 
||
| // Collect all events | ||
| collector.collect(); | ||
| const events = collector.getEvents(); | ||
| 
     | 
||
| // Count web_search tool calls | ||
| const webSearchCalls = events.filter( | ||
| (e) => | ||
| "type" in e && | ||
| e.type === "tool-call-start" && | ||
| "toolName" in e && | ||
| e.toolName === "web_search" | ||
| ); | ||
| 
     | 
||
| // Should have at least one web_search call | ||
| // (Model may decide to combine searches or use multiple - either is valid) | ||
| expect(webSearchCalls.length).toBeGreaterThan(0); | ||
| } finally { | ||
| await cleanup(); | ||
| } | ||
| }, | ||
| 75000 // 75 second timeout - multiple searches take time | ||
| ); | ||
| 
     | 
||
| test.concurrent( | ||
| "should correctly handle reasoning with web_search", | ||
| async () => { | ||
| // Setup test environment with OpenAI | ||
| const { env, workspaceId, cleanup } = await setupWorkspace("openai"); | ||
| try { | ||
| // Send a message that should trigger reasoning + web search | ||
| const result = await sendMessageWithModel( | ||
| env.mockIpcRenderer, | ||
| workspaceId, | ||
| "Find recent information about quantum computing breakthroughs", | ||
| "openai", | ||
| "gpt-5-codex" | ||
| ); | ||
| 
     | 
||
| // Verify the IPC call succeeded | ||
| expect(result.success).toBe(true); | ||
| 
     | 
||
| // Collect and verify stream events | ||
| const collector = createEventCollector(env.sentEvents, workspaceId); | ||
| 
     | 
||
| // Wait for stream to complete | ||
| const streamEnd = await collector.waitForEvent("stream-end", 45000); | ||
| expect(streamEnd).toBeDefined(); | ||
| 
     | 
||
| // Verify no errors occurred (this is the key test - ensuring no reasoning-related errors) | ||
| assertStreamSuccess(collector); | ||
| 
     | 
||
| // Collect all events | ||
| collector.collect(); | ||
| const events = collector.getEvents(); | ||
| 
     | 
||
| // Verify we got reasoning deltas (OpenAI o1/o3 models produce reasoning) | ||
| const hasReasoning = events.some((e) => "type" in e && e.type === "reasoning-delta"); | ||
| 
     | 
||
| // Verify we got web_search tool call | ||
| const hasWebSearch = events.some( | ||
| (e) => | ||
| "type" in e && | ||
| e.type === "tool-call-start" && | ||
| "toolName" in e && | ||
| e.toolName === "web_search" | ||
| ); | ||
| 
     | 
||
| // Both reasoning and web_search should be present (if model supports reasoning) | ||
| // If reasoning is present, web_search should also work without errors | ||
| if (hasReasoning) { | ||
| expect(hasWebSearch).toBe(true); | ||
| } | ||
| } finally { | ||
| await cleanup(); | ||
| } | ||
| }, | ||
| 60000 // 60 second timeout | ||
| ); | ||
| }); | ||
      
      Oops, something went wrong.
        
    
  
  Add this suggestion to a batch that can be applied as a single commit.
  This suggestion is invalid because no changes were made to the code.
  Suggestions cannot be applied while the pull request is closed.
  Suggestions cannot be applied while viewing a subset of changes.
  Only one suggestion per line can be applied in a batch.
  Add this suggestion to a batch that can be applied as a single commit.
  Applying suggestions on deleted lines is not supported.
  You must change the existing code in this line in order to create a valid suggestion.
  Outdated suggestions cannot be applied.
  This suggestion has been applied or marked resolved.
  Suggestions cannot be applied from pending reviews.
  Suggestions cannot be applied on multi-line comments.
  Suggestions cannot be applied while the pull request is queued to merge.
  Suggestion cannot be applied right now. Please check back later.
  
    
  
    
Uh oh!
There was an error while loading. Please reload this page.