diff --git a/coding_agent_dataset.py b/coding_agent_dataset.py index 3f4c703..625923b 100644 --- a/coding_agent_dataset.py +++ b/coding_agent_dataset.py @@ -7,7 +7,7 @@ import asyncio import json -import os +import rich import re from datetime import datetime from pathlib import Path @@ -30,23 +30,34 @@ TRACE_DIR, TRACE_TRUNCATE_TOOL_RESULTS ) -from example_tools import bash_execute, edit_file, read_file, write_file +from rich.console import Console +from rich.panel import Panel +from rich.markdown import Markdown +from example_tools import bash_execute, edit_file, glob_tool, read_file, ripgrep_search, write_file from example_with_todo import SimpleTodoManager +def pretty_print_markdown(markup: str, title: str): + """Pretty print a payload to the console.""" + console = Console() + console.print( + Panel(Markdown(markup), title=title) + ) + + async def run_dataset_coding_agent(task_index=0): """Run coding agent on a task from the dataset. Always saves traces.""" - print(f"Using model: {MODEL}") - print(f"API Base: {API_BASE}\n") + rich.print(f"Using model: {MODEL}") + rich.print(f"API Base: {API_BASE}\n") # Load dataset - print(f"Loading {DATASET_NAME} dataset from Hugging Face...") + rich.print(f"Loading {DATASET_NAME} dataset from Hugging Face...") dataset = load_dataset(DATASET_NAME) # Get task task = dataset['train'][task_index] - print(f"Loaded task {task_index}: {task['instance_id']}\n") + rich.print(f"Loaded task {task_index}: {task['instance_id']}\n") # Repository setup - USE REPO POOL FOR CONCURRENCY OR EXISTING REPOS # Try to find existing repos first (hyperswitch_task_X or hyperswitch_pool_X) @@ -74,13 +85,13 @@ async def run_dataset_coding_agent(task_index=0): for dir_path in possible_dirs: if Path(dir_path).exists(): WORK_DIR = dir_path - print(f"Reusing existing repo: {WORK_DIR}") + rich.print(f"Reusing existing repo: {WORK_DIR}") break # If no existing repo found, create new one if WORK_DIR is None: WORK_DIR = f"./repos/{REPO_NAME}_pool_{repo_pool_id}" - print(f"Cloning repository to {WORK_DIR}...") + rich.print(f"Cloning repository to {WORK_DIR}...") # Create repos directory bash_execute(command="mkdir -p repos", working_dir=".") @@ -88,13 +99,13 @@ async def run_dataset_coding_agent(task_index=0): command=f"git clone https://github.com/{task['repo']}.git {REPO_NAME}_pool_{repo_pool_id}", working_dir="./repos" ) - print(f" Clone result: {json.loads(clone_result).get('success', False)}") + rich.print(f" Clone result: {json.loads(clone_result).get('success', False)}") # Checkout the base commit so the diff isn't applied yet - print(f"Checking out base commit: {task['base_commit'][:8]}...") + rich.print(f"Checking out base commit: {task['base_commit'][:8]}...") # Checkout base commit (with fetch if needed) - print(f" Fetching commit if needed...") + rich.print(f" Fetching commit if needed...") fetch_result = bash_execute( command=f"git fetch origin {task['base_commit']} || true", working_dir=WORK_DIR, @@ -107,9 +118,9 @@ async def run_dataset_coding_agent(task_index=0): ) checkout_data = json.loads(checkout_result) if checkout_data.get('success'): - print(f"Checked out commit {task['base_commit'][:8]}\n") + rich.print(f"Checked out commit {task['base_commit'][:8]}\n") else: - print(f"WARNING: Using current HEAD (commit not found): {checkout_data.get('stderr', 'unknown')[:100]}\n") + rich.print(f"WARNING: Using current HEAD (commit not found): {checkout_data.get('stderr', 'unknown')[:100]}\n") # Create enhanced prompt with dataset task initial_prompt = f"""You are working on the hyperswitch repository (Rust payment processing system). @@ -159,13 +170,8 @@ async def run_dataset_coding_agent(task_index=0): EFFICIENCY IS CRITICAL. Make substantial progress every turn. Start by creating your complete todo list NOW.""" - # Print the initial prompt - print("="*80) - print("INITIAL PROMPT:") - print("="*80) - print(initial_prompt) - print("="*80) - print() + pretty_print_markdown(task['problem_statement'], "Problem Statement") + pretty_print_markdown(task['hints_text'], "Hints Text") messages = [ { @@ -322,6 +328,90 @@ async def run_dataset_coding_agent(task_index=0): }, }, }, + { + "type": "function", + "function": { + "name": "ripgrep_search", + "description": "FAST search powered by ripgrep. PREFERRED over bash grep for better performance and automatic output limiting (max 20k matches). Supports regex patterns and file filtering.", + "parameters": { + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "The pattern to search for. Treated as regex by default. Use '\\b' for word boundaries (e.g., '\\bMatchMe\\b')." + }, + "dir_path": { + "type": "string", + "description": "Directory or file to search. Defaults to current directory ('.'). Use './hyperswitch' for repo searches.", + "default": "." + }, + "include": { + "type": "string", + "description": "Glob pattern to filter files (e.g., '*.rs', '*.{toml,yaml}'). Recommended for large repos to reduce noise." + }, + "case_sensitive": { + "type": "boolean", + "description": "If true, search is case-sensitive. Defaults to false (case-insensitive).", + "default": False + }, + "fixed_strings": { + "type": "boolean", + "description": "If true, treats pattern as literal string instead of regex. Defaults to false.", + "default": False + }, + "context": { + "type": "integer", + "description": "Show this many lines of context around each match (like grep -C)." + }, + "after": { + "type": "integer", + "description": "Show this many lines after each match (like grep -A)." + }, + "before": { + "type": "integer", + "description": "Show this many lines before each match (like grep -B)." + }, + "no_ignore": { + "type": "boolean", + "description": "If true, search all files including those in .gitignore, build/, dist/, etc. Defaults to false.", + "default": False + } + }, + "required": ["pattern"] + } + } + }, + { + "type": "function", + "function": { + "name": "glob_tool", + "description": "Efficiently finds files matching specific glob patterns (e.g., '**/*.py', '*.md', 'src/**/*.rs'), returning absolute paths sorted by modification time (newest first). Ideal for quickly locating files based on their name or path structure, especially in large codebases.", + "parameters": { + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "The glob pattern to match against (e.g., '**/*.py', 'docs/*.md', '*.{rs,toml}'). Use '**/' for recursive directory matching." + }, + "dir_path": { + "type": "string", + "description": "Optional: The directory path to search within. If omitted, searches the current directory. Use './hyperswitch' for repo searches." + }, + "case_sensitive": { + "type": "boolean", + "description": "Optional: Whether the search should be case-sensitive. Defaults to false.", + "default": False + }, + "respect_git_ignore": { + "type": "boolean", + "description": "Optional: Whether to respect .gitignore patterns when finding files. Defaults to true.", + "default": True + } + }, + "required": ["pattern"] + } + } + }, ] # Initialize todo manager @@ -333,6 +423,8 @@ async def run_dataset_coding_agent(task_index=0): "read_file": read_file, "write_file": write_file, "edit_file": edit_file, + "ripgrep_search": ripgrep_search, + "glob_tool": glob_tool, } # Trace collection @@ -354,9 +446,9 @@ async def run_dataset_coding_agent(task_index=0): async with httpx.AsyncClient(timeout=API_TIMEOUT) as client: for turn in range(MAX_TURNS): - print(f"\n{'='*80}") - print(f"Turn {turn + 1}/{MAX_TURNS}") - print(f"{'='*80}\n") + rich.print(f"\n{'='*80}") + rich.print(f"Turn {turn + 1}/{MAX_TURNS}") + rich.print(f"{'='*80}\n") try: # Make API call to OpenAI-compatible endpoint @@ -377,8 +469,8 @@ async def run_dataset_coding_agent(task_index=0): ) if response.status_code != 200: - print(f"API Error: {response.status_code}") - print(response.text) + rich.print(f"API Error: {response.status_code}") + rich.print(response.text) trace_data["error"] = {"turn": turn, "status": response.status_code, "text": response.text} break @@ -395,18 +487,18 @@ async def run_dataset_coding_agent(task_index=0): # Debug: Show raw response on first turn if turn == 0: - print(f"šŸ” DEBUG - First response:") - print(f" Content: {assistant_message.get('content', 'None')[:200]}") - print(f" Tool calls: {assistant_message.get('tool_calls', 'None')}") - print() + rich.print(f"šŸ” DEBUG - First response:") + rich.print(f" Content: {assistant_message.get('content', 'None')[:200]}") + rich.print(f" Tool calls: {assistant_message.get('tool_calls', 'None')}") + rich.print() # Display assistant response if assistant_message.get("content"): content = assistant_message['content'] # Check if content contains XML-style tool calls (malformed response) if '' in content: - print(f"WARNING - ASSISTANT (with malformed tool calls):\n{content[:500]}\n") - print("ERROR: Model returned XML-style tool calls instead of JSON. This model may not support OpenAI tool format correctly.") + rich.print(f"WARNING - ASSISTANT (with malformed tool calls):\n{content[:500]}\n") + rich.print("ERROR: Model returned XML-style tool calls instead of JSON. This model may not support OpenAI tool format correctly.") trace_data["error"] = {"turn": turn, "type": "malformed_tool_calls"} break else: @@ -419,20 +511,20 @@ async def run_dataset_coding_agent(task_index=0): if matches: # Display thinking sections prominently for i, think_content in enumerate(matches, 1): - print(f"\n{'='*80}") - print(f"THINKING (Section {i}):") - print(f"{'='*80}") - print(think_content.strip()) - print(f"{'='*80}\n") + rich.print(f"\n{'='*80}") + rich.print(f"THINKING (Section {i}):") + rich.print(f"{'='*80}") + rich.print(think_content.strip()) + rich.print(f"{'='*80}\n") # Remove think tags and show the rest rest_content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip() if rest_content: - print(f"ASSISTANT:\n{rest_content}\n") + rich.print(f"ASSISTANT:\n{rest_content}\n") else: - print(f"ASSISTANT:\n{content}\n") + rich.print(f"ASSISTANT:\n{content}\n") else: - print(f"ASSISTANT:\n{content}\n") + rich.print(f"ASSISTANT:\n{content}\n") messages.append(assistant_message) @@ -440,17 +532,17 @@ async def run_dataset_coding_agent(task_index=0): tool_calls = assistant_message.get("tool_calls", []) if not tool_calls: - print("āœ“ Agent completed (no more tool calls)!\n") + rich.print("āœ“ Agent completed (no more tool calls)!\n") break # Execute tools - print(f"šŸ”§ Executing {len(tool_calls)} tool call(s)...\n") + rich.print(f"šŸ”§ Executing {len(tool_calls)} tool call(s)...\n") for idx, tool_call in enumerate(tool_calls, 1): function_name = tool_call["function"]["name"] function_args = json.loads(tool_call["function"]["arguments"]) - print(f"[{idx}] Tool: {function_name}") + rich.print(f"[{idx}] Tool: {function_name}") # Collect tool call for trace tool_call_data = { @@ -460,17 +552,21 @@ async def run_dataset_coding_agent(task_index=0): # Show file path if present if "file_path" in function_args: - print(f" šŸ“ File: {function_args['file_path']}") + rich.print(f" šŸ“ File: {function_args['file_path']}") if "working_dir" in function_args and function_args["working_dir"] != ".": - print(f" šŸ“‚ Working Dir: {function_args['working_dir']}") + rich.print(f" šŸ“‚ Working Dir: {function_args['working_dir']}") if "command" in function_args: - print(f" šŸ’» Command: {function_args['command'][:100]}") + rich.print(f" šŸ’» Command: {function_args['command'][:100]}") + if "pattern" in function_args: + rich.print(f" šŸ” Pattern: {function_args['pattern'][:100]}") + if "dir_path" in function_args and function_args["dir_path"] != ".": + rich.print(f" šŸ“‚ Search Dir: {function_args['dir_path']}") # Show other args other_args = {k: v for k, v in function_args.items() - if k not in ["file_path", "working_dir", "command", "content", "old_text", "new_text"]} + if k not in ["file_path", "working_dir", "command", "content", "old_text", "new_text", "pattern", "dir_path"]} if other_args: - print(f" āš™ļø Args: {json.dumps(other_args)}") + rich.print(f" āš™ļø Args: {json.dumps(other_args)}") if function_name in tool_map: try: @@ -491,39 +587,47 @@ async def run_dataset_coding_agent(task_index=0): success = result_json.get('success', False) if success: - print(f" āœ… Success!") + rich.print(f" āœ… Success!") else: - print(f" āŒ Failed!") + rich.print(f" āŒ Failed!") if 'stdout' in result_json and result_json['stdout']: stdout_preview = result_json['stdout'][:300].replace('\n', '\n ') - print(f" šŸ“¤ Output:\n {stdout_preview}") + rich.print(f" šŸ“¤ Output:\n {stdout_preview}") if 'stderr' in result_json and result_json['stderr']: stderr_preview = result_json['stderr'][:200] - print(f" āš ļø Stderr: {stderr_preview}") + rich.print(f" āš ļø Stderr: {stderr_preview}") if 'num_lines' in result_json: - print(f" šŸ“Š Lines read: {result_json['num_lines']}") + rich.print(f" šŸ“Š Lines read: {result_json['num_lines']}") if 'bytes_written' in result_json: - print(f" šŸ’¾ Bytes written: {result_json['bytes_written']}") + rich.print(f" šŸ’¾ Bytes written: {result_json['bytes_written']}") if 'occurrences_replaced' in result_json: - print(f" šŸ”„ Replacements: {result_json['occurrences_replaced']}") + rich.print(f" šŸ”„ Replacements: {result_json['occurrences_replaced']}") + + if 'total_matches' in result_json: + rich.print(f" šŸ” Matches: {result_json['total_matches']}") + if result_json.get('truncated'): + rich.print(f" āš ļø Results truncated to limit") + + if 'file_count' in result_json: + rich.print(f" šŸ“ Files found: {result_json['file_count']}") if 'error' in result_json: - print(f" āŒ Error: {result_json['error']}") + rich.print(f" āŒ Error: {result_json['error']}") except: # For todo_manager which returns plain text if function_name == "todo_manager": preview = result_text[:400].replace('\n', '\n ') - print(f" šŸ“‹ Result:\n {preview}") + rich.print(f" šŸ“‹ Result:\n {preview}") else: - print(f" Result: {result_text[:300]}") + rich.print(f" Result: {result_text[:300]}") - print() + rich.print() messages.append({ "role": "tool", @@ -533,7 +637,7 @@ async def run_dataset_coding_agent(task_index=0): }) except Exception as e: error_msg = f"Error: {str(e)}" - print(f" āŒ {error_msg}\n") + rich.print(f" āŒ {error_msg}\n") turn_data["tool_calls"].append(tool_call_data) turn_data["tool_results"].append({ @@ -553,22 +657,22 @@ async def run_dataset_coding_agent(task_index=0): trace_data["turns"].append(turn_data) except Exception as e: - print(f"āŒ Error: {e}") + rich.print(f"āŒ Error: {e}") import traceback traceback.print_exc() trace_data["error"] = {"turn": turn, "exception": str(e)} break - print(f"\n{'='*80}") - print("šŸŽ‰ Agent run completed!") - print(f"{'='*80}\n") + rich.print(f"\n{'='*80}") + rich.print("šŸŽ‰ Agent run completed!") + rich.print(f"{'='*80}\n") # Show final todo list - print("\nšŸ“‹ Final Todo List Status:") - print("="*80) + rich.print("\nšŸ“‹ Final Todo List Status:") + rich.print("="*80) final_list = todo_manager._list_items() - print(final_list) - print("="*80 + "\n") + rich.print(final_list) + rich.print("="*80 + "\n") # Save final todo state in trace trace_data["final_todo_state"] = final_list @@ -584,13 +688,13 @@ async def run_dataset_coding_agent(task_index=0): with open(trace_path, "w") as f: json.dump(trace_data, f, indent=2) - print(f"Trace saved to: {trace_path}\n") + rich.print(f"Trace saved to: {trace_path}\n") return trace_data def main(): - print(""" + rich.print(""" ╔══════════════════════════════════════════════════════════════════════════════╗ ā•‘ Hyperswitch Dataset Coding Agent (Local OpenAI-compatible) ā•‘ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā• @@ -605,14 +709,14 @@ def main(): """) # Get task index from config - print(f"Running task index: {TASK_INDEX}\n") + rich.print(f"Running task index: {TASK_INDEX}\n") try: asyncio.run(run_dataset_coding_agent(task_index=TASK_INDEX)) except KeyboardInterrupt: - print("\nāš ļø Interrupted by user") + rich.print("\nāš ļø Interrupted by user") except Exception as e: - print(f"\nāŒ Error: {e}") + rich.print(f"\nāŒ Error: {e}") raise diff --git a/coding_agent_dataset_sync.py b/coding_agent_dataset_sync.py new file mode 100644 index 0000000..5b3f78d --- /dev/null +++ b/coding_agent_dataset_sync.py @@ -0,0 +1,734 @@ +#!/usr/bin/env python3 +""" +Coding agent that works on tasks from the hyperswitch dataset. +Uses local OpenAI-compatible API server. +Generates traces with todo list usage for analysis. +""" + +import json +import rich +import re +from datetime import datetime +from pathlib import Path + +import httpx +import weave +from datasets import load_dataset + +from config import ( + API_BASE, + API_KEY, + API_MAX_TOKENS, + API_TEMPERATURE, + API_TIMEOUT, + DATASET_NAME, + MAX_CONCURRENT_REPOS, + MAX_TURNS, + MODEL, + REPO_NAME, + TASK_INDEX, + TRACE_DIR, + TRACE_TRUNCATE_TOOL_RESULTS +) +from rich.console import Console +from rich.panel import Panel +from rich.markdown import Markdown +from example_tools import bash_execute, edit_file, glob_tool, read_file, ripgrep_search, write_file +from example_with_todo import SimpleTodoManager + + +bash_execute = weave.op(bash_execute) +edit_file = weave.op(edit_file) +glob_tool = weave.op(glob_tool) +read_file = weave.op(read_file) +ripgrep_search = weave.op(ripgrep_search) +write_file = weave.op(write_file) + + +def pretty_print_markdown(markup: str, title: str): + """Pretty print a payload to the console.""" + console = Console() + console.print( + Panel(Markdown(markup), title=title) + ) + + +@weave.op() +def run_dataset_coding_agent(task_index=0): + """Run coding agent on a task from the dataset. Always saves traces.""" + + rich.print(f"Using model: {MODEL}") + rich.print(f"API Base: {API_BASE}\n") + + # Load dataset + rich.print(f"Loading {DATASET_NAME} dataset from Hugging Face...") + dataset = load_dataset(DATASET_NAME) + + # Get task + task = dataset['train'][task_index] + rich.print(f"Loaded task {task_index}: {task['instance_id']}\n") + + # Repository setup - USE REPO POOL FOR CONCURRENCY OR EXISTING REPOS + # Try to find existing repos first (hyperswitch_task_X or hyperswitch_pool_X) + repo_pool_id = task_index % MAX_CONCURRENT_REPOS # Reuse repos in a pool + + # Check for existing repos in order of preference + WORK_DIR = None + possible_dirs = [ + f"./repos/{REPO_NAME}_task_{task_index}", # Task-specific repo + f"./repos/{REPO_NAME}_pool_{repo_pool_id}", # Pool repo + f"./{REPO_NAME}", # Legacy single repo + ] + + # Also check for any existing task repos we can reuse + repos_path = Path("./repos") + if repos_path.exists(): + existing_repos = [d.name for d in repos_path.iterdir() if d.is_dir() and d.name.startswith(REPO_NAME)] + if existing_repos: + # Prefer repos that match our pool ID + for repo_dir in existing_repos: + if f"task_{repo_pool_id}" in repo_dir or f"pool_{repo_pool_id}" in repo_dir: + possible_dirs.insert(0, f"./repos/{repo_dir}") + + # Find first existing directory + for dir_path in possible_dirs: + if Path(dir_path).exists(): + WORK_DIR = dir_path + rich.print(f"Reusing existing repo: {WORK_DIR}") + break + + # If no existing repo found, create new one + if WORK_DIR is None: + WORK_DIR = f"./repos/{REPO_NAME}_pool_{repo_pool_id}" + rich.print(f"Cloning repository to {WORK_DIR}...") + # Create repos directory + bash_execute(command="mkdir -p repos", working_dir=".") + + clone_result = bash_execute( + command=f"git clone https://github.com/{task['repo']}.git {REPO_NAME}_pool_{repo_pool_id}", + working_dir="./repos" + ) + rich.print(f" Clone result: {json.loads(clone_result).get('success', False)}") + + # Checkout the base commit so the diff isn't applied yet + rich.print(f"Checking out base commit: {task['base_commit'][:8]}...") + + # Checkout base commit (with fetch if needed) + rich.print(f" Fetching commit if needed...") + fetch_result = bash_execute( + command=f"git fetch origin {task['base_commit']} || true", + working_dir=WORK_DIR, + timeout=60 + ) + + checkout_result = bash_execute( + command=f"git checkout {task['base_commit']} 2>&1 || git checkout -b task-{task_index} HEAD", + working_dir=WORK_DIR + ) + checkout_data = json.loads(checkout_result) + if checkout_data.get('success'): + rich.print(f"Checked out commit {task['base_commit'][:8]}\n") + else: + rich.print(f"WARNING: Using current HEAD (commit not found): {checkout_data.get('stderr', 'unknown')[:100]}\n") + + # Create enhanced prompt with dataset task + initial_prompt = f"""You are working on the hyperswitch repository (Rust payment processing system). + +REPOSITORY SETUP: +- Repository: {task['repo']} +- Working directory: {WORK_DIR} (already cloned) +- Base commit: {task['base_commit']} (checked out or using nearby commit) +- Task ID: {task['instance_id']} +- Version: {task['version']} + +TASK DESCRIPTION: +{task['problem_statement']} + +DETAILED CONTEXT & HINTS: +{task['hints_text']} + +CRITICAL INSTRUCTIONS - WORK FAST AND EFFICIENTLY: + +1. **Create DENSE, ACTION-FOCUSED todos IMMEDIATELY** + - Use items_text to add ALL todos at once (not one by one) + - Make todos specific and actionable (e.g., "Read payment.rs lines 100-200 for PaymentIntent struct" not "understand payments") + - Focus on HIGH-IMPACT actions only - skip exploratory/understanding todos + - 5-8 todos maximum - be surgical, not exhaustive + +2. **Execute aggressively:** + - Use bash commands to search efficiently (grep, find, rg if available) + - Read multiple related files in quick succession + - Make ALL related changes in one edit_file call when possible + - Mark todos in_progress → complete rapidly + - Skip unnecessary exploration - use hints to go directly to relevant code + +3. **File operations:** + - All repo files: '{WORK_DIR}/' + - Use bash_execute with working_dir='{WORK_DIR}' for git/cargo/grep commands + - Prefer targeted edits over whole file rewrites + +4. **NO TESTING UNLESS CRITICAL:** + - Focus on making the actual code changes + - Only test if explicitly required or to verify compilation + - Skip test exploration - implement the feature first + +5. **Maximize information density:** + - Each turn should accomplish significant work + - Batch file reads when examining related code + - Use search commands to locate code quickly instead of exploring + +EFFICIENCY IS CRITICAL. Make substantial progress every turn. Start by creating your complete todo list NOW.""" + + pretty_print_markdown(task['problem_statement'], "Problem Statement") + pretty_print_markdown(task['hints_text'], "Hints Text") + + messages = [ + { + "role": "system", + "content": """You are an expert coding assistant that works FAST and EFFICIENTLY. + +MANDATORY RESPONSE FORMAT: + + +- What's the current state and what do I know? +- What's the most DIRECT path to complete this task? +- What specific actions will I take RIGHT NOW? +- Can I batch multiple operations together? + + +[1-2 sentence action summary] + +Then make your tool calls. + +CRITICAL RULES: +- ALWAYS include tags before every response +- Think strategically about the FASTEST path forward +- Batch operations whenever possible (read multiple files, make multiple edits) +- Skip unnecessary exploration - be laser-focused on the task +- Each turn must make SUBSTANTIAL progress +- Prioritize implementation over exploration +- Use dense, specific todos (5-8 max) +- Complete tasks rapidly - mark in_progress → complete quickly""" + }, + { + "role": "user", + "content": initial_prompt + } + ] + + # Define tools + tools = [ + { + "type": "function", + "function": { + "name": "todo_manager", + "description": "Manage a todo list with add, complete, list, remove, and search functionality. CRITICAL: Use this extensively to track all your work. Can add multiple items at once.", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "description": "Action to perform", + "enum": ["add", "complete", "incomplete", "in_progress", "list", "remove", "search"] + }, + "item_text": {"type": "string", "description": "Text of a single todo item (for 'add' action)"}, + "items_text": { + "type": "array", + "items": {"type": "string"}, + "description": "List of multiple todo items to add at once (for 'add' action). Use this to add all your initial todos." + }, + "item_id": { + "type": "string", + "description": "ID of the todo item (for 'complete', 'incomplete', 'in_progress', 'remove' actions)" + }, + "priority": { + "type": "string", + "enum": ["low", "medium", "high"], + "description": "Priority level (default: medium)" + }, + "search_query": {"type": "string", "description": "Search query (for 'search' action)"} + }, + "required": ["action"] + } + } + }, + { + "type": "function", + "function": { + "name": "bash_execute", + "description": "Execute a bash/shell command and return the output. Use working_dir='./hyperswitch' for repository operations. Default timeout is 300s.", + "parameters": { + "type": "object", + "properties": { + "command": {"type": "string", "description": "Shell command to execute"}, + "working_dir": { + "type": "string", + "description": "Working directory for command execution. Use './hyperswitch' for repo operations.", + "default": "." + }, + "timeout": { + "type": "integer", + "description": "Command timeout in seconds (default: 300)", + "default": 300 + } + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "read_file", + "description": "Read the contents of a file. Use full path like './hyperswitch/crates/router/src/core/payments.rs' for repo files.", + "parameters": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Full path to the file to read (e.g., './hyperswitch/README.md')" + }, + "max_lines": {"type": "integer", "description": "Maximum lines to read", "default": 1000}, + }, + "required": ["file_path"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_file", + "description": "Write or append content to a file. Use full path like './hyperswitch/new_file.rs' for repo files.", + "parameters": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Full path to the file to write (e.g., './hyperswitch/analysis.md')" + }, + "content": {"type": "string", "description": "Content to write to the file"}, + "mode": { + "type": "string", + "enum": ["write", "append"], + "description": "Write mode: 'write' (overwrite) or 'append'", + "default": "write", + }, + }, + "required": ["file_path", "content"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "edit_file", + "description": "Edit a file by replacing old_text with new_text. Use full path like './hyperswitch/config.toml' for repo files.", + "parameters": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Full path to the file to edit (e.g., './hyperswitch/Cargo.toml')" + }, + "old_text": {"type": "string", "description": "Text to search for and replace"}, + "new_text": {"type": "string", "description": "Text to replace with"}, + }, + "required": ["file_path", "old_text", "new_text"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "ripgrep_search", + "description": "FAST search powered by ripgrep. PREFERRED over bash grep for better performance and automatic output limiting (max 20k matches). Supports regex patterns and file filtering.", + "parameters": { + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "The pattern to search for. Treated as regex by default. Use '\\b' for word boundaries (e.g., '\\bMatchMe\\b')." + }, + "dir_path": { + "type": "string", + "description": "Directory or file to search. Defaults to current directory ('.'). Use './hyperswitch' for repo searches.", + "default": "." + }, + "include": { + "type": "string", + "description": "Glob pattern to filter files (e.g., '*.rs', '*.{toml,yaml}'). Recommended for large repos to reduce noise." + }, + "case_sensitive": { + "type": "boolean", + "description": "If true, search is case-sensitive. Defaults to false (case-insensitive).", + "default": False + }, + "fixed_strings": { + "type": "boolean", + "description": "If true, treats pattern as literal string instead of regex. Defaults to false.", + "default": False + }, + "context": { + "type": "integer", + "description": "Show this many lines of context around each match (like grep -C)." + }, + "after": { + "type": "integer", + "description": "Show this many lines after each match (like grep -A)." + }, + "before": { + "type": "integer", + "description": "Show this many lines before each match (like grep -B)." + }, + "no_ignore": { + "type": "boolean", + "description": "If true, search all files including those in .gitignore, build/, dist/, etc. Defaults to false.", + "default": False + } + }, + "required": ["pattern"] + } + } + }, + { + "type": "function", + "function": { + "name": "glob_tool", + "description": "Efficiently finds files matching specific glob patterns (e.g., '**/*.py', '*.md', 'src/**/*.rs'), returning absolute paths sorted by modification time (newest first). Ideal for quickly locating files based on their name or path structure, especially in large codebases.", + "parameters": { + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "The glob pattern to match against (e.g., '**/*.py', 'docs/*.md', '*.{rs,toml}'). Use '**/' for recursive directory matching." + }, + "dir_path": { + "type": "string", + "description": "Optional: The directory path to search within. If omitted, searches the current directory. Use './hyperswitch' for repo searches." + }, + "case_sensitive": { + "type": "boolean", + "description": "Optional: Whether the search should be case-sensitive. Defaults to false.", + "default": False + }, + "respect_git_ignore": { + "type": "boolean", + "description": "Optional: Whether to respect .gitignore patterns when finding files. Defaults to true.", + "default": True + } + }, + "required": ["pattern"] + } + } + }, + ] + + # Initialize todo manager + todo_manager = SimpleTodoManager() + + tool_map = { + "todo_manager": lambda **kwargs: todo_manager.execute(**kwargs), + "bash_execute": bash_execute, + "read_file": read_file, + "write_file": write_file, + "edit_file": edit_file, + "ripgrep_search": ripgrep_search, + "glob_tool": glob_tool, + } + + # Trace collection + trace_data = { + "task_info": { + "instance_id": task['instance_id'], + "task_index": task_index, + "base_commit": task['base_commit'], + "problem_statement": task['problem_statement'], + "version": task['version'], + "created_at": task['created_at'], + }, + "model": MODEL, + "api_base": API_BASE, + "timestamp": datetime.now().isoformat(), + "turns": [], + "final_todo_state": None, + } + + with httpx.Client(timeout=API_TIMEOUT) as client: + for turn in range(MAX_TURNS): + rich.print(f"\n{'='*80}") + rich.print(f"Turn {turn + 1}/{MAX_TURNS}") + rich.print(f"{'='*80}\n") + + try: + # Make API call to OpenAI-compatible endpoint + response = weave.op(client.post)( + f"{API_BASE}/chat/completions", + headers={ + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + }, + json={ + "model": MODEL, + "messages": messages, + "tools": tools, + "tool_choice": "auto", + "temperature": API_TEMPERATURE, + "max_tokens": API_MAX_TOKENS, + } + ) + + if response.status_code != 200: + rich.print(f"API Error: {response.status_code}") + rich.print(response.text) + trace_data["error"] = {"turn": turn, "status": response.status_code, "text": response.text} + break + + result = response.json() + assistant_message = result["choices"][0]["message"] + + # Collect turn data for trace + turn_data = { + "turn": turn + 1, + "assistant_content": assistant_message.get('content'), + "tool_calls": [], + "tool_results": [] + } + + # Debug: Show raw response on first turn + if turn == 0: + rich.print(f"šŸ” DEBUG - First response:") + rich.print(f" Content: {assistant_message.get('content', 'None')[:200]}") + rich.print(f" Tool calls: {assistant_message.get('tool_calls', 'None')}") + rich.print() + + # Display assistant response + if assistant_message.get("content"): + content = assistant_message['content'] + # Check if content contains XML-style tool calls (malformed response) + if '' in content: + rich.print(f"WARNING - ASSISTANT (with malformed tool calls):\n{content[:500]}\n") + rich.print("ERROR: Model returned XML-style tool calls instead of JSON. This model may not support OpenAI tool format correctly.") + trace_data["error"] = {"turn": turn, "type": "malformed_tool_calls"} + break + else: + # Highlight thinking sections + if '' in content: + # Extract and highlight thinking + think_pattern = r'(.*?)' + matches = re.findall(think_pattern, content, re.DOTALL) + + if matches: + # Display thinking sections prominently + for i, think_content in enumerate(matches, 1): + rich.print(f"\n{'='*80}") + rich.print(f"THINKING (Section {i}):") + rich.print(f"{'='*80}") + rich.print(think_content.strip()) + rich.print(f"{'='*80}\n") + + # Remove think tags and show the rest + rest_content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip() + if rest_content: + rich.print(f"ASSISTANT:\n{rest_content}\n") + else: + rich.print(f"ASSISTANT:\n{content}\n") + else: + rich.print(f"ASSISTANT:\n{content}\n") + + messages.append(assistant_message) + + # Handle tool calls + tool_calls = assistant_message.get("tool_calls", []) + + if not tool_calls: + rich.print("āœ“ Agent completed (no more tool calls)!\n") + break + + # Execute tools + rich.print(f"šŸ”§ Executing {len(tool_calls)} tool call(s)...\n") + + for idx, tool_call in enumerate(tool_calls, 1): + function_name = tool_call["function"]["name"] + function_args = json.loads(tool_call["function"]["arguments"]) + + rich.print(f"[{idx}] Tool: {function_name}") + + # Collect tool call for trace + tool_call_data = { + "tool": function_name, + "args": function_args + } + + # Show file path if present + if "file_path" in function_args: + rich.print(f" šŸ“ File: {function_args['file_path']}") + if "working_dir" in function_args and function_args["working_dir"] != ".": + rich.print(f" šŸ“‚ Working Dir: {function_args['working_dir']}") + if "command" in function_args: + rich.print(f" šŸ’» Command: {function_args['command'][:100]}") + if "pattern" in function_args: + rich.print(f" šŸ” Pattern: {function_args['pattern'][:100]}") + if "dir_path" in function_args and function_args["dir_path"] != ".": + rich.print(f" šŸ“‚ Search Dir: {function_args['dir_path']}") + + # Show other args + other_args = {k: v for k, v in function_args.items() + if k not in ["file_path", "working_dir", "command", "content", "old_text", "new_text", "pattern", "dir_path"]} + if other_args: + rich.print(f" āš™ļø Args: {json.dumps(other_args)}") + + if function_name in tool_map: + try: + result_text = tool_map[function_name](**function_args) + + # Collect tool result for trace + turn_data["tool_calls"].append(tool_call_data) + truncate_len = TRACE_TRUNCATE_TOOL_RESULTS if TRACE_TRUNCATE_TOOL_RESULTS > 0 else None + turn_data["tool_results"].append({ + "tool": function_name, + "result": result_text[:truncate_len] if truncate_len else result_text, + "success": True + }) + + # Parse and display result nicely + try: + result_json = json.loads(result_text) + success = result_json.get('success', False) + + if success: + rich.print(f" āœ… Success!") + else: + rich.print(f" āŒ Failed!") + + if 'stdout' in result_json and result_json['stdout']: + stdout_preview = result_json['stdout'][:300].replace('\n', '\n ') + rich.print(f" šŸ“¤ Output:\n {stdout_preview}") + + if 'stderr' in result_json and result_json['stderr']: + stderr_preview = result_json['stderr'][:200] + rich.print(f" āš ļø Stderr: {stderr_preview}") + + if 'num_lines' in result_json: + rich.print(f" šŸ“Š Lines read: {result_json['num_lines']}") + + if 'bytes_written' in result_json: + rich.print(f" šŸ’¾ Bytes written: {result_json['bytes_written']}") + + if 'occurrences_replaced' in result_json: + rich.print(f" šŸ”„ Replacements: {result_json['occurrences_replaced']}") + + if 'total_matches' in result_json: + rich.print(f" šŸ” Matches: {result_json['total_matches']}") + if result_json.get('truncated'): + rich.print(f" āš ļø Results truncated to limit") + + if 'file_count' in result_json: + rich.print(f" šŸ“ Files found: {result_json['file_count']}") + + if 'error' in result_json: + rich.print(f" āŒ Error: {result_json['error']}") + + except: + # For todo_manager which returns plain text + if function_name == "todo_manager": + preview = result_text[:400].replace('\n', '\n ') + rich.print(f" šŸ“‹ Result:\n {preview}") + else: + rich.print(f" Result: {result_text[:300]}") + + rich.print() + + messages.append({ + "role": "tool", + "tool_call_id": tool_call["id"], + "name": function_name, + "content": result_text + }) + except Exception as e: + error_msg = f"Error: {str(e)}" + rich.print(f" āŒ {error_msg}\n") + + turn_data["tool_calls"].append(tool_call_data) + turn_data["tool_results"].append({ + "tool": function_name, + "error": error_msg, + "success": False + }) + + messages.append({ + "role": "tool", + "tool_call_id": tool_call["id"], + "name": function_name, + "content": error_msg + }) + + # Save turn data + trace_data["turns"].append(turn_data) + + except Exception as e: + rich.print(f"āŒ Error: {e}") + import traceback + traceback.print_exc() + trace_data["error"] = {"turn": turn, "exception": str(e)} + break + + rich.print(f"\n{'='*80}") + rich.print("šŸŽ‰ Agent run completed!") + rich.print(f"{'='*80}\n") + + # Show final todo list + rich.print("\nšŸ“‹ Final Todo List Status:") + rich.print("="*80) + final_list = todo_manager._list_items() + rich.print(final_list) + rich.print("="*80 + "\n") + + # Save final todo state in trace + trace_data["final_todo_state"] = final_list + + # Save trace to file (always) + traces_dir = Path(TRACE_DIR) + traces_dir.mkdir(exist_ok=True) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + trace_filename = f"trace_{task['instance_id']}_{timestamp}.json" + trace_path = traces_dir / trace_filename + + with open(trace_path, "w") as f: + json.dump(trace_data, f, indent=2) + + rich.print(f"Trace saved to: {trace_path}\n") + + return trace_data + + +def main(): + rich.print(""" +╔══════════════════════════════════════════════════════════════════════════════╗ +ā•‘ Hyperswitch Dataset Coding Agent (Local OpenAI-compatible) ā•‘ +ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā• + +This agent: +- āœ… Loads tasks from the geekyrakshit/hyperswitch dataset +- šŸ“‹ Creates and manages detailed todo lists for each task +- šŸ”§ Works through tasks systematically using file read/write/edit tools +- šŸ’¾ Collects detailed traces of all actions and todo list states +- šŸŽÆ Uses local OpenAI-compatible model server + +""") + + # Get task index from config + rich.print(f"Running task index: {TASK_INDEX}\n") + + try: + run_dataset_coding_agent(task_index=TASK_INDEX) + except KeyboardInterrupt: + rich.print("\nāš ļø Interrupted by user") + except Exception as e: + rich.print(f"\nāŒ Error: {e}") + raise + + +if __name__ == "__main__": + weave.init(project_name="hyperswitch-traces") + main() diff --git a/config.py b/config.py index 0b2deb3..9a21dbd 100644 --- a/config.py +++ b/config.py @@ -10,7 +10,7 @@ MODEL = os.getenv("OPENAI_MODEL", "claude-sonnet-4-20250514") # Task Configuration -MAX_TURNS = int(os.getenv("MAX_TURNS", "40")) +MAX_TURNS = int(os.getenv("MAX_TURNS", "100")) MAX_CONCURRENT_REPOS = int(os.getenv("MAX_CONCURRENT", "10")) TASK_INDEX = int(os.getenv("TASK_INDEX", "0")) diff --git a/example_tools.py b/example_tools.py index cc9fb66..67268cd 100644 --- a/example_tools.py +++ b/example_tools.py @@ -330,6 +330,376 @@ def edit_file( }) +def ripgrep_search( + pattern: str, + dir_path: str = ".", + include: Optional[str] = None, + case_sensitive: bool = False, + fixed_strings: bool = False, + context: Optional[int] = None, + after: Optional[int] = None, + before: Optional[int] = None, + no_ignore: bool = False, + max_results: int = 20000 +) -> str: + """ + Fast search using ripgrep (rg) with regex support. + + Args: + pattern: The regex pattern to search for (or literal string if fixed_strings=True) + dir_path: Directory or file to search (default: current directory) + include: Glob pattern to filter files (e.g., "*.py", "*.{rs,toml}") + case_sensitive: If True, search case-sensitively (default: False) + fixed_strings: If True, treat pattern as literal string (default: False) + context: Number of lines of context around each match + after: Number of lines after each match + before: Number of lines before each match + no_ignore: If True, don't respect .gitignore (default: False) + max_results: Maximum number of results to return (default: 20000) + + Returns: + JSON string with success status, matches grouped by file, and metadata + """ + try: + # Check if ripgrep is available + rg_check = subprocess.run( + ["which", "rg"], + capture_output=True, + text=True + ) + + if rg_check.returncode != 0: + return json.dumps({ + "success": False, + "error": "ripgrep (rg) not found. Install it with: brew install ripgrep (macOS) or apt install ripgrep (Linux)", + "matches": [], + "total_matches": 0 + }) + + # Build ripgrep command + rg_args = ["rg", "--json"] + + # Case sensitivity + if not case_sensitive: + rg_args.append("--ignore-case") + + # Fixed strings vs regex + if fixed_strings: + rg_args.extend(["--fixed-strings", pattern]) + else: + rg_args.extend(["--regexp", pattern]) + + # Context options + if context is not None: + rg_args.extend(["--context", str(context)]) + if after is not None: + rg_args.extend(["--after-context", str(after)]) + if before is not None: + rg_args.extend(["--before-context", str(before)]) + + # Ignore options + if no_ignore: + rg_args.append("--no-ignore") + else: + # Add common excludes + common_excludes = [ + "*.log", "*.tmp", "**/node_modules/**", "**/target/**", + "**/build/**", "**/dist/**", "**/.git/**", "**/venv/**", + "**/__pycache__/**", "**/.pytest_cache/**" + ] + for exclude in common_excludes: + rg_args.extend(["--glob", f"!{exclude}"]) + + # Include pattern + if include: + rg_args.extend(["--glob", include]) + + # Performance: limit threads + rg_args.extend(["--threads", "4"]) + + # Add search path + search_path = Path(dir_path).resolve() + if not search_path.exists(): + return json.dumps({ + "success": False, + "error": f"Search path does not exist: {dir_path}", + "matches": [], + "total_matches": 0 + }) + + rg_args.append(str(search_path)) + + # Execute ripgrep + result = subprocess.run( + rg_args, + capture_output=True, + text=True, + timeout=60 + ) + + # Exit code 0 = matches found, 1 = no matches, 2+ = error + if result.returncode > 1: + return json.dumps({ + "success": False, + "error": f"ripgrep error (exit {result.returncode}): {result.stderr}", + "matches": [], + "total_matches": 0 + }) + + if result.returncode == 1: + # No matches found + return json.dumps({ + "success": True, + "message": f"No matches found for pattern '{pattern}' in {dir_path}", + "matches": {}, + "total_matches": 0, + "pattern": pattern, + "search_path": str(search_path) + }) + + # Parse JSON output + matches_by_file = {} + total_matches = 0 + + for line in result.stdout.strip().split('\n'): + if not line: + continue + + try: + entry = json.loads(line) + + # Only process match entries (not begin/end/context) + if entry.get("type") == "match": + data = entry.get("data", {}) + path_info = data.get("path", {}) + lines_info = data.get("lines", {}) + + if path_info.get("text") and lines_info.get("text"): + file_path = path_info["text"] + line_number = data.get("line_number", 0) + line_text = lines_info["text"].rstrip() + + if file_path not in matches_by_file: + matches_by_file[file_path] = [] + + matches_by_file[file_path].append({ + "line_number": line_number, + "line": line_text + }) + + total_matches += 1 + + # Respect max_results limit + if total_matches >= max_results: + break + + except json.JSONDecodeError: + # Skip malformed lines + continue + + # Check if results were truncated + was_truncated = total_matches >= max_results + + # Format output message + if total_matches == 0: + message = f"No matches found for pattern '{pattern}'" + else: + match_term = "match" if total_matches == 1 else "matches" + message = f"Found {total_matches} {match_term} for pattern '{pattern}'" + if include: + message += f" (filter: {include})" + if was_truncated: + message += f" (limited to {max_results} results)" + + return json.dumps({ + "success": True, + "message": message, + "matches": matches_by_file, + "total_matches": total_matches, + "truncated": was_truncated, + "pattern": pattern, + "search_path": str(search_path) + }) + + except subprocess.TimeoutExpired: + return json.dumps({ + "success": False, + "error": "ripgrep search timed out after 60 seconds", + "matches": [], + "total_matches": 0 + }) + except Exception as e: + return json.dumps({ + "success": False, + "error": f"ripgrep search failed: {str(e)}", + "matches": [], + "total_matches": 0 + }) + + +def glob_tool( + pattern: str, + dir_path: Optional[str] = None, + case_sensitive: bool = False, + respect_git_ignore: bool = True +) -> str: + """ + Find files matching a glob pattern, sorted by modification time. + + Args: + pattern: Glob pattern to match (e.g., "*.py", "**/*.rs", "src/**/*.toml") + dir_path: Directory to search in (default: current directory) + case_sensitive: Whether to perform case-sensitive matching (default: False) + respect_git_ignore: Whether to respect .gitignore patterns (default: True) + + Returns: + JSON string with success status, list of matching files sorted by recency + """ + try: + import glob as glob_module + import fnmatch + import time + + # Determine search directory + search_dir = Path(dir_path).resolve() if dir_path else Path.cwd() + + if not search_dir.exists(): + return json.dumps({ + "success": False, + "error": f"Search directory does not exist: {dir_path or '.'}", + "files": [], + "file_count": 0 + }) + + if not search_dir.is_dir(): + return json.dumps({ + "success": False, + "error": f"Path is not a directory: {dir_path or '.'}", + "files": [], + "file_count": 0 + }) + + # Load .gitignore patterns if requested + gitignore_patterns = [] + if respect_git_ignore: + gitignore_path = search_dir / ".gitignore" + if gitignore_path.exists(): + try: + with open(gitignore_path, 'r') as f: + for line in f: + line = line.strip() + # Skip comments and empty lines + if line and not line.startswith('#'): + gitignore_patterns.append(line) + except Exception: + pass # If we can't read .gitignore, just continue + + # Common patterns to ignore + default_ignores = [ + '*.log', '*.tmp', '**/node_modules/**', '**/target/**', + '**/build/**', '**/dist/**', '**/.git/**', '**/venv/**', + '**/__pycache__/**', '**/.pytest_cache/**', '**/.mypy_cache/**', + '**/.tox/**', '**/coverage/**', '**/.coverage/**' + ] + + # Perform glob search + # Use glob.glob with recursive support + glob_pattern = str(search_dir / pattern) + + # Get all matching files + matched_files = [] + for file_path in glob_module.glob(glob_pattern, recursive=True): + path_obj = Path(file_path) + + # Skip directories + if path_obj.is_dir(): + continue + + # Apply case sensitivity + if not case_sensitive: + # For case-insensitive matching, we need to check manually + # since glob is case-sensitive on Unix-like systems + pass # glob.glob is already case-sensitive by default on Unix + + # Check against ignore patterns + relative_path = str(path_obj.relative_to(search_dir)) + should_ignore = False + + # Check default ignores + for ignore_pattern in default_ignores: + # Convert glob pattern to simple match + if fnmatch.fnmatch(relative_path, ignore_pattern.replace('**/', '')): + should_ignore = True + break + + # Check gitignore patterns + if respect_git_ignore and not should_ignore: + for ignore_pattern in gitignore_patterns: + # Simple pattern matching (not full gitignore spec) + if ignore_pattern.endswith('/'): + # Directory pattern + if ignore_pattern.rstrip('/') in relative_path: + should_ignore = True + break + elif fnmatch.fnmatch(relative_path, ignore_pattern): + should_ignore = True + break + + if not should_ignore: + matched_files.append(path_obj) + + if not matched_files: + return json.dumps({ + "success": True, + "message": f"No files found matching pattern '{pattern}' in {search_dir}", + "files": [], + "file_count": 0, + "pattern": pattern, + "search_dir": str(search_dir) + }) + + # Sort by modification time (newest first for recent files, then alphabetically) + # Files modified within last 24 hours shown first + ONE_DAY_MS = 24 * 60 * 60 * 1000 + now_timestamp = time.time() * 1000 # Convert to milliseconds + + def sort_key(file_path): + try: + mtime_ms = file_path.stat().st_mtime * 1000 + is_recent = (now_timestamp - mtime_ms) < ONE_DAY_MS + if is_recent: + # Recent files: sort by mtime descending (newest first) + return (0, -mtime_ms, str(file_path)) + else: + # Older files: sort alphabetically + return (1, 0, str(file_path)) + except Exception: + # If we can't get mtime, treat as old file + return (1, 0, str(file_path)) + + sorted_files = sorted(matched_files, key=sort_key) + + # Convert to absolute paths as strings + file_paths = [str(f.resolve()) for f in sorted_files] + + return json.dumps({ + "success": True, + "message": f"Found {len(file_paths)} file(s) matching '{pattern}', sorted by modification time (newest first)", + "files": file_paths, + "file_count": len(file_paths), + "pattern": pattern, + "search_dir": str(search_dir) + }) + + except Exception as e: + return json.dumps({ + "success": False, + "error": f"Glob search failed: {str(e)}", + "files": [], + "file_count": 0 + }) + + def parse_tool_result(result_str: str) -> dict: """ Parse a tool result string into a dictionary.