OpenHands · xingyaoww · Jan 12, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 20, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -255,4 +255,11 @@ For examples that use the critic model (e.g., `34_critic_example.py`), the criti
 - Ruff ignores `ARG` (unused arguments) under `tests/**/*.py` to allow pytest fixtures.
 - Repository guidance lives in `AGENTS.md` (loaded as a third-party skill file).
 </REPO_CONFIG_NOTES>
+
+<EXAMPLES_STYLE>
+- Examples in `examples/01_standalone_sdk/` should be written as direct scripts, NOT wrapped in a `main()` function.
+- The script code should run at module level (after imports and helper function definitions).
+- Keep examples concise: avoid excessive print statements that make the code long and less readable.
+- See `examples/01_standalone_sdk/01_hello_world.py` for the canonical pattern.
+</EXAMPLES_STYLE>
 </REPO>
diff --git a/examples/01_standalone_sdk/34_critic_example.py b/examples/01_standalone_sdk/34_critic_example.py
@@ -7,33 +7,57 @@
 feedback that can trigger follow-up prompts when the agent hasn't completed the
 task successfully.
 
+Two critic modes are supported:
+
+1. **API-based Critic** (CRITIC_MODE=api): Uses an external critic API endpoint.
+   Auto-configures for All-Hands LLM proxy, or uses explicit env vars.
+
+2. **Agent Review Critic** (CRITIC_MODE=agent_review): Spawns a separate OpenHands
+   agent to do a PR-style review of the git diff.
+
 Key concepts demonstrated:
 1. Setting up a critic with IterativeRefinementConfig for automatic retry
 2. Conversation.run() automatically handles retries based on critic scores
 3. Custom follow-up prompt generation via critic.get_followup_prompt()
 4. Iterating until the task is completed successfully or max iterations reached
 
-For All-Hands LLM proxy (llm-proxy.*.all-hands.dev), the critic is auto-configured
-using the same base_url with /vllm suffix and "critic" as the model name.
+Requirements:
+- export LLM_API_KEY=...
+- optional: CRITIC_MODE (api|agent_review), LLM_MODEL, LLM_BASE_URL
+
+Run:
+  # API-based critic (default)
+  python examples/01_standalone_sdk/34_critic_example.py
+
+  # Agent review critic
+  CRITIC_MODE=agent_review python examples/01_standalone_sdk/34_critic_example.py
 """
 
 import os
 import re
+import signal
+import subprocess
 import tempfile
 from pathlib import Path
 
+from pydantic import SecretStr
+
 from openhands.sdk import LLM, Agent, Conversation, Tool
 from openhands.sdk.critic import APIBasedCritic, IterativeRefinementConfig
 from openhands.sdk.critic.base import CriticBase
+from openhands.sdk.critic.impl.agent_review import AgentReviewCritic
 from openhands.tools.file_editor import FileEditorTool
+from openhands.tools.preset.critic import get_critic_agent
+from openhands.tools.preset.default import get_default_agent
 from openhands.tools.task_tracker import TaskTrackerTool
 from openhands.tools.terminal import TerminalTool
 
 
+signal.signal(signal.SIGINT, lambda *_: (_ for _ in ()).throw(KeyboardInterrupt()))
+
+
 # Configuration
-# Higher threshold (70%) makes it more likely the agent needs multiple iterations,
-# which better demonstrates how iterative refinement works.
-# Adjust as needed to see different behaviors.
+CRITIC_MODE = os.getenv("CRITIC_MODE", "api")  # "api" or "agent_review"
 SUCCESS_THRESHOLD = float(os.getenv("CRITIC_SUCCESS_THRESHOLD", "0.7"))
 MAX_ITERATIONS = int(os.getenv("MAX_ITERATIONS", "3"))
 
@@ -48,36 +72,16 @@ def get_required_env(name: str) -> str:
     )
 
 
-def get_default_critic(llm: LLM) -> CriticBase | None:
-    """Auto-configure critic for All-Hands LLM proxy.
+def get_api_critic(llm: LLM) -> CriticBase | None:
+    """Auto-configure API-based critic for All-Hands LLM proxy.
 
     When the LLM base_url matches `llm-proxy.*.all-hands.dev`, returns an
     APIBasedCritic configured with:
     - server_url: {base_url}/vllm
     - api_key: same as LLM
     - model_name: "critic"
 
-    Args:
-        llm: The LLM instance to derive critic configuration from.
-
-    Returns:
-        An APIBasedCritic if the LLM is configured for All-Hands proxy,
-        None otherwise.
-
-    Example:
-        llm = LLM(
-            model="anthropic/claude-sonnet-4-5",
-            api_key=api_key,
-            base_url="https://llm-proxy.eval.all-hands.dev",
-        )
-        critic = get_default_critic(llm)
-        if critic is None:
-            # Fall back to explicit configuration
-            critic = APIBasedCritic(
-                server_url="https://my-critic-server.com",
-                api_key="my-api-key",
-                model_name="my-critic-model",
-            )
+    Returns None if not using All-Hands proxy.
     """
     base_url = llm.base_url
     api_key = llm.api_key
@@ -96,10 +100,22 @@ def get_default_critic(llm: LLM) -> CriticBase | None:
     )
 
 
-# Task prompt designed to be moderately complex with subtle requirements.
-# The task is simple enough to complete in 1-2 iterations, but has specific
-# requirements that are easy to miss - triggering critic feedback.
-INITIAL_TASK_PROMPT = """\
+def _git(workspace: Path, *args: str) -> None:
+    subprocess.run(["git", *args], cwd=workspace, check=True, capture_output=True)
+
+
+def _git_patch(workspace: Path) -> str:
+    return subprocess.check_output(["git", "diff"], cwd=workspace, text=True)
+
+
+# Task prompts for different modes
+AGENT_REVIEW_TASK = (
+    "Edit calc.py to add a new function multiply(a, b) that "
+    "multiplies two numbers. Add proper type hints and a docstring. "
+    "Then finish."
+)
+
+API_CRITIC_TASK = """\
 Create a Python word statistics tool called `wordstats` that analyzes text files.
 
 ## Structure
@@ -168,76 +184,123 @@ def get_default_critic(llm: LLM) -> CriticBase | None:
 """
 
 
+# Setup LLM
 llm_api_key = get_required_env("LLM_API_KEY")
 llm = LLM(
-    # Use a weaker model to increase likelihood of needing multiple iterations
-    model="anthropic/claude-haiku-4-5",
-    api_key=llm_api_key,
+    usage_id="agent",
+    model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
+    api_key=SecretStr(llm_api_key),
     top_p=0.95,
     base_url=os.getenv("LLM_BASE_URL", None),
 )
 
-# Setup critic with iterative refinement config
-# The IterativeRefinementConfig tells Conversation.run() to automatically
-# retry the task if the critic score is below the threshold
+# Setup iterative refinement config
 iterative_config = IterativeRefinementConfig(
     success_threshold=SUCCESS_THRESHOLD,
     max_iterations=MAX_ITERATIONS,
 )
 
-# Auto-configure critic for All-Hands proxy or use explicit env vars
-critic = get_default_critic(llm)
-if critic is None:
-    print("⚠️  No All-Hands LLM proxy detected, trying explicit env vars...")
-    critic = APIBasedCritic(
-        server_url=get_required_env("CRITIC_SERVER_URL"),
-        api_key=get_required_env("CRITIC_API_KEY"),
-        model_name=get_required_env("CRITIC_MODEL_NAME"),
-        iterative_refinement=iterative_config,
-    )
-else:
-    # Add iterative refinement config to the auto-configured critic
-    critic = critic.model_copy(update={"iterative_refinement": iterative_config})
-
-# Create agent with critic (iterative refinement is built into the critic)
-agent = Agent(
-    llm=llm,
-    tools=[
-        Tool(name=TerminalTool.name),
-        Tool(name=FileEditorTool.name),
-        Tool(name=TaskTrackerTool.name),
-    ],
-    critic=critic,
-)
-
 # Create workspace
 workspace = Path(tempfile.mkdtemp(prefix="critic_demo_"))
 print(f"📁 Created workspace: {workspace}")
 
-# Create conversation - iterative refinement is handled automatically
-# by Conversation.run() based on the critic's config
+# Setup critic based on mode
+if CRITIC_MODE == "agent_review":
+    # Initialize git repo for agent review mode
+    _git(workspace, "init", "-q")
+    _git(workspace, "config", "user.email", "example@example.com")
+    _git(workspace, "config", "user.name", "Example")
+
+    # Create initial file for the task
+    (workspace / "calc.py").write_text(
+        """def add(a, b):
+    return a + b
+
+
+if __name__ == "__main__":
+    print(add(1, 2))
+"""
+    )
+    _git(workspace, "add", "calc.py")
+    _git(workspace, "commit", "-m", "init", "-q")
+
+    critic: CriticBase = AgentReviewCritic(
+        llm=llm,
+        agent_factory=get_critic_agent,
+        review_style="roasted",
+        workspace_dir=str(workspace),
+        iterative_refinement=iterative_config,
+    )
+    task_prompt = AGENT_REVIEW_TASK
+    mode_description = "Agent Review Critic (PR-style code review)"
+
+    # Use default agent preset for agent review mode (cli_mode=True disables browser)
+    base_agent = get_default_agent(llm=llm, cli_mode=True)
+    agent = base_agent.model_copy(update={"critic": critic})
+
+else:  # API mode
+    # Auto-configure critic for All-Hands proxy or use explicit env vars
+    api_critic = get_api_critic(llm)
+    if api_critic is None:
+        print("⚠️  No All-Hands LLM proxy detected, trying explicit env vars...")
+        critic = APIBasedCritic(
+            server_url=get_required_env("CRITIC_SERVER_URL"),
+            api_key=get_required_env("CRITIC_API_KEY"),
+            model_name=get_required_env("CRITIC_MODEL_NAME"),
+            iterative_refinement=iterative_config,
+        )
+    else:
+        critic = api_critic.model_copy(
+            update={"iterative_refinement": iterative_config}
+        )
+    task_prompt = API_CRITIC_TASK
+    mode_description = "API-based Critic"
+
+    # Create agent with tools for API mode
+    agent = Agent(
+        llm=llm,
+        tools=[
+            Tool(name=TerminalTool.name),
+            Tool(name=FileEditorTool.name),
+            Tool(name=TaskTrackerTool.name),
+        ],
+        critic=critic,
+    )
+
+# Create conversation
 conversation = Conversation(
     agent=agent,
     workspace=str(workspace),
 )
 
 print("\n" + "=" * 70)
-print("🚀 Starting Iterative Refinement with Critic Model")
+print(f"🚀 Starting Iterative Refinement with {mode_description}")
 print("=" * 70)
 print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}")
 print(f"Max iterations: {MAX_ITERATIONS}")
+print("\nThe agent will work on the task, and the critic will evaluate progress.")
+print("If the critic finds issues, it will provide feedback for improvement.\n")
 
-# Send the task and run - Conversation.run() handles retries automatically
-conversation.send_message(INITIAL_TASK_PROMPT)
+# Send the task and run
+conversation.send_message(task_prompt)
 conversation.run()
 
-# Print additional info about created files
-print("\nCreated files:")
-for path in sorted(workspace.rglob("*")):
-    if path.is_file():
-        relative = path.relative_to(workspace)
-        print(f"  - {relative}")
+# Show results based on mode
+if CRITIC_MODE == "agent_review":
+    patch = _git_patch(workspace)
+    if patch:
+        print("\n[Current git diff]")
+        print(patch[:500] + "..." if len(patch) > 500 else patch)
+else:
+    print("\nCreated files:")
+    for path in sorted(workspace.rglob("*")):
+        if path.is_file():
+            relative = path.relative_to(workspace)
+            print(f"  - {relative}")
+
+print("\n" + "=" * 70)
+print("Example Complete!")
+print("=" * 70)
 
-# Report cost
-cost = llm.metrics.accumulated_cost
+cost = conversation.conversation_stats.get_combined_metrics().accumulated_cost
 print(f"\nEXAMPLE_COST: {cost:.4f}")
diff --git a/openhands-sdk/openhands/sdk/__init__.py b/openhands-sdk/openhands/sdk/__init__.py
@@ -67,6 +67,18 @@
 except PackageNotFoundError:
     __version__ = "0.0.0"  # fallback for editable/unbuilt environments
 
+
+# Rebuild models that have forward references now that all imports are done
+def _rebuild_forward_refs() -> None:
+    """Rebuild Pydantic models with forward references."""
+    from openhands.sdk.critic.impl.agent_review import AgentReviewCritic
+
+    # Pass Agent to the model_rebuild so it can resolve the forward reference
+    AgentReviewCritic.model_rebuild(_types_namespace={"Agent": Agent})
+
+
+_rebuild_forward_refs()
+
 __all__ = [
     "LLM",
     "LLMRegistry",

diff --git a/openhands-sdk/openhands/sdk/critic/__init__.py b/openhands-sdk/openhands/sdk/critic/__init__.py
@@ -1,6 +1,7 @@
 from openhands.sdk.critic.base import CriticBase, IterativeRefinementConfig
 from openhands.sdk.critic.impl import (
     AgentFinishedCritic,
+    AgentReviewCritic,
     APIBasedCritic,
     EmptyPatchCritic,
     PassCritic,
@@ -15,6 +16,7 @@
     "IterativeRefinementConfig",
     # Critic implementations
     "AgentFinishedCritic",
+    "AgentReviewCritic",
     "APIBasedCritic",
     "EmptyPatchCritic",
     "PassCritic",

diff --git a/openhands-sdk/openhands/sdk/critic/impl/__init__.py b/openhands-sdk/openhands/sdk/critic/impl/__init__.py
@@ -1,13 +1,15 @@
 """Critic implementations module."""
 
 from openhands.sdk.critic.impl.agent_finished import AgentFinishedCritic
+from openhands.sdk.critic.impl.agent_review import AgentReviewCritic
 from openhands.sdk.critic.impl.api import APIBasedCritic
 from openhands.sdk.critic.impl.empty_patch import EmptyPatchCritic
 from openhands.sdk.critic.impl.pass_critic import PassCritic
 
 
 __all__ = [
     "AgentFinishedCritic",
+    "AgentReviewCritic",
     "APIBasedCritic",
     "EmptyPatchCritic",
     "PassCritic",