diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index f84109ffc7..71434a23b7 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -92,7 +92,7 @@ TokenCallbackType, ) from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot -from openhands.sdk.llm.utils.model_features import get_default_temperature, get_features +from openhands.sdk.llm.utils.model_features import get_features from openhands.sdk.llm.utils.retry_mixin import RetryMixin from openhands.sdk.llm.utils.telemetry import Telemetry from openhands.sdk.logger import ENV_LOG_DIR, get_logger @@ -181,10 +181,21 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin): ge=0, description=( "Sampling temperature for response generation. " - "Defaults to 0 for most models and provider default for reasoning models." + "Defaults to None (uses provider default temperature). " + "Set to 0.0 for deterministic outputs, " + "or higher values (0.7-1.0) for more creative responses." + ), + ) + top_p: float | None = Field( + default=None, + ge=0, + le=1, + description=( + "Nucleus sampling parameter. " + "Defaults to None (uses provider default). " + "Set to a value between 0 and 1 to control diversity of outputs." ), ) - top_p: float | None = Field(default=1.0, ge=0, le=1) top_k: float | None = Field(default=None, ge=0) max_input_tokens: int | None = Field( @@ -427,11 +438,6 @@ def _coerce_inputs(cls, data): # Use `or` instead of dict.get() to handle explicit None values d["base_url"] = d.get("base_url") or "https://llm-proxy.app.all-hands.dev/" - # HF doesn't support the OpenAI default value for top_p (1) - if model_val.startswith("huggingface"): - if d.get("top_p", 1.0) == 1.0: - d["top_p"] = 0.9 - return d @model_validator(mode="after") @@ -471,9 +477,6 @@ def _set_env_side_effects(self): # Capabilities + model info self._init_model_info_and_caps() - if self.temperature is None: - self.temperature = get_default_temperature(self.model) - logger.debug( f"LLM ready: model={self.model} base_url={self.base_url} " f"reasoning_effort={self.reasoning_effort} " diff --git a/openhands-sdk/openhands/sdk/llm/utils/model_features.py b/openhands-sdk/openhands/sdk/llm/utils/model_features.py index d454f5aa88..98af640779 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/model_features.py +++ b/openhands-sdk/openhands/sdk/llm/utils/model_features.py @@ -178,22 +178,3 @@ def get_features(model: str) -> ModelFeatures: model, PROMPT_CACHE_RETENTION_MODELS ), ) - - -# Default temperature mapping. -# Each entry: (pattern, default_temperature) -DEFAULT_TEMPERATURE_MODELS: list[tuple[str, float]] = [ - ("kimi-k2-thinking", 1.0), - ("kimi-k2.5", 1.0), -] - - -def get_default_temperature(model: str) -> float: - """Return the default temperature for a given model pattern. - - Uses case-insensitive substring matching via model_matches. - """ - for pattern, value in DEFAULT_TEMPERATURE_MODELS: - if model_matches(model, [pattern]): - return value - return 0.0 diff --git a/tests/sdk/config/test_llm_config.py b/tests/sdk/config/test_llm_config.py index 21d8e699ed..280c4e3e18 100644 --- a/tests/sdk/config/test_llm_config.py +++ b/tests/sdk/config/test_llm_config.py @@ -21,8 +21,8 @@ def test_llm_config_defaults(): assert config.retry_max_wait == 64 assert config.timeout == 300 # Default timeout is 5 minutes assert config.max_message_chars == 30_000 - assert config.temperature == 0.0 - assert config.top_p == 1.0 + assert config.temperature is None # None to use provider defaults + assert config.top_p is None # None to use provider defaults assert config.top_k is None assert config.max_input_tokens == 128000 # Auto-populated from model info assert config.max_output_tokens == 16384 # Auto-populated from model info diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py index 173a1c65b1..6cb220546a 100644 --- a/tests/sdk/llm/test_llm.py +++ b/tests/sdk/llm/test_llm.py @@ -78,6 +78,32 @@ def test_base_url_for_openhands_provider_with_explicit_none(mock_get): # The important assertion is that base_url is set correctly +@patch("openhands.sdk.llm.utils.model_info.httpx.get") +def test_kimi_k2_5_uses_provider_defaults(mock_get): + """Test that kimi-k2.5 uses provider defaults (None) for temperature and top_p.""" + mock_get.return_value = Mock(json=lambda: {"data": []}) + + llm = LLM( + model="moonshot/kimi-k2.5", + api_key=SecretStr("test-key"), + usage_id="test-kimi-llm", + ) + # Both temperature and top_p should be None (use provider defaults) + assert llm.temperature is None + assert llm.top_p is None + + # Explicit values should still be respected + llm_explicit = LLM( + model="moonshot/kimi-k2.5", + api_key=SecretStr("test-key"), + usage_id="test-kimi-llm-explicit", + top_p=0.8, + temperature=0.5, + ) + assert llm_explicit.top_p == 0.8 + assert llm_explicit.temperature == 0.5 + + @patch("openhands.sdk.llm.utils.model_info.httpx.get") def test_base_url_for_openhands_provider_with_custom_url(mock_get): """Test that openhands/ provider respects custom base_url when provided.""" @@ -637,7 +663,7 @@ def test_llm_local_detection_based_on_model_name(default_llm): # Test basic model configuration assert llm.model == "gpt-4o" - assert llm.temperature == 0.0 + assert llm.temperature is None # Uses provider default # Test with localhost base_url local_llm = default_llm.model_copy(update={"base_url": "http://localhost:8000"}) diff --git a/tests/sdk/llm/test_model_features.py b/tests/sdk/llm/test_model_features.py index 304448a728..bdfe1c44ad 100644 --- a/tests/sdk/llm/test_model_features.py +++ b/tests/sdk/llm/test_model_features.py @@ -1,7 +1,6 @@ import pytest from openhands.sdk.llm.utils.model_features import ( - get_default_temperature, get_features, model_matches, ) @@ -312,54 +311,3 @@ def test_send_reasoning_content_support(model, expected_send_reasoning): """Test that models like kimi-k2-thinking require send_reasoning_content.""" features = get_features(model) assert features.send_reasoning_content is expected_send_reasoning - - -@pytest.mark.parametrize( - "model,expected_temperature", - [ - # kimi-k2-thinking models should default to 1.0 - ("kimi-k2-thinking", 1.0), - ("kimi-k2-thinking-0905", 1.0), - ("Kimi-K2-Thinking", 1.0), # Case insensitive - ("moonshot/kimi-k2-thinking", 1.0), # With provider prefix - ("litellm_proxy/kimi-k2-thinking", 1.0), # With litellm proxy prefix - # kimi-k2.5 models should also default to 1.0 - ("kimi-k2.5", 1.0), - ("Kimi-K2.5", 1.0), # Case insensitive - # All other models should default to 0.0 - ("kimi-k2-instruct", 0.0), # Different kimi variant - ("gpt-4", 0.0), - ("gpt-4o", 0.0), - ("gpt-4o-mini", 0.0), - ("claude-3-5-sonnet", 0.0), - ("claude-3-7-sonnet", 0.0), - ("gemini-1.5-pro", 0.0), - ("gemini-2.5-pro-experimental", 0.0), - ("o1", 0.0), - ("o1-mini", 0.0), - ("o3", 0.0), - ("deepseek-chat", 0.0), - ("llama-3.1-70b", 0.0), - ("azure/gpt-4o-mini", 0.0), - ("openai/gpt-4o", 0.0), - ("anthropic/claude-3-5-sonnet", 0.0), - ("unknown-model", 0.0), - ], -) -def test_get_default_temperature(model, expected_temperature): - """Test that get_default_temperature returns correct values for different models.""" - assert get_default_temperature(model) == expected_temperature - - -def test_get_default_temperature_fallback(): - """Test that get_default_temperature returns 0.0 for unknown models.""" - assert get_default_temperature("completely-unknown-model-12345") == 0.0 - assert get_default_temperature("some-random-model") == 0.0 - - -def test_get_default_temperature_case_insensitive(): - """Test that get_default_temperature is case insensitive.""" - assert get_default_temperature("kimi-k2-thinking") == 1.0 - assert get_default_temperature("KIMI-K2-THINKING") == 1.0 - assert get_default_temperature("Kimi-K2-Thinking") == 1.0 - assert get_default_temperature("KiMi-k2-ThInKiNg") == 1.0