Skip to content
Merged
16 changes: 8 additions & 8 deletions altk/pre_tool/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
from enum import Enum
from altk.core.toolkit import ComponentConfig
from altk.pre_tool.core.consts import (
METRIC_GENERAL_CONVERSATION_GROUNDED_CORRECTNESS,
METRIC_GENERAL_HALLUCINATION_CHECK,
METRIC_GENERAL_VALUE_FORMAT_ALIGNMENT,
METRIC_FUNCTION_SELECTION_APPROPRIATENESS,
METRIC_AGENTIC_CONSTRAINTS_SATISFACTION,
METRIC_PARAMETER_VALUE_FORMAT_ALIGNMENT,
METRIC_PARAMETER_HALLUCINATION_CHECK,
)


Expand All @@ -23,6 +22,7 @@ class Track(str, Enum):
"""Predefined configuration tracks for the reflection pipeline."""

SYNTAX = "syntax"
SPEC_FREE = "spec_free"
FAST_TRACK = "fast_track"
SLOW_TRACK = "slow_track"
TRANSFORMATIONS_ONLY = "transformations_only"
Expand Down Expand Up @@ -70,22 +70,17 @@ class SPARCReflectionConfig(BaseModel):
general_metrics: Optional[List[str]] = Field(
default=[
METRIC_GENERAL_HALLUCINATION_CHECK,
METRIC_GENERAL_VALUE_FORMAT_ALIGNMENT,
],
description="List of general metrics to evaluate",
)
function_metrics: Optional[List[str]] = Field(
default=[
METRIC_FUNCTION_SELECTION_APPROPRIATENESS,
METRIC_AGENTIC_CONSTRAINTS_SATISFACTION,
],
description="List of function-specific metrics to evaluate",
)
parameter_metrics: Optional[List[str]] = Field(
default=[
METRIC_PARAMETER_HALLUCINATION_CHECK,
METRIC_PARAMETER_VALUE_FORMAT_ALIGNMENT,
],
default=[],
description="List of parameter-specific metrics to evaluate",
)

Expand Down Expand Up @@ -113,6 +108,11 @@ class Config:
function_metrics=None,
parameter_metrics=None,
),
"spec_free": SPARCReflectionConfig(
general_metrics=[METRIC_GENERAL_CONVERSATION_GROUNDED_CORRECTNESS],
function_metrics=None,
parameter_metrics=None,
),
"fast_track": SPARCReflectionConfig(
general_metrics=[METRIC_GENERAL_HALLUCINATION_CHECK],
function_metrics=[METRIC_FUNCTION_SELECTION_APPROPRIATENESS],
Expand Down
8 changes: 8 additions & 0 deletions altk/pre_tool/core/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
## General metrics
METRIC_GENERAL_HALLUCINATION_CHECK = "general_hallucination_check"
METRIC_GENERAL_VALUE_FORMAT_ALIGNMENT = "general_value_format_alignment"
METRIC_GENERAL_CONVERSATION_GROUNDED_CORRECTNESS = (
"general_conversation_grounded_correctness"
)

## Function selection metrics
METRIC_FUNCTION_SELECTION_APPROPRIATENESS = "function_selection_appropriateness"
Expand All @@ -17,6 +20,11 @@
METRIC_GENERAL_VALUE_FORMAT_ALIGNMENT,
]

## Tool-spec-free metrics (can run without tool specifications)
TOOL_SPEC_FREE_METRICS = [
METRIC_GENERAL_CONVERSATION_GROUNDED_CORRECTNESS,
]

FUNCTION_SELECTION_METRICS = [
METRIC_FUNCTION_SELECTION_APPROPRIATENESS,
METRIC_AGENTIC_CONSTRAINTS_SATISFACTION,
Expand Down
26 changes: 0 additions & 26 deletions altk/pre_tool/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,6 @@

from altk.core.llm import LLMClient
from altk.core.toolkit import ComponentInput, ComponentOutput
from altk.pre_tool.refraction.src.schemas.results import (
DebuggingResult,
)
from altk.pre_tool.refraction.src.schemas.mappings import Mapping

try:
from nestful.schemas.api import Catalog
except ImportError:
Catalog = None


class SPARCReflectionDecision(str, Enum):
Expand Down Expand Up @@ -102,23 +93,6 @@ class SPARCReflectionRunOutput(PreToolReflectionRunOutput):
)


class RefractionRunInput(PreToolReflectionRunInput):
mappings: Optional[list[Mapping]] = None
memory_objects: Optional[dict[str, Any]] = None
use_given_operators_only: bool = False


class RefractionBuildInput(PreToolReflectionBuildInput):
tool_specs: list[dict[str, Any]] | Catalog
top_k: int = 5
threshold: float = 0.8
compute_maps: bool = True


class RefractionRunOutput(PreToolReflectionRunOutput):
result: Optional[DebuggingResult] = None


class ToolGuardBuildInputMetaData(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
policy_text: str = Field(description="Text of the policy document file")
Expand Down
12 changes: 7 additions & 5 deletions altk/pre_tool/refraction/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
Refraction is a low-cost (no LLMs!), low-latency, domain-agnostic, data-agnostic, model-agnostic approach towards validation and repair for a sequence of tool calls, based on classical AI planning techniques. We have seen as much as 48% error correction in certain scenarios.

## Table of Contents
- [When it is recommended to use this component](#when-it-is-recommended-to-use-this-component)
- [Quick Start](#quick-start)
- [License](#license)
- [Under the Hood](#under-the-hood)
- [Refraction - Syntactic Validation of Tool Calls](#refraction---syntactic-validation-of-tool-calls)
- [Table of Contents](#table-of-contents)
- [When it is recommended to use this component](#when-it-is-recommended-to-use-this-component)
- [Quick Start](#quick-start)
- [License](#license)
- [Under the Hood](#under-the-hood)

## When it is recommended to use this component

Expand All @@ -22,7 +24,7 @@ Make sure the dependencies for Refraction are included by running `pip install "
```python
import os
from altk.pre_tool.refraction.refraction import RefractionComponent
from altk.pre_tool.core.types import RefractionBuildInput, RefractionRunInput
from altk.pre_tool.refraction.types import RefractionBuildInput, RefractionRunInput
from altk.pre_tool.core.config import RefractionConfig, RefractionMode
from altk.core.toolkit import AgentPhase

Expand Down
2 changes: 1 addition & 1 deletion altk/pre_tool/refraction/refraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
RefractionConfig,
RefractionMode,
)
from altk.pre_tool.core.types import (
from altk.pre_tool.refraction.types import (
RefractionBuildInput,
RefractionRunInput,
RefractionRunOutput,
Expand Down
46 changes: 46 additions & 0 deletions altk/pre_tool/refraction/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from typing import Optional, Any

from altk.pre_tool.core.types import (
PreToolReflectionRunInput,
PreToolReflectionRunOutput,
PreToolReflectionBuildInput,
)
from altk.pre_tool.refraction.src.schemas.results import (
DebuggingResult,
)
from altk.pre_tool.refraction.src.schemas.mappings import Mapping

try:
from nestful.schemas.api import Catalog
except ImportError:
Catalog = None


class RefractionRunInput(PreToolReflectionRunInput):
"""Input for running Refraction reflection."""

mappings: Optional[list[Mapping]] = None
memory_objects: Optional[dict[str, Any]] = None
use_given_operators_only: bool = False


class RefractionBuildInput(PreToolReflectionBuildInput):
"""Input for building Refraction component."""

tool_specs: list[dict[str, Any]] | Catalog
top_k: int = 5
threshold: float = 0.8
compute_maps: bool = True


class RefractionRunOutput(PreToolReflectionRunOutput):
"""Output from running Refraction reflection."""

result: Optional[DebuggingResult] = None


__all__ = [
"RefractionRunInput",
"RefractionRunOutput",
"RefractionBuildInput",
]
48 changes: 37 additions & 11 deletions altk/pre_tool/sparc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ from altk.pre_tool.core import (
)
from altk.pre_tool.sparc.sparc import SPARCReflectionComponent
from altk.core.toolkit import AgentPhase, ComponentConfig
from langchain_core.messages import HumanMessage, AIMessage
from altk.core.llm import get_llm


Expand Down Expand Up @@ -115,8 +114,14 @@ tool_specs = [{

# Prepare conversation context
messages = [
HumanMessage(content="Send an email to team@company.com about the meeting"),
AIMessage(content="I'll send that email for you.")
{
"role": "user",
"content": "Send an email to team@company.com about the meeting"
},
{
"role": "assistant",
"content": "I'll send that email for you."
}
]

# Tool call to validate (OpenAI format)
Expand Down Expand Up @@ -155,13 +160,15 @@ The component expects three main inputs in OpenAI-compatible formats:
List of messages representing the conversation context:

```python
from langchain_core.messages import HumanMessage, AIMessage

messages = [
HumanMessage(content="What's the weather in New York?"),
AIMessage(content="I'll check the weather for you."),
HumanMessage(content="Make sure to use Fahrenheit please"),
AIMessage(content="I'll get the weather in New York using Fahrenheit.")
{
"role": "user",
"content": "What's the weather in New York?"
},
{
"role": "assistant",
"content": "I'll check the weather for you."
},
]
```

Expand Down Expand Up @@ -314,6 +321,22 @@ sparc = SPARCReflectionComponent(
)
```

#### `Track.SPEC_FREE` - Semantic Validation without Tool Specifications
- **LLM Calls**: 1
- **Validates**: General correctness check (spec-free)
- **Use Case**: Single-turn or multi-turn conversations, performance-sensitive applications
- **Performance**: Very fast
- **Model Required**: Yes

```python
config = build_config() # ValidatingLLMClient required
sparc = SPARCReflectionComponent(
config=config,
track=Track.SPEC_FREE,
execution_mode=SPARCExecutionMode.ASYNC,
)
```

#### `Track.TRANSFORMATIONS_ONLY` - Unit/Format Conversion Focus
- **LLM Calls**: 1 + N (where N = parameters needing transformation, executed in parallel)
- **Validates**: Units conversion, format transformations
Expand Down Expand Up @@ -352,6 +375,9 @@ Each track includes specific validation metrics optimized for different use case
- **METRIC_AGENTIC_CONSTRAINTS_SATISFACTION**: Validates adherence to agentic conversation constraints and context
- **Transform enabled**: Unit/format conversions when needed

#### `Track.SPEC_FREE`
- **METRIC_GENERAL_CONVERSATION_GROUNDED_CORRECTNESS**: Detects incorrect tool calls

#### `Track.TRANSFORMATIONS_ONLY`
- **Transform enabled**: Focus on unit/format conversions
- **METRIC_TRANSFORMATION_DETECTION**: Identifies parameters needing transformation
Expand Down Expand Up @@ -396,6 +422,7 @@ For advanced users who need specific combinations of validation metrics, you can
from llmevalkit.function_calling.consts import (
METRIC_GENERAL_HALLUCINATION_CHECK, # Detects hallucinated parameter values
METRIC_GENERAL_VALUE_FORMAT_ALIGNMENT, # Validates parameter format requirements
METRIC_GENERAL_CONVERSATION_GROUNDED_CORRECTNESS, # Validate tool call correctness (spec-free)
METRIC_FUNCTION_SELECTION_APPROPRIATENESS, # Validates function choice matches intent
METRIC_AGENTIC_CONSTRAINTS_SATISFACTION, # Validates agentic conversation constraints
METRIC_PARAMETER_VALUE_FORMAT_ALIGNMENT, # Validates parameter format requirements
Expand Down Expand Up @@ -491,6 +518,7 @@ sparc = SPARCReflectionComponent(
- **General Metrics**: Applied to the overall tool call context
- `METRIC_GENERAL_HALLUCINATION_CHECK`: Detects fabricated or hallucinated information
- `METRIC_GENERAL_VALUE_FORMAT_ALIGNMENT`: Validates parameter format requirements
- `METRIC_GENERAL_CONVERSATION_GROUNDED_CORRECTNESS`: Validate tool call correctness (spec-free)

- **Function Metrics**: Applied to function selection and appropriateness
- `METRIC_FUNCTION_SELECTION_APPROPRIATENESS`: Validates function choice matches user intent
Expand Down Expand Up @@ -787,8 +815,6 @@ uv run pytest tests/pre_tool/sparc/units_conversion_test.py
- **Semantic Validation Tests**: Intent alignment, parameter grounding, hallucination detection
- **Units Conversion Tests**: Temperature, distance, and format transformation validation



## License
Apache 2.0 - see LICENSE file for details.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
from dotenv import load_dotenv
from typing import cast
from langchain_core.messages import HumanMessage, AIMessage
from altk.pre_tool.core.types import SPARCReflectionRunOutput

# Import middleware components
Expand Down Expand Up @@ -69,8 +68,14 @@ def run_custom_config_examples():

# Test with function selection misalignment
conversation_context = [
HumanMessage(content="What's the weather like in New York today?"),
AIMessage(content="I'll check the weather for you."),
{
"role": "user",
"content": "What's the weather like in New York today?",
},
{
"role": "assistant",
"content": "I'll check the weather for you.",
},
]

tool_specs = [
Expand Down
Loading
Loading