Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

llm judges updates #1259

Merged
merged 6 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/evidently/calculation_engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from evidently.calculation_engine.metric_implementation import MetricImplementation
from evidently.features.generated_features import FeatureResult
from evidently.features.generated_features import GeneratedFeatures
from evidently.options.base import Options
from evidently.pydantic_utils import Fingerprint
from evidently.utils.data_preprocessing import DataDefinition

Expand Down Expand Up @@ -60,7 +61,7 @@ def execute_metrics(self, context: "Context", data: GenericInputData):
converted_data = self.convert_input_data(data)

features_list = self.get_additional_features(converted_data.data_definition)
features = self.calculate_additional_features(converted_data, features_list)
features = self.calculate_additional_features(converted_data, features_list, context.options)
context.set_features(features)
self.inject_additional_features(converted_data, features)
context.data = converted_data
Expand Down Expand Up @@ -91,7 +92,7 @@ def get_data_definition(

@abc.abstractmethod
def calculate_additional_features(
self, data: TInputData, features: List[GeneratedFeatures]
self, data: TInputData, features: List[GeneratedFeatures], options: Options
) -> Dict[GeneratedFeatures, FeatureResult[TEngineDataType]]:
raise NotImplementedError

Expand Down
7 changes: 4 additions & 3 deletions src/evidently/calculation_engine/python_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from evidently.calculation_engine.metric_implementation import MetricImplementation
from evidently.features.generated_features import FeatureResult
from evidently.features.generated_features import GeneratedFeatures
from evidently.options.base import Options
from evidently.utils.data_preprocessing import DataDefinition
from evidently.utils.data_preprocessing import create_data_definition

Expand Down Expand Up @@ -53,13 +54,13 @@ def get_data_definition(
return create_data_definition(reference_data, current_data, column_mapping, categorical_features_cardinality)

def calculate_additional_features(
self, data: TInputData, features: List[GeneratedFeatures]
self, data: TInputData, features: List[GeneratedFeatures], options: Options
) -> Dict[GeneratedFeatures, FeatureResult[pd.DataFrame]]:
result: Dict[GeneratedFeatures, FeatureResult[pd.DataFrame]] = {}
for feature in features:
current = feature.generate_features_renamed(data.current_data, data.data_definition)
current = feature.generate_features_renamed(data.current_data, data.data_definition, options)
reference = (
feature.generate_features_renamed(data.reference_data, data.data_definition)
feature.generate_features_renamed(data.reference_data, data.data_definition, options)
if data.reference_data is not None
else None
)
Expand Down
2 changes: 2 additions & 0 deletions src/evidently/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class EvidentlyError(Exception):
pass
4 changes: 3 additions & 1 deletion src/evidently/features/feature_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def run(
additional_data=additional_data or {},
)
converted_data = context_engine.convert_input_data(data)
result = context_engine.calculate_additional_features(converted_data, self.features)
result = context_engine.calculate_additional_features(
converted_data, self.features, self._inner_suite.context.options
)
self._inner_suite.context.features = result

def get_features(self, feature: Optional[GeneratedFeatures] = None) -> EngineDatasets[Any]:
Expand Down
11 changes: 7 additions & 4 deletions src/evidently/features/generated_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from evidently.base_metric import DatasetType
from evidently.base_metric import TEngineDataType
from evidently.core import ColumnType
from evidently.options.base import Options
from evidently.pydantic_utils import EvidentlyBaseModel
from evidently.utils.data_preprocessing import DataDefinition

Expand All @@ -37,7 +38,7 @@ def get_type(self, subcolumn: Optional[str] = None) -> ColumnType:
raise NotImplementedError

@abc.abstractmethod
def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition, options: Options) -> pd.DataFrame:
"""
generate DataFrame with new features from source data.

Expand All @@ -46,8 +47,10 @@ def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition)
"""
raise NotImplementedError

def generate_features_renamed(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
features = self.generate_features(data, data_definition)
def generate_features_renamed(
self, data: pd.DataFrame, data_definition: DataDefinition, options: Options
) -> pd.DataFrame:
features = self.generate_features(data, data_definition, options)
return features.rename(columns={col: self._create_column_name(col) for col in features.columns}).set_index(
data.index
)
Expand Down Expand Up @@ -124,7 +127,7 @@ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition)
"""
raise NotImplementedError

def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition, options: Options) -> pd.DataFrame:
feature = self.generate_feature(data, data_definition)
assert len(feature.columns) == 1
return feature
Expand Down
64 changes: 51 additions & 13 deletions src/evidently/features/llm_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,26 @@
from abc import abstractmethod
from enum import Enum
from typing import Callable
from typing import ClassVar
from typing import Dict
from typing import Iterator
from typing import List
from typing import Optional
from typing import Tuple
from typing import Type
from typing import Union

import pandas as pd

from evidently import ColumnType
from evidently._pydantic_compat import Field
from evidently._pydantic_compat import PrivateAttr
from evidently._pydantic_compat import SecretStr
from evidently.base_metric import ColumnName
from evidently.errors import EvidentlyError
from evidently.features.generated_features import GeneratedFeatures
from evidently.options.base import Options
from evidently.options.option import Option
from evidently.pydantic_utils import EnumValueMixin
from evidently.pydantic_utils import EvidentlyBaseModel
from evidently.utils.data_preprocessing import DataDefinition
Expand All @@ -25,19 +31,32 @@
LLMResponse = Dict[str, Union[str, float]]


class LLMResponseParseError(ValueError):
class EvidentlyLLMError(EvidentlyError):
pass


class LLMResponseParseError(EvidentlyLLMError):
pass


class LLMRequestError(EvidentlyLLMError):
pass


class LLMWrapper(ABC):
__used_options__: ClassVar[List[Type[Option]]] = []

@abstractmethod
def complete(self, messages: List[LLMMessage]) -> str:
raise NotImplementedError

def get_used_options(self) -> List[Type[Option]]:
return self.__used_options__


LLMProvider = str
LLMModel = str
LLMWrapperProvider = Callable[[LLMModel], LLMWrapper]
LLMWrapperProvider = Callable[[LLMModel, Options], LLMWrapper]
_wrappers: Dict[Tuple[LLMProvider, Optional[LLMModel]], LLMWrapperProvider] = {}


Expand All @@ -49,13 +68,13 @@ def dec(f: LLMWrapperProvider):
return dec


def get_llm_wrapper(provider: LLMProvider, model: LLMModel) -> LLMWrapper:
def get_llm_wrapper(provider: LLMProvider, model: LLMModel, options: Options) -> LLMWrapper:
key: Tuple[str, Optional[str]] = (provider, model)
if key in _wrappers:
return _wrappers[key](model)
return _wrappers[key](model, options)
key = (provider, None)
if key in _wrappers:
return _wrappers[key](model)
return _wrappers[key](model, options)
raise ValueError(f"LLM wrapper for provider {provider} model {model} not found")


Expand Down Expand Up @@ -236,10 +255,9 @@ class LLMJudge(GeneratedFeatures):

_llm_wrapper: Optional[LLMWrapper] = PrivateAttr(None)

@property
def llm_wrapper(self) -> LLMWrapper:
def get_llm_wrapper(self, options: Options) -> LLMWrapper:
if self._llm_wrapper is None:
self._llm_wrapper = get_llm_wrapper(self.provider, self.model)
self._llm_wrapper = get_llm_wrapper(self.provider, self.model, options)
return self._llm_wrapper

def get_input_columns(self):
Expand All @@ -249,12 +267,12 @@ def get_input_columns(self):

return {self.input_column: "input"}

def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition, options: Options) -> pd.DataFrame:
result: List[Dict[str, Union[str, float]]] = []

for message in self.template.iterate_messages(data, self.get_input_columns()):
messages: List[LLMMessage] = [*self.template.get_system_prompts(), message]
response = self.llm_wrapper.complete(messages)
response = self.get_llm_wrapper(options).complete(messages)
result.append(self.template.parse_response(response))
return pd.DataFrame(result)

Expand All @@ -271,17 +289,37 @@ def get_type(self, subcolumn: Optional[str] = None) -> ColumnType:
return self.template.get_type(subcolumn)


class OpenAIKey(Option):
api_key: Optional[SecretStr] = None

def __init__(self, api_key: Optional[str] = None):
self.api_key = SecretStr(api_key) if api_key is not None else None
super().__init__()

def get_value(self) -> Optional[str]:
if self.api_key is None:
return None
return self.api_key.get_secret_value()


@llm_provider("openai", None)
class OpenAIWrapper(LLMWrapper):
def __init__(self, model: str):
__used_options__: ClassVar = [OpenAIKey]

def __init__(self, model: str, options: Options):
import openai

self.model = model
self.client = openai.OpenAI()
self.client = openai.OpenAI(api_key=options.get(OpenAIKey).get_value())

def complete(self, messages: List[LLMMessage]) -> str:
import openai

messages = [{"role": user, "content": msg} for user, msg in messages]
response = self.client.chat.completions.create(model=self.model, messages=messages) # type: ignore[arg-type]
try:
response = self.client.chat.completions.create(model=self.model, messages=messages) # type: ignore[arg-type]
except openai.OpenAIError as e:
raise LLMRequestError("Failed to call OpenAI complete API") from e
content = response.choices[0].message.content
assert content is not None # todo: better error
return content
Expand Down
35 changes: 35 additions & 0 deletions src/evidently/options/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import TYPE_CHECKING
from typing import Dict
from typing import List
from typing import Optional
Expand All @@ -11,6 +12,10 @@
from evidently.options.agg_data import RenderOptions
from evidently.options.option import Option

if TYPE_CHECKING:
from evidently._pydantic_compat import AbstractSetIntStr
from evidently._pydantic_compat import DictStrAny
from evidently._pydantic_compat import MappingIntStrAny
TypeParam = TypeVar("TypeParam", bound=Option)


Expand Down Expand Up @@ -86,6 +91,36 @@ def __hash__(self):
value_pairs.extend(sorted(list(self.custom.items())))
return hash((type(self),) + tuple(value_pairs))

def dict(
self,
*,
include: Optional[Union["AbstractSetIntStr", "MappingIntStrAny"]] = None,
exclude: Optional[Union["AbstractSetIntStr", "MappingIntStrAny"]] = None,
by_alias: bool = False,
skip_defaults: Optional[bool] = None,
exclude_unset: bool = False,
exclude_defaults: bool = False,
exclude_none: bool = False,
) -> "DictStrAny":
# todo
# for now custom options will not be saved at all
# if we want them to be saved, custom field needs to be Dict[str, Option] so it is json-able
if exclude is None:
exclude = {"custom"}
elif isinstance(exclude, set):
exclude.add("custom")
else:
exclude["custom"] = False
return super().dict(
include=include,
exclude=exclude,
by_alias=by_alias,
skip_defaults=skip_defaults,
exclude_unset=exclude_unset,
exclude_defaults=exclude_defaults,
exclude_none=exclude_none,
)


_option_cls_mapping = {field.type_: name for name, field in Options.__fields__.items()}

Expand Down
3 changes: 2 additions & 1 deletion src/evidently/spark/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from evidently.core import ColumnType
from evidently.features.generated_features import FeatureResult
from evidently.features.generated_features import GeneratedFeatures
from evidently.options.base import Options
from evidently.spark.base import SparkDataFrame
from evidently.spark.base import SparkSeries
from evidently.spark.base import create_data_definition_spark
Expand Down Expand Up @@ -125,7 +126,7 @@ def get_data_definition(
return create_data_definition_spark(current_data, reference_data, column_mapping)

def calculate_additional_features(
self, data: TInputData, features: List[GeneratedFeatures]
self, data: TInputData, features: List[GeneratedFeatures], options: Options
) -> Dict[GeneratedFeatures, FeatureResult[TEngineDataType]]:
if len(features) > 0:
raise NotImplementedError("SparkEngine does not support generated features yet")
Expand Down
4 changes: 3 additions & 1 deletion src/evidently/ui/errors.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from litestar import Response

from evidently.errors import EvidentlyError

class EvidentlyServiceError(Exception):

class EvidentlyServiceError(EvidentlyError):
def to_response(self) -> Response:
raise NotImplementedError

Expand Down
7 changes: 4 additions & 3 deletions tests/features/test_llm_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from evidently.features.llm_judge import LLMResponseParseError
from evidently.features.llm_judge import LLMWrapper
from evidently.features.llm_judge import llm_provider
from evidently.options.base import Options
from evidently.utils.data_preprocessing import DataDefinition


Expand Down Expand Up @@ -85,7 +86,7 @@ def test_parse_response(

@llm_provider("mock", None)
class MockLLMWrapper(LLMWrapper):
def __init__(self, model: str):
def __init__(self, model: str, options: Options):
self.model = model

def complete(self, messages: List[LLMMessage]) -> str:
Expand All @@ -105,7 +106,7 @@ def test_llm_judge():
data = pd.DataFrame({"text": ["A", "B"]})

dd = DataDefinition(columns={}, reference_present=False)
fts = llm_judge.generate_features(data, dd)
fts = llm_judge.generate_features(data, dd, Options())
pd.testing.assert_frame_equal(fts, pd.DataFrame({"category": ["A", "B"]}))


Expand All @@ -120,5 +121,5 @@ def test_multicol_llm_judge():
data = pd.DataFrame({"text": ["A", "B"], "text2": ["C", "D"]})

dd = DataDefinition(columns={}, reference_present=False)
fts = llm_judge.generate_features(data, dd)
fts = llm_judge.generate_features(data, dd, Options())
pd.testing.assert_frame_equal(fts, pd.DataFrame({"category": ["A", "B"]}))
3 changes: 2 additions & 1 deletion tests/features/test_multicolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from evidently.features.feature_generator import FeatureGenerator
from evidently.features.generated_features import GeneratedFeatures
from evidently.metrics import ColumnSummaryMetric
from evidently.options.base import Options
from evidently.report import Report
from evidently.utils.data_preprocessing import DataDefinition

Expand All @@ -17,7 +18,7 @@ class MultiColumnFeature(GeneratedFeatures):
source_column: str
_called_count: int = PrivateAttr(0)

def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition, options: Options) -> pd.DataFrame:
self._called_count += 1
col = data[self.source_column]
return pd.DataFrame({"+1": col + 1, "+5": col + 5})
Expand Down
Loading