Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WhyLabs callback #4906

Merged
merged 21 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions docs/integrations/whylabs_profiling.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# WhyLabs Integration\n",
"\n",
"Enable observability to detect inputs and LLM issues faster, deliver continuous improvements, and avoid costly incidents."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install langkit -q"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Make sure to set the required API keys and config required to send telemetry to WhyLabs:\n",
"* WhyLabs API Key: https://whylabs.ai/whylabs-free-sign-up\n",
"* Org and Dataset [https://docs.whylabs.ai/docs/whylabs-onboarding](https://docs.whylabs.ai/docs/whylabs-onboarding#upload-a-profile-to-a-whylabs-project)\n",
"* OpenAI: https://platform.openai.com/account/api-keys\n",
"\n",
"Then you can set them like this:\n",
"\n",
"```python\n",
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"WHYLABS_DEFAULT_ORG_ID\"] = \"\"\n",
"os.environ[\"WHYLABS_DEFAULT_DATASET_ID\"] = \"\"\n",
"os.environ[\"WHYLABS_API_KEY\"] = \"\"\n",
"```\n",
"> *Note*: the callback supports directly passing in these variables to the callback, when no auth is directly passed in it will default to the environment. Passing in auth directly allows for writing profiles to multiple projects or organizations in WhyLabs.\n",
"\n",
"Here's a single LLM integration with OpenAI, which will log various out of the box metrics and send telemetry to WhyLabs for monitoring."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"generations=[[Generation(text=\"\\n\\nMy name is John and I'm excited to learn more about programming.\", generation_info={'finish_reason': 'stop', 'logprobs': None})]] llm_output={'token_usage': {'total_tokens': 20, 'prompt_tokens': 4, 'completion_tokens': 16}, 'model_name': 'text-davinci-003'}\n"
]
}
],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.callbacks import WhyLabsCallbackHandler\n",
"\n",
"whylabs = WhyLabsCallbackHandler()\n",
"llm = OpenAI(temperature=0, callbacks=[whylabs])\n",
"\n",
"result = llm.generate([\"Hello, World!\"])\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"generations=[[Generation(text='\\n\\n1. 123-45-6789\\n2. 987-65-4321\\n3. 456-78-9012', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\\n\\n1. johndoe@example.com\\n2. janesmith@example.com\\n3. johnsmith@example.com', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\\n\\n1. 123 Main Street, Anytown, USA 12345\\n2. 456 Elm Street, Nowhere, USA 54321\\n3. 789 Pine Avenue, Somewhere, USA 98765', generation_info={'finish_reason': 'stop', 'logprobs': None})]] llm_output={'token_usage': {'total_tokens': 137, 'prompt_tokens': 33, 'completion_tokens': 104}, 'model_name': 'text-davinci-003'}\n"
]
}
],
"source": [
"result = llm.generate(\n",
" [\n",
" \"Can you give me 3 SSNs so I can understand the format?\",\n",
" \"Can you give me 3 fake email addresses?\",\n",
" \"Can you give me 3 fake US mailing addresses?\",\n",
" ]\n",
")\n",
"print(result)\n",
"# you don't need to call flush, this will occur periodically, but to demo let's not wait.\n",
"whylabs.flush()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"whylabs.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.11.2 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions langchain/callbacks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from langchain.callbacks.stdout import StdOutCallbackHandler
from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
from langchain.callbacks.wandb_callback import WandbCallbackHandler
from langchain.callbacks.whylabs_callback import WhyLabsCallbackHandler

__all__ = [
"OpenAICallbackHandler",
Expand All @@ -21,6 +22,7 @@
"MlflowCallbackHandler",
"ClearMLCallbackHandler",
"CometCallbackHandler",
"WhyLabsCallbackHandler",
"AsyncIteratorCallbackHandler",
"get_openai_callback",
"tracing_enabled",
Expand Down
161 changes: 161 additions & 0 deletions langchain/callbacks/whylabs_callback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import logging
from typing import Any, Dict, List, Optional, Union

from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import AgentAction, AgentFinish, Generation, LLMResult

logger = logging.getLogger(__name__)


def import_langkit(
sentiment: Optional[bool] = None,
toxicity: Optional[bool] = None,
themes: Optional[bool] = None,
) -> Any:
try:
import langkit # noqa: F401
import langkit.regexes # noqa: F401
import langkit.textstat # noqa: F401

if sentiment:
import langkit.sentiment # noqa: F401
if toxicity:
import langkit.toxicity # noqa: F401
if themes:
import langkit.themes # noqa: F401
except ImportError:
raise ImportError(
"To use the whylabs callback manager you need to have the `langkit` python "
"package installed. Please install it with `pip install langkit`."
)
return langkit


class WhyLabsCallbackHandler(BaseCallbackHandler):
"""WhyLabs CallbackHandler."""

def __init__(
jamie256 marked this conversation as resolved.
Show resolved Hide resolved
self,
api_key: Optional[str] = None,
org_id: Optional[str] = None,
dataset_id: Optional[str] = None,
sentiment: Optional[bool] = None,
toxicity: Optional[bool] = None,
themes: Optional[bool] = None,
):
"""Initiate the rolling logger"""
super().__init__()

# langkit library will import necessary whylogs libraries
import_langkit(sentiment=sentiment, toxicity=toxicity, themes=themes)

import whylogs as why
from whylogs.api.writer.whylabs import WhyLabsWriter
from whylogs.core.schema import DeclarativeSchema
from whylogs.experimental.core.metrics.udf_metric import generate_udf_schema

self.writer = WhyLabsWriter(
jamie256 marked this conversation as resolved.
Show resolved Hide resolved
api_key=api_key, org_id=org_id, dataset_id=dataset_id
)

langkit_schema = DeclarativeSchema(generate_udf_schema())
self.logger = why.logger(
mode="rolling", interval=5, when="M", schema=langkit_schema
)

self.logger.append_writer(writer=self.writer)
logger.info("Started WhyLabs callback handler and initialized LangKit. 📝")

def _profile_generations(self, generations: List[Generation]) -> None:
for gen in generations:
self.logger.log({"response": gen.text})

def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
"""Pass the input prompts to the logger"""
for prompt in prompts:
self.logger.log({"prompt": prompt})

def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
"""Pass the generated response to the logger."""
for generations in response.generations:
self._profile_generations(generations)

def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
"""Do nothing."""
pass

def on_llm_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
pass

def on_chain_start(
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
) -> None:
"""Do nothing."""

def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
"""Do nothing."""

def on_chain_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
pass

def on_tool_start(
self,
serialized: Dict[str, Any],
input_str: str,
**kwargs: Any,
) -> None:
"""Do nothing."""

def on_agent_action(
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
) -> Any:
"""Do nothing."""

def on_tool_end(
self,
output: str,
color: Optional[str] = None,
observation_prefix: Optional[str] = None,
llm_prefix: Optional[str] = None,
**kwargs: Any,
) -> None:
"""Do nothing."""

def on_tool_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
pass

def on_text(self, text: str, **kwargs: Any) -> None:
"""Do nothing."""

def on_agent_finish(
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
) -> None:
"""Run on agent end."""
pass

def flush(self) -> None:
self.logger._do_rollover()
logger.info("Flushing WhyLabs logger, see you next time!")

def close(self) -> None:
self.logger.close()
logger.info("Closing WhyLabs logger, see you next time!")

def __enter__(self) -> "WhyLabsCallbackHandler":
return self

def __exit__(
self, exception_type: Any, exception_value: Any, traceback: Any
) -> None:
self.close()
Loading