-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(datasets): Add limited
langchain
support for Anthropic, Cohere…
…, and OpenAI models (#434) * Add openai datasets. * Add anthropic and cohere Signed-off-by: Ian Whalen <ianpatrickwhalen@gmail.com> * Add python API examples to docstrings. Signed-off-by: Ian Whalen <ianpatrickwhalen@gmail.com> * Clean up python example. Signed-off-by: Ian Whalen <ianpatrickwhalen@gmail.com> * Remove setup.py and move lanchain reqs to pyproject.toml Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Move lanchain datasets to experimental Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Try get antrophic dataset running. Looks like API URL is not necessary? Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Update cohere package and imports Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Update openai dependency + allow for url in antrophic Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Improve Cohere dataset Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Make credentials consistent + fix openai examples Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Turn cohere dataset into chatcohere dataset Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Clean up cohere dataset Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Update release notes + init Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Apply suggestions from code review Co-authored-by: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Signed-off-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> * Add version pins for langchain dependencies Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Update kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py Co-authored-by: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Signed-off-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> * Try loosen pin on langchain-cohere Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> * Only pin dependencies of dataset def in pyproject.toml Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> --------- Signed-off-by: Ian Whalen <ianpatrickwhalen@gmail.com> Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com> Signed-off-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> Co-authored-by: Merel Theisen <merel.theisen@quantumblack.com> Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> Co-authored-by: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com>
- Loading branch information
1 parent
064c5d9
commit 7f3f3ec
Showing
6 changed files
with
327 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
kedro-datasets/kedro_datasets_experimental/langchain/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
"""Provides interface to langchain model API objects.""" | ||
from typing import Any | ||
|
||
import lazy_loader as lazy | ||
|
||
# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901 | ||
ChatOpenAIDataset: Any | ||
OpenAIEmbeddingsDataset: Any | ||
ChatAnthropicDataset: Any | ||
ChatCohereDataset: Any | ||
|
||
__getattr__, __dir__, __all__ = lazy.attach( | ||
__name__, | ||
submod_attrs={ | ||
"_openai": ["ChatOpenAIDataset", "OpenAIEmbeddingsDataset"], | ||
"_anthropic": ["ChatAnthropicDataset"], | ||
"_cohere": ["ChatCohereDataset"], | ||
}, | ||
) |
75 changes: 75 additions & 0 deletions
75
kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
"""Defines an interface to common Anthropic models.""" | ||
|
||
from typing import Any, NoReturn | ||
|
||
from kedro.io import AbstractDataset, DatasetError | ||
from langchain_anthropic import ChatAnthropic | ||
|
||
|
||
class ChatAnthropicDataset(AbstractDataset[None, ChatAnthropic]): | ||
"""``ChatAnthropicDataset`` loads a ChatAnthropic `langchain <https://python.langchain.com/>`_ model. | ||
Example usage for the :doc:`YAML API <kedro:data/data_catalog_yaml_examples>`: | ||
catalog.yml: | ||
.. code-block:: yaml | ||
claude_instant_1: | ||
type: langchain.ChatAnthropicDataset | ||
kwargs: | ||
model: "claude-instant-1" | ||
temperature: 0.0 | ||
credentials: anthropic | ||
credentials.yml: | ||
.. code-block:: yaml | ||
anthropic: | ||
anthropic_api_url: <anthropic-api-base> | ||
anthropic_api_key: <anthropic-api-key> | ||
Example usage for the | ||
`Python API <https://kedro.readthedocs.io/en/stable/data/\ | ||
advanced_data_catalog_usage.html>`_: | ||
.. code-block:: python | ||
>>> from kedro_datasets_experimental.langchain import ChatAnthropicDataset | ||
>>> llm = ChatAnthropicDataset( | ||
... credentials={ | ||
... "anthropic_api_url": "xxx" | ||
... "anthropic_api_key": "xxx", | ||
... }, | ||
... kwargs={ | ||
... "model": "claude-instant-1", | ||
... "temperature": 0.0, | ||
... } | ||
... ).load() | ||
>>> | ||
>>> # See: https://python.langchain.com/docs/integrations/chat/anthropic | ||
>>> llm.invoke("Hello world!") | ||
""" | ||
|
||
def __init__(self, credentials: dict[str, str], kwargs: dict[str, Any] = None): | ||
"""Constructor. | ||
Args: | ||
credentials: must contain `anthropic_api_url` and `anthropic_api_key`. | ||
kwargs: keyword arguments passed to the ChatAnthropic constructor. | ||
""" | ||
self.anthropic_api_url = credentials["anthropic_api_url"] | ||
self.anthropic_api_key = credentials["anthropic_api_key"] | ||
self.kwargs = kwargs or {} | ||
|
||
def _describe(self) -> dict[str, Any]: | ||
return {**self.kwargs} | ||
|
||
def _save(self, data: None) -> NoReturn: | ||
raise DatasetError(f"{self.__class__.__name__} is a read only data set type") | ||
|
||
def _load(self) -> ChatAnthropic: | ||
return ChatAnthropic( | ||
anthropic_api_url=self.anthropic_api_url, | ||
anthropic_api_key=self.anthropic_api_key, | ||
**self.kwargs, | ||
) |
73 changes: 73 additions & 0 deletions
73
kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
""" | ||
Cohere dataset definition. | ||
""" | ||
|
||
from typing import Any, NoReturn | ||
|
||
from kedro.io import AbstractDataset, DatasetError | ||
from langchain_cohere import ChatCohere | ||
|
||
|
||
class ChatCohereDataset(AbstractDataset[None, ChatCohere]): | ||
"""``ChatCohereDataset`` loads a ChatCohere `langchain <https://python.langchain.com/>`_ model. | ||
Example usage for the :doc:`YAML API <kedro:data/data_catalog_yaml_examples>`: | ||
catalog.yml: | ||
.. code-block:: yaml | ||
command: | ||
type: langchain.ChatCohereDataset | ||
kwargs: | ||
model: "command" | ||
temperature: 0.0 | ||
credentials: cohere | ||
credentials.yml: | ||
.. code-block:: yaml | ||
cohere: | ||
cohere_api_url: <cohere-api-base> | ||
cohere_api_key: <cohere-api-key> | ||
Example usage for the | ||
`Python API <https://kedro.readthedocs.io/en/stable/data/\ | ||
advanced_data_catalog_usage.html>`_: | ||
.. code-block:: python | ||
>>> from kedro_datasets_experimental.langchain import ChatCohereDataset | ||
>>> llm = ChatCohereDataset( | ||
... credentials={ | ||
... "cohere_api_key": "xxx", | ||
... "cohere_api_url": "xxx", | ||
... }, | ||
... kwargs={ | ||
... "model": "command", | ||
... "temperature": 0, | ||
... } | ||
... ).load() | ||
>>> | ||
>>> # See: https://python.langchain.com/v0.1/docs/integrations/chat/cohere/ | ||
>>> llm.invoke("Hello world!") | ||
""" | ||
|
||
def __init__(self, credentials: dict[str, str], kwargs: dict[str, Any] = None): | ||
"""Constructor. | ||
Args: | ||
credentials: must contain `cohere_api_url` and `cohere_api_key`. | ||
kwargs: keyword arguments passed to the underlying constructor. | ||
""" | ||
self.cohere_api_url = credentials["cohere_api_url"] | ||
self.cohere_api_key = credentials["cohere_api_key"] | ||
self.kwargs = kwargs or {} | ||
|
||
def _describe(self) -> dict[str, Any]: | ||
return {**self.kwargs} | ||
|
||
def _save(self, data: None) -> NoReturn: | ||
raise DatasetError(f"{self.__class__.__name__} is a read only data set type") | ||
|
||
def _load(self) -> ChatCohere: | ||
return ChatCohere(cohere_api_key=self.cohere_api_key, base_url=self.cohere_api_url, **self.kwargs) |
139 changes: 139 additions & 0 deletions
139
kedro-datasets/kedro_datasets_experimental/langchain/_openai.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
"""Defines an interface to common OpenAI models.""" | ||
|
||
from abc import abstractmethod | ||
from typing import Any, Generic, NoReturn, TypeVar | ||
|
||
from kedro.io import AbstractDataset, DatasetError | ||
from langchain_openai import ChatOpenAI, OpenAIEmbeddings | ||
|
||
OPENAI_TYPE = TypeVar("OPENAI_TYPE") | ||
|
||
|
||
class OpenAIDataset(AbstractDataset[None, OPENAI_TYPE], Generic[OPENAI_TYPE]): | ||
"""OpenAI dataset used to access credentials at runtime.""" | ||
|
||
@property | ||
@abstractmethod | ||
def constructor(self) -> OPENAI_TYPE: | ||
"""Return the OpenAI class to construct in the _load method.""" | ||
|
||
def __init__(self, credentials: dict[str, str], kwargs: dict[str, Any] = None): | ||
"""Constructor. | ||
Args: | ||
credentials: must contain `openai_api_base` and `openai_api_key`. | ||
kwargs: keyword arguments passed to the underlying constructor. | ||
""" | ||
self.openai_api_base = credentials["openai_api_base"] | ||
self.openai_api_key = credentials["openai_api_key"] | ||
self.kwargs = kwargs or {} | ||
|
||
def _describe(self) -> dict[str, Any]: | ||
return {**self.kwargs} | ||
|
||
def _save(self, data: None) -> NoReturn: | ||
raise DatasetError(f"{self.__class__.__name__} is a read only data set type") | ||
|
||
def _load(self) -> OPENAI_TYPE: | ||
return self.constructor( | ||
openai_api_base=self.openai_api_base, | ||
openai_api_key=self.openai_api_key, | ||
**self.kwargs, | ||
) | ||
|
||
|
||
class OpenAIEmbeddingsDataset(OpenAIDataset[OpenAIEmbeddings]): | ||
"""``OpenAIEmbeddingsDataset`` loads a OpenAIEmbeddings `langchain <https://python.langchain.com/>`_ model. | ||
Example usage for the :doc:`YAML API <kedro:data/data_catalog_yaml_examples>`: | ||
catalog.yml: | ||
.. code-block:: yaml | ||
text_embedding_ada_002: | ||
type: langchain.OpenAIEmbeddingsDataset | ||
kwargs: | ||
model: "text-embedding-ada-002" | ||
credentials: openai | ||
credentials.yml: | ||
.. code-block:: yaml | ||
openai: | ||
openai_api_base: <openai-api-base> | ||
openai_api_key: <openai-api-key> | ||
Example usage for the | ||
`Python API <https://kedro.readthedocs.io/en/stable/data/\ | ||
advanced_data_catalog_usage.html>`_: | ||
.. code-block:: python | ||
>>> from kedro_datasets_experimental.langchain import OpenAIEmbeddingsDataset | ||
>>> | ||
>>> embeddings = OpenAIEmbeddingsDataset( | ||
... credentials={ | ||
... "openai_api_base": "<openai-api-base>", | ||
... "openai_api_key": "<openai-api-key>", | ||
... }, | ||
... kwargs={ | ||
... "model": "text-embedding-ada-002", | ||
... }, | ||
... ).load() | ||
>>> | ||
>>> # See: https://python.langchain.com/docs/integrations/text_embedding/openai | ||
>>> embeddings.embed_query("Hello world!") | ||
""" | ||
|
||
@property | ||
def constructor(self) -> type[OpenAIEmbeddings]: | ||
return OpenAIEmbeddings | ||
|
||
|
||
class ChatOpenAIDataset(OpenAIDataset[ChatOpenAI]): | ||
"""``ChatOpenAIDataset`` loads a ChatOpenAI `langchain <https://python.langchain.com/>`_ model. | ||
Example usage for the :doc:`YAML API <kedro:data/data_catalog_yaml_examples>`: | ||
catalog.yml: | ||
.. code-block:: yaml | ||
gpt_3_5_turbo: | ||
type: langchain.ChatOpenAIDataset | ||
kwargs: | ||
model: "gpt-3.5-turbo" | ||
temperature: 0.0 | ||
credentials: openai | ||
credentials.yml: | ||
.. code-block:: yaml | ||
openai: | ||
openai_api_base: <openai-api-base> | ||
openai_api_key: <openai-api-key> | ||
Example usage for the | ||
`Python API <https://kedro.readthedocs.io/en/stable/data/\ | ||
advanced_data_catalog_usage.html>`_: | ||
.. code-block:: python | ||
>>> from kedro_datasets_experimental.langchain import ChatOpenAIDataset | ||
>>> | ||
>>> llm = ChatOpenAIDataset( | ||
... credentials={ | ||
... "openai_api_base": "<openai-api-base>", | ||
... "openai_api_key": "<openai-api-key>", | ||
... }, | ||
... kwargs={ | ||
... "model": "gpt-3.5-turbo", | ||
... "temperature": 0, | ||
... }, | ||
... ).load() | ||
>>> | ||
>>> # See: https://python.langchain.com/docs/integrations/chat/openai | ||
>>> llm.invoke("Hello world!") | ||
""" | ||
|
||
@property | ||
def constructor(self) -> type[ChatOpenAI]: | ||
return ChatOpenAI |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters