diff --git a/docs/book/user-guide/advanced-guide/artifact-management/handle-custom-data-types.md b/docs/book/user-guide/advanced-guide/artifact-management/handle-custom-data-types.md index 72ec26ba069..12090637eac 100644 --- a/docs/book/user-guide/advanced-guide/artifact-management/handle-custom-data-types.md +++ b/docs/book/user-guide/advanced-guide/artifact-management/handle-custom-data-types.md @@ -30,7 +30,7 @@ In addition to the built-in materializers, ZenML also provides several integration-specific materializers that can be activated by installing the respective [integration](../../component-guide/integration-overview.md): -
IntegrationMaterializerHandled Data TypesStorage Format
bentomlBentoMaterializerbentoml.Bento.bento
deepchecksDeepchecksResultMateriailzerdeepchecks.CheckResult, deepchecks.SuiteResult.json
evidentlyEvidentlyProfileMaterializerevidently.Profile.json
great_expectationsGreatExpectationsMaterializergreat_expectations.ExpectationSuite, great_expectations.CheckpointResult.json
huggingfaceHFDatasetMaterializerdatasets.Dataset, datasets.DatasetDictDirectory
huggingfaceHFPTModelMaterializertransformers.PreTrainedModelDirectory
huggingfaceHFTFModelMaterializertransformers.TFPreTrainedModelDirectory
huggingfaceHFTokenizerMaterializertransformers.PreTrainedTokenizerBaseDirectory
lightgbmLightGBMBoosterMaterializerlgbm.Booster.txt
lightgbmLightGBMDatasetMaterializerlgbm.Dataset.binary
llama_indexLlamaIndexGPTFaissIndexMaterializerllama_index.GPTFaissIndex.json
neural_prophetNeuralProphetMaterializerNeuralProphet.pt
pillowPillowImageMaterializerPillow.Image.PNG
pycaretPyCaretMaterializerAny sklearn, xgboost, lightgbm or catboost model.pkl
pytorchPyTorchDataLoaderMaterializertorch.Dataset, torch.DataLoader.pt
pytorchPyTorchModuleMaterializertorch.Module.pt
scipySparseMaterializerscipy.spmatrix.npz
sparkSparkDataFrameMaterializerpyspark.DataFrame.parquet
sparkSparkModelMaterializerpyspark.Transformerpyspark.Estimator
tensorflowKerasMaterializertf.keras.ModelDirectory
tensorflowTensorflowDatasetMaterializertf.DatasetDirectory
whylogsWhylogsMaterializerwhylogs.DatasetProfileView.pb
xgboostXgboostBoosterMaterializerxgb.Booster.json
xgboostXgboostDMatrixMaterializerxgb.DMatrix.binary
+
IntegrationMaterializerHandled Data TypesStorage Format
bentomlBentoMaterializerbentoml.Bento.bento
deepchecksDeepchecksResultMateriailzerdeepchecks.CheckResult, deepchecks.SuiteResult.json
evidentlyEvidentlyProfileMaterializerevidently.Profile.json
great_expectationsGreatExpectationsMaterializergreat_expectations.ExpectationSuite, great_expectations.CheckpointResult.json
huggingfaceHFDatasetMaterializerdatasets.Dataset, datasets.DatasetDictDirectory
huggingfaceHFPTModelMaterializertransformers.PreTrainedModelDirectory
huggingfaceHFTFModelMaterializertransformers.TFPreTrainedModelDirectory
huggingfaceHFTokenizerMaterializertransformers.PreTrainedTokenizerBaseDirectory
lightgbmLightGBMBoosterMaterializerlgbm.Booster.txt
lightgbmLightGBMDatasetMaterializerlgbm.Dataset.binary
neural_prophetNeuralProphetMaterializerNeuralProphet.pt
pillowPillowImageMaterializerPillow.Image.PNG
pycaretPyCaretMaterializerAny sklearn, xgboost, lightgbm or catboost model.pkl
pytorchPyTorchDataLoaderMaterializertorch.Dataset, torch.DataLoader.pt
pytorchPyTorchModuleMaterializertorch.Module.pt
scipySparseMaterializerscipy.spmatrix.npz
sparkSparkDataFrameMaterializerpyspark.DataFrame.parquet
sparkSparkModelMaterializerpyspark.Transformerpyspark.Estimator
tensorflowKerasMaterializertf.keras.ModelDirectory
tensorflowTensorflowDatasetMaterializertf.DatasetDirectory
whylogsWhylogsMaterializerwhylogs.DatasetProfileView.pb
xgboostXgboostBoosterMaterializerxgb.Booster.json
xgboostXgboostDMatrixMaterializerxgb.DMatrix.binary
{% hint style="warning" %} If you are running pipelines with a Docker-based diff --git a/scripts/install-zenml-dev.sh b/scripts/install-zenml-dev.sh index abad610e523..89afa61d505 100755 --- a/scripts/install-zenml-dev.sh +++ b/scripts/install-zenml-dev.sh @@ -31,7 +31,7 @@ install_integrations() { # figure out the python version python_version=$(python -c "import sys; print('.'.join(map(str, sys.version_info[:2])))") - ignore_integrations="feast label_studio bentoml seldon kserve langchain llama_index pycaret skypilot_aws skypilot_gcp skypilot_azure" + ignore_integrations="feast label_studio bentoml seldon kserve pycaret skypilot_aws skypilot_gcp skypilot_azure" # if python version is 3.11, exclude all integrations depending on kfp # because they are not yet compatible with python 3.11 if [ "$python_version" = "3.11" ]; then diff --git a/src/zenml/integrations/__init__.py b/src/zenml/integrations/__init__.py index cc5bb823184..f1c1e3e7504 100644 --- a/src/zenml/integrations/__init__.py +++ b/src/zenml/integrations/__init__.py @@ -69,5 +69,5 @@ from zenml.integrations.xgboost import XgboostIntegration # noqa if sys.version_info > (3, 7): - from zenml.integrations.llama_index import LlamaIndexIntegration # noqa + # from zenml.integrations.llama_index import LlamaIndexIntegration # noqa from zenml.integrations.langchain import LangchainIntegration # noqa diff --git a/src/zenml/integrations/constants.py b/src/zenml/integrations/constants.py index d11445c954b..54dba7c205f 100644 --- a/src/zenml/integrations/constants.py +++ b/src/zenml/integrations/constants.py @@ -37,7 +37,7 @@ LABEL_STUDIO = "label_studio" LANGCHAIN = "langchain" LIGHTGBM = "lightgbm" -LLAMA_INDEX = "llama_index" +# LLAMA_INDEX = "llama_index" MLFLOW = "mlflow" NEPTUNE = "neptune" NEURAL_PROPHET = "neural_prophet" diff --git a/src/zenml/integrations/gitlab/code_repositories/gitlab_code_repository.py b/src/zenml/integrations/gitlab/code_repositories/gitlab_code_repository.py index 399b69cad43..984f10087ed 100644 --- a/src/zenml/integrations/gitlab/code_repositories/gitlab_code_repository.py +++ b/src/zenml/integrations/gitlab/code_repositories/gitlab_code_repository.py @@ -16,8 +16,8 @@ import re from typing import Optional -from gitlab import Gitlab -from gitlab.v4.objects import Project +from gitlab import Gitlab # type: ignore +from gitlab.v4.objects import Project # type: ignore from zenml.code_repositories import ( BaseCodeRepository, diff --git a/src/zenml/integrations/langchain/__init__.py b/src/zenml/integrations/langchain/__init__.py index 7e3d6217b7e..d19b11e17cc 100644 --- a/src/zenml/integrations/langchain/__init__.py +++ b/src/zenml/integrations/langchain/__init__.py @@ -25,7 +25,7 @@ class LangchainIntegration(Integration): """Definition of langchain integration for ZenML.""" NAME = LANGCHAIN - REQUIREMENTS = ["langchain>=0.0.116"] + REQUIREMENTS = ["langchain>=0.0.325"] @classmethod def activate(cls) -> None: diff --git a/src/zenml/integrations/langchain/materializers/vector_store_materializer.py b/src/zenml/integrations/langchain/materializers/vector_store_materializer.py index 6627150d78b..481f9873d2f 100644 --- a/src/zenml/integrations/langchain/materializers/vector_store_materializer.py +++ b/src/zenml/integrations/langchain/materializers/vector_store_materializer.py @@ -24,7 +24,7 @@ if TYPE_CHECKING and sys.version_info < (3, 8): VectorStore = Any else: - from langchain.vectorstores import VectorStore + from langchain.vectorstores.base import VectorStore class LangchainVectorStoreMaterializer(CloudpickleMaterializer): diff --git a/src/zenml/integrations/llama_index/__init__.py b/src/zenml/integrations/llama_index/__init__.py index 2d547f69af9..cd0247389d0 100644 --- a/src/zenml/integrations/llama_index/__init__.py +++ b/src/zenml/integrations/llama_index/__init__.py @@ -12,25 +12,25 @@ # or implied. See the License for the specific language governing # permissions and limitations under the License. """Initialization of the Llama Index integration.""" -from zenml.integrations.integration import Integration +# from zenml.integrations.integration import Integration -from zenml.logger import get_logger -from zenml.integrations.constants import LLAMA_INDEX -from zenml.integrations.integration import Integration +# from zenml.logger import get_logger +# from zenml.integrations.constants import LLAMA_INDEX +# from zenml.integrations.integration import Integration -logger = get_logger(__name__) +# logger = get_logger(__name__) -class LlamaIndexIntegration(Integration): - """Definition of Llama Index integration for ZenML.""" +# class LlamaIndexIntegration(Integration): +# """Definition of Llama Index integration for ZenML.""" - NAME = LLAMA_INDEX - REQUIREMENTS = ["llama_index>=0.4.28,<0.6.0"] +# NAME = LLAMA_INDEX +# REQUIREMENTS = ["llama_index>=0.4.28,<0.6.0"] - @classmethod - def activate(cls) -> None: - """Activates the integration.""" - from zenml.integrations.llama_index import materializers # noqa +# @classmethod +# def activate(cls) -> None: +# """Activates the integration.""" +# from zenml.integrations.llama_index import materializers # noqa -LlamaIndexIntegration.check_installation() +# LlamaIndexIntegration.check_installation() diff --git a/src/zenml/integrations/llama_index/materializers/__init__.py b/src/zenml/integrations/llama_index/materializers/__init__.py index f46949b2bb7..a8bd5dae14a 100644 --- a/src/zenml/integrations/llama_index/materializers/__init__.py +++ b/src/zenml/integrations/llama_index/materializers/__init__.py @@ -13,10 +13,10 @@ # permissions and limitations under the License. """Initialization of the Llama Index materializers.""" -from zenml.integrations.llama_index.materializers.gpt_index_materializer import ( - LlamaIndexGPTIndexMaterializer, - LlamaIndexGPTFaissIndexMaterializer, -) -from zenml.integrations.llama_index.materializers.document_materializer import ( - LlamaIndexDocumentMaterializer, -) +# from zenml.integrations.llama_index.materializers.gpt_index_materializer import ( +# LlamaIndexGPTIndexMaterializer, +# LlamaIndexGPTFaissIndexMaterializer, +# ) +# from zenml.integrations.llama_index.materializers.document_materializer import ( +# LlamaIndexDocumentMaterializer, +# ) diff --git a/src/zenml/integrations/llama_index/materializers/document_materializer.py b/src/zenml/integrations/llama_index/materializers/document_materializer.py index 9829232a501..4090888518a 100644 --- a/src/zenml/integrations/llama_index/materializers/document_materializer.py +++ b/src/zenml/integrations/llama_index/materializers/document_materializer.py @@ -13,58 +13,58 @@ # permissions and limitations under the License. """Implementation of the llama-index document materializer.""" -import sys -from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type +# import sys +# from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type -from zenml.enums import ArtifactType -from zenml.integrations.langchain.materializers.document_materializer import ( - LangchainDocumentMaterializer, -) +# from zenml.enums import ArtifactType +# from zenml.integrations.langchain.materializers.document_materializer import ( +# LangchainDocumentMaterializer, +# ) -if TYPE_CHECKING: - from zenml.metadata.metadata_types import MetadataType +# if TYPE_CHECKING: +# from zenml.metadata.metadata_types import MetadataType -if TYPE_CHECKING and sys.version_info < (3, 8): - Document = Any - LCDocument = Any -else: - from langchain.docstore.document import Document as LCDocument - from llama_index.readers.schema.base import Document +# if TYPE_CHECKING and sys.version_info < (3, 8): +# Document = Any +# LCDocument = Any +# else: +# from langchain.docstore.document import Document as LCDocument +# from llama_index.readers.schema.base import Document -class LlamaIndexDocumentMaterializer(LangchainDocumentMaterializer): - """Handle serialization and deserialization of llama-index documents.""" +# class LlamaIndexDocumentMaterializer(LangchainDocumentMaterializer): +# """Handle serialization and deserialization of llama-index documents.""" - ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA - ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (Document,) +# ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA +# ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (Document,) - def load(self, data_type: Type[Any]) -> Any: - """Reads a llama-index document from JSON. +# def load(self, data_type: Type[Any]) -> Any: +# """Reads a llama-index document from JSON. - Args: - data_type: The type of the data to read. +# Args: +# data_type: The type of the data to read. - Returns: - The data read. - """ - return Document.from_langchain_format(super().load(LCDocument)) +# Returns: +# The data read. +# """ +# return Document.from_langchain_format(super().load(LCDocument)) - def save(self, data: Any) -> None: - """Serialize a llama-index document as a Langchain document. +# def save(self, data: Any) -> None: +# """Serialize a llama-index document as a Langchain document. - Args: - data: The data to store. - """ - super().save(data.to_langchain_format()) +# Args: +# data: The data to store. +# """ +# super().save(data.to_langchain_format()) - def extract_metadata(self, data: Any) -> Dict[str, "MetadataType"]: - """Extract metadata from the given Llama Index document. +# def extract_metadata(self, data: Any) -> Dict[str, "MetadataType"]: +# """Extract metadata from the given Llama Index document. - Args: - data: The BaseModel object to extract metadata from. +# Args: +# data: The BaseModel object to extract metadata from. - Returns: - The extracted metadata as a dictionary. - """ - return super().extract_metadata(data.to_langchain_format()) +# Returns: +# The extracted metadata as a dictionary. +# """ +# return super().extract_metadata(data.to_langchain_format()) diff --git a/src/zenml/integrations/llama_index/materializers/gpt_index_materializer.py b/src/zenml/integrations/llama_index/materializers/gpt_index_materializer.py index 51d70709267..02062a1dfb5 100644 --- a/src/zenml/integrations/llama_index/materializers/gpt_index_materializer.py +++ b/src/zenml/integrations/llama_index/materializers/gpt_index_materializer.py @@ -13,140 +13,140 @@ # permissions and limitations under the License. """Implementation of the llama-index GPT index materializer.""" -import os -import sys -import tempfile -from typing import ( - TYPE_CHECKING, - Any, - ClassVar, - Generic, - Tuple, - Type, - TypeVar, - cast, -) +# import os +# import sys +# import tempfile +# from typing import ( +# TYPE_CHECKING, +# Any, +# ClassVar, +# Generic, +# Tuple, +# Type, +# TypeVar, +# cast, +# ) -from zenml.enums import ArtifactType -from zenml.io import fileio -from zenml.materializers.base_materializer import BaseMaterializer +# from zenml.enums import ArtifactType +# from zenml.io import fileio +# from zenml.materializers.base_materializer import BaseMaterializer -DEFAULT_FILENAME = "index.json" -DEFAULT_FAISS_FILENAME = "faiss_index.json" +# DEFAULT_FILENAME = "index.json" +# DEFAULT_FAISS_FILENAME = "faiss_index.json" -if TYPE_CHECKING and sys.version_info < (3, 8): - BaseGPTIndex = Any - GPTFaissIndex = Any - T = TypeVar("T", bound=Any) -else: - from llama_index.indices.base import BaseGPTIndex - from llama_index.indices.vector_store import GPTFaissIndex - - T = TypeVar("T", bound=BaseGPTIndex[Any]) - - -class LlamaIndexGPTIndexMaterializer(Generic[T], BaseMaterializer): - """Materializer for llama_index GPT indices.""" - - ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL - ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (BaseGPTIndex,) - - def load(self, data_type: Type[T]) -> T: - """Loads a llama-index GPT index from disk. - - Args: - data_type: The type of the index. - - Returns: - The index. - """ - filepath = os.path.join(self.uri, DEFAULT_FILENAME) - - # Create a temporary folder - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) - - # Copy from artifact store to temporary file - fileio.copy(filepath, temp_file) - - index = data_type.load_from_disk(save_path=filepath) - assert isinstance(index, data_type) - - # Cleanup and return - fileio.rmtree(temp_dir) - return index - - def save(self, index: T) -> None: - """Save a llama-index GPT index to disk. - - Args: - index: The index to save. - """ - filepath = os.path.join(self.uri, DEFAULT_FILENAME) - - with tempfile.NamedTemporaryFile( - mode="w", suffix=".json", delete=False - ) as f: - index.save_to_disk(save_path=f.name) - # Copy it into artifact store - fileio.copy(f.name, filepath) - - # Close and remove the temporary file - f.close() - fileio.remove(f.name) - - -class LlamaIndexGPTFaissIndexMaterializer(BaseMaterializer): - """Materializer for llama_index GPT faiss indices.""" - - ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL - ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (GPTFaissIndex,) - - def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex: - """Load a llama-index GPT faiss index from disk. - - Args: - data_type: The type of the index. - - Returns: - The index. - """ - filepath = os.path.join(self.uri, DEFAULT_FILENAME) - faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME) - - # Create a temporary folder - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) - - # Copy from artifact store to temporary file - fileio.copy(filepath, temp_file) +# if TYPE_CHECKING and sys.version_info < (3, 8): +# BaseGPTIndex = Any +# GPTFaissIndex = Any +# T = TypeVar("T", bound=Any) +# else: +# from llama_index.indices.base import BaseGPTIndex +# from llama_index.indices.vector_store import GPTFaissIndex + +# T = TypeVar("T", bound=BaseGPTIndex[Any]) + + +# class LlamaIndexGPTIndexMaterializer(Generic[T], BaseMaterializer): +# """Materializer for llama_index GPT indices.""" + +# ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL +# ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (BaseGPTIndex,) + +# def load(self, data_type: Type[T]) -> T: +# """Loads a llama-index GPT index from disk. + +# Args: +# data_type: The type of the index. + +# Returns: +# The index. +# """ +# filepath = os.path.join(self.uri, DEFAULT_FILENAME) + +# # Create a temporary folder +# temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") +# temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) + +# # Copy from artifact store to temporary file +# fileio.copy(filepath, temp_file) + +# index = data_type.load_from_disk(save_path=filepath) +# assert isinstance(index, data_type) + +# # Cleanup and return +# fileio.rmtree(temp_dir) +# return index + +# def save(self, index: T) -> None: +# """Save a llama-index GPT index to disk. + +# Args: +# index: The index to save. +# """ +# filepath = os.path.join(self.uri, DEFAULT_FILENAME) + +# with tempfile.NamedTemporaryFile( +# mode="w", suffix=".json", delete=False +# ) as f: +# index.save_to_disk(save_path=f.name) +# # Copy it into artifact store +# fileio.copy(f.name, filepath) + +# # Close and remove the temporary file +# f.close() +# fileio.remove(f.name) + + +# class LlamaIndexGPTFaissIndexMaterializer(BaseMaterializer): +# """Materializer for llama_index GPT faiss indices.""" + +# ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL +# ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (GPTFaissIndex,) + +# def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex: +# """Load a llama-index GPT faiss index from disk. + +# Args: +# data_type: The type of the index. + +# Returns: +# The index. +# """ +# filepath = os.path.join(self.uri, DEFAULT_FILENAME) +# faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME) + +# # Create a temporary folder +# temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") +# temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) + +# # Copy from artifact store to temporary file +# fileio.copy(filepath, temp_file) - index = data_type.load_from_disk( - save_path=filepath, faiss_index_save_path=faiss_filepath - ) +# index = data_type.load_from_disk( +# save_path=filepath, faiss_index_save_path=faiss_filepath +# ) - # Cleanup and return - fileio.rmtree(temp_dir) - return cast(GPTFaissIndex, index) +# # Cleanup and return +# fileio.rmtree(temp_dir) +# return cast(GPTFaissIndex, index) - def save(self, index: GPTFaissIndex) -> None: - """Save a llama-index GPT faiss index to disk. +# def save(self, index: GPTFaissIndex) -> None: +# """Save a llama-index GPT faiss index to disk. - Args: - index: The index to save. - """ - filepath = os.path.join(self.uri, DEFAULT_FILENAME) - faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME) +# Args: +# index: The index to save. +# """ +# filepath = os.path.join(self.uri, DEFAULT_FILENAME) +# faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME) - with tempfile.NamedTemporaryFile( - mode="w", suffix=".json", delete=False - ) as f: - index.save_to_disk( - save_path=f.name, faiss_index_save_path=faiss_filepath - ) - # Copy it into artifact store - fileio.copy(f.name, filepath) +# with tempfile.NamedTemporaryFile( +# mode="w", suffix=".json", delete=False +# ) as f: +# index.save_to_disk( +# save_path=f.name, faiss_index_save_path=faiss_filepath +# ) +# # Copy it into artifact store +# fileio.copy(f.name, filepath) - # Close and remove the temporary file - f.close() - fileio.remove(f.name) +# # Close and remove the temporary file +# f.close() +# fileio.remove(f.name) diff --git a/tests/integration/functional/zen_stores/test_zen_store.py b/tests/integration/functional/zen_stores/test_zen_store.py index 7a647ec58fe..fc0427b3586 100644 --- a/tests/integration/functional/zen_stores/test_zen_store.py +++ b/tests/integration/functional/zen_stores/test_zen_store.py @@ -2452,6 +2452,7 @@ def test_latest_version_properly_fetched(self): ) models = zs.list_models(ModelFilterModel()) assert models[0].latest_version == mv.name + time.sleep(1) # thanks to MySQL again! class TestModelVersion: diff --git a/tests/unit/integrations/langchain/materializers/test_openai_embedding_materializer_materializer.py b/tests/unit/integrations/langchain/materializers/test_openai_embedding_materializer_materializer.py index 50b759c461b..a10cbd3477a 100644 --- a/tests/unit/integrations/langchain/materializers/test_openai_embedding_materializer_materializer.py +++ b/tests/unit/integrations/langchain/materializers/test_openai_embedding_materializer_materializer.py @@ -26,18 +26,15 @@ def test_langchain_openai_embedding_materializer(clean_client): fake_key = "aria_and_blupus" fake_chunk_size = 1234 - fake_model_name = "zenml_best_model" embeddings = _test_materializer( step_output=OpenAIEmbeddings( chunk_size=fake_chunk_size, openai_api_key=fake_key, - document_model_name=fake_model_name, ), materializer_class=LangchainOpenaiEmbeddingMaterializer, expected_metadata_size=1, ) - assert embeddings.document_model_name == fake_model_name assert embeddings.openai_api_key == fake_key assert embeddings.chunk_size == fake_chunk_size diff --git a/tests/unit/integrations/langchain/materializers/test_vector_store_materializer.py b/tests/unit/integrations/langchain/materializers/test_vector_store_materializer.py new file mode 100644 index 00000000000..bf903cdaa1f --- /dev/null +++ b/tests/unit/integrations/langchain/materializers/test_vector_store_materializer.py @@ -0,0 +1,36 @@ +# Copyright (c) ZenML GmbH 2023. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. + + +from tests.unit.test_general import _test_materializer + + +def test_langchain_vectorstore_materializer(clean_client): + """Tests the Langchain Vector Store materializer.""" + from langchain.embeddings import FakeEmbeddings + from langchain.vectorstores import SKLearnVectorStore + + from zenml.integrations.langchain.materializers.vector_store_materializer import ( + LangchainVectorStoreMaterializer, + ) + + embeddings = FakeEmbeddings(size=1352) + + langchain_vector_store = _test_materializer( + step_output=SKLearnVectorStore(embedding=embeddings), + materializer_class=LangchainVectorStoreMaterializer, + expected_metadata_size=1, + ) + + assert langchain_vector_store.embeddings diff --git a/tests/unit/integrations/llama_index/materializers/test_llama_index_document_materializer.py b/tests/unit/integrations/llama_index/materializers/test_llama_index_document_materializer.py index b6b06d44d5c..f05615b9730 100644 --- a/tests/unit/integrations/llama_index/materializers/test_llama_index_document_materializer.py +++ b/tests/unit/integrations/llama_index/materializers/test_llama_index_document_materializer.py @@ -13,28 +13,28 @@ # permissions and limitations under the License. -from tests.unit.test_general import _test_materializer +# from tests.unit.test_general import _test_materializer +# TODO: turn this back on when we are able to upgrade llama_index integration +# def test_llama_index_document_materializer(clean_client): +# """Tests whether the steps work for the Llama Index Document +# materializer.""" +# from langchain.docstore.document import Document as LCDocument +# from llama_index.readers.schema.base import Document -def test_llama_index_document_materializer(clean_client): - """Tests whether the steps work for the Llama Index Document - materializer.""" - from langchain.docstore.document import Document as LCDocument - from llama_index.readers.schema.base import Document +# from zenml.integrations.llama_index.materializers.document_materializer import ( +# LlamaIndexDocumentMaterializer, +# ) - from zenml.integrations.llama_index.materializers.document_materializer import ( - LlamaIndexDocumentMaterializer, - ) +# page_content = ( +# "Axl, Aria and Blupus were very cold during the winter months." +# ) +# langchain_document = _test_materializer( +# step_output=Document(text=page_content), +# materializer_class=LlamaIndexDocumentMaterializer, +# expected_metadata_size=2, +# ) - page_content = ( - "Axl, Aria and Blupus were very cold during the winter months." - ) - langchain_document = _test_materializer( - step_output=Document(text=page_content), - materializer_class=LlamaIndexDocumentMaterializer, - expected_metadata_size=2, - ) - - assert langchain_document.get_type() == "Document" - assert langchain_document.text == page_content - assert isinstance(langchain_document.to_langchain_format(), LCDocument) +# assert langchain_document.get_type() == "Document" +# assert langchain_document.text == page_content +# assert isinstance(langchain_document.to_langchain_format(), LCDocument)