diff --git a/src/datachain/lib/dc.py b/src/datachain/lib/dc.py index a99c4f96d..e3f92a7ad 100644 --- a/src/datachain/lib/dc.py +++ b/src/datachain/lib/dc.py @@ -415,7 +415,7 @@ def from_storage( .save(list_dataset_name, listing=True) ) - dc = cls.from_dataset(list_dataset_name, session=session) + dc = cls.from_dataset(list_dataset_name, session=session, settings=settings) dc.signals_schema = dc.signals_schema.mutate({f"{object_name}": file_type}) return ls(dc, list_path, recursive=recursive, object_name=object_name) @@ -426,6 +426,7 @@ def from_dataset( name: str, version: Optional[int] = None, session: Optional[Session] = None, + settings: Optional[dict] = None, ) -> "DataChain": """Get data from a saved Dataset. It returns the chain itself. @@ -438,7 +439,7 @@ def from_dataset( chain = DataChain.from_dataset("my_cats") ``` """ - return DataChain(name=name, version=version, session=session) + return DataChain(name=name, version=version, session=session, settings=settings) @classmethod def from_json( @@ -1622,6 +1623,8 @@ def from_csv( model_name: str = "", source: bool = True, nrows=None, + session: Optional[Session] = None, + settings: Optional[dict] = None, **kwargs, ) -> "DataChain": """Generate chain from csv files. @@ -1638,6 +1641,8 @@ def from_csv( model_name : Generated model name. source : Whether to include info about the source file. nrows : Optional row limit. + session : Session to use for the chain. + settings : Settings to use for the chain. Example: Reading a csv file: @@ -1654,7 +1659,9 @@ def from_csv( from pyarrow.csv import ConvertOptions, ParseOptions, ReadOptions from pyarrow.dataset import CsvFileFormat - chain = DataChain.from_storage(path, **kwargs) + chain = DataChain.from_storage( + path, session=session, settings=settings, **kwargs + ) column_names = None if not header: @@ -1701,6 +1708,8 @@ def from_parquet( object_name: str = "", model_name: str = "", source: bool = True, + session: Optional[Session] = None, + settings: Optional[dict] = None, **kwargs, ) -> "DataChain": """Generate chain from parquet files. @@ -1713,6 +1722,8 @@ def from_parquet( object_name : Created object column name. model_name : Generated model name. source : Whether to include info about the source file. + session : Session to use for the chain. + settings : Settings to use for the chain. Example: Reading a single file: @@ -1725,7 +1736,9 @@ def from_parquet( dc = DataChain.from_parquet("s3://mybucket/dir") ``` """ - chain = DataChain.from_storage(path, **kwargs) + chain = DataChain.from_storage( + path, session=session, settings=settings, **kwargs + ) return chain.parse_tabular( output=output, object_name=object_name, diff --git a/src/datachain/lib/model_store.py b/src/datachain/lib/model_store.py index 9713e9fd2..ce54f6cf4 100644 --- a/src/datachain/lib/model_store.py +++ b/src/datachain/lib/model_store.py @@ -1,3 +1,4 @@ +import inspect import logging from typing import ClassVar, Optional @@ -69,7 +70,11 @@ def remove(cls, fr: type) -> None: @staticmethod def is_pydantic(val): - return not hasattr(val, "__origin__") and issubclass(val, BaseModel) + return ( + not hasattr(val, "__origin__") + and inspect.isclass(val) + and issubclass(val, BaseModel) + ) @staticmethod def to_pydantic(val) -> Optional[type[BaseModel]]: