77
88from datachain .lib .convert .python_to_sql import python_to_sql
99from datachain .lib .convert .sql_to_python import sql_to_python
10+ from datachain .lib .model_store import ModelStore
1011from datachain .lib .utils import DataChainColumnError , DataChainParamsError
1112from datachain .query .schema import Column , ColumnMeta
1213from datachain .sql .functions import numeric
@@ -415,6 +416,20 @@ def get_column(
415416 label : str | None = None ,
416417 table : "TableClause | None" = None ,
417418 ) -> Column :
419+ # Guard against using complex (pydantic) object columns in SQL funcs
420+ if signals_schema and self ._db_cols :
421+ for arg in self ._db_cols :
422+ # _db_cols normalizes known columns to strings; skip non-string args
423+ if not isinstance (arg , str ):
424+ continue
425+ t_with_sub = signals_schema .get_column_type (arg , with_subtree = True )
426+ if ModelStore .is_pydantic (t_with_sub ):
427+ raise DataChainParamsError (
428+ f"Function { self .name } doesn't support complex object "
429+ f"columns like '{ arg } '. Use a leaf field (e.g., "
430+ f"'{ arg } .path') or use UDFs to operate on complex objects."
431+ )
432+
418433 col_type = self .get_result_type (signals_schema )
419434 sql_type = python_to_sql (col_type )
420435
@@ -434,6 +449,7 @@ def get_col(col: ColT, string_as_literal=False) -> ColT:
434449 return col
435450
436451 cols = [get_col (col ) for col in self ._db_cols ]
452+
437453 kwargs = {k : get_col (v , string_as_literal = True ) for k , v in self .kwargs .items ()}
438454 func_col = self .inner (* cols , * self .args , ** kwargs )
439455
@@ -470,9 +486,8 @@ def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
470486 if isinstance (col , ColumnElement ) and not hasattr (col , "name" ):
471487 return sql_to_python (col )
472488
473- return signals_schema .get_column_type (
474- col .name if isinstance (col , ColumnElement ) else col # type: ignore[arg-type]
475- )
489+ name = col .name if isinstance (col , ColumnElement ) else col # type: ignore[assignment]
490+ return signals_schema .get_column_type (name ) # type: ignore[arg-type]
476491
477492
478493def _truediv (a , b ):
0 commit comments