diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index c10ab9638a21..ec05c18d4f06 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -180,7 +180,9 @@ def __init__(self, jc): __ror__ = _bin_op("or") # container operators - __contains__ = _bin_op("contains") + def __contains__(self, item): + raise ValueError("Cannot apply 'in' operator against a column: please use 'contains' " + "in a string column or 'array_contains' function for an array column.") # bitwise operators bitwiseOR = _bin_op("bitwiseOR") diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index e943f8da3db1..81f3d1d36a34 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -967,6 +967,9 @@ def test_column_operators(self): cs.startswith('a'), cs.endswith('a') self.assertTrue(all(isinstance(c, Column) for c in css)) self.assertTrue(isinstance(ci.cast(LongType()), Column)) + self.assertRaisesRegexp(ValueError, + "Cannot apply 'in' operator against a column", + lambda: 1 in cs) def test_column_getitem(self): from pyspark.sql.functions import col