-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-40533] [CONNECT] [PYTHON] Support most built-in literal types for Python in Spark Connect #38462
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-40533] [CONNECT] [PYTHON] Support most built-in literal types for Python in Spark Connect #38462
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,9 +14,11 @@ | |
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
|
|
||
| import uuid | ||
| from typing import cast, get_args, TYPE_CHECKING, Optional, Callable, Any | ||
|
|
||
| import decimal | ||
| import datetime | ||
|
|
||
| import pyspark.sql.connect.proto as proto | ||
| from pyspark.sql.connect._typing import PrimitiveType | ||
|
|
@@ -87,7 +89,7 @@ class LiteralExpression(Expression): | |
| The Python types are converted best effort into the relevant proto types. On the Spark Connect | ||
| server side, the proto types are converted to the Catalyst equivalents.""" | ||
|
|
||
| def __init__(self, value: PrimitiveType) -> None: | ||
| def __init__(self, value: Any) -> None: | ||
| super().__init__() | ||
| self._value = value | ||
|
|
||
|
|
@@ -99,11 +101,59 @@ def to_plan(self, session: Optional["RemoteSparkSession"]) -> "proto.Expression" | |
| value_type = type(self._value) | ||
| exp = proto.Expression() | ||
| if value_type is int: | ||
| exp.literal.i32 = cast(int, self._value) | ||
| exp.literal.i64 = cast(int, self._value) | ||
| elif value_type is bool: | ||
| exp.literal.boolean = cast(bool, self._value) | ||
| elif value_type is str: | ||
| exp.literal.string = cast(str, self._value) | ||
| elif value_type is float: | ||
| exp.literal.fp64 = cast(float, self._value) | ||
| elif value_type is decimal.Decimal: | ||
| d_v = cast(decimal.Decimal, self._value) | ||
| v_tuple = d_v.as_tuple() | ||
| exp.literal.decimal.scale = abs(v_tuple.exponent) | ||
| exp.literal.decimal.precision = len(v_tuple.digits) - abs(v_tuple.exponent) | ||
| # Two complement yeah... | ||
| raise ValueError("Python Decimal not supported.") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we remove if this is not implemented yet? |
||
| elif value_type is bytes: | ||
| exp.literal.binary = self._value | ||
| elif value_type is datetime.datetime: | ||
| # Microseconds since epoch. | ||
| dt = cast(datetime.datetime, self._value) | ||
| v = dt - datetime.datetime(1970, 1, 1, 0, 0, 0, 0) | ||
| exp.literal.timestamp = int(v / datetime.timedelta(microseconds=1)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we maybe match the implementation in PySpark's: https://github.com/apache/spark/blob/master/python/pyspark/sql/types.py#L254-L260 ? |
||
| elif value_type is datetime.time: | ||
| # Nanoseconds of the day. | ||
| tv = cast(datetime.time, self._value) | ||
| offset = (tv.second + tv.minute * 60 + tv.hour * 3600) * 1000 + tv.microsecond | ||
| exp.literal.time = int(offset * 1000) | ||
| elif value_type is datetime.date: | ||
| # Days since epoch. | ||
| days_since_epoch = (cast(datetime.date, self._value) - datetime.date(1970, 1, 1)).days | ||
| exp.literal.date = days_since_epoch | ||
| elif value_type is uuid.UUID: | ||
grundprinzip marked this conversation as resolved.
Show resolved
Hide resolved
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could remove |
||
| raise ValueError("Python UUID type not supported.") | ||
| elif value_type is list: | ||
| lv = cast(list, self._value) | ||
| for k in lv: | ||
| if type(k) is LiteralExpression: | ||
| exp.literal.list.values.append(k.to_plan(session).literal) | ||
| else: | ||
| exp.literal.list.values.append(LiteralExpression(k).to_plan(session).literal) | ||
| elif value_type is dict: | ||
| mv = cast(dict, self._value) | ||
| for k in mv: | ||
| kv = proto.Expression.Literal.Map.KeyValue() | ||
| if type(k) is LiteralExpression: | ||
| kv.key.CopyFrom(k.to_plan(session).literal) | ||
| else: | ||
| kv.key.CopyFrom(LiteralExpression(k).to_plan(session).literal) | ||
|
|
||
| if type(mv[k]) is LiteralExpression: | ||
| kv.value.CopyFrom(mv[k].to_plan(session).literal) | ||
| else: | ||
| kv.value.CopyFrom(LiteralExpression(mv[k]).to_plan(session).literal) | ||
| exp.literal.map.key_values.append(kv) | ||
| else: | ||
| raise ValueError(f"Could not convert literal for type {type(self._value)}") | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.