From e275d0c33b0c765304f15975e54b1ddf7174c5ca Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 4 Oct 2023 16:08:23 +0800 Subject: [PATCH] Revert "[SPARK-45088][PYTHON][CONNECT] Make `getitem` work with duplicated columns" This reverts commit 73d3c49c97ae1be3f9f96fbc86be1c91cd17a656. --- .../protobuf/spark/connect/expressions.proto | 10 -- .../connect/planner/SparkConnectPlanner.scala | 14 +- python/pyspark/sql/connect/dataframe.py | 21 +-- python/pyspark/sql/connect/expressions.py | 34 ----- .../sql/connect/proto/expressions_pb2.py | 126 +++++++++--------- .../sql/connect/proto/expressions_pb2.pyi | 40 ------ python/pyspark/sql/dataframe.py | 5 +- python/pyspark/sql/tests/test_dataframe.py | 47 +------ python/pyspark/sql/tests/test_group.py | 27 ---- .../scala/org/apache/spark/sql/Dataset.scala | 12 -- 10 files changed, 71 insertions(+), 265 deletions(-) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto b/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto index 782bcc5d1fcd3..4aac2bcc612bf 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto @@ -48,7 +48,6 @@ message Expression { CommonInlineUserDefinedFunction common_inline_user_defined_function = 15; CallFunction call_function = 16; NamedArgumentExpression named_argument_expression = 17; - GetColumnByOrdinal get_column_by_ordinal = 18; // This field is used to mark extensions to the protocol. When plugins generate arbitrary // relations they can add them here. During the planning the correct resolution is done. @@ -229,15 +228,6 @@ message Expression { optional bool is_metadata_column = 3; } - // An unresolved attribute that is represented by its column index. - message GetColumnByOrdinal { - // (Required) 0-based column index. - int32 ordinal = 1; - - // (Optional) The id of corresponding connect plan. - optional int64 plan_id = 2; - } - // An unresolved function is not explicitly bound to one explicit function, but the function // is resolved during analysis following Sparks name resolution rules. message UnresolvedFunction { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 3dd2be9c54cf6..52c61c2cbe98c 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -45,7 +45,7 @@ import org.apache.spark.ml.{functions => MLFunctions} import org.apache.spark.sql.{Column, Dataset, Encoders, ForeachWriter, RelationalGroupedDataset, SparkSession} import org.apache.spark.sql.avro.{AvroDataToCatalyst, CatalystDataToAvro} import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier} -import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedRegex, UnresolvedRelation, UnresolvedStar} +import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedRegex, UnresolvedRelation, UnresolvedStar} import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UnboundRowEncoder import org.apache.spark.sql.catalyst.expressions._ @@ -1353,8 +1353,6 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging { case proto.Expression.ExprTypeCase.LITERAL => transformLiteral(exp.getLiteral) case proto.Expression.ExprTypeCase.UNRESOLVED_ATTRIBUTE => transformUnresolvedAttribute(exp.getUnresolvedAttribute) - case proto.Expression.ExprTypeCase.GET_COLUMN_BY_ORDINAL => - transformGetColumnByOrdinal(exp.getGetColumnByOrdinal) case proto.Expression.ExprTypeCase.UNRESOLVED_FUNCTION => transformUnregisteredFunction(exp.getUnresolvedFunction) .getOrElse(transformUnresolvedFunction(exp.getUnresolvedFunction)) @@ -1408,16 +1406,6 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging { expr } - private def transformGetColumnByOrdinal( - attr: proto.Expression.GetColumnByOrdinal): GetColumnByOrdinal = { - // always set dataType field null, since it is not used in Analyzer - val expr = GetColumnByOrdinal(attr.getOrdinal, null) - if (attr.hasPlanId) { - expr.setTagValue(LogicalPlan.PLAN_ID_TAG, attr.getPlanId) - } - expr - } - private def transformExpressionPlugin(extension: ProtoAny): Expression = { SparkConnectPluginRegistry.expressionRegistry // Lazily traverse the collection. diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index b0f229a8a4397..e4e6613f9976b 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -68,10 +68,7 @@ from pyspark.sql.connect.readwriter import DataFrameWriter, DataFrameWriterV2 from pyspark.sql.connect.streaming.readwriter import DataStreamWriter from pyspark.sql.connect.column import Column -from pyspark.sql.connect.expressions import ( - UnresolvedRegex, - GetColumnByOrdinal, -) +from pyspark.sql.connect.expressions import UnresolvedRegex from pyspark.sql.connect.functions import ( _to_col_with_plan_id, _to_col, @@ -1691,10 +1688,10 @@ def __getitem__(self, item: Union[Column, List, Tuple]) -> "DataFrame": ... def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Column, "DataFrame"]: - if self._plan is None: - raise SparkConnectException("Cannot analyze on empty plan.") - if isinstance(item, str): + if self._plan is None: + raise SparkConnectException("Cannot analyze on empty plan.") + # validate the column name if not hasattr(self._session, "is_mock_session"): self.select(item).isLocal() @@ -1708,15 +1705,7 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum elif isinstance(item, (list, tuple)): return self.select(*item) elif isinstance(item, int): - n = len(self.columns) - # 1, convert bool; 2, covert negative index; 3, validate index - item = range(0, n)[int(item)] - return Column( - GetColumnByOrdinal( - ordinal=item, - plan_id=self._plan._plan_id, - ) - ) + return col(self.columns[item]) else: raise PySparkTypeError( error_class="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE", diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py index 797910eba0bb8..88c4f4d267b39 100644 --- a/python/pyspark/sql/connect/expressions.py +++ b/python/pyspark/sql/connect/expressions.py @@ -488,40 +488,6 @@ def __eq__(self, other: Any) -> bool: ) -class GetColumnByOrdinal(Expression): - """Represents a column index (0-based). There is no guarantee that this column - actually exists. In the context of this project, we refer by its index and - treat it as an unresolved GetColumnByOrdinal""" - - def __init__(self, ordinal: int, plan_id: Optional[int] = None): - super().__init__() - - assert isinstance(ordinal, int) and ordinal >= 0 - self._ordinal = ordinal - - assert plan_id is None or isinstance(plan_id, int) - self._plan_id = plan_id - - def to_plan(self, session: "SparkConnectClient") -> proto.Expression: - """Returns the Proto representation of the expression.""" - expr = proto.Expression() - expr.get_column_by_ordinal.ordinal = self._ordinal - if self._plan_id is not None: - expr.get_column_by_ordinal.plan_id = self._plan_id - return expr - - def __repr__(self) -> str: - return f"getcolumnbyordinal({self._ordinal})" - - def __eq__(self, other: Any) -> bool: - return ( - other is not None - and isinstance(other, GetColumnByOrdinal) - and other._ordinal == self._ordinal - and other._plan_id == self._plan_id - ) - - class UnresolvedStar(Expression): def __init__(self, unparsed_target: Optional[str]): super().__init__() diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py index 13f7f2c75de0a..1e943b8978c27 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.py +++ b/python/pyspark/sql/connect/proto/expressions_pb2.py @@ -33,7 +33,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto"\xc7.\n\nExpression\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12\x61\n\x15get_column_by_ordinal\x18\x12 \x01(\x0b\x32,.spark.connect.Expression.GetColumnByOrdinalH\x00R\x12getColumnByOrdinal\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\x91\x01\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStrB\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1aX\n\x12GetColumnByOrdinal\x12\x18\n\x07ordinal\x18\x01 \x01(\x05R\x07ordinal\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1aR\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x42\x12\n\x10_unparsed_target\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\x9b\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer"\xb8\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05valueB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto"\x8a-\n\nExpression\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\x91\x01\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStrB\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1aR\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x42\x12\n\x10_unparsed_target\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\x9b\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer"\xb8\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05valueB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -44,67 +44,65 @@ b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated" ) _EXPRESSION._serialized_start = 105 - _EXPRESSION._serialized_end = 6064 - _EXPRESSION_WINDOW._serialized_start = 1744 - _EXPRESSION_WINDOW._serialized_end = 2527 - _EXPRESSION_WINDOW_WINDOWFRAME._serialized_start = 2034 - _EXPRESSION_WINDOW_WINDOWFRAME._serialized_end = 2527 - _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_start = 2301 - _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_end = 2446 - _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_start = 2448 - _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_end = 2527 - _EXPRESSION_SORTORDER._serialized_start = 2530 - _EXPRESSION_SORTORDER._serialized_end = 2955 - _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_start = 2760 - _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_end = 2868 - _EXPRESSION_SORTORDER_NULLORDERING._serialized_start = 2870 - _EXPRESSION_SORTORDER_NULLORDERING._serialized_end = 2955 - _EXPRESSION_CAST._serialized_start = 2958 - _EXPRESSION_CAST._serialized_end = 3103 - _EXPRESSION_LITERAL._serialized_start = 3106 - _EXPRESSION_LITERAL._serialized_end = 4669 - _EXPRESSION_LITERAL_DECIMAL._serialized_start = 3941 - _EXPRESSION_LITERAL_DECIMAL._serialized_end = 4058 - _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_start = 4060 - _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_end = 4158 - _EXPRESSION_LITERAL_ARRAY._serialized_start = 4161 - _EXPRESSION_LITERAL_ARRAY._serialized_end = 4291 - _EXPRESSION_LITERAL_MAP._serialized_start = 4294 - _EXPRESSION_LITERAL_MAP._serialized_end = 4521 - _EXPRESSION_LITERAL_STRUCT._serialized_start = 4524 - _EXPRESSION_LITERAL_STRUCT._serialized_end = 4653 - _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_start = 4672 - _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_end = 4858 - _EXPRESSION_GETCOLUMNBYORDINAL._serialized_start = 4860 - _EXPRESSION_GETCOLUMNBYORDINAL._serialized_end = 4948 - _EXPRESSION_UNRESOLVEDFUNCTION._serialized_start = 4951 - _EXPRESSION_UNRESOLVEDFUNCTION._serialized_end = 5155 - _EXPRESSION_EXPRESSIONSTRING._serialized_start = 5157 - _EXPRESSION_EXPRESSIONSTRING._serialized_end = 5207 - _EXPRESSION_UNRESOLVEDSTAR._serialized_start = 5209 - _EXPRESSION_UNRESOLVEDSTAR._serialized_end = 5291 - _EXPRESSION_UNRESOLVEDREGEX._serialized_start = 5293 - _EXPRESSION_UNRESOLVEDREGEX._serialized_end = 5379 - _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_start = 5382 - _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_end = 5514 - _EXPRESSION_UPDATEFIELDS._serialized_start = 5517 - _EXPRESSION_UPDATEFIELDS._serialized_end = 5704 - _EXPRESSION_ALIAS._serialized_start = 5706 - _EXPRESSION_ALIAS._serialized_end = 5826 - _EXPRESSION_LAMBDAFUNCTION._serialized_start = 5829 - _EXPRESSION_LAMBDAFUNCTION._serialized_end = 5987 - _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_start = 5989 - _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_end = 6051 - _COMMONINLINEUSERDEFINEDFUNCTION._serialized_start = 6067 - _COMMONINLINEUSERDEFINEDFUNCTION._serialized_end = 6431 - _PYTHONUDF._serialized_start = 6434 - _PYTHONUDF._serialized_end = 6589 - _SCALARSCALAUDF._serialized_start = 6592 - _SCALARSCALAUDF._serialized_end = 6776 - _JAVAUDF._serialized_start = 6779 - _JAVAUDF._serialized_end = 6928 - _CALLFUNCTION._serialized_start = 6930 - _CALLFUNCTION._serialized_end = 7038 - _NAMEDARGUMENTEXPRESSION._serialized_start = 7040 - _NAMEDARGUMENTEXPRESSION._serialized_end = 7132 + _EXPRESSION._serialized_end = 5875 + _EXPRESSION_WINDOW._serialized_start = 1645 + _EXPRESSION_WINDOW._serialized_end = 2428 + _EXPRESSION_WINDOW_WINDOWFRAME._serialized_start = 1935 + _EXPRESSION_WINDOW_WINDOWFRAME._serialized_end = 2428 + _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_start = 2202 + _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_end = 2347 + _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_start = 2349 + _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_end = 2428 + _EXPRESSION_SORTORDER._serialized_start = 2431 + _EXPRESSION_SORTORDER._serialized_end = 2856 + _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_start = 2661 + _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_end = 2769 + _EXPRESSION_SORTORDER_NULLORDERING._serialized_start = 2771 + _EXPRESSION_SORTORDER_NULLORDERING._serialized_end = 2856 + _EXPRESSION_CAST._serialized_start = 2859 + _EXPRESSION_CAST._serialized_end = 3004 + _EXPRESSION_LITERAL._serialized_start = 3007 + _EXPRESSION_LITERAL._serialized_end = 4570 + _EXPRESSION_LITERAL_DECIMAL._serialized_start = 3842 + _EXPRESSION_LITERAL_DECIMAL._serialized_end = 3959 + _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_start = 3961 + _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_end = 4059 + _EXPRESSION_LITERAL_ARRAY._serialized_start = 4062 + _EXPRESSION_LITERAL_ARRAY._serialized_end = 4192 + _EXPRESSION_LITERAL_MAP._serialized_start = 4195 + _EXPRESSION_LITERAL_MAP._serialized_end = 4422 + _EXPRESSION_LITERAL_STRUCT._serialized_start = 4425 + _EXPRESSION_LITERAL_STRUCT._serialized_end = 4554 + _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_start = 4573 + _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_end = 4759 + _EXPRESSION_UNRESOLVEDFUNCTION._serialized_start = 4762 + _EXPRESSION_UNRESOLVEDFUNCTION._serialized_end = 4966 + _EXPRESSION_EXPRESSIONSTRING._serialized_start = 4968 + _EXPRESSION_EXPRESSIONSTRING._serialized_end = 5018 + _EXPRESSION_UNRESOLVEDSTAR._serialized_start = 5020 + _EXPRESSION_UNRESOLVEDSTAR._serialized_end = 5102 + _EXPRESSION_UNRESOLVEDREGEX._serialized_start = 5104 + _EXPRESSION_UNRESOLVEDREGEX._serialized_end = 5190 + _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_start = 5193 + _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_end = 5325 + _EXPRESSION_UPDATEFIELDS._serialized_start = 5328 + _EXPRESSION_UPDATEFIELDS._serialized_end = 5515 + _EXPRESSION_ALIAS._serialized_start = 5517 + _EXPRESSION_ALIAS._serialized_end = 5637 + _EXPRESSION_LAMBDAFUNCTION._serialized_start = 5640 + _EXPRESSION_LAMBDAFUNCTION._serialized_end = 5798 + _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_start = 5800 + _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_end = 5862 + _COMMONINLINEUSERDEFINEDFUNCTION._serialized_start = 5878 + _COMMONINLINEUSERDEFINEDFUNCTION._serialized_end = 6242 + _PYTHONUDF._serialized_start = 6245 + _PYTHONUDF._serialized_end = 6400 + _SCALARSCALAUDF._serialized_start = 6403 + _SCALARSCALAUDF._serialized_end = 6587 + _JAVAUDF._serialized_start = 6590 + _JAVAUDF._serialized_end = 6739 + _CALLFUNCTION._serialized_start = 6741 + _CALLFUNCTION._serialized_end = 6849 + _NAMEDARGUMENTEXPRESSION._serialized_start = 6851 + _NAMEDARGUMENTEXPRESSION._serialized_end = 6943 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.pyi b/python/pyspark/sql/connect/proto/expressions_pb2.pyi index ba24da263cf33..93a431dcc8607 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.pyi +++ b/python/pyspark/sql/connect/proto/expressions_pb2.pyi @@ -807,37 +807,6 @@ class Expression(google.protobuf.message.Message): self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"] ) -> typing_extensions.Literal["plan_id"] | None: ... - class GetColumnByOrdinal(google.protobuf.message.Message): - """An unresolved attribute that is represented by its column index.""" - - DESCRIPTOR: google.protobuf.descriptor.Descriptor - - ORDINAL_FIELD_NUMBER: builtins.int - PLAN_ID_FIELD_NUMBER: builtins.int - ordinal: builtins.int - """(Required) 0-based column index.""" - plan_id: builtins.int - """(Optional) The id of corresponding connect plan.""" - def __init__( - self, - *, - ordinal: builtins.int = ..., - plan_id: builtins.int | None = ..., - ) -> None: ... - def HasField( - self, - field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"], - ) -> builtins.bool: ... - def ClearField( - self, - field_name: typing_extensions.Literal[ - "_plan_id", b"_plan_id", "ordinal", b"ordinal", "plan_id", b"plan_id" - ], - ) -> None: ... - def WhichOneof( - self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"] - ) -> typing_extensions.Literal["plan_id"] | None: ... - class UnresolvedFunction(google.protobuf.message.Message): """An unresolved function is not explicitly bound to one explicit function, but the function is resolved during analysis following Sparks name resolution rules. @@ -1160,7 +1129,6 @@ class Expression(google.protobuf.message.Message): COMMON_INLINE_USER_DEFINED_FUNCTION_FIELD_NUMBER: builtins.int CALL_FUNCTION_FIELD_NUMBER: builtins.int NAMED_ARGUMENT_EXPRESSION_FIELD_NUMBER: builtins.int - GET_COLUMN_BY_ORDINAL_FIELD_NUMBER: builtins.int EXTENSION_FIELD_NUMBER: builtins.int @property def literal(self) -> global___Expression.Literal: ... @@ -1199,8 +1167,6 @@ class Expression(google.protobuf.message.Message): @property def named_argument_expression(self) -> global___NamedArgumentExpression: ... @property - def get_column_by_ordinal(self) -> global___Expression.GetColumnByOrdinal: ... - @property def extension(self) -> google.protobuf.any_pb2.Any: """This field is used to mark extensions to the protocol. When plugins generate arbitrary relations they can add them here. During the planning the correct resolution is done. @@ -1226,7 +1192,6 @@ class Expression(google.protobuf.message.Message): common_inline_user_defined_function: global___CommonInlineUserDefinedFunction | None = ..., call_function: global___CallFunction | None = ..., named_argument_expression: global___NamedArgumentExpression | None = ..., - get_column_by_ordinal: global___Expression.GetColumnByOrdinal | None = ..., extension: google.protobuf.any_pb2.Any | None = ..., ) -> None: ... def HasField( @@ -1246,8 +1211,6 @@ class Expression(google.protobuf.message.Message): b"expression_string", "extension", b"extension", - "get_column_by_ordinal", - b"get_column_by_ordinal", "lambda_function", b"lambda_function", "literal", @@ -1291,8 +1254,6 @@ class Expression(google.protobuf.message.Message): b"expression_string", "extension", b"extension", - "get_column_by_ordinal", - b"get_column_by_ordinal", "lambda_function", b"lambda_function", "literal", @@ -1340,7 +1301,6 @@ class Expression(google.protobuf.message.Message): "common_inline_user_defined_function", "call_function", "named_argument_expression", - "get_column_by_ordinal", "extension", ] | None diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 51f18e7b6f35f..07a672d40637a 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -3458,10 +3458,7 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum elif isinstance(item, (list, tuple)): return self.select(*item) elif isinstance(item, int): - n = len(self.columns) - # 1, convert bool; 2, covert negative index; 3, validate index - item = range(0, n)[int(item)] - jc = self._jdf.apply(item) + jc = self._jdf.apply(self.columns[item]) return Column(jc) else: raise PySparkTypeError( diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 150130f3c1a1a..3c493a8ae3ace 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -26,6 +26,7 @@ import io from contextlib import redirect_stdout +from pyspark import StorageLevel from pyspark.sql import SparkSession, Row, functions from pyspark.sql.functions import col, lit, count, sum, mean, struct from pyspark.sql.types import ( @@ -61,51 +62,6 @@ class DataFrameTestsMixin: - def test_getitem_invalid_indices(self): - df = self.spark.sql( - "SELECT * FROM VALUES " - "(1, 1.1, 'a'), " - "(2, 2.2, 'b'), " - "(4, 4.4, 'c') " - "AS TAB(a, b, c)" - ) - - # accepted type and values - for index in [False, True, 0, 1, 2, -1, -2, -3]: - df[index] - - # negative cases: ordinal out of range - for index in [-10, -4, 3, 10, 100]: - with self.assertRaises(IndexError): - df[index] - - # negative cases: unsupported types - for index in [None, 1.0, Decimal(1)]: - with self.assertRaises(PySparkTypeError): - df[index] - - def test_getitem_duplicated_column(self): - df = self.spark.sql( - "SELECT * FROM VALUES " - "(1, 1.1, 'a'), " - "(2, 2.2, 'b'), " - "(4, 4.4, 'c') " - "AS TAB(a, a, a)" - ) - - self.assertEqual( - df.select(df[0]).schema.simpleString(), - "struct", - ) - self.assertEqual( - df.select(df[1]).schema.simpleString(), - "struct", - ) - self.assertEqual( - df.select(df[2]).schema.simpleString(), - "struct", - ) - def test_range(self): self.assertEqual(self.spark.range(1, 1).count(), 0) self.assertEqual(self.spark.range(1, 0, -1).count(), 1) @@ -120,6 +76,7 @@ def test_duplicated_column_names(self): self.assertEqual(2, row[1]) self.assertEqual("Row(c=1, c=2)", str(row)) # Cannot access columns + self.assertRaises(AnalysisException, lambda: df.select(df[0]).first()) self.assertRaises(AnalysisException, lambda: df.select(df.c).first()) self.assertRaises(AnalysisException, lambda: df.select(df["c"]).first()) diff --git a/python/pyspark/sql/tests/test_group.py b/python/pyspark/sql/tests/test_group.py index 85d2535774143..6981601cb1292 100644 --- a/python/pyspark/sql/tests/test_group.py +++ b/python/pyspark/sql/tests/test_group.py @@ -149,33 +149,6 @@ def test_order_by_ordinal(self): with self.assertRaises(IndexError): df.orderBy(-3) - def test_order_by_ordinal_duplicated_column(self): - spark = self.spark - df = spark.createDataFrame( - [ - (1, 1), - (1, 2), - (2, 1), - (2, 2), - (3, 1), - (3, 2), - ], - ["a", "a"], - ) - - with self.tempView("v"): - df.createOrReplaceTempView("v") - - df1 = spark.sql("select * from v order by 2, 1;") - df2 = df.orderBy(2, 1) - assertSchemaEqual(df1.schema, df2.schema) - assertDataFrameEqual(df1, df2) - - df1 = spark.sql("select * from v order by 1 desc, 2;") - df2 = df.orderBy(-1, 2) - assertSchemaEqual(df1.schema, df2.schema) - assertDataFrameEqual(df1, df2) - class GroupTests(GroupTestsMixin, ReusedSQLTestCase): pass diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index a3f8a2cf4a05d..0cc037b157e07 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1393,11 +1393,6 @@ class Dataset[T] private[sql]( */ def apply(colName: String): Column = col(colName) - /** - * Selects column based on the column index (0-based) and returns it as a [[Column]]. - */ - private[sql] def apply(index: Int): Column = col(index) - /** * Specifies some hint on the current Dataset. As an example, the following code specifies * that one of the plan can be broadcasted: @@ -1450,13 +1445,6 @@ class Dataset[T] private[sql]( } } - /** - * Selects column based on the column index (0-based) and returns it as a [[Column]]. - */ - private[sql] def col(index: Int): Column = { - Column(addDataFrameIdToCol(queryExecution.analyzed.output(index))) - } - /** * Selects a metadata column based on its logical column name, and returns it as a [[Column]]. *