From 8f2d8baf27106923fa97c34f121d55d9c8388856 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 6 Jan 2023 02:03:19 -0800 Subject: [PATCH 1/3] [SPARK-41874][CONNECT][PYTHON] Implement `DataFrame.sameSemantics` --- .../protobuf/spark/connect/relations.proto | 9 + .../connect/planner/SparkConnectPlanner.scala | 14 ++ python/pyspark/sql/connect/dataframe.py | 11 +- python/pyspark/sql/connect/plan.py | 14 ++ .../sql/connect/proto/relations_pb2.py | 214 ++++++++++-------- .../sql/connect/proto/relations_pb2.pyi | 83 +++++++ python/pyspark/sql/dataframe.py | 3 + .../sql/tests/connect/test_connect_basic.py | 8 +- 8 files changed, 253 insertions(+), 103 deletions(-) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto index c0f22dd457641..06ffeab1ac6d3 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto @@ -60,6 +60,7 @@ message Relation { Unpivot unpivot = 25; ToSchema to_schema = 26; RepartitionByExpression repartition_by_expression = 27; + SameSemantics same_semantics = 29; // NA functions NAFill fill_na = 90; @@ -748,3 +749,11 @@ message ToSchema { // (Optional) number of partitions, must be positive. optional int32 num_partitions = 3; } + +message SameSemantics { + // (Required) The input relation. + Relation input = 1; + + // (Required) The other Relation to compare against. + Relation other = 2; +} diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index b4c882541e08f..4029f64d8702e 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -110,6 +110,8 @@ class SparkConnectPlanner(session: SparkSession) { case proto.Relation.RelTypeCase.UNPIVOT => transformUnpivot(rel.getUnpivot) case proto.Relation.RelTypeCase.REPARTITION_BY_EXPRESSION => transformRepartitionByExpression(rel.getRepartitionByExpression) + case proto.Relation.RelTypeCase.SAME_SEMANTICS => + transformSameSemantics(rel.getSameSemantics) case proto.Relation.RelTypeCase.RELTYPE_NOT_SET => throw new IndexOutOfBoundsException("Expected Relation to be set, but is empty.") @@ -535,6 +537,18 @@ class SparkConnectPlanner(session: SparkSession) { numPartitionsOpt) } + private def transformSameSemantics(rel: proto.SameSemantics): LogicalPlan = { + val otherDS = Dataset + .ofRows(session, transformRelation(rel.getOther)) + val sameSemantics = Dataset + .ofRows(session, transformRelation(rel.getInput)) + .sameSemantics(otherDS) + LocalRelation.fromProduct( + output = AttributeReference("same_semantics", BooleanType, false)() :: Nil, + data = Tuple1.apply(sameSemantics) :: Nil) + } + + private def transformDeduplicate(rel: proto.Deduplicate): LogicalPlan = { if (!rel.hasInput) { throw InvalidPlanInput("Deduplicate needs a plan input") diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 67f3c6e929218..3e61ed9b97d9d 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -1361,8 +1361,15 @@ def _repr_html_(self, *args: Any, **kwargs: Any) -> None: def semanticHash(self, *args: Any, **kwargs: Any) -> None: raise NotImplementedError("semanticHash() is not implemented.") - def sameSemantics(self, *args: Any, **kwargs: Any) -> None: - raise NotImplementedError("sameSemantics() is not implemented.") + def sameSemantics(self, other: "DataFrame") -> bool: + pdf = DataFrame.withPlan( + plan.SameSemantics(child=self._plan, other=other._plan), + session=self._session, + ).toPandas() + assert pdf is not None + return pdf["same_semantics"][0] + + sameSemantics.__doc__ = PySparkDataFrame.sameSemantics.__doc__ # SparkConnect specific API def offset(self, n: int) -> "DataFrame": diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py index 1973755be274e..fbd0208be3ea4 100644 --- a/python/pyspark/sql/connect/plan.py +++ b/python/pyspark/sql/connect/plan.py @@ -891,6 +891,20 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation: return plan +class SameSemantics(LogicalPlan): + def __init__(self, child: Optional["LogicalPlan"], other: Optional["LogicalPlan"]) -> None: + super().__init__(child) + self.other = other + + def plan(self, session: "SparkConnectClient") -> proto.Relation: + assert self._child is not None and self.other is not None + plan = proto.Relation() + plan.same_semantics.input.CopyFrom(self._child.plan(session)) + plan.same_semantics.other.CopyFrom(self.other.plan(session)) + + return plan + + class Unpivot(LogicalPlan): """Logical plan object for a unpivot operation.""" diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py index 9e230c3d2395d..d94b2f5c03968 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.py +++ b/python/pyspark/sql/connect/proto/relations_pb2.py @@ -36,7 +36,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xed\x12\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12|\n#rename_columns_by_same_length_names\x18\x12 \x01(\x0b\x32-.spark.connect.RenameColumnsBySameLengthNamesH\x00R\x1erenameColumnsBySameLengthNames\x12w\n"rename_columns_by_name_to_name_map\x18\x13 \x01(\x0b\x32+.spark.connect.RenameColumnsByNameToNameMapH\x00R\x1crenameColumnsByNameToNameMap\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"1\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo"\x1b\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query"\xaa\x03\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x1a=\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x1a\xcf\x01\n\nDataSource\x12\x16\n\x06\x66ormat\x18\x01 \x01(\tR\x06\x66ormat\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x00R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\xd7\x03\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07"\x8c\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_name"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"d\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12-\n\x04\x63ols\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x04\x63ols"\xab\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x42\x16\n\x14_all_columns_as_keys"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"r\n\x1eRenameColumnsBySameLengthNames\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\x83\x02\n\x1cRenameColumnsByNameToNameMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12o\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x41.spark.connect.RenameColumnsByNameToNameMap.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x83\x01\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x45\n\x0ename_expr_list\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x0cnameExprList"\x8c\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x41\n\nparameters\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\nparameters"\xf6\x01\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12\x31\n\x06values\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06values\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitionsB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3' + b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xb4\x13\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12|\n#rename_columns_by_same_length_names\x18\x12 \x01(\x0b\x32-.spark.connect.RenameColumnsBySameLengthNamesH\x00R\x1erenameColumnsBySameLengthNames\x12w\n"rename_columns_by_name_to_name_map\x18\x13 \x01(\x0b\x32+.spark.connect.RenameColumnsByNameToNameMapH\x00R\x1crenameColumnsByNameToNameMap\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0esame_semantics\x18\x1d \x01(\x0b\x32\x1c.spark.connect.SameSemanticsH\x00R\rsameSemantics\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"1\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo"\x1b\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query"\xaa\x03\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x1a=\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x1a\xcf\x01\n\nDataSource\x12\x16\n\x06\x66ormat\x18\x01 \x01(\tR\x06\x66ormat\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x00R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\xd7\x03\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07"\x8c\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_name"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"d\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12-\n\x04\x63ols\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x04\x63ols"\xab\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x42\x16\n\x14_all_columns_as_keys"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"r\n\x1eRenameColumnsBySameLengthNames\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\x83\x02\n\x1cRenameColumnsByNameToNameMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12o\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x41.spark.connect.RenameColumnsByNameToNameMap.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x83\x01\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x45\n\x0ename_expr_list\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x0cnameExprList"\x8c\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x41\n\nparameters\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\nparameters"\xf6\x01\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12\x31\n\x06values\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06values\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"m\n\rSameSemantics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12-\n\x05other\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05otherB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3' ) @@ -89,6 +89,7 @@ _UNPIVOT = DESCRIPTOR.message_types_by_name["Unpivot"] _TOSCHEMA = DESCRIPTOR.message_types_by_name["ToSchema"] _REPARTITIONBYEXPRESSION = DESCRIPTOR.message_types_by_name["RepartitionByExpression"] +_SAMESEMANTICS = DESCRIPTOR.message_types_by_name["SameSemantics"] _JOIN_JOINTYPE = _JOIN.enum_types_by_name["JoinType"] _SETOPERATION_SETOPTYPE = _SETOPERATION.enum_types_by_name["SetOpType"] _AGGREGATE_GROUPTYPE = _AGGREGATE.enum_types_by_name["GroupType"] @@ -602,6 +603,17 @@ ) _sym_db.RegisterMessage(RepartitionByExpression) +SameSemantics = _reflection.GeneratedProtocolMessageType( + "SameSemantics", + (_message.Message,), + { + "DESCRIPTOR": _SAMESEMANTICS, + "__module__": "spark.connect.relations_pb2" + # @@protoc_insertion_point(class_scope:spark.connect.SameSemantics) + }, +) +_sym_db.RegisterMessage(SameSemantics) + if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None @@ -611,103 +623,105 @@ _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._options = None _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_options = b"8\001" _RELATION._serialized_start = 165 - _RELATION._serialized_end = 2578 - _UNKNOWN._serialized_start = 2580 - _UNKNOWN._serialized_end = 2589 - _RELATIONCOMMON._serialized_start = 2591 - _RELATIONCOMMON._serialized_end = 2640 - _SQL._serialized_start = 2642 - _SQL._serialized_end = 2669 - _READ._serialized_start = 2672 - _READ._serialized_end = 3098 - _READ_NAMEDTABLE._serialized_start = 2814 - _READ_NAMEDTABLE._serialized_end = 2875 - _READ_DATASOURCE._serialized_start = 2878 - _READ_DATASOURCE._serialized_end = 3085 - _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3016 - _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 3074 - _PROJECT._serialized_start = 3100 - _PROJECT._serialized_end = 3217 - _FILTER._serialized_start = 3219 - _FILTER._serialized_end = 3331 - _JOIN._serialized_start = 3334 - _JOIN._serialized_end = 3805 - _JOIN_JOINTYPE._serialized_start = 3597 - _JOIN_JOINTYPE._serialized_end = 3805 - _SETOPERATION._serialized_start = 3808 - _SETOPERATION._serialized_end = 4204 - _SETOPERATION_SETOPTYPE._serialized_start = 4067 - _SETOPERATION_SETOPTYPE._serialized_end = 4181 - _LIMIT._serialized_start = 4206 - _LIMIT._serialized_end = 4282 - _OFFSET._serialized_start = 4284 - _OFFSET._serialized_end = 4363 - _TAIL._serialized_start = 4365 - _TAIL._serialized_end = 4440 - _AGGREGATE._serialized_start = 4443 - _AGGREGATE._serialized_end = 5025 - _AGGREGATE_PIVOT._serialized_start = 4782 - _AGGREGATE_PIVOT._serialized_end = 4893 - _AGGREGATE_GROUPTYPE._serialized_start = 4896 - _AGGREGATE_GROUPTYPE._serialized_end = 5025 - _SORT._serialized_start = 5028 - _SORT._serialized_end = 5188 - _DROP._serialized_start = 5190 - _DROP._serialized_end = 5290 - _DEDUPLICATE._serialized_start = 5293 - _DEDUPLICATE._serialized_end = 5464 - _LOCALRELATION._serialized_start = 5466 - _LOCALRELATION._serialized_end = 5555 - _SAMPLE._serialized_start = 5558 - _SAMPLE._serialized_end = 5831 - _RANGE._serialized_start = 5834 - _RANGE._serialized_end = 5979 - _SUBQUERYALIAS._serialized_start = 5981 - _SUBQUERYALIAS._serialized_end = 6095 - _REPARTITION._serialized_start = 6098 - _REPARTITION._serialized_end = 6240 - _SHOWSTRING._serialized_start = 6243 - _SHOWSTRING._serialized_end = 6385 - _STATSUMMARY._serialized_start = 6387 - _STATSUMMARY._serialized_end = 6479 - _STATDESCRIBE._serialized_start = 6481 - _STATDESCRIBE._serialized_end = 6562 - _STATCROSSTAB._serialized_start = 6564 - _STATCROSSTAB._serialized_end = 6665 - _STATCOV._serialized_start = 6667 - _STATCOV._serialized_end = 6763 - _STATCORR._serialized_start = 6766 - _STATCORR._serialized_end = 6903 - _STATAPPROXQUANTILE._serialized_start = 6906 - _STATAPPROXQUANTILE._serialized_end = 7070 - _STATFREQITEMS._serialized_start = 7072 - _STATFREQITEMS._serialized_end = 7197 - _STATSAMPLEBY._serialized_start = 7200 - _STATSAMPLEBY._serialized_end = 7509 - _STATSAMPLEBY_FRACTION._serialized_start = 7401 - _STATSAMPLEBY_FRACTION._serialized_end = 7500 - _NAFILL._serialized_start = 7512 - _NAFILL._serialized_end = 7646 - _NADROP._serialized_start = 7649 - _NADROP._serialized_end = 7783 - _NAREPLACE._serialized_start = 7786 - _NAREPLACE._serialized_end = 8082 - _NAREPLACE_REPLACEMENT._serialized_start = 7941 - _NAREPLACE_REPLACEMENT._serialized_end = 8082 - _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_start = 8084 - _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_end = 8198 - _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_start = 8201 - _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_end = 8460 - _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_start = 8393 - _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_end = 8460 - _WITHCOLUMNS._serialized_start = 8463 - _WITHCOLUMNS._serialized_end = 8594 - _HINT._serialized_start = 8597 - _HINT._serialized_end = 8737 - _UNPIVOT._serialized_start = 8740 - _UNPIVOT._serialized_end = 8986 - _TOSCHEMA._serialized_start = 8988 - _TOSCHEMA._serialized_end = 9094 - _REPARTITIONBYEXPRESSION._serialized_start = 9097 - _REPARTITIONBYEXPRESSION._serialized_end = 9300 + _RELATION._serialized_end = 2649 + _UNKNOWN._serialized_start = 2651 + _UNKNOWN._serialized_end = 2660 + _RELATIONCOMMON._serialized_start = 2662 + _RELATIONCOMMON._serialized_end = 2711 + _SQL._serialized_start = 2713 + _SQL._serialized_end = 2740 + _READ._serialized_start = 2743 + _READ._serialized_end = 3169 + _READ_NAMEDTABLE._serialized_start = 2885 + _READ_NAMEDTABLE._serialized_end = 2946 + _READ_DATASOURCE._serialized_start = 2949 + _READ_DATASOURCE._serialized_end = 3156 + _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3087 + _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 3145 + _PROJECT._serialized_start = 3171 + _PROJECT._serialized_end = 3288 + _FILTER._serialized_start = 3290 + _FILTER._serialized_end = 3402 + _JOIN._serialized_start = 3405 + _JOIN._serialized_end = 3876 + _JOIN_JOINTYPE._serialized_start = 3668 + _JOIN_JOINTYPE._serialized_end = 3876 + _SETOPERATION._serialized_start = 3879 + _SETOPERATION._serialized_end = 4275 + _SETOPERATION_SETOPTYPE._serialized_start = 4138 + _SETOPERATION_SETOPTYPE._serialized_end = 4252 + _LIMIT._serialized_start = 4277 + _LIMIT._serialized_end = 4353 + _OFFSET._serialized_start = 4355 + _OFFSET._serialized_end = 4434 + _TAIL._serialized_start = 4436 + _TAIL._serialized_end = 4511 + _AGGREGATE._serialized_start = 4514 + _AGGREGATE._serialized_end = 5096 + _AGGREGATE_PIVOT._serialized_start = 4853 + _AGGREGATE_PIVOT._serialized_end = 4964 + _AGGREGATE_GROUPTYPE._serialized_start = 4967 + _AGGREGATE_GROUPTYPE._serialized_end = 5096 + _SORT._serialized_start = 5099 + _SORT._serialized_end = 5259 + _DROP._serialized_start = 5261 + _DROP._serialized_end = 5361 + _DEDUPLICATE._serialized_start = 5364 + _DEDUPLICATE._serialized_end = 5535 + _LOCALRELATION._serialized_start = 5537 + _LOCALRELATION._serialized_end = 5626 + _SAMPLE._serialized_start = 5629 + _SAMPLE._serialized_end = 5902 + _RANGE._serialized_start = 5905 + _RANGE._serialized_end = 6050 + _SUBQUERYALIAS._serialized_start = 6052 + _SUBQUERYALIAS._serialized_end = 6166 + _REPARTITION._serialized_start = 6169 + _REPARTITION._serialized_end = 6311 + _SHOWSTRING._serialized_start = 6314 + _SHOWSTRING._serialized_end = 6456 + _STATSUMMARY._serialized_start = 6458 + _STATSUMMARY._serialized_end = 6550 + _STATDESCRIBE._serialized_start = 6552 + _STATDESCRIBE._serialized_end = 6633 + _STATCROSSTAB._serialized_start = 6635 + _STATCROSSTAB._serialized_end = 6736 + _STATCOV._serialized_start = 6738 + _STATCOV._serialized_end = 6834 + _STATCORR._serialized_start = 6837 + _STATCORR._serialized_end = 6974 + _STATAPPROXQUANTILE._serialized_start = 6977 + _STATAPPROXQUANTILE._serialized_end = 7141 + _STATFREQITEMS._serialized_start = 7143 + _STATFREQITEMS._serialized_end = 7268 + _STATSAMPLEBY._serialized_start = 7271 + _STATSAMPLEBY._serialized_end = 7580 + _STATSAMPLEBY_FRACTION._serialized_start = 7472 + _STATSAMPLEBY_FRACTION._serialized_end = 7571 + _NAFILL._serialized_start = 7583 + _NAFILL._serialized_end = 7717 + _NADROP._serialized_start = 7720 + _NADROP._serialized_end = 7854 + _NAREPLACE._serialized_start = 7857 + _NAREPLACE._serialized_end = 8153 + _NAREPLACE_REPLACEMENT._serialized_start = 8012 + _NAREPLACE_REPLACEMENT._serialized_end = 8153 + _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_start = 8155 + _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_end = 8269 + _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_start = 8272 + _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_end = 8531 + _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_start = 8464 + _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_end = 8531 + _WITHCOLUMNS._serialized_start = 8534 + _WITHCOLUMNS._serialized_end = 8665 + _HINT._serialized_start = 8668 + _HINT._serialized_end = 8808 + _UNPIVOT._serialized_start = 8811 + _UNPIVOT._serialized_end = 9057 + _TOSCHEMA._serialized_start = 9059 + _TOSCHEMA._serialized_end = 9165 + _REPARTITIONBYEXPRESSION._serialized_start = 9168 + _REPARTITIONBYEXPRESSION._serialized_end = 9371 + _SAMESEMANTICS._serialized_start = 9373 + _SAMESEMANTICS._serialized_end = 9482 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi index 811f005d24b73..90d72326e01b2 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.pyi +++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi @@ -53,6 +53,7 @@ else: DESCRIPTOR: google.protobuf.descriptor.FileDescriptor +@typing_extensions.final class Relation(google.protobuf.message.Message): """The main [[Relation]] type. Fundamentally, a relation is a typed container that has exactly one explicit relation type set. @@ -89,6 +90,7 @@ class Relation(google.protobuf.message.Message): UNPIVOT_FIELD_NUMBER: builtins.int TO_SCHEMA_FIELD_NUMBER: builtins.int REPARTITION_BY_EXPRESSION_FIELD_NUMBER: builtins.int + SAME_SEMANTICS_FIELD_NUMBER: builtins.int FILL_NA_FIELD_NUMBER: builtins.int DROP_NA_FIELD_NUMBER: builtins.int REPLACE_FIELD_NUMBER: builtins.int @@ -158,6 +160,8 @@ class Relation(google.protobuf.message.Message): @property def repartition_by_expression(self) -> global___RepartitionByExpression: ... @property + def same_semantics(self) -> global___SameSemantics: ... + @property def fill_na(self) -> global___NAFill: """NA functions""" @property @@ -221,6 +225,7 @@ class Relation(google.protobuf.message.Message): unpivot: global___Unpivot | None = ..., to_schema: global___ToSchema | None = ..., repartition_by_expression: global___RepartitionByExpression | None = ..., + same_semantics: global___SameSemantics | None = ..., fill_na: global___NAFill | None = ..., drop_na: global___NADrop | None = ..., replace: global___NAReplace | None = ..., @@ -297,6 +302,8 @@ class Relation(google.protobuf.message.Message): b"repartition_by_expression", "replace", b"replace", + "same_semantics", + b"same_semantics", "sample", b"sample", "sample_by", @@ -386,6 +393,8 @@ class Relation(google.protobuf.message.Message): b"repartition_by_expression", "replace", b"replace", + "same_semantics", + b"same_semantics", "sample", b"sample", "sample_by", @@ -443,6 +452,7 @@ class Relation(google.protobuf.message.Message): "unpivot", "to_schema", "repartition_by_expression", + "same_semantics", "fill_na", "drop_na", "replace", @@ -461,6 +471,7 @@ class Relation(google.protobuf.message.Message): global___Relation = Relation +@typing_extensions.final class Unknown(google.protobuf.message.Message): """Used for testing purposes only.""" @@ -472,6 +483,7 @@ class Unknown(google.protobuf.message.Message): global___Unknown = Unknown +@typing_extensions.final class RelationCommon(google.protobuf.message.Message): """Common metadata of all relations.""" @@ -491,6 +503,7 @@ class RelationCommon(google.protobuf.message.Message): global___RelationCommon = RelationCommon +@typing_extensions.final class SQL(google.protobuf.message.Message): """Relation that uses a SQL query to generate the output.""" @@ -508,6 +521,7 @@ class SQL(google.protobuf.message.Message): global___SQL = SQL +@typing_extensions.final class Read(google.protobuf.message.Message): """Relation that reads from a file / table or other data source. Does not have additional inputs. @@ -515,6 +529,7 @@ class Read(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor + @typing_extensions.final class NamedTable(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -531,9 +546,11 @@ class Read(google.protobuf.message.Message): field_name: typing_extensions.Literal["unparsed_identifier", b"unparsed_identifier"], ) -> None: ... + @typing_extensions.final class DataSource(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor + @typing_extensions.final class OptionsEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -623,6 +640,7 @@ class Read(google.protobuf.message.Message): global___Read = Read +@typing_extensions.final class Project(google.protobuf.message.Message): """Projection of a bag of expressions for a given input relation. @@ -664,6 +682,7 @@ class Project(google.protobuf.message.Message): global___Project = Project +@typing_extensions.final class Filter(google.protobuf.message.Message): """Relation that applies a boolean expression `condition` on each row of `input` to produce the output result. @@ -694,6 +713,7 @@ class Filter(google.protobuf.message.Message): global___Filter = Filter +@typing_extensions.final class Join(google.protobuf.message.Message): """Relation of type [[Join]]. @@ -792,6 +812,7 @@ class Join(google.protobuf.message.Message): global___Join = Join +@typing_extensions.final class SetOperation(google.protobuf.message.Message): """Relation of type [[SetOperation]]""" @@ -899,6 +920,7 @@ class SetOperation(google.protobuf.message.Message): global___SetOperation = SetOperation +@typing_extensions.final class Limit(google.protobuf.message.Message): """Relation of type [[Limit]] that is used to `limit` rows from the input relation.""" @@ -926,6 +948,7 @@ class Limit(google.protobuf.message.Message): global___Limit = Limit +@typing_extensions.final class Offset(google.protobuf.message.Message): """Relation of type [[Offset]] that is used to read rows staring from the `offset` on the input relation. @@ -955,6 +978,7 @@ class Offset(google.protobuf.message.Message): global___Offset = Offset +@typing_extensions.final class Tail(google.protobuf.message.Message): """Relation of type [[Tail]] that is used to fetch `limit` rows from the last of the input relation.""" @@ -982,6 +1006,7 @@ class Tail(google.protobuf.message.Message): global___Tail = Tail +@typing_extensions.final class Aggregate(google.protobuf.message.Message): """Relation of type [[Aggregate]].""" @@ -1009,6 +1034,7 @@ class Aggregate(google.protobuf.message.Message): GROUP_TYPE_CUBE: Aggregate.GroupType.ValueType # 3 GROUP_TYPE_PIVOT: Aggregate.GroupType.ValueType # 4 + @typing_extensions.final class Pivot(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -1107,6 +1133,7 @@ class Aggregate(google.protobuf.message.Message): global___Aggregate = Aggregate +@typing_extensions.final class Sort(google.protobuf.message.Message): """Relation of type [[Sort]].""" @@ -1162,6 +1189,7 @@ class Sort(google.protobuf.message.Message): global___Sort = Sort +@typing_extensions.final class Drop(google.protobuf.message.Message): """Drop specified columns.""" @@ -1198,6 +1226,7 @@ class Drop(google.protobuf.message.Message): global___Drop = Drop +@typing_extensions.final class Deduplicate(google.protobuf.message.Message): """Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only the subset of columns or all the columns. @@ -1262,6 +1291,7 @@ class Deduplicate(google.protobuf.message.Message): global___Deduplicate = Deduplicate +@typing_extensions.final class LocalRelation(google.protobuf.message.Message): """A relation that does not need to be qualified by name.""" @@ -1309,6 +1339,7 @@ class LocalRelation(google.protobuf.message.Message): global___LocalRelation = LocalRelation +@typing_extensions.final class Sample(google.protobuf.message.Message): """Relation of type [[Sample]] that samples a fraction of the dataset.""" @@ -1393,6 +1424,7 @@ class Sample(google.protobuf.message.Message): global___Sample = Sample +@typing_extensions.final class Range(google.protobuf.message.Message): """Relation of type [[Range]] that generates a sequence of integers.""" @@ -1461,6 +1493,7 @@ class Range(google.protobuf.message.Message): global___Range = Range +@typing_extensions.final class SubqueryAlias(google.protobuf.message.Message): """Relation alias.""" @@ -1498,6 +1531,7 @@ class SubqueryAlias(google.protobuf.message.Message): global___SubqueryAlias = SubqueryAlias +@typing_extensions.final class Repartition(google.protobuf.message.Message): """Relation repartition.""" @@ -1545,6 +1579,7 @@ class Repartition(google.protobuf.message.Message): global___Repartition = Repartition +@typing_extensions.final class ShowString(google.protobuf.message.Message): """Compose the string representing rows for output. It will invoke 'Dataset.showString' to compute the results. @@ -1594,6 +1629,7 @@ class ShowString(google.protobuf.message.Message): global___ShowString = ShowString +@typing_extensions.final class StatSummary(google.protobuf.message.Message): """Computes specified statistics for numeric and string columns. It will invoke 'Dataset.summary' (same as 'StatFunctions.summary') @@ -1641,6 +1677,7 @@ class StatSummary(google.protobuf.message.Message): global___StatSummary = StatSummary +@typing_extensions.final class StatDescribe(google.protobuf.message.Message): """Computes basic statistics for numeric and string columns, including count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical or @@ -1674,6 +1711,7 @@ class StatDescribe(google.protobuf.message.Message): global___StatDescribe = StatDescribe +@typing_extensions.final class StatCrosstab(google.protobuf.message.Message): """Computes a pair-wise frequency table of the given columns. Also known as a contingency table. It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate') @@ -1715,6 +1753,7 @@ class StatCrosstab(google.protobuf.message.Message): global___StatCrosstab = StatCrosstab +@typing_extensions.final class StatCov(google.protobuf.message.Message): """Calculate the sample covariance of two numerical columns of a DataFrame. It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results. @@ -1749,6 +1788,7 @@ class StatCov(google.protobuf.message.Message): global___StatCov = StatCov +@typing_extensions.final class StatCorr(google.protobuf.message.Message): """Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as @@ -1808,6 +1848,7 @@ class StatCorr(google.protobuf.message.Message): global___StatCorr = StatCorr +@typing_extensions.final class StatApproxQuantile(google.protobuf.message.Message): """Calculates the approximate quantiles of numerical columns of a DataFrame. It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile') @@ -1870,6 +1911,7 @@ class StatApproxQuantile(google.protobuf.message.Message): global___StatApproxQuantile = StatApproxQuantile +@typing_extensions.final class StatFreqItems(google.protobuf.message.Message): """Finding frequent items for columns, possibly with false positives. It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') @@ -1918,6 +1960,7 @@ class StatFreqItems(google.protobuf.message.Message): global___StatFreqItems = StatFreqItems +@typing_extensions.final class StatSampleBy(google.protobuf.message.Message): """Returns a stratified sample without replacement based on the fraction given on each stratum. @@ -1927,6 +1970,7 @@ class StatSampleBy(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor + @typing_extensions.final class Fraction(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2008,6 +2052,7 @@ class StatSampleBy(google.protobuf.message.Message): global___StatSampleBy = StatSampleBy +@typing_extensions.final class NAFill(google.protobuf.message.Message): """Replaces null values. It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results. @@ -2066,6 +2111,7 @@ class NAFill(google.protobuf.message.Message): global___NAFill = NAFill +@typing_extensions.final class NADrop(google.protobuf.message.Message): """Drop rows containing null values. It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results. @@ -2134,6 +2180,7 @@ class NADrop(google.protobuf.message.Message): global___NADrop = NADrop +@typing_extensions.final class NAReplace(google.protobuf.message.Message): """Replaces old values with the corresponding values. It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace') @@ -2142,6 +2189,7 @@ class NAReplace(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor + @typing_extensions.final class Replacement(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2218,6 +2266,7 @@ class NAReplace(google.protobuf.message.Message): global___NAReplace = NAReplace +@typing_extensions.final class RenameColumnsBySameLengthNames(google.protobuf.message.Message): """Rename columns on the input relation by the same length of names.""" @@ -2253,11 +2302,13 @@ class RenameColumnsBySameLengthNames(google.protobuf.message.Message): global___RenameColumnsBySameLengthNames = RenameColumnsBySameLengthNames +@typing_extensions.final class RenameColumnsByNameToNameMap(google.protobuf.message.Message): """Rename columns on the input relation by a map with name to name mapping.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor + @typing_extensions.final class RenameColumnsMapEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2309,6 +2360,7 @@ class RenameColumnsByNameToNameMap(google.protobuf.message.Message): global___RenameColumnsByNameToNameMap = RenameColumnsByNameToNameMap +@typing_extensions.final class WithColumns(google.protobuf.message.Message): """Adding columns or replacing the existing columns that have the same names.""" @@ -2356,6 +2408,7 @@ class WithColumns(google.protobuf.message.Message): global___WithColumns = WithColumns +@typing_extensions.final class Hint(google.protobuf.message.Message): """Specify a hint over a relation. Hint should have a name and optional parameters.""" @@ -2403,6 +2456,7 @@ class Hint(google.protobuf.message.Message): global___Hint = Hint +@typing_extensions.final class Unpivot(google.protobuf.message.Message): """Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.""" @@ -2466,6 +2520,7 @@ class Unpivot(google.protobuf.message.Message): global___Unpivot = Unpivot +@typing_extensions.final class ToSchema(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2495,6 +2550,7 @@ class ToSchema(google.protobuf.message.Message): global___ToSchema = ToSchema +@typing_extensions.final class RepartitionByExpression(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2552,3 +2608,30 @@ class RepartitionByExpression(google.protobuf.message.Message): ) -> typing_extensions.Literal["num_partitions"] | None: ... global___RepartitionByExpression = RepartitionByExpression + +@typing_extensions.final +class SameSemantics(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + INPUT_FIELD_NUMBER: builtins.int + OTHER_FIELD_NUMBER: builtins.int + @property + def input(self) -> global___Relation: + """(Required) The input relation.""" + @property + def other(self) -> global___Relation: + """(Required) The other Relation to compare against.""" + def __init__( + self, + *, + input: global___Relation | None = ..., + other: global___Relation | None = ..., + ) -> None: ... + def HasField( + self, field_name: typing_extensions.Literal["input", b"input", "other", b"other"] + ) -> builtins.bool: ... + def ClearField( + self, field_name: typing_extensions.Literal["input", b"input", "other", b"other"] + ) -> None: ... + +global___SameSemantics = SameSemantics diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index e3646cd7d950c..aedd1a9f28e51 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -4864,6 +4864,9 @@ def sameSemantics(self, other: "DataFrame") -> bool: .. versionadded:: 3.1.0 + .. versionchanged:: 3.4.0 + Support Spark Connect. + Notes ----- The equality comparison here is simplified by tolerating the cosmetic differences diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 0feed38214020..1488a7fe30497 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -1961,11 +1961,17 @@ def test_unsupported_functions(self): "localCheckpoint", "_repr_html_", "semanticHash", - "sameSemantics", ): with self.assertRaises(NotImplementedError): getattr(df, f)() + def test_same_semantics(self): + df1 = self.connect.read.table(self.tbl_name).limit(10) + df2 = self.connect.read.table(self.tbl_name).limit(10) + df3 = self.connect.read.table(self.tbl_name).limit(1) + + self.assertTrue(df1.sameSemantics(df2)) + self.assertFalse(df1.sameSemantics(df3)) @unittest.skipIf(not should_test_connect, connect_requirement_message) class ChannelBuilderTests(ReusedPySparkTestCase): From ac2e98a8b57eed6d22bf8aa2f8ee439959630df0 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 6 Jan 2023 02:39:19 -0800 Subject: [PATCH 2/3] formatting --- .../apache/spark/sql/connect/planner/SparkConnectPlanner.scala | 1 - python/pyspark/sql/tests/connect/test_connect_basic.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 4029f64d8702e..90e398f38271d 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -548,7 +548,6 @@ class SparkConnectPlanner(session: SparkSession) { data = Tuple1.apply(sameSemantics) :: Nil) } - private def transformDeduplicate(rel: proto.Deduplicate): LogicalPlan = { if (!rel.hasInput) { throw InvalidPlanInput("Deduplicate needs a plan input") diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 1488a7fe30497..6a7cf077435ba 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -1973,6 +1973,7 @@ def test_same_semantics(self): self.assertTrue(df1.sameSemantics(df2)) self.assertFalse(df1.sameSemantics(df3)) + @unittest.skipIf(not should_test_connect, connect_requirement_message) class ChannelBuilderTests(ReusedPySparkTestCase): def test_invalid_connection_strings(self): From 95affef5657008a16a94a155a9954c73b6f945bc Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 6 Jan 2023 02:45:42 -0800 Subject: [PATCH 3/3] Update relations_pb2.pyi --- .../sql/connect/proto/relations_pb2.pyi | 48 ------------------- 1 file changed, 48 deletions(-) diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi index 90d72326e01b2..4509c5f01f713 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.pyi +++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi @@ -53,7 +53,6 @@ else: DESCRIPTOR: google.protobuf.descriptor.FileDescriptor -@typing_extensions.final class Relation(google.protobuf.message.Message): """The main [[Relation]] type. Fundamentally, a relation is a typed container that has exactly one explicit relation type set. @@ -471,7 +470,6 @@ class Relation(google.protobuf.message.Message): global___Relation = Relation -@typing_extensions.final class Unknown(google.protobuf.message.Message): """Used for testing purposes only.""" @@ -483,7 +481,6 @@ class Unknown(google.protobuf.message.Message): global___Unknown = Unknown -@typing_extensions.final class RelationCommon(google.protobuf.message.Message): """Common metadata of all relations.""" @@ -503,7 +500,6 @@ class RelationCommon(google.protobuf.message.Message): global___RelationCommon = RelationCommon -@typing_extensions.final class SQL(google.protobuf.message.Message): """Relation that uses a SQL query to generate the output.""" @@ -521,7 +517,6 @@ class SQL(google.protobuf.message.Message): global___SQL = SQL -@typing_extensions.final class Read(google.protobuf.message.Message): """Relation that reads from a file / table or other data source. Does not have additional inputs. @@ -529,7 +524,6 @@ class Read(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - @typing_extensions.final class NamedTable(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -546,11 +540,9 @@ class Read(google.protobuf.message.Message): field_name: typing_extensions.Literal["unparsed_identifier", b"unparsed_identifier"], ) -> None: ... - @typing_extensions.final class DataSource(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - @typing_extensions.final class OptionsEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -640,7 +632,6 @@ class Read(google.protobuf.message.Message): global___Read = Read -@typing_extensions.final class Project(google.protobuf.message.Message): """Projection of a bag of expressions for a given input relation. @@ -682,7 +673,6 @@ class Project(google.protobuf.message.Message): global___Project = Project -@typing_extensions.final class Filter(google.protobuf.message.Message): """Relation that applies a boolean expression `condition` on each row of `input` to produce the output result. @@ -713,7 +703,6 @@ class Filter(google.protobuf.message.Message): global___Filter = Filter -@typing_extensions.final class Join(google.protobuf.message.Message): """Relation of type [[Join]]. @@ -812,7 +801,6 @@ class Join(google.protobuf.message.Message): global___Join = Join -@typing_extensions.final class SetOperation(google.protobuf.message.Message): """Relation of type [[SetOperation]]""" @@ -920,7 +908,6 @@ class SetOperation(google.protobuf.message.Message): global___SetOperation = SetOperation -@typing_extensions.final class Limit(google.protobuf.message.Message): """Relation of type [[Limit]] that is used to `limit` rows from the input relation.""" @@ -948,7 +935,6 @@ class Limit(google.protobuf.message.Message): global___Limit = Limit -@typing_extensions.final class Offset(google.protobuf.message.Message): """Relation of type [[Offset]] that is used to read rows staring from the `offset` on the input relation. @@ -978,7 +964,6 @@ class Offset(google.protobuf.message.Message): global___Offset = Offset -@typing_extensions.final class Tail(google.protobuf.message.Message): """Relation of type [[Tail]] that is used to fetch `limit` rows from the last of the input relation.""" @@ -1006,7 +991,6 @@ class Tail(google.protobuf.message.Message): global___Tail = Tail -@typing_extensions.final class Aggregate(google.protobuf.message.Message): """Relation of type [[Aggregate]].""" @@ -1034,7 +1018,6 @@ class Aggregate(google.protobuf.message.Message): GROUP_TYPE_CUBE: Aggregate.GroupType.ValueType # 3 GROUP_TYPE_PIVOT: Aggregate.GroupType.ValueType # 4 - @typing_extensions.final class Pivot(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -1133,7 +1116,6 @@ class Aggregate(google.protobuf.message.Message): global___Aggregate = Aggregate -@typing_extensions.final class Sort(google.protobuf.message.Message): """Relation of type [[Sort]].""" @@ -1189,7 +1171,6 @@ class Sort(google.protobuf.message.Message): global___Sort = Sort -@typing_extensions.final class Drop(google.protobuf.message.Message): """Drop specified columns.""" @@ -1226,7 +1207,6 @@ class Drop(google.protobuf.message.Message): global___Drop = Drop -@typing_extensions.final class Deduplicate(google.protobuf.message.Message): """Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only the subset of columns or all the columns. @@ -1291,7 +1271,6 @@ class Deduplicate(google.protobuf.message.Message): global___Deduplicate = Deduplicate -@typing_extensions.final class LocalRelation(google.protobuf.message.Message): """A relation that does not need to be qualified by name.""" @@ -1339,7 +1318,6 @@ class LocalRelation(google.protobuf.message.Message): global___LocalRelation = LocalRelation -@typing_extensions.final class Sample(google.protobuf.message.Message): """Relation of type [[Sample]] that samples a fraction of the dataset.""" @@ -1424,7 +1402,6 @@ class Sample(google.protobuf.message.Message): global___Sample = Sample -@typing_extensions.final class Range(google.protobuf.message.Message): """Relation of type [[Range]] that generates a sequence of integers.""" @@ -1493,7 +1470,6 @@ class Range(google.protobuf.message.Message): global___Range = Range -@typing_extensions.final class SubqueryAlias(google.protobuf.message.Message): """Relation alias.""" @@ -1531,7 +1507,6 @@ class SubqueryAlias(google.protobuf.message.Message): global___SubqueryAlias = SubqueryAlias -@typing_extensions.final class Repartition(google.protobuf.message.Message): """Relation repartition.""" @@ -1579,7 +1554,6 @@ class Repartition(google.protobuf.message.Message): global___Repartition = Repartition -@typing_extensions.final class ShowString(google.protobuf.message.Message): """Compose the string representing rows for output. It will invoke 'Dataset.showString' to compute the results. @@ -1629,7 +1603,6 @@ class ShowString(google.protobuf.message.Message): global___ShowString = ShowString -@typing_extensions.final class StatSummary(google.protobuf.message.Message): """Computes specified statistics for numeric and string columns. It will invoke 'Dataset.summary' (same as 'StatFunctions.summary') @@ -1677,7 +1650,6 @@ class StatSummary(google.protobuf.message.Message): global___StatSummary = StatSummary -@typing_extensions.final class StatDescribe(google.protobuf.message.Message): """Computes basic statistics for numeric and string columns, including count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical or @@ -1711,7 +1683,6 @@ class StatDescribe(google.protobuf.message.Message): global___StatDescribe = StatDescribe -@typing_extensions.final class StatCrosstab(google.protobuf.message.Message): """Computes a pair-wise frequency table of the given columns. Also known as a contingency table. It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate') @@ -1753,7 +1724,6 @@ class StatCrosstab(google.protobuf.message.Message): global___StatCrosstab = StatCrosstab -@typing_extensions.final class StatCov(google.protobuf.message.Message): """Calculate the sample covariance of two numerical columns of a DataFrame. It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results. @@ -1788,7 +1758,6 @@ class StatCov(google.protobuf.message.Message): global___StatCov = StatCov -@typing_extensions.final class StatCorr(google.protobuf.message.Message): """Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as @@ -1848,7 +1817,6 @@ class StatCorr(google.protobuf.message.Message): global___StatCorr = StatCorr -@typing_extensions.final class StatApproxQuantile(google.protobuf.message.Message): """Calculates the approximate quantiles of numerical columns of a DataFrame. It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile') @@ -1911,7 +1879,6 @@ class StatApproxQuantile(google.protobuf.message.Message): global___StatApproxQuantile = StatApproxQuantile -@typing_extensions.final class StatFreqItems(google.protobuf.message.Message): """Finding frequent items for columns, possibly with false positives. It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') @@ -1960,7 +1927,6 @@ class StatFreqItems(google.protobuf.message.Message): global___StatFreqItems = StatFreqItems -@typing_extensions.final class StatSampleBy(google.protobuf.message.Message): """Returns a stratified sample without replacement based on the fraction given on each stratum. @@ -1970,7 +1936,6 @@ class StatSampleBy(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - @typing_extensions.final class Fraction(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2052,7 +2017,6 @@ class StatSampleBy(google.protobuf.message.Message): global___StatSampleBy = StatSampleBy -@typing_extensions.final class NAFill(google.protobuf.message.Message): """Replaces null values. It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results. @@ -2111,7 +2075,6 @@ class NAFill(google.protobuf.message.Message): global___NAFill = NAFill -@typing_extensions.final class NADrop(google.protobuf.message.Message): """Drop rows containing null values. It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results. @@ -2180,7 +2143,6 @@ class NADrop(google.protobuf.message.Message): global___NADrop = NADrop -@typing_extensions.final class NAReplace(google.protobuf.message.Message): """Replaces old values with the corresponding values. It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace') @@ -2189,7 +2151,6 @@ class NAReplace(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - @typing_extensions.final class Replacement(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2266,7 +2227,6 @@ class NAReplace(google.protobuf.message.Message): global___NAReplace = NAReplace -@typing_extensions.final class RenameColumnsBySameLengthNames(google.protobuf.message.Message): """Rename columns on the input relation by the same length of names.""" @@ -2302,13 +2262,11 @@ class RenameColumnsBySameLengthNames(google.protobuf.message.Message): global___RenameColumnsBySameLengthNames = RenameColumnsBySameLengthNames -@typing_extensions.final class RenameColumnsByNameToNameMap(google.protobuf.message.Message): """Rename columns on the input relation by a map with name to name mapping.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor - @typing_extensions.final class RenameColumnsMapEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2360,7 +2318,6 @@ class RenameColumnsByNameToNameMap(google.protobuf.message.Message): global___RenameColumnsByNameToNameMap = RenameColumnsByNameToNameMap -@typing_extensions.final class WithColumns(google.protobuf.message.Message): """Adding columns or replacing the existing columns that have the same names.""" @@ -2408,7 +2365,6 @@ class WithColumns(google.protobuf.message.Message): global___WithColumns = WithColumns -@typing_extensions.final class Hint(google.protobuf.message.Message): """Specify a hint over a relation. Hint should have a name and optional parameters.""" @@ -2456,7 +2412,6 @@ class Hint(google.protobuf.message.Message): global___Hint = Hint -@typing_extensions.final class Unpivot(google.protobuf.message.Message): """Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.""" @@ -2520,7 +2475,6 @@ class Unpivot(google.protobuf.message.Message): global___Unpivot = Unpivot -@typing_extensions.final class ToSchema(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2550,7 +2504,6 @@ class ToSchema(google.protobuf.message.Message): global___ToSchema = ToSchema -@typing_extensions.final class RepartitionByExpression(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -2609,7 +2562,6 @@ class RepartitionByExpression(google.protobuf.message.Message): global___RepartitionByExpression = RepartitionByExpression -@typing_extensions.final class SameSemantics(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor