diff --git a/connector/connect/src/main/protobuf/spark/connect/relations.proto b/connector/connect/src/main/protobuf/spark/connect/relations.proto index 8421535dd8e26..bf4fb1f031ac6 100644 --- a/connector/connect/src/main/protobuf/spark/connect/relations.proto +++ b/connector/connect/src/main/protobuf/spark/connect/relations.proto @@ -44,6 +44,7 @@ message Relation { Sample sample = 12; Offset offset = 13; Deduplicate deduplicate = 14; + Range range = 15; Unknown unknown = 999; } @@ -217,3 +218,23 @@ message Sample { int64 seed = 1; } } + +// Relation of type [[Range]] that generates a sequence of integers. +message Range { + // Optional. Default value = 0 + int32 start = 1; + int32 end = 2; + // Optional. Default value = 1 + Step step = 3; + // Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if + // it is set, or 2) spark default parallelism. + NumPartitions num_partitions = 4; + + message Step { + int32 step = 1; + } + + message NumPartitions { + int32 num_partitions = 1; + } +} diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala b/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala index 3ba773e4c04fd..4d3df49dc72d2 100644 --- a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala +++ b/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.connect import scala.collection.JavaConverters._ import scala.language.implicitConversions +import org.apache.spark.connect.proto import org.apache.spark.connect.proto._ import org.apache.spark.connect.proto.Join.JoinType import org.apache.spark.connect.proto.SetOperation.SetOpType @@ -34,6 +35,8 @@ import org.apache.spark.sql.connect.planner.DataTypeProtoConverter package object dsl { + class MockRemoteSession {} + object expressions { // scalastyle:ignore implicit class DslString(val s: String) { def protoAttr: Expression = @@ -175,6 +178,28 @@ package object dsl { } object plans { // scalastyle:ignore + implicit class DslMockRemoteSession(val session: MockRemoteSession) { + def range( + start: Option[Int], + end: Int, + step: Option[Int], + numPartitions: Option[Int]): Relation = { + val range = proto.Range.newBuilder() + if (start.isDefined) { + range.setStart(start.get) + } + range.setEnd(end) + if (step.isDefined) { + range.setStep(proto.Range.Step.newBuilder().setStep(step.get)) + } + if (numPartitions.isDefined) { + range.setNumPartitions( + proto.Range.NumPartitions.newBuilder().setNumPartitions(numPartitions.get)) + } + Relation.newBuilder().setRange(range).build() + } + } + implicit class DslLogicalPlan(val logicalPlan: Relation) { def select(exprs: Expression*): Relation = { Relation diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index dda5415abd5e2..481dbff10ee90 100644 --- a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -68,6 +68,7 @@ class SparkConnectPlanner(plan: proto.Relation, session: SparkSession) { case proto.Relation.RelTypeCase.LOCAL_RELATION => transformLocalRelation(rel.getLocalRelation, common) case proto.Relation.RelTypeCase.SAMPLE => transformSample(rel.getSample) + case proto.Relation.RelTypeCase.RANGE => transformRange(rel.getRange) case proto.Relation.RelTypeCase.RELTYPE_NOT_SET => throw new IndexOutOfBoundsException("Expected Relation to be set, but is empty.") case _ => throw InvalidPlanInput(s"${rel.getUnknown} not supported.") @@ -93,6 +94,22 @@ class SparkConnectPlanner(plan: proto.Relation, session: SparkSession) { transformRelation(rel.getInput)) } + private def transformRange(rel: proto.Range): LogicalPlan = { + val start = rel.getStart + val end = rel.getEnd + val step = if (rel.hasStep) { + rel.getStep.getStep + } else { + 1 + } + val numPartitions = if (rel.hasNumPartitions) { + rel.getNumPartitions.getNumPartitions + } else { + session.leafNodeDefaultParallelism + } + logical.Range(start, end, step, numPartitions) + } + private def transformDeduplicate(rel: proto.Deduplicate): LogicalPlan = { if (!rel.hasInput) { throw InvalidPlanInput("Deduplicate needs a plan input") diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala index 3232901ca8e09..eaf0c914d0ba3 100644 --- a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala +++ b/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftAnti, LeftOuter, LeftSemi, PlanTest, RightOuter} import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.connect.dsl.MockRemoteSession import org.apache.spark.sql.connect.dsl.expressions._ import org.apache.spark.sql.connect.dsl.plans._ import org.apache.spark.sql.internal.SQLConf @@ -35,6 +36,7 @@ import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructT * same as Spark dataframe's generated plan. */ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest { + lazy val connect = new MockRemoteSession() lazy val connectTestRelation = createLocalRelationProto( @@ -209,6 +211,15 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest { comparePlans(connectPlan8, sparkPlan8) } + test("Test Range") { + comparePlans(connect.range(None, 10, None, None), spark.range(10).toDF()) + comparePlans(connect.range(Some(2), 10, None, None), spark.range(2, 10).toDF()) + comparePlans(connect.range(Some(2), 10, Some(10), None), spark.range(2, 10, 10).toDF()) + comparePlans( + connect.range(Some(2), 10, Some(10), Some(100)), + spark.range(2, 10, 10, 100).toDF()) + } + private def createLocalRelationProtoByQualifiedAttributes( attrs: Seq[proto.Expression.QualifiedAttribute]): proto.Relation = { val localRelationBuilder = proto.LocalRelation.newBuilder() @@ -226,6 +237,7 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest { connectAnalyzed } + // Compares proto plan with DataFrame. private def comparePlans(connectPlan: proto.Relation, sparkPlan: DataFrame): Unit = { val connectAnalyzed = analyzePlan(transform(connectPlan)) comparePlans(connectAnalyzed, sparkPlan.queryExecution.analyzed, false) diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py index 973b322014ede..bf242a5d7cafc 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.py +++ b/python/pyspark/sql/connect/proto/relations_pb2.py @@ -32,7 +32,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x1fspark/connect/expressions.proto"\x97\x06\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"G\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias"\x1b\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query"\x9a\x03\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x1a=\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x1a\xbf\x01\n\nDataSource\x12\x16\n\x06\x66ormat\x18\x01 \x01(\tR\x06\x66ormat\x12\x16\n\x06schema\x18\x02 \x01(\tR\x06schema\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\xc2\x03\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns"\xbb\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06"\xeb\x02\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x15\n\x06is_all\x18\x04 \x01(\x08R\x05isAll\x12\x17\n\x07\x62y_name\x18\x05 \x01(\x08R\x06\x62yName"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"\xc5\x02\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12Y\n\x12result_expressions\x18\x03 \x03(\x0b\x32*.spark.connect.Aggregate.AggregateFunctionR\x11resultExpressions\x1a`\n\x11\x41ggregateFunction\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\xf6\x03\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\x0bsort_fields\x18\x02 \x03(\x0b\x32\x1d.spark.connect.Sort.SortFieldR\nsortFields\x1a\xbc\x01\n\tSortField\x12\x39\n\nexpression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nexpression\x12?\n\tdirection\x18\x02 \x01(\x0e\x32!.spark.connect.Sort.SortDirectionR\tdirection\x12\x33\n\x05nulls\x18\x03 \x01(\x0e\x32\x1d.spark.connect.Sort.SortNullsR\x05nulls"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"R\n\tSortNulls\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02"\x8e\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12-\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08R\x10\x61llColumnsAsKeys"]\n\rLocalRelation\x12L\n\nattributes\x18\x01 \x03(\x0b\x32,.spark.connect.Expression.QualifiedAttributeR\nattributes"\xf0\x01\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12)\n\x10with_replacement\x18\x04 \x01(\x08R\x0fwithReplacement\x12.\n\x04seed\x18\x05 \x01(\x0b\x32\x1a.spark.connect.Sample.SeedR\x04seed\x1a\x1a\n\x04Seed\x12\x12\n\x04seed\x18\x01 \x01(\x03R\x04seedB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3' + b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x1fspark/connect/expressions.proto"\xc5\x06\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"G\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias"\x1b\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query"\x9a\x03\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x1a=\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x1a\xbf\x01\n\nDataSource\x12\x16\n\x06\x66ormat\x18\x01 \x01(\tR\x06\x66ormat\x12\x16\n\x06schema\x18\x02 \x01(\tR\x06schema\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\xc2\x03\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns"\xbb\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06"\xeb\x02\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x15\n\x06is_all\x18\x04 \x01(\x08R\x05isAll\x12\x17\n\x07\x62y_name\x18\x05 \x01(\x08R\x06\x62yName"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"\xc5\x02\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12Y\n\x12result_expressions\x18\x03 \x03(\x0b\x32*.spark.connect.Aggregate.AggregateFunctionR\x11resultExpressions\x1a`\n\x11\x41ggregateFunction\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\xf6\x03\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\x0bsort_fields\x18\x02 \x03(\x0b\x32\x1d.spark.connect.Sort.SortFieldR\nsortFields\x1a\xbc\x01\n\tSortField\x12\x39\n\nexpression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nexpression\x12?\n\tdirection\x18\x02 \x01(\x0e\x32!.spark.connect.Sort.SortDirectionR\tdirection\x12\x33\n\x05nulls\x18\x03 \x01(\x0e\x32\x1d.spark.connect.Sort.SortNullsR\x05nulls"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"R\n\tSortNulls\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02"\x8e\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12-\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08R\x10\x61llColumnsAsKeys"]\n\rLocalRelation\x12L\n\nattributes\x18\x01 \x03(\x0b\x32,.spark.connect.Expression.QualifiedAttributeR\nattributes"\xf0\x01\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12)\n\x10with_replacement\x18\x04 \x01(\x08R\x0fwithReplacement\x12.\n\x04seed\x18\x05 \x01(\x0b\x32\x1a.spark.connect.Sample.SeedR\x04seed\x1a\x1a\n\x04Seed\x12\x12\n\x04seed\x18\x01 \x01(\x03R\x04seed"\xfd\x01\n\x05Range\x12\x14\n\x05start\x18\x01 \x01(\x05R\x05start\x12\x10\n\x03\x65nd\x18\x02 \x01(\x05R\x03\x65nd\x12-\n\x04step\x18\x03 \x01(\x0b\x32\x19.spark.connect.Range.StepR\x04step\x12I\n\x0enum_partitions\x18\x04 \x01(\x0b\x32".spark.connect.Range.NumPartitionsR\rnumPartitions\x1a\x1a\n\x04Step\x12\x12\n\x04step\x18\x01 \x01(\x05R\x04step\x1a\x36\n\rNumPartitions\x12%\n\x0enum_partitions\x18\x01 \x01(\x05R\rnumPartitionsB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -44,55 +44,61 @@ _READ_DATASOURCE_OPTIONSENTRY._options = None _READ_DATASOURCE_OPTIONSENTRY._serialized_options = b"8\001" _RELATION._serialized_start = 82 - _RELATION._serialized_end = 873 - _UNKNOWN._serialized_start = 875 - _UNKNOWN._serialized_end = 884 - _RELATIONCOMMON._serialized_start = 886 - _RELATIONCOMMON._serialized_end = 957 - _SQL._serialized_start = 959 - _SQL._serialized_end = 986 - _READ._serialized_start = 989 - _READ._serialized_end = 1399 - _READ_NAMEDTABLE._serialized_start = 1131 - _READ_NAMEDTABLE._serialized_end = 1192 - _READ_DATASOURCE._serialized_start = 1195 - _READ_DATASOURCE._serialized_end = 1386 - _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 1328 - _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 1386 - _PROJECT._serialized_start = 1401 - _PROJECT._serialized_end = 1518 - _FILTER._serialized_start = 1520 - _FILTER._serialized_end = 1632 - _JOIN._serialized_start = 1635 - _JOIN._serialized_end = 2085 - _JOIN_JOINTYPE._serialized_start = 1898 - _JOIN_JOINTYPE._serialized_end = 2085 - _SETOPERATION._serialized_start = 2088 - _SETOPERATION._serialized_end = 2451 - _SETOPERATION_SETOPTYPE._serialized_start = 2337 - _SETOPERATION_SETOPTYPE._serialized_end = 2451 - _LIMIT._serialized_start = 2453 - _LIMIT._serialized_end = 2529 - _OFFSET._serialized_start = 2531 - _OFFSET._serialized_end = 2610 - _AGGREGATE._serialized_start = 2613 - _AGGREGATE._serialized_end = 2938 - _AGGREGATE_AGGREGATEFUNCTION._serialized_start = 2842 - _AGGREGATE_AGGREGATEFUNCTION._serialized_end = 2938 - _SORT._serialized_start = 2941 - _SORT._serialized_end = 3443 - _SORT_SORTFIELD._serialized_start = 3061 - _SORT_SORTFIELD._serialized_end = 3249 - _SORT_SORTDIRECTION._serialized_start = 3251 - _SORT_SORTDIRECTION._serialized_end = 3359 - _SORT_SORTNULLS._serialized_start = 3361 - _SORT_SORTNULLS._serialized_end = 3443 - _DEDUPLICATE._serialized_start = 3446 - _DEDUPLICATE._serialized_end = 3588 - _LOCALRELATION._serialized_start = 3590 - _LOCALRELATION._serialized_end = 3683 - _SAMPLE._serialized_start = 3686 - _SAMPLE._serialized_end = 3926 - _SAMPLE_SEED._serialized_start = 3900 - _SAMPLE_SEED._serialized_end = 3926 + _RELATION._serialized_end = 919 + _UNKNOWN._serialized_start = 921 + _UNKNOWN._serialized_end = 930 + _RELATIONCOMMON._serialized_start = 932 + _RELATIONCOMMON._serialized_end = 1003 + _SQL._serialized_start = 1005 + _SQL._serialized_end = 1032 + _READ._serialized_start = 1035 + _READ._serialized_end = 1445 + _READ_NAMEDTABLE._serialized_start = 1177 + _READ_NAMEDTABLE._serialized_end = 1238 + _READ_DATASOURCE._serialized_start = 1241 + _READ_DATASOURCE._serialized_end = 1432 + _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 1374 + _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 1432 + _PROJECT._serialized_start = 1447 + _PROJECT._serialized_end = 1564 + _FILTER._serialized_start = 1566 + _FILTER._serialized_end = 1678 + _JOIN._serialized_start = 1681 + _JOIN._serialized_end = 2131 + _JOIN_JOINTYPE._serialized_start = 1944 + _JOIN_JOINTYPE._serialized_end = 2131 + _SETOPERATION._serialized_start = 2134 + _SETOPERATION._serialized_end = 2497 + _SETOPERATION_SETOPTYPE._serialized_start = 2383 + _SETOPERATION_SETOPTYPE._serialized_end = 2497 + _LIMIT._serialized_start = 2499 + _LIMIT._serialized_end = 2575 + _OFFSET._serialized_start = 2577 + _OFFSET._serialized_end = 2656 + _AGGREGATE._serialized_start = 2659 + _AGGREGATE._serialized_end = 2984 + _AGGREGATE_AGGREGATEFUNCTION._serialized_start = 2888 + _AGGREGATE_AGGREGATEFUNCTION._serialized_end = 2984 + _SORT._serialized_start = 2987 + _SORT._serialized_end = 3489 + _SORT_SORTFIELD._serialized_start = 3107 + _SORT_SORTFIELD._serialized_end = 3295 + _SORT_SORTDIRECTION._serialized_start = 3297 + _SORT_SORTDIRECTION._serialized_end = 3405 + _SORT_SORTNULLS._serialized_start = 3407 + _SORT_SORTNULLS._serialized_end = 3489 + _DEDUPLICATE._serialized_start = 3492 + _DEDUPLICATE._serialized_end = 3634 + _LOCALRELATION._serialized_start = 3636 + _LOCALRELATION._serialized_end = 3729 + _SAMPLE._serialized_start = 3732 + _SAMPLE._serialized_end = 3972 + _SAMPLE_SEED._serialized_start = 3946 + _SAMPLE_SEED._serialized_end = 3972 + _RANGE._serialized_start = 3975 + _RANGE._serialized_end = 4228 + _RANGE_STEP._serialized_start = 4146 + _RANGE_STEP._serialized_end = 4172 + _RANGE_NUMPARTITIONS._serialized_start = 4174 + _RANGE_NUMPARTITIONS._serialized_end = 4228 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi index 2c2ce8d0711ad..7618ed230e750 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.pyi +++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi @@ -73,6 +73,7 @@ class Relation(google.protobuf.message.Message): SAMPLE_FIELD_NUMBER: builtins.int OFFSET_FIELD_NUMBER: builtins.int DEDUPLICATE_FIELD_NUMBER: builtins.int + RANGE_FIELD_NUMBER: builtins.int UNKNOWN_FIELD_NUMBER: builtins.int @property def common(self) -> global___RelationCommon: ... @@ -103,6 +104,8 @@ class Relation(google.protobuf.message.Message): @property def deduplicate(self) -> global___Deduplicate: ... @property + def range(self) -> global___Range: ... + @property def unknown(self) -> global___Unknown: ... def __init__( self, @@ -121,6 +124,7 @@ class Relation(google.protobuf.message.Message): sample: global___Sample | None = ..., offset: global___Offset | None = ..., deduplicate: global___Deduplicate | None = ..., + range: global___Range | None = ..., unknown: global___Unknown | None = ..., ) -> None: ... def HasField( @@ -144,6 +148,8 @@ class Relation(google.protobuf.message.Message): b"offset", "project", b"project", + "range", + b"range", "read", b"read", "rel_type", @@ -181,6 +187,8 @@ class Relation(google.protobuf.message.Message): b"offset", "project", b"project", + "range", + b"range", "read", b"read", "rel_type", @@ -213,6 +221,7 @@ class Relation(google.protobuf.message.Message): "sample", "offset", "deduplicate", + "range", "unknown", ] | None: ... @@ -935,3 +944,70 @@ class Sample(google.protobuf.message.Message): ) -> None: ... global___Sample = Sample + +class Range(google.protobuf.message.Message): + """Relation of type [[Range]] that generates a sequence of integers.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + class Step(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STEP_FIELD_NUMBER: builtins.int + step: builtins.int + def __init__( + self, + *, + step: builtins.int = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["step", b"step"]) -> None: ... + + class NumPartitions(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NUM_PARTITIONS_FIELD_NUMBER: builtins.int + num_partitions: builtins.int + def __init__( + self, + *, + num_partitions: builtins.int = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["num_partitions", b"num_partitions"] + ) -> None: ... + + START_FIELD_NUMBER: builtins.int + END_FIELD_NUMBER: builtins.int + STEP_FIELD_NUMBER: builtins.int + NUM_PARTITIONS_FIELD_NUMBER: builtins.int + start: builtins.int + """Optional. Default value = 0""" + end: builtins.int + @property + def step(self) -> global___Range.Step: + """Optional. Default value = 1""" + @property + def num_partitions(self) -> global___Range.NumPartitions: + """Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if + it is set, or 2) spark default parallelism. + """ + def __init__( + self, + *, + start: builtins.int = ..., + end: builtins.int = ..., + step: global___Range.Step | None = ..., + num_partitions: global___Range.NumPartitions | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal["num_partitions", b"num_partitions", "step", b"step"], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "end", b"end", "num_partitions", b"num_partitions", "start", b"start", "step", b"step" + ], + ) -> None: ... + +global___Range = Range