@@ -67,11 +67,13 @@ message Unknown {}
6767
6868// Common metadata of all relations.
6969message RelationCommon {
70+ // (Required) Shared relation metadata.
7071 string source_info = 1 ;
7172}
7273
7374// Relation that uses a SQL query to generate the output.
7475message SQL {
76+ // (Required) The SQL query.
7577 string query = 1 ;
7678}
7779
@@ -84,15 +86,20 @@ message Read {
8486 }
8587
8688 message NamedTable {
89+ // (Required) Unparsed identifier for the table.
8790 string unparsed_identifier = 1 ;
8891 }
8992
9093 message DataSource {
91- // Required. Supported formats include: parquet, orc, text, json, parquet, csv, avro.
94+ // ( Required) Supported formats include: parquet, orc, text, json, parquet, csv, avro.
9295 string format = 1 ;
93- // Optional. If not set, Spark will infer the schema.
94- string schema = 2 ;
95- // The key is case insensitive.
96+
97+ // (Optional) If not set, Spark will infer the schema.
98+ optional string schema = 2 ;
99+
100+ // Options for the data source. The context of this map varies based on the
101+ // data source format. This options could be empty for valid data source format.
102+ // The map key is case insensitive.
96103 map <string , string > options = 3 ;
97104 }
98105}
@@ -106,24 +113,39 @@ message Project {
106113 //
107114 // For example, `SELECT ABS(-1)` is valid plan without an input plan.
108115 Relation input = 1 ;
116+
117+ // (Required) A Project requires at least one expression.
109118 repeated Expression expressions = 3 ;
110119}
111120
112121// Relation that applies a boolean expression `condition` on each row of `input` to produce
113122// the output result.
114123message Filter {
124+ // (Required) Input relation for a Filter.
115125 Relation input = 1 ;
126+
127+ // (Required) A Filter must have a condition expression.
116128 Expression condition = 2 ;
117129}
118130
119131// Relation of type [[Join]].
120132//
121133// `left` and `right` must be present.
122134message Join {
135+ // (Required) Left input relation for a Join.
123136 Relation left = 1 ;
137+
138+ // (Required) Right input relation for a Join.
124139 Relation right = 2 ;
140+
141+ // (Optional) The join condition. Could be unset when `using_columns` is utilized.
142+ //
143+ // This field does not co-exist with using_columns.
125144 Expression join_condition = 3 ;
145+
146+ // (Required) The join type.
126147 JoinType join_type = 4 ;
148+
127149 // Optional. using_columns provides a list of columns that should present on both sides of
128150 // the join inputs that this Join will join on. For example A JOIN B USING col_name is
129151 // equivalent to A JOIN B on A.col_name = B.col_name.
@@ -144,11 +166,25 @@ message Join {
144166
145167// Relation of type [[SetOperation]]
146168message SetOperation {
169+ // (Required) Left input relation for a Set operation.
147170 Relation left_input = 1 ;
171+
172+ // (Required) Right input relation for a Set operation.
148173 Relation right_input = 2 ;
174+
175+ // (Required) The Set operation type.
149176 SetOpType set_op_type = 3 ;
150- bool is_all = 4 ;
151- bool by_name = 5 ;
177+
178+ // (Optional) If to remove duplicate rows.
179+ //
180+ // True to preserve all results.
181+ // False to remove duplicate rows.
182+ optional bool is_all = 4 ;
183+
184+ // (Optional) If to perform the Set operation based on name resolution.
185+ //
186+ // Only UNION supports this option.
187+ optional bool by_name = 5 ;
152188
153189 enum SetOpType {
154190 SET_OP_TYPE_UNSPECIFIED = 0 ;
@@ -160,29 +196,42 @@ message SetOperation {
160196
161197// Relation of type [[Limit]] that is used to `limit` rows from the input relation.
162198message Limit {
199+ // (Required) Input relation for a Limit.
163200 Relation input = 1 ;
201+
202+ // (Required) the limit.
164203 int32 limit = 2 ;
165204}
166205
167206// Relation of type [[Offset]] that is used to read rows staring from the `offset` on
168207// the input relation.
169208message Offset {
209+ // (Required) Input relation for an Offset.
170210 Relation input = 1 ;
211+
212+ // (Required) the limit.
171213 int32 offset = 2 ;
172214}
173215
174216// Relation of type [[Aggregate]].
175217message Aggregate {
218+ // (Required) Input relation for a Aggregate.
176219 Relation input = 1 ;
220+
177221 repeated Expression grouping_expressions = 2 ;
178222 repeated Expression result_expressions = 3 ;
179223}
180224
181225// Relation of type [[Sort]].
182226message Sort {
227+ // (Required) Input relation for a Sort.
183228 Relation input = 1 ;
229+
230+ // (Required) Sort fields.
184231 repeated SortField sort_fields = 2 ;
185- bool is_global = 3 ;
232+
233+ // (Optional) if this is a global sort.
234+ optional bool is_global = 3 ;
186235
187236 message SortField {
188237 Expression expression = 1 ;
@@ -206,58 +255,83 @@ message Sort {
206255// Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only
207256// the subset of columns or all the columns.
208257message Deduplicate {
258+ // (Required) Input relation for a Deduplicate.
209259 Relation input = 1 ;
260+
261+ // (Optional) Deduplicate based on a list of column names.
262+ //
263+ // This field does not co-use with `all_columns_as_keys`.
210264 repeated string column_names = 2 ;
211- bool all_columns_as_keys = 3 ;
265+
266+ // (Optional) Deduplicate based on all the columns of the input relation.
267+ //
268+ // This field does not co-use with `column_names`.
269+ optional bool all_columns_as_keys = 3 ;
212270}
213271
272+ // A relation that does not need to be qualified by name.
214273message LocalRelation {
274+ // (Optional) A list qualified attributes.
215275 repeated Expression.QualifiedAttribute attributes = 1 ;
216276 // TODO: support local data.
217277}
218278
219279// Relation of type [[Sample]] that samples a fraction of the dataset.
220280message Sample {
281+ // (Required) Input relation for a Sample.
221282 Relation input = 1 ;
283+
284+ // (Required) lower bound.
222285 double lower_bound = 2 ;
286+
287+ // (Required) upper bound.
223288 double upper_bound = 3 ;
224- bool with_replacement = 4 ;
289+
290+ // (Optional) Whether to sample with replacement.
291+ optional bool with_replacement = 4 ;
292+
293+ // (Optional) The random seed.
225294 optional int64 seed = 5 ;
226295}
227296
228297// Relation of type [[Range]] that generates a sequence of integers.
229298message Range {
230- // Optional. Default value = 0
231- int64 start = 1 ;
232- // Required.
299+ // (Optional) Default value = 0
300+ optional int64 start = 1 ;
301+
302+ // (Required)
233303 int64 end = 2 ;
234- // Required.
304+
305+ // (Required)
235306 int64 step = 3 ;
307+
236308 // Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if
237309 // it is set, or 2) spark default parallelism.
238310 optional int32 num_partitions = 4 ;
239311}
240312
241313// Relation alias.
242314message SubqueryAlias {
243- // Required. The input relation.
315+ // ( Required) The input relation of SubqueryAlias .
244316 Relation input = 1 ;
245- // Required. The alias.
317+
318+ // (Required) The alias.
246319 string alias = 2 ;
247- // Optional. Qualifier of the alias.
320+
321+ // (Optional) Qualifier of the alias.
248322 repeated string qualifier = 3 ;
249323}
250324
251325// Relation repartition.
252326message Repartition {
253- // Required. The input relation.
327+ // ( Required) The input relation of Repartition .
254328 Relation input = 1 ;
255329
256- // Required. Must be positive.
330+ // ( Required) Must be positive.
257331 int32 num_partitions = 2 ;
258332
259- // Optional. Default value is false.
260- bool shuffle = 3 ;
333+ // ( Optional) Default value is false.
334+ optional bool shuffle = 3 ;
261335}
262336
263337// Compose the string representing rows for output.
@@ -267,14 +341,14 @@ message ShowString {
267341 Relation input = 1 ;
268342
269343 // (Required) Number of rows to show.
270- optional int32 numRows = 2 ;
344+ int32 numRows = 2 ;
271345
272346 // (Required) If set to more than 0, truncates strings to
273347 // `truncate` characters and all cells will be aligned right.
274- optional int32 truncate = 3 ;
348+ int32 truncate = 3 ;
275349
276350 // (Required) If set to true, prints output rows vertically (one line per column value).
277- optional bool vertical = 4 ;
351+ bool vertical = 4 ;
278352}
279353
280354// Computes specified statistics for numeric and string columns.
@@ -344,10 +418,10 @@ message NAFill {
344418
345419// Rename columns on the input relation by the same length of names.
346420message RenameColumnsBySameLengthNames {
347- // Required. The input relation.
421+ // ( Required) The input relation of RenameColumnsBySameLengthNames .
348422 Relation input = 1 ;
349423
350- // Required.
424+ // ( Required)
351425 //
352426 // The number of columns of the input relation must be equal to the length
353427 // of this field. If this is not true, an exception will be returned.
@@ -357,11 +431,11 @@ message RenameColumnsBySameLengthNames {
357431
358432// Rename columns on the input relation by a map with name to name mapping.
359433message RenameColumnsByNameToNameMap {
360- // Required. The input relation.
434+ // ( Required) The input relation.
361435 Relation input = 1 ;
362436
363437
364- // Required.
438+ // ( Required)
365439 //
366440 // Renaming column names of input relation from A to B where A is the map key
367441 // and B is the map value. This is a no-op if schema doesn't contain any A. It
0 commit comments