From d915482c597bd231bf2bfe113fdbd1becfe5da99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E7=81=BF00244106?= <00244106@zte.intra> Date: Thu, 11 Oct 2018 17:10:05 +0800 Subject: [PATCH 1/3] update comment about RewriteDistinctAggregates --- .../sql/catalyst/optimizer/RewriteDistinctAggregates.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala index 4448ace7105a..1848c1277831 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala @@ -95,7 +95,7 @@ import org.apache.spark.sql.types.IntegerType * * This rule duplicates the input data by two or more times (# distinct groups + an optional * non-distinct group). This will put quite a bit of memory pressure of the used aggregate and - * exchange operators. Keeping the number of distinct groups as low a possible should be priority, + * exchange operators. Keeping the number of distinct groups as low as possible should be priority, * we could improve this in the current rule by applying more advanced expression canonicalization * techniques. */ From ded2c17adf1f890706b86308b31469ce4be567a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E7=81=BF00244106?= <00244106@zte.intra> Date: Fri, 12 Oct 2018 09:33:47 +0800 Subject: [PATCH 2/3] update another comment --- .../org/apache/spark/sql/hive/execution/HiveTableScanExec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala index b3795b443040..92c6632ad786 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala @@ -182,7 +182,7 @@ case class HiveTableScanExec( protected override def doExecute(): RDD[InternalRow] = { // Using dummyCallSite, as getCallSite can turn out to be expensive with - // with multiple partitions. + // multiple partitions. val rdd = if (!relation.isPartitioned) { Utils.withDummyCallSite(sqlContext.sparkContext) { hadoopReader.makeRDDForTable(hiveQlTable) From 64c0c86356341f8f6b81c2f7bcf78d2495757987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E7=81=BF00244106?= <00244106@zte.intra> Date: Tue, 16 Oct 2018 17:12:33 +0800 Subject: [PATCH 3/3] update a new file --- .../sql/catalyst/optimizer/RewriteDistinctAggregates.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala index 1848c1277831..b9468007cac6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala @@ -241,7 +241,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] { groupByAttrs ++ distinctAggChildAttrs ++ Seq(gid) ++ regularAggChildAttrMap.map(_._2), a.child) - // Construct the first aggregate operator. This de-duplicates the all the children of + // Construct the first aggregate operator. This de-duplicates all the children of // distinct operators, and applies the regular aggregate operators. val firstAggregateGroupBy = groupByAttrs ++ distinctAggChildAttrs :+ gid val firstAggregate = Aggregate(