apache · LennonChin · Sep 12, 2025 · Sep 12, 2025 · Sep 15, 2025 · wForget
diff --git a/...i-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala b/...i-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala
@@ -28,6 +28,7 @@ import org.slf4j.LoggerFactory
 import org.apache.kyuubi.plugin.spark.authz.OperationType.OperationType
 import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectActionType._
 import org.apache.kyuubi.plugin.spark.authz.rule.Authorization._
+import org.apache.kyuubi.plugin.spark.authz.rule.plan.ChildOutputHolder
 import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter._
 import org.apache.kyuubi.plugin.spark.authz.serde._
 import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._
@@ -100,13 +101,18 @@ object PrivilegesBuilder {
         privilegeObjects += PrivilegeObject(table)
 
       case p =>
+        val existsChildOutputHolder = p.exists(_.isInstanceOf[ChildOutputHolder])
         for (child <- p.children) {
           // If current plan's references don't have relation to it's input, have two cases
           //   1. `MapInPandas`, `ScriptTransformation`
           //   2. `Project` output only have constant value
           if (columnPrune(p.references.toSeq ++ p.output, p.inputSet).isEmpty) {
-            // If plan is project and output don't have relation to input, can ignore.
-            if (!p.isInstanceOf[Project]) {
+            // 1. If plan is project and output don't have relation to input, can ignore.
+            // 2. If sub logic plan tree exists ChildOutputHolder node, it means that the output of
+            //    some nodes in the tree is fixed by RuleChildOutputMarker in some special
+            //    scenarios, such as the Aggregate(count(*)) child node. To avoid missing child node
+            //    permissions, we need to continue checking down.
+            if (!p.isInstanceOf[Project] || existsChildOutputHolder) {
               buildQuery(
                 child,
                 privilegeObjects,

diff --git a/...thz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala b/...thz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala
@@ -19,11 +19,12 @@ package org.apache.kyuubi.plugin.spark.authz.ranger
 
 import org.apache.spark.sql.SparkSessionExtensions
 
-import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker, RuleEliminatePermanentViewMarker, RuleEliminateTypeOf}
+import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateChildOutputHolder, RuleEliminateMarker, RuleEliminatePermanentViewMarker, RuleEliminateTypeOf}
 import org.apache.kyuubi.plugin.spark.authz.rule.config.AuthzConfigurationChecker
 import org.apache.kyuubi.plugin.spark.authz.rule.datamasking.{RuleApplyDataMaskingStage0, RuleApplyDataMaskingStage1}
 import org.apache.kyuubi.plugin.spark.authz.rule.expression.RuleApplyTypeOfMarker
 import org.apache.kyuubi.plugin.spark.authz.rule.permanentview.RuleApplyPermanentViewMarker
+import org.apache.kyuubi.plugin.spark.authz.rule.plan.RuleChildOutputMarker
 import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter.{FilterDataSourceV2Strategy, RuleApplyRowFilter, RuleReplaceShowObjectCommands}
 
 /**
@@ -48,13 +49,15 @@ class RangerSparkExtension extends (SparkSessionExtensions => Unit) {
     v1.injectResolutionRule(_ => RuleReplaceShowObjectCommands)
     v1.injectResolutionRule(_ => RuleApplyPermanentViewMarker)
     v1.injectResolutionRule(_ => RuleApplyTypeOfMarker)
+    v1.injectResolutionRule(_ => RuleChildOutputMarker)
     v1.injectResolutionRule(RuleApplyRowFilter)
     v1.injectResolutionRule(RuleApplyDataMaskingStage0)
     v1.injectResolutionRule(RuleApplyDataMaskingStage1)
     v1.injectOptimizerRule(_ => RuleEliminateMarker)
     v1.injectOptimizerRule(RuleAuthorization)
     v1.injectOptimizerRule(RuleEliminatePermanentViewMarker)
     v1.injectOptimizerRule(_ => RuleEliminateTypeOf)
+    v1.injectOptimizerRule(_ => RuleEliminateChildOutputHolder)
     v1.injectPlannerStrategy(FilterDataSourceV2Strategy)
   }
 }
diff --git a/...main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateChildOutputHolder.scala b/...main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateChildOutputHolder.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.plugin.spark.authz.rule
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+import org.apache.kyuubi.plugin.spark.authz.rule.plan.ChildOutputHolder
+
+/**
+ * Transforming down [[ChildOutputHolder]]
+ */
+object RuleEliminateChildOutputHolder extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.transform {
+      case p @ ChildOutputHolder(child, _) => child
+    }
+  }
+}
diff --git a/...thz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/ChildOutputHolder.scala b/...thz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/ChildOutputHolder.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.plugin.spark.authz.rule.plan
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
+
+import org.apache.kyuubi.plugin.spark.authz.util.WithInternalChild
+
+case class ChildOutputHolder(child: LogicalPlan, childOutput: Seq[Attribute])
+  extends UnaryNode with WithInternalChild {
+
+  val output: Seq[Attribute] = childOutput
+
+  override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(child = newChild)
+}
diff --git a/...src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/RuleChildOutputMarker.scala b/...src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/RuleChildOutputMarker.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.plugin.spark.authz.rule.plan
+
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * Rule to add [[ChildOutputHolder]] used to fixed logic plan's child node's output,
+ * now used for following cases:
+ *
+ *  1. Aggregate(count(*)/count(1)), it's child node will be pruned in Spark optimizer
+ *     rule [[org.apache.spark.sql.catalyst.optimizer.ColumnPruning]].
+ */
+object RuleChildOutputMarker extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.transform {
+      case a @ Aggregate(_, _, child)
+          if !child.isInstanceOf[ChildOutputHolder] &&
+            child.outputSet.intersect(a.references).isEmpty =>
+        a.copy(child = ChildOutputHolder(child, child.output))
+    }
+  }
+}
diff --git a/...rc/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/...rc/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala
@@ -961,11 +961,36 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite {
                |AS
                |SELECT count(*) as cnt, sum(id) as sum_id FROM $db1.$table1
               """.stripMargin))
-        checkAnswer(someone, s"SELECT count(*) FROM $db1.$table1", Row(0) :: Nil)
 
-        checkAnswer(someone, s"SELECT count(*) FROM $db1.$view1", Row(0) :: Nil)
+        checkAnswer(admin, s"SELECT count(*) FROM $db1.$table1", Row(0) :: Nil)
 
-        checkAnswer(someone, s"SELECT count(*) FROM $db1.$view2", Row(1) :: Nil)
+        checkAnswer(admin, s"SELECT count(*) FROM $db1.$view1", Row(0) :: Nil)
+
+        checkAnswer(admin, s"SELECT count(*) FROM $db1.$view2", Row(1) :: Nil)
+
+        interceptEndsWith[AccessControlException](
+          doAs(someone, sql(s"SELECT count(*) FROM $db1.$table1").show()))(
+          s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]")
+
+        interceptEndsWith[AccessControlException](
+          doAs(someone, sql(s"SELECT count(1) FROM $db1.$table1").show()))(
+          s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]")
+
+        interceptEndsWith[AccessControlException](
+          doAs(someone, sql(s"SELECT count(*) FROM $db1.$view1").show()))(
+          s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]")
+
+        interceptEndsWith[AccessControlException](
+          doAs(someone, sql(s"SELECT count(1) FROM $db1.$view1").show()))(
+          s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]")
+
+        interceptEndsWith[AccessControlException](
+          doAs(someone, sql(s"SELECT count(*) FROM $db1.$view2").show()))(
+          s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]")
+
+        interceptEndsWith[AccessControlException](
+          doAs(someone, sql(s"SELECT count(1) FROM $db1.$view2").show()))(
+          s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]")
 
         interceptEndsWith[AccessControlException](
           doAs(someone, sql(s"SELECT count(id) FROM $db1.$table1 WHERE id > 10").show()))(
@@ -1500,13 +1525,33 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite {
         doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)"))
         doAs(admin, sql(s"CREATE VIEW $db1.$view1 AS SELECT * FROM $db1.$table1"))
         checkAnswer(
-          someone,
+          admin,
           s"SELECT count(*) FROM $db1.$table1 WHERE id > 1",
           Row(0) :: Nil)
         checkAnswer(
-          someone,
+          admin,
           s"SELECT count(*) FROM $db1.$view1 WHERE id > 1",
           Row(0) :: Nil)
+        interceptContains[AccessControlException](
+          doAs(
+            someone,
+            sql(s"SELECT count(*) FROM $db1.$table1 WHERE id > 1").collect()))(
+          s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]")
+        interceptContains[AccessControlException](
+          doAs(
+            someone,
+            sql(s"SELECT count(1) FROM $db1.$table1 WHERE id > 1").collect()))(
+          s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]")
+        interceptContains[AccessControlException](
+          doAs(
+            someone,
+            sql(s"SELECT count(*) FROM $db1.$view1 WHERE id > 1").collect()))(
+          s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]")
+        interceptContains[AccessControlException](
+          doAs(
+            someone,
+            sql(s"SELECT count(1) FROM $db1.$view1 WHERE id > 1").collect()))(
+          s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]")
         interceptContains[AccessControlException](
           doAs(
             someone,
@@ -1542,4 +1587,57 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite {
       }
     }
   }
+
+  test("select count(*)/count(1) should not ignore privileges") {
+    val db1 = defaultDb
+    val table1 = "table1"
+    withSingleCallEnabled {
+      withCleanTmpResources(Seq((s"$db1.$table1", "table"))) {
+        doAs(
+          admin,
+          sql(
+            s"""CREATE TABLE IF NOT EXISTS $db1.$table1
+               |(id int, scope int, part string)
+               |PARTITIONED BY(part)
+               |""".stripMargin))
+
+        interceptContains[AccessControlException](
+          doAs(someone, sql(s"select count(*) from $db1.$table1").show()))(
+          s"does not have [select] privilege on " +
+            s"[$db1/$table1/id,$db1/$table1/scope,$db1/$table1/part]")
+
+        interceptContains[AccessControlException](
+          doAs(someone, sql(s"select count(id) from $db1.$table1").show()))(
+          s"does not have [select] privilege on [$db1/$table1/id]")
+
+        interceptContains[AccessControlException](
+          doAs(
+            someone,
+            sql(
+              s"""select count(1) from (
+                 |  select id, part from $db1.$table1
+                 |) t where part = 'part-1'
+                 |""".stripMargin).show()))(
+          s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/part]")
+
+        interceptContains[AccessControlException](
+          doAs(
+            someone,
+            sql(
+              s"""select cnt from (
+                 |  select count(1) as cnt from (
+                 |    select id from $db1.$table1 where part = 'part-1'
+                 |  ) t1
+                 |) t2
+                 |""".stripMargin).show()))(
+          s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/part]")
+
+        // df.count() same with select count(*)
+        interceptContains[AccessControlException](
+          doAs(someone, sql(s"select id from $db1.$table1 where part = 'part-1'").toDF().count()))(
+          s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/part]")
+      }
+
+    }
+  }
 }