Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 12 additions & 21 deletions sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,7 @@ class Dataset[T] private[sql](
if (sqlContext.conf.supportQuotedRegexColumnName) {
colRegex(colName)
} else {
Column(addDataFrameIdToCol(resolve(colName)))
createColumnWithPlanId(colName)
}
}

Expand All @@ -1457,25 +1457,6 @@ class Dataset[T] private[sql](
def metadataColumn(colName: String): Column =
Column(queryExecution.analyzed.getMetadataAttributeByName(colName))

// Attach the dataset id and column position to the column reference, so that we can detect
// ambiguous self-join correctly. See the rule `DetectAmbiguousSelfJoin`.
// This must be called before we return a `Column` that contains `AttributeReference`.
// Note that, the metadata added here are only available in the analyzer, as the analyzer rule
// `DetectAmbiguousSelfJoin` will remove it.
private def addDataFrameIdToCol(expr: NamedExpression): NamedExpression = {
val newExpr = expr transform {
case a: AttributeReference
if sparkSession.conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) =>
val metadata = new MetadataBuilder()
.withMetadata(a.metadata)
.putLong(Dataset.DATASET_ID_KEY, id)
.putLong(Dataset.COL_POS_KEY, logicalPlan.output.indexWhere(a.semanticEquals))
.build()
a.withMetadata(metadata)
}
newExpr.asInstanceOf[NamedExpression]
}

/**
* Selects column based on the column name specified as a regex and returns it as [[Column]].
* @group untypedrel
Expand All @@ -1489,8 +1470,18 @@ class Dataset[T] private[sql](
case ParserUtils.qualifiedEscapedIdentifier(nameParts, columnNameRegex) =>
Column(UnresolvedRegex(columnNameRegex, Some(nameParts), caseSensitive))
case _ =>
Column(addDataFrameIdToCol(resolve(colName)))
createColumnWithPlanId(colName)
}
}

private def createColumnWithPlanId(colName: String) = {
val expr = resolve(colName) match {
case attr: AttributeReference => UnresolvedAttribute(Seq(attr.name))
case _ => UnresolvedAttribute.quotedString(colName)
}
// reuse existing DATASET_ID_KEY as the PLAN_ID
expr.setTagValue(LogicalPlan.PLAN_ID_TAG, id)
Column(expr)
}

/**
Expand Down