Skip to content

Commit b15a2c9

Browse files
dhruvarya-dballisonport-db
authored andcommitted
Introduce InCommitTimestamp feature and write monotonically increasing timestamps in CommitInfo
Follow-up for #2532. Adds a new writer feature called `inCommitTimestamp`. When this feature is enabled, the writer will make sure that it writes `commitTimestamp` in CommitInfo which contains a monotonically increasing timestamp. This PR is an initial implementation, it does not handle timestamp retrieval efficiently. It does not try to populate the inCommitTimestamp in Snapshot even in places where it is already available, instead Snapshot has to perform an IO to read the timestamp. Closes #2596 GitOrigin-RevId: 44904e734eee74378ee55f708beb29a484cd93e6
1 parent f50bd83 commit b15a2c9

14 files changed

+753
-53
lines changed

spark/src/main/resources/error/delta-error-classes.json

+12
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,18 @@
13881388
],
13891389
"sqlState" : "42703"
13901390
},
1391+
"DELTA_MISSING_COMMIT_INFO" : {
1392+
"message" : [
1393+
"This table has the feature <featureName> enabled which requires the presence of the CommitInfo action in every commit. However, the CommitInfo action is missing from commit version <version>."
1394+
],
1395+
"sqlState" : "KD004"
1396+
},
1397+
"DELTA_MISSING_COMMIT_TIMESTAMP" : {
1398+
"message" : [
1399+
"This table has the feature <featureName> enabled which requires the presence of commitTimestamp in the CommitInfo action. However, this field has not been set in commit version <version>."
1400+
],
1401+
"sqlState" : "KD004"
1402+
},
13911403
"DELTA_MISSING_DELTA_TABLE" : {
13921404
"message" : [
13931405
"<tableName> is not a Delta table."

spark/src/main/scala/org/apache/spark/sql/delta/ConflictChecker.scala

+48
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ private[delta] class ConflictChecker(
165165
checkForAddedFilesThatShouldHaveBeenReadByCurrentTxn()
166166
checkForDeletedFilesAgainstCurrentTxnReadFiles()
167167
checkForDeletedFilesAgainstCurrentTxnDeletedFiles()
168+
resolveTimestampOrderingConflicts()
168169

169170
logMetrics()
170171
currentTransactionInfo
@@ -554,6 +555,53 @@ private[delta] class ConflictChecker(
554555
currentTransactionInfo = currentTransactionInfo.copy(actions = newActions)
555556
}
556557

558+
/**
559+
* Adjust the current transaction's commit timestamp to account for the winning
560+
* transaction's commit timestamp. If this transaction newly enabled ICT, also update
561+
* the table properties to reflect the adjusted enablement version and timestamp.
562+
*/
563+
private def resolveTimestampOrderingConflicts(): Unit = {
564+
if (!DeltaConfigs.IN_COMMIT_TIMESTAMPS_ENABLED.fromMetaData(currentTransactionInfo.metadata)) {
565+
return
566+
}
567+
568+
val winningCommitTimestamp =
569+
if (InCommitTimestampUtils.didCurrentTransactionEnableICT(
570+
currentTransactionInfo.metadata, currentTransactionInfo.readSnapshot)) {
571+
// Since the current transaction enabled inCommitTimestamps, we should use the file
572+
// timestamp from the winning transaction as its commit timestamp.
573+
winningCommitFileStatus.getModificationTime
574+
} else {
575+
// Get the inCommitTimestamp from the winning transaction.
576+
CommitInfo.getRequiredInCommitTimestamp(
577+
winningCommitSummary.commitInfo, winningCommitVersion.toString)
578+
}
579+
val currentTransactionTimestamp = CommitInfo.getRequiredInCommitTimestamp(
580+
currentTransactionInfo.commitInfo, "NEW_COMMIT")
581+
// getRequiredInCommitTimestamp will throw an exception if commitInfo is None.
582+
val currentTransactionCommitInfo = currentTransactionInfo.commitInfo.get
583+
val updatedCommitTimestamp = Math.max(currentTransactionTimestamp, winningCommitTimestamp + 1)
584+
val updatedCommitInfo =
585+
currentTransactionCommitInfo.copy(inCommitTimestamp = Some(updatedCommitTimestamp))
586+
currentTransactionInfo = currentTransactionInfo.copy(commitInfo = Some(updatedCommitInfo))
587+
val nextAvailableVersion = winningCommitVersion + 1L
588+
val updatedMetadata =
589+
InCommitTimestampUtils.getUpdatedMetadataWithICTEnablementInfo(
590+
updatedCommitTimestamp,
591+
currentTransactionInfo.readSnapshot,
592+
currentTransactionInfo.metadata,
593+
nextAvailableVersion)
594+
updatedMetadata.foreach { updatedMetadata =>
595+
currentTransactionInfo = currentTransactionInfo.copy(
596+
metadata = updatedMetadata,
597+
actions = currentTransactionInfo.actions.map {
598+
case _: Metadata => updatedMetadata
599+
case other => other
600+
}
601+
)
602+
}
603+
}
604+
557605
/** A helper function for pretty printing a specific partition directory. */
558606
protected def getPrettyPartitionMessage(partitionValues: Map[String, String]): String = {
559607
val partitionColumns = currentTransactionInfo.partitionSchemaAtReadTime

spark/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala

+31
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,37 @@ trait DeltaConfigsBase extends DeltaLogging {
754754
_ => true,
755755
"A string-to-string map of configuration properties for the managed commit owner.")
756756

757+
val IN_COMMIT_TIMESTAMPS_ENABLED = buildConfig[Boolean](
758+
"enableInCommitTimestamps-dev",
759+
false.toString,
760+
_.toBoolean,
761+
validationFunction = _ => true,
762+
"needs to be a boolean."
763+
)
764+
765+
/**
766+
* This table property is used to track the version of the table at which
767+
* inCommitTimestamps were enabled.
768+
*/
769+
val IN_COMMIT_TIMESTAMP_ENABLEMENT_VERSION = buildConfig[Option[Long]](
770+
"inCommitTimestampEnablementVersion-dev",
771+
null,
772+
v => Option(v).map(_.toLong),
773+
validationFunction = _ => true,
774+
"needs to be a long."
775+
)
776+
777+
/**
778+
* This table property is used to track the timestamp at which inCommitTimestamps
779+
* were enabled. More specifically, it is the inCommitTimestamp of the commit with
780+
* the version specified in [[IN_COMMIT_TIMESTAMP_ENABLEMENT_VERSION]].
781+
*/
782+
val IN_COMMIT_TIMESTAMP_ENABLEMENT_TIMESTAMP = buildConfig[Option[Long]](
783+
"inCommitTimestampEnablementTimestamp-dev",
784+
null,
785+
v => Option(v).map(_.toLong),
786+
validationFunction = _ => true,
787+
"needs to be a long.")
757788
}
758789

759790
object DeltaConfigs extends DeltaConfigsBase

spark/src/main/scala/org/apache/spark/sql/delta/DeltaErrors.scala

+12
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,18 @@ trait DeltaErrorsBase
186186
cause = cause)
187187
}
188188

189+
def missingCommitInfo(featureName: String, commitVersion: String): DeltaIllegalStateException = {
190+
new DeltaIllegalStateException(
191+
errorClass = "DELTA_MISSING_COMMIT_INFO",
192+
messageParameters = Array(featureName, commitVersion))
193+
}
194+
195+
def missingCommitTimestamp(commitVersion: String): DeltaIllegalStateException = {
196+
new DeltaIllegalStateException(
197+
errorClass = "DELTA_MISSING_COMMIT_TIMESTAMP",
198+
messageParameters = Array(InCommitTimestampTableFeature.name, commitVersion))
199+
}
200+
189201
def failOnCheckpointRename(src: Path, dest: Path): DeltaIllegalStateException = {
190202
new DeltaIllegalStateException(
191203
errorClass = "DELTA_CANNOT_RENAME_PATH",

spark/src/main/scala/org/apache/spark/sql/delta/DeltaHistoryManager.scala

+21-9
Original file line numberDiff line numberDiff line change
@@ -255,25 +255,37 @@ class DeltaHistoryManager(
255255

256256
/** Contains many utility methods that can also be executed on Spark executors. */
257257
object DeltaHistoryManager extends DeltaLogging {
258-
/** Get the persisted commit info for the given delta file. */
259-
private def getCommitInfo(
258+
/** Get the persisted commit info (if available) for the given delta file. */
259+
def getCommitInfoOpt(
260260
logStore: LogStore,
261261
basePath: Path,
262262
version: Long,
263-
hadoopConf: Configuration): CommitInfo = {
263+
hadoopConf: Configuration): Option[CommitInfo] = {
264264
val logs = logStore.readAsIterator(FileNames.deltaFile(basePath, version), hadoopConf)
265265
try {
266-
val info = logs.map(Action.fromJson).collectFirst { case c: CommitInfo => c }
267-
if (info.isEmpty) {
268-
CommitInfo.empty(Some(version))
269-
} else {
270-
info.head.copy(version = Some(version))
271-
}
266+
logs
267+
.map(Action.fromJson)
268+
.collectFirst { case c: CommitInfo => c.copy(version = Some(version)) }
272269
} finally {
273270
logs.close()
274271
}
275272
}
276273

274+
/**
275+
* Get the persisted commit info for the given delta file. If commit info
276+
* is not found in the commit, a mostly empty [[CommitInfo]] object with only
277+
* the version populated will be returned.
278+
*/
279+
private def getCommitInfo(
280+
logStore: LogStore,
281+
basePath: Path,
282+
version: Long,
283+
hadoopConf: Configuration): CommitInfo = {
284+
getCommitInfoOpt(logStore, basePath, version, hadoopConf).getOrElse {
285+
CommitInfo.empty(Some(version))
286+
}
287+
}
288+
277289
/**
278290
* Get the earliest commit available for this table. Note that this version isn't guaranteed to
279291
* exist when performing an action as a concurrent operation can delete the file during cleanup.

spark/src/main/scala/org/apache/spark/sql/delta/OptimisticTransaction.scala

+60-20
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import org.apache.spark.sql.delta.metering.DeltaLogging
4040
import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils}
4141
import org.apache.spark.sql.delta.sources.DeltaSQLConf
4242
import org.apache.spark.sql.delta.stats._
43+
import org.apache.spark.sql.util.ScalaExtensions._
4344
import org.apache.hadoop.fs.{FileStatus, Path}
4445

4546
import org.apache.spark.SparkException
@@ -296,7 +297,7 @@ trait OptimisticTransactionImpl extends TransactionalWrite
296297
* Tracks the start time since we started trying to write a particular commit.
297298
* Used for logging duration of retried transactions.
298299
*/
299-
protected var commitAttemptStartTime: Long = _
300+
protected var commitAttemptStartTimeMillis: Long = _
300301

301302
/**
302303
* Tracks actions within the transaction, will commit along with the passed-in actions in the
@@ -371,6 +372,18 @@ trait OptimisticTransactionImpl extends TransactionalWrite
371372
// will be detected as a conflict and the transaction will anyway fail.
372373
private[delta] val preCommitCommitStoreOpt: Option[CommitStore] = snapshot.commitStoreOpt
373374

375+
/**
376+
* Generates a timestamp which is greater than the commit timestamp
377+
* of the last snapshot. Note that this is only needed when the
378+
* feature `inCommitTimestamps` is enabled.
379+
*/
380+
protected[delta] def generateInCommitTimestampForFirstCommitAttempt(
381+
currentTimestamp: Long): Option[Long] =
382+
Option.when(DeltaConfigs.IN_COMMIT_TIMESTAMPS_ENABLED.fromMetaData(metadata)) {
383+
val lastCommitTimestamp = snapshot.timestamp
384+
math.max(currentTimestamp, lastCommitTimestamp + 1)
385+
}
386+
374387
/** The end to end execution time of this transaction. */
375388
def txnExecutionTimeMs: Option[Long] = if (commitEndNano == -1) {
376389
None
@@ -1086,31 +1099,50 @@ trait OptimisticTransactionImpl extends TransactionalWrite
10861099
val readRowIdHighWatermark =
10871100
RowId.extractHighWatermark(snapshot).getOrElse(RowId.MISSING_HIGH_WATER_MARK)
10881101

1102+
commitAttemptStartTimeMillis = clock.getTimeMillis()
10891103
commitInfo = CommitInfo(
1090-
clock.getTimeMillis(),
1091-
op.name,
1092-
op.jsonEncodedValues,
1093-
Map.empty,
1094-
Some(readVersion).filter(_ >= 0),
1095-
Option(isolationLevelToUse.toString),
1096-
Some(isBlindAppend),
1097-
getOperationMetrics(op),
1098-
getUserMetadata(op),
1104+
time = commitAttemptStartTimeMillis,
1105+
operation = op.name,
1106+
inCommitTimestamp =
1107+
generateInCommitTimestampForFirstCommitAttempt(commitAttemptStartTimeMillis),
1108+
operationParameters = op.jsonEncodedValues,
1109+
commandContext = Map.empty,
1110+
readVersion = Some(readVersion).filter(_ >= 0),
1111+
isolationLevel = Option(isolationLevelToUse.toString),
1112+
isBlindAppend = Some(isBlindAppend),
1113+
operationMetrics = getOperationMetrics(op),
1114+
userMetadata = getUserMetadata(op),
10991115
tags = if (tags.nonEmpty) Some(tags) else None,
11001116
txnId = Some(txnId))
11011117

11021118
// Validate that the [[DeltaConfigs.MANAGED_COMMIT_PROVIDER_CONF]] is json parse-able.
11031119
DeltaConfigs.MANAGED_COMMIT_OWNER_CONF.fromMetaData(metadata)
11041120

1121+
val firstAttemptVersion = getFirstAttemptVersion
1122+
val updatedMetadataOpt = commitInfo.inCommitTimestamp.flatMap { inCommitTimestamp =>
1123+
InCommitTimestampUtils.getUpdatedMetadataWithICTEnablementInfo(
1124+
inCommitTimestamp,
1125+
snapshot,
1126+
metadata,
1127+
firstAttemptVersion)
1128+
}
1129+
val updatedActions = updatedMetadataOpt.map { updatedMetadata =>
1130+
preparedActions.map {
1131+
case _: Metadata => updatedMetadata
1132+
case other => other
1133+
}
1134+
}
1135+
.getOrElse(preparedActions)
1136+
val updatedMetadata = updatedMetadataOpt.getOrElse(metadata)
11051137
val currentTransactionInfo = CurrentTransactionInfo(
11061138
txnId = txnId,
11071139
readPredicates = readPredicates.toSeq,
11081140
readFiles = readFiles.toSet,
11091141
readWholeTable = readTheWholeTable,
11101142
readAppIds = readTxn.toSet,
1111-
metadata = metadata,
1143+
metadata = updatedMetadata,
11121144
protocol = protocol,
1113-
actions = preparedActions,
1145+
actions = updatedActions,
11141146
readSnapshot = snapshot,
11151147
commitInfo = Option(commitInfo),
11161148
readRowIdHighWatermark = readRowIdHighWatermark,
@@ -1125,15 +1157,14 @@ trait OptimisticTransactionImpl extends TransactionalWrite
11251157
registerPostCommitHook(GenerateSymlinkManifest)
11261158
}
11271159

1128-
commitAttemptStartTime = clock.getTimeMillis()
11291160
if (preparedActions.isEmpty && canSkipEmptyCommits &&
11301161
skipRecordingEmptyCommitAllowed(isolationLevelToUse)) {
11311162
return None
11321163
}
11331164

11341165
val (commitVersion, postCommitSnapshot, updatedCurrentTransactionInfo) =
11351166
doCommitRetryIteratively(
1136-
getFirstAttemptVersion, currentTransactionInfo, isolationLevelToUse)
1167+
firstAttemptVersion, currentTransactionInfo, isolationLevelToUse)
11371168
logInfo(s"Committed delta #$commitVersion to ${deltaLog.logPath}")
11381169
(commitVersion, postCommitSnapshot, updatedCurrentTransactionInfo.actions)
11391170
} catch {
@@ -1182,15 +1213,17 @@ trait OptimisticTransactionImpl extends TransactionalWrite
11821213
newProtocolOpt: Option[Protocol],
11831214
op: DeltaOperations.Operation,
11841215
context: Map[String, String],
1185-
metrics: Map[String, String]): (Long, Snapshot) = {
1216+
metrics: Map[String, String]
1217+
): (Long, Snapshot) = recordDeltaOperation(deltaLog, "delta.commit.large") {
11861218
assert(!committed, "Transaction already committed.")
11871219
commitStartNano = System.nanoTime()
11881220
val attemptVersion = getFirstAttemptVersion
11891221
try {
11901222
val tags = Map.empty[String, String]
11911223
val commitInfo = CommitInfo(
1192-
time = clock.getTimeMillis(),
1224+
NANOSECONDS.toMillis(commitStartNano),
11931225
operation = op.name,
1226+
generateInCommitTimestampForFirstCommitAttempt(NANOSECONDS.toMillis(commitStartNano)),
11941227
operationParameters = op.jsonEncodedValues,
11951228
context,
11961229
readVersion = Some(readVersion),
@@ -1215,8 +1248,15 @@ trait OptimisticTransactionImpl extends TransactionalWrite
12151248
// Initialize everything needed to maintain auto-compaction stats.
12161249
partitionsAddedToOpt = Some(new mutable.HashSet[Map[String, String]])
12171250
val acStatsCollector = createAutoCompactStatsCollector()
1251+
val updatedMetadataOpt = commitInfo.inCommitTimestamp.flatMap { inCommitTimestamp =>
1252+
InCommitTimestampUtils.getUpdatedMetadataWithICTEnablementInfo(
1253+
inCommitTimestamp,
1254+
snapshot,
1255+
metadata,
1256+
attemptVersion)
1257+
}
12181258
var allActions =
1219-
Seq(commitInfo, metadata).toIterator ++
1259+
Seq(commitInfo, updatedMetadataOpt.getOrElse(metadata)).toIterator ++
12201260
nonProtocolMetadataActions ++
12211261
newProtocolOpt.toIterator
12221262
allActions = allActions.map { action =>
@@ -1719,7 +1759,7 @@ trait OptimisticTransactionImpl extends TransactionalWrite
17191759
}
17201760
}
17211761
// retries all failed
1722-
val totalCommitAttemptTime = clock.getTimeMillis() - commitAttemptStartTime
1762+
val totalCommitAttemptTime = clock.getTimeMillis() - commitAttemptStartTimeMillis
17231763
throw DeltaErrors.maxCommitRetriesExceededException(
17241764
maxRetryAttempts + 1,
17251765
commitVersion,
@@ -1939,12 +1979,12 @@ trait OptimisticTransactionImpl extends TransactionalWrite
19391979
otherCommitFileStatus,
19401980
commitIsolationLevel)
19411981
logInfo(s"$logPrefixStr No conflicts in version $otherCommitVersion, " +
1942-
s"${clock.getTimeMillis() - commitAttemptStartTime} ms since start")
1982+
s"${clock.getTimeMillis() - commitAttemptStartTimeMillis} ms since start")
19431983
}
19441984

19451985
logInfo(s"$logPrefixStr No conflicts with versions [$checkVersion, $nextAttemptVersion) " +
19461986
s"with current txn having $txnDetailsLogStr, " +
1947-
s"${clock.getTimeMillis() - commitAttemptStartTime} ms since start")
1987+
s"${clock.getTimeMillis() - commitAttemptStartTimeMillis} ms since start")
19481988
(nextAttemptVersion, updatedCurrentTransactionInfo)
19491989
}
19501990
}

0 commit comments

Comments
 (0)