Skip to content

Commit 03759c9

Browse files
committed
[3.2][Kernel][Writes] Allow transaction retries for blind append (#3055)
Currently, Kernel throws an exception when there is a conflict (i.e., there already exists a committed file at a given version). We should retry the transaction as the current support is just for blind appends. Retry checks if there are no logical conflicts (`metadata`, `protocol` or `txn` (Set Tranaction)) conflicts that affect the blind append. Tests for protocol, metadata and setTxn conflicts. Also tests to verify blind appends are retried and committed.
1 parent 4ae6df6 commit 03759c9

File tree

7 files changed

+528
-39
lines changed

7 files changed

+528
-39
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Copyright (2024) The Delta Lake Project Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.delta.kernel.exceptions;
17+
18+
import io.delta.kernel.annotation.Evolving;
19+
20+
/**
21+
* Thrown when the metadata of the Delta table has changed between the time of transaction start
22+
* and the time of commit.
23+
*
24+
* @since 3.2.0
25+
*/
26+
@Evolving
27+
public class MetadataChangedException extends ConcurrentWriteException {
28+
public MetadataChangedException() {
29+
super("The metadata of the Delta table has been changed by a concurrent update. " +
30+
"Please try the operation again.");
31+
}
32+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Copyright (2024) The Delta Lake Project Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.delta.kernel.exceptions;
17+
18+
19+
import io.delta.kernel.annotation.Evolving;
20+
21+
/**
22+
* Thrown when the protocol of the Delta table has changed between the time of transaction start
23+
* and the time of commit.
24+
*
25+
* @since 3.2.0
26+
*/
27+
@Evolving
28+
public class ProtocolChangedException extends ConcurrentWriteException {
29+
private static final String helpfulMsgForNewTables = " This happens when multiple writers " +
30+
"are writing to an empty directory. Creating the table ahead of time will avoid this " +
31+
"conflict.";
32+
33+
public ProtocolChangedException(long attemptVersion) {
34+
super(String.format("Transaction has encountered a conflict and can not be committed. " +
35+
"Query needs to be re-executed using the latest version of the table.%s",
36+
attemptVersion == 0 ? helpfulMsgForNewTables : ""));
37+
}
38+
}

kernel/kernel-api/src/main/java/io/delta/kernel/internal/DeltaErrors.java

+8
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,14 @@ public static KernelException concurrentTransaction(
195195
return new ConcurrentTransactionException(appId, txnVersion, lastUpdated);
196196
}
197197

198+
public static KernelException metadataChangedException() {
199+
return new MetadataChangedException();
200+
}
201+
202+
public static KernelException protocolChangedException(long attemptVersion) {
203+
return new ProtocolChangedException(attemptVersion);
204+
}
205+
198206
/* ------------------------ HELPER METHODS ----------------------------- */
199207
private static String formatTimestamp(long millisSinceEpochUTC) {
200208
return new Timestamp(millisSinceEpochUTC).toInstant().toString();

kernel/kernel-api/src/main/java/io/delta/kernel/internal/TransactionImpl.java

+80-34
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
import java.util.*;
2121
import java.util.stream.Collectors;
2222

23+
import org.slf4j.Logger;
24+
import org.slf4j.LoggerFactory;
25+
2326
import io.delta.kernel.*;
2427
import io.delta.kernel.data.Row;
2528
import io.delta.kernel.engine.Engine;
@@ -32,18 +35,30 @@
3235
import io.delta.kernel.internal.actions.*;
3336
import io.delta.kernel.internal.data.TransactionStateRow;
3437
import io.delta.kernel.internal.fs.Path;
38+
import io.delta.kernel.internal.replay.ConflictChecker;
39+
import io.delta.kernel.internal.replay.ConflictChecker.TransactionRebaseState;
3540
import io.delta.kernel.internal.util.FileNames;
3641
import io.delta.kernel.internal.util.VectorUtils;
3742
import static io.delta.kernel.internal.TableConfig.CHECKPOINT_INTERVAL;
3843
import static io.delta.kernel.internal.actions.SingleAction.*;
44+
import static io.delta.kernel.internal.util.Preconditions.checkArgument;
3945
import static io.delta.kernel.internal.util.Preconditions.checkState;
4046
import static io.delta.kernel.internal.util.Utils.toCloseableIterator;
4147

4248
public class TransactionImpl
4349
implements Transaction {
50+
private static final Logger logger = LoggerFactory.getLogger(TransactionImpl.class);
51+
4452
public static final int DEFAULT_READ_VERSION = 1;
4553
public static final int DEFAULT_WRITE_VERSION = 2;
4654

55+
/**
56+
* Number of retries for concurrent write exceptions to resolve conflicts and retry commit. In
57+
* Delta-Spark, for historical reasons the number of retries is really high (10m). We are
58+
* starting with a lower number for now. If this is not sufficient we can update it.
59+
*/
60+
private static final int NUM_TXN_RETRIES = 200;
61+
4762
private final UUID txnId = UUID.randomUUID();
4863

4964
private final boolean isNewTable; // the transaction is creating a new table
@@ -95,13 +110,45 @@ public StructType getSchema(Engine engine) {
95110
}
96111

97112
@Override
98-
public TransactionCommitResult commit(
113+
public TransactionCommitResult commit(Engine engine, CloseableIterable<Row> dataActions)
114+
throws ConcurrentWriteException {
115+
try {
116+
checkState(!closed,
117+
"Transaction is already attempted to commit. Create a new transaction.");
118+
119+
long commitAsVersion = readSnapshot.getVersion(engine) + 1;
120+
int numRetries = 0;
121+
do {
122+
logger.info("Committing transaction as version = {}.", commitAsVersion);
123+
try {
124+
return doCommit(engine, commitAsVersion, dataActions);
125+
} catch (FileAlreadyExistsException fnfe) {
126+
logger.info("Concurrent write detected when committing as version = {}. " +
127+
"Trying to resolve conflicts and retry commit.", commitAsVersion);
128+
TransactionRebaseState rebaseState = ConflictChecker
129+
.resolveConflicts(engine, readSnapshot, commitAsVersion, this);
130+
long newCommitAsVersion = rebaseState.getLatestVersion() + 1;
131+
checkArgument(commitAsVersion < newCommitAsVersion,
132+
"New commit version %d should be greater than the previous commit " +
133+
"attempt version %d.", newCommitAsVersion, commitAsVersion);
134+
commitAsVersion = newCommitAsVersion;
135+
}
136+
numRetries++;
137+
} while (numRetries < NUM_TXN_RETRIES);
138+
} finally {
139+
closed = true;
140+
}
141+
142+
// we have exhausted the number of retries, give up.
143+
logger.info("Exhausted maximum retries ({}) for committing transaction.", NUM_TXN_RETRIES);
144+
throw new ConcurrentWriteException();
145+
}
146+
147+
private TransactionCommitResult doCommit(
99148
Engine engine,
149+
long commitAsVersion,
100150
CloseableIterable<Row> dataActions)
101-
throws ConcurrentWriteException {
102-
checkState(
103-
!closed,
104-
"Transaction is already attempted to commit. Create a new transaction.");
151+
throws FileAlreadyExistsException {
105152
List<Row> metadataActions = new ArrayList<>();
106153
metadataActions.add(createCommitInfoSingleAction(generateCommitAction()));
107154
if (isNewTable) {
@@ -117,35 +164,40 @@ public TransactionCommitResult commit(
117164
CloseableIterator<Row> dataAndMetadataActions =
118165
toCloseableIterator(metadataActions.iterator()).combine(stageDataIter);
119166

120-
try {
121-
long readVersion = readSnapshot.getVersion(engine);
122-
if (readVersion == -1) {
123-
// New table, create a delta log directory
124-
if (!engine.getFileSystemClient().mkdirs(logPath.toString())) {
125-
throw new RuntimeException(
126-
"Failed to create delta log directory: " + logPath);
127-
}
167+
if (commitAsVersion == 0) {
168+
// New table, create a delta log directory
169+
if (!engine.getFileSystemClient().mkdirs(logPath.toString())) {
170+
throw new RuntimeException(
171+
"Failed to create delta log directory: " + logPath);
128172
}
129-
130-
long newVersion = readVersion + 1;
131-
// Write the staged data to a delta file
132-
engine.getJsonHandler().writeJsonFileAtomically(
133-
FileNames.deltaFile(logPath, newVersion),
134-
dataAndMetadataActions,
135-
false /* overwrite */);
136-
137-
return new TransactionCommitResult(newVersion, isReadyForCheckpoint(newVersion));
138-
} catch (FileAlreadyExistsException e) {
139-
// TODO: Resolve conflicts and retry commit
140-
throw new ConcurrentWriteException();
141173
}
174+
175+
// Write the staged data to a delta file
176+
engine.getJsonHandler().writeJsonFileAtomically(
177+
FileNames.deltaFile(logPath, commitAsVersion),
178+
dataAndMetadataActions,
179+
false /* overwrite */);
180+
181+
return new TransactionCommitResult(
182+
commitAsVersion,
183+
isReadyForCheckpoint(commitAsVersion));
184+
} catch (FileAlreadyExistsException e) {
185+
throw e;
142186
} catch (IOException ioe) {
143187
throw new RuntimeException(ioe);
144-
} finally {
145-
closed = true;
146188
}
147189
}
148190

191+
public boolean isBlindAppend() {
192+
// For now, Kernel just supports blind append.
193+
// Change this when read-after-write is supported.
194+
return true;
195+
}
196+
197+
public Optional<SetTransaction> getSetTxnOpt() {
198+
return setTxnOpt;
199+
}
200+
149201
private Row generateCommitAction() {
150202
return new CommitInfo(
151203
System.currentTimeMillis(), /* timestamp */
@@ -162,12 +214,6 @@ private boolean isReadyForCheckpoint(long newVersion) {
162214
return newVersion > 0 && newVersion % checkpointInterval == 0;
163215
}
164216

165-
private boolean isBlindAppend() {
166-
// For now, Kernel just supports blind append.
167-
// Change this when read-after-write is supported.
168-
return true;
169-
}
170-
171217
private Map<String, String> getOperationParameters() {
172218
if (isNewTable) {
173219
List<String> partitionCols = VectorUtils.toJavaList(metadata.getPartitionColumns());
@@ -182,7 +228,7 @@ private Map<String, String> getOperationParameters() {
182228
/**
183229
* Get the part of the schema of the table that needs the statistics to be collected per file.
184230
*
185-
* @param engine {@link Engine} instance to use.
231+
* @param engine {@link Engine} instance to use.
186232
* @param transactionState State of the transaction
187233
* @return
188234
*/

kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/SingleAction.java

+14
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,20 @@ public class SingleAction {
3737
// Once we start supporting updating CDC or domain metadata enabled tables, we should add the
3838
// schema for those fields here.
3939

40+
/**
41+
* Schema to use when reading the winning commit files for conflict resolution. This schema
42+
* is just for resolving conflicts when doing a blind append. It doesn't cover case when the
43+
* txn is reading data from the table and updating the table.
44+
*/
45+
public static StructType CONFLICT_RESOLUTION_SCHEMA = new StructType()
46+
.add("txn", SetTransaction.FULL_SCHEMA)
47+
// .add("add", AddFile.FULL_SCHEMA) // not needed for blind appends
48+
// .add("remove", RemoveFile.FULL_SCHEMA) // not needed for blind appends
49+
.add("metaData", Metadata.FULL_SCHEMA)
50+
.add("protocol", Protocol.FULL_SCHEMA);
51+
// Once we start supporting domain metadata/row tracking enabled tables, we should add the
52+
// schema for domain metadata fields here.
53+
4054
// Schema to use when writing out the single action to the Delta Log.
4155
public static StructType FULL_SCHEMA = new StructType()
4256
.add("txn", SetTransaction.FULL_SCHEMA)

0 commit comments

Comments
 (0)