Skip to content

Commit 4e42e93

Browse files
committed
working write bencmark
1 parent c532b95 commit 4e42e93

File tree

15 files changed

+536
-44
lines changed

15 files changed

+536
-44
lines changed

kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/benchmarks/AbstractBenchmarkState.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ public abstract class AbstractBenchmarkState {
4040
* dynamically by JMH. The value is set in the main method.
4141
*/
4242
@Param({})
43-
private String workloadSpecJson;
43+
public String workloadSpecJson;
4444

4545
/**
4646
* The engine to use for this benchmark. Note: This parameter will be set dynamically by JMH. The
4747
* value is set in the main method.
4848
*/
4949
@Param({})
50-
private String engineName;
50+
public String engineName;
5151

5252
/** The workload runner initialized for this benchmark invocation. */
5353
private WorkloadRunner runner;
@@ -76,6 +76,17 @@ public void setupInvocation() throws Exception {
7676
runner.setup();
7777
}
7878

79+
/**
80+
* Teardown method that runs after each benchmark invocation. This calls the {@link
81+
* WorkloadRunner#cleanup()} to clean up any state created during execution.
82+
*
83+
* @throws Exception If any error occurs during cleanup.
84+
*/
85+
@TearDown(Level.Invocation)
86+
public void teardownInvocation() throws Exception {
87+
runner.cleanup();
88+
}
89+
7990
/**
8091
* Returns an instance of the desired engine based on the provided engine name.
8192
*

kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/benchmarks/BenchmarkUtils.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,10 @@ private static List<WorkloadSpec> loadSpecsFromTable(Path tableDir) {
115115
validateTableStructure(tableDir);
116116

117117
Path tableInfoPath = tableDir.resolve(TABLE_INFO_FILE_NAME);
118-
Path deltaDir = tableDir.resolve(DELTA_DIR_NAME);
119118
Path specsDir = tableDir.resolve(SPECS_DIR_NAME);
120119

121120
TableInfo tableInfo =
122-
TableInfo.fromJsonPath(tableInfoPath.toString(), deltaDir.toAbsolutePath().toString());
121+
TableInfo.fromJsonPath(tableInfoPath.toString(), tableDir.toAbsolutePath().toString());
123122

124123
return findSpecDirectories(specsDir).stream()
125124
.map(specDir -> loadSingleSpec(specDir, tableInfo))

kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/benchmarks/WorkloadBenchmark.java

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
import org.openjdk.jmh.runner.options.Options;
3434
import org.openjdk.jmh.runner.options.OptionsBuilder;
3535
import org.openjdk.jmh.runner.options.TimeValue;
36+
import org.slf4j.Logger;
37+
import org.slf4j.LoggerFactory;
3638

3739
/**
3840
* Generic JMH benchmark for all workload types. Automatically loads and runs benchmarks based on
@@ -45,12 +47,23 @@
4547
@Measurement(iterations = 5, time = 1)
4648
public class WorkloadBenchmark<T> {
4749

50+
private static final Logger log = LoggerFactory.getLogger(WorkloadBenchmark.class);
51+
4852
/** Default implementation of BenchmarkState that supports only the "default" engine. */
4953
public static class DefaultBenchmarkState extends AbstractBenchmarkState {
5054
@Override
5155
protected Engine getEngine(String engineName) {
5256
if (engineName.equals("default")) {
53-
return DefaultEngine.create(new Configuration());
57+
return DefaultEngine.create(
58+
new Configuration() {
59+
{
60+
// Set the batch sizes to small so that we get to test the multiple batch/file
61+
// scenarios.
62+
set("delta.kernel.default.parquet.reader.batch-size", "20");
63+
set("delta.kernel.default.json.reader.batch-size", "20");
64+
set("delta.kernel.default.parquet.writer.targetMaxFileSize", "20");
65+
}
66+
});
5467
} else {
5568
throw new IllegalArgumentException("Unsupported engine: " + engineName);
5669
}
@@ -78,6 +91,7 @@ public void benchmarkWorkload(DefaultBenchmarkState state, Blackhole blackhole)
7891
public static void main(String[] args) throws RunnerException, IOException {
7992
// Get workload specs from the workloads directory
8093
List<WorkloadSpec> workloadSpecs = BenchmarkUtils.loadAllWorkloads(WORKLOAD_SPECS_DIR);
94+
System.out.println("Loaded " + workloadSpecs.size() + " workload specs");
8195
if (workloadSpecs.isEmpty()) {
8296
throw new RunnerException(
8397
"No workloads found. Please add workload specs to the workloads directory.");
@@ -87,7 +101,12 @@ public static void main(String[] args) throws RunnerException, IOException {
87101
List<WorkloadSpec> filteredSpecs = new ArrayList<>();
88102
for (WorkloadSpec spec : workloadSpecs) {
89103
// TODO: In the future, we can filter specific workloads using command line args here.
90-
filteredSpecs.addAll(spec.getWorkloadVariants());
104+
List<WorkloadSpec> variants = spec.getWorkloadVariants();
105+
for (WorkloadSpec variant : variants) {
106+
if (variant.getType().equals("write")) {
107+
filteredSpecs.add(variant);
108+
}
109+
}
91110
}
92111

93112
// Convert paths into a String array for JMH. JMH requires that parameters be of type String[].
@@ -102,13 +121,13 @@ public static void main(String[] args) throws RunnerException, IOException {
102121
// TODO: In the future, this can be extended to support multiple engines.
103122
.param("engineName", "default")
104123
.forks(1)
105-
.warmupIterations(3) // Proper warmup for production benchmarks
106-
.measurementIterations(5) // Proper measurement iterations for production benchmarks
124+
.warmupIterations(3)
125+
.measurementIterations(5)
107126
.warmupTime(TimeValue.seconds(1))
108127
.measurementTime(TimeValue.seconds(1))
109128
.addProfiler(KernelMetricsProfiler.class)
110129
.build();
111130

112-
new Runner(opt, new WorkloadOutputFormat()).run();
131+
new Runner(opt).run();
113132
}
114133
}

kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/benchmarks/models/TableInfo.java

Lines changed: 73 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616

1717
package io.delta.kernel.defaults.benchmarks.models;
1818

19+
import com.fasterxml.jackson.annotation.JsonIgnore;
1920
import com.fasterxml.jackson.annotation.JsonProperty;
21+
import com.fasterxml.jackson.databind.ObjectMapper;
2022
import java.io.File;
2123
import java.io.IOException;
22-
import org.codehaus.jackson.map.ObjectMapper;
24+
import java.nio.file.Paths;
2325

2426
/**
2527
* Represents metadata about a Delta table used in benchmark workloads.
@@ -30,17 +32,26 @@
3032
*
3133
* <p>TableInfo instances are typically loaded from JSON files in the workload specifications
3234
* directory structure. Each table directory should contain a {@code table_info.json} file with the
33-
* table metadata and a {@code delta} subdirectory containing the actual table data. The table root
34-
* path is the absolute path to the root of the table and is provided separately in {@link
35-
* WorkloadSpec#fromJsonPath(String, String, TableInfo)}.
35+
* table metadata and a {@code delta} subdirectory containing the actual table data.
3636
*
37-
* <p>Example JSON structure:
37+
* <p>Example JSON structure for relative table (default):
3838
*
3939
* <pre>{@code
4040
* {
41-
* "name": "large-table",
42-
* "description": "A large Delta table with multi-part checkpoints for performance testing",
43-
* "engineInfo": "Apache-Spark/3.5.1 Delta-Lake/3.1.0"
41+
* "name": "basic_table",
42+
* "description": "A basic Delta table for benchmarking",
43+
* "engine_info": "Apache-Spark/3.5.1 Delta-Lake/3.1.0"
44+
* }
45+
* }</pre>
46+
*
47+
* <p>Example JSON structure for absolute path table:
48+
*
49+
* <pre>{@code
50+
* {
51+
* "name": "s3_table",
52+
* "description": "Table stored in S3",
53+
* "table_type": "absolute",
54+
* "table_path": "s3://my-bucket/path/to/table"
4455
* }
4556
* }</pre>
4657
*/
@@ -60,23 +71,28 @@ public class TableInfo {
6071
@JsonProperty("engine_info")
6172
public String engineInfo;
6273

63-
/** The root path where the Delta table is stored. */
64-
@JsonProperty("table_root")
65-
public String tableRoot;
74+
/**
75+
* The type of table location: "relative" (default) or "absolute".
76+
*
77+
* <p>When "relative", the table is located at {table_info_directory}/delta. When "absolute", the
78+
* table is located at the path specified in {@link #tablePath}.
79+
*/
80+
@JsonProperty("table_type")
81+
private String tableType;
6682

67-
/** @return the absolute path to the root of the table */
68-
public String getTableRoot() {
69-
return tableRoot;
70-
}
83+
/**
84+
* The absolute path to the table when {@link #tableType} is "absolute". Can be a local path
85+
* (file:///) or S3 path (s3://). Null when table_type is "relative".
86+
*/
87+
@JsonProperty("table_path")
88+
private String tablePath;
7189

7290
/**
73-
* Sets the root path of the Delta table.
74-
*
75-
* @param tableRoot the absolute path to the root of the table
91+
* The resolved absolute path to the root of the table. This is computed after deserialization
92+
* based on {@link #tableType} and {@link #tablePath}.
7693
*/
77-
public void setTableRoot(String tableRoot) {
78-
this.tableRoot = tableRoot;
79-
}
94+
@JsonProperty("table_info_path")
95+
private String tableInfoPath;
8096

8197
/**
8298
* Default constructor for Jackson deserialization.
@@ -86,23 +102,47 @@ public void setTableRoot(String tableRoot) {
86102
*/
87103
public TableInfo() {}
88104

105+
/** Resolves the table root path based on the table type and location configuration. */
106+
@JsonIgnore
107+
public String getResolvedTableRoot() {
108+
if ("absolute".equals(tableType)) {
109+
if (tablePath == null || tablePath.trim().isEmpty()) {
110+
throw new IllegalStateException(
111+
"table_path must be specified when table_type is 'absolute'");
112+
}
113+
return tablePath;
114+
} else {
115+
// Default to "relative" if tableType is null or "relative"
116+
return Paths.get(tableInfoPath, "delta").toAbsolutePath().toString();
117+
}
118+
}
119+
120+
public String getTableInfoPath() {
121+
return tableInfoPath;
122+
}
123+
124+
public void setTableInfoPath(String tableInfoDirectory) {
125+
this.tableInfoPath = tableInfoDirectory;
126+
}
127+
89128
/**
90129
* Creates a TableInfo instance by reading from a JSON file at the specified path.
91130
*
92-
* <p>This method loads table metadata from a JSON file and sets the table root path. The JSON
93-
* file should contain the table name and description, while the table root path is provided
94-
* separately with the absolute path.
131+
* <p>This method loads table metadata from a JSON file and resolves the table root path. The JSON
132+
* file should contain the table name and description, while the table root path is computed based
133+
* on the table_type and table_path fields.
95134
*
96135
* @param jsonPath the path to the JSON file containing the TableInfo metadata
97-
* @param tableRoot the absolute path to the root of the Delta table
98-
* @return a TableInfo instance populated from the JSON file and table root path
136+
* @param tableInfoPath the directory containing the table_info.json file (used for relative path
137+
* resolution)
138+
* @return a TableInfo instance populated from the JSON file with resolved table root path
99139
* @throws RuntimeException if there is an error reading or parsing the JSON file
100140
*/
101-
public static TableInfo fromJsonPath(String jsonPath, String tableRoot) {
141+
public static TableInfo fromJsonPath(String jsonPath, String tableInfoPath) {
102142
ObjectMapper mapper = new ObjectMapper();
103143
try {
104144
TableInfo info = mapper.readValue(new File(jsonPath), TableInfo.class);
105-
info.setTableRoot(tableRoot);
145+
info.setTableInfoPath(tableInfoPath);
106146
return info;
107147
} catch (IOException e) {
108148
throw new RuntimeException("Failed to read TableInfo from JSON file: " + jsonPath, e);
@@ -112,8 +152,8 @@ public static TableInfo fromJsonPath(String jsonPath, String tableRoot) {
112152
/**
113153
* Returns a string representation of this TableInfo.
114154
*
115-
* <p>The string includes the table name, description, and engine info, but excludes the table
116-
* root path for security reasons (as it may contain sensitive path information).
155+
* <p>The string includes the table name, description, engine info, and CCv2 status, but excludes
156+
* the table root path for security reasons (as it may contain sensitive path information).
117157
*
118158
* @return a string representation of this TableInfo
119159
*/
@@ -125,6 +165,8 @@ public String toString() {
125165
+ description
126166
+ "', engineInfo='"
127167
+ engineInfo
128-
+ "'}";
168+
+ "', tableType='"
169+
+ tableType
170+
+ "}";
129171
}
130172
}

kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/benchmarks/models/WorkloadSpec.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@
3333
* field in the JSON.
3434
*/
3535
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type")
36-
@JsonSubTypes({@JsonSubTypes.Type(value = ReadSpec.class, name = "read")})
36+
@JsonSubTypes({
37+
@JsonSubTypes.Type(value = ReadSpec.class, name = "read"),
38+
@JsonSubTypes.Type(value = WriteSpec.class, name = "write")
39+
})
3740
public abstract class WorkloadSpec {
3841
/**
3942
* The type of workload (e.g., "read"). This is used by Jackson's polymorphic deserialization to

0 commit comments

Comments
 (0)