15
15
*/
16
16
package io .delta .kernel ;
17
17
18
+ import java .net .URI ;
18
19
import java .util .List ;
19
20
import java .util .Map ;
20
21
21
22
import io .delta .kernel .annotation .Evolving ;
22
- import io .delta .kernel .data .FilteredColumnarBatch ;
23
- import io .delta .kernel .data .Row ;
23
+ import io .delta .kernel .data .*;
24
24
import io .delta .kernel .engine .Engine ;
25
25
import io .delta .kernel .exceptions .ConcurrentWriteException ;
26
26
import io .delta .kernel .expressions .Literal ;
27
27
import io .delta .kernel .types .StructType ;
28
28
import io .delta .kernel .utils .*;
29
29
30
+ import io .delta .kernel .internal .DataWriteContextImpl ;
31
+ import io .delta .kernel .internal .actions .AddFile ;
32
+ import io .delta .kernel .internal .actions .SingleAction ;
33
+ import io .delta .kernel .internal .fs .Path ;
34
+ import static io .delta .kernel .internal .DeltaErrors .dataSchemaMismatch ;
35
+ import static io .delta .kernel .internal .DeltaErrors .partitionColumnMissingInData ;
36
+ import static io .delta .kernel .internal .TransactionImpl .getStatisticsColumns ;
37
+ import static io .delta .kernel .internal .data .TransactionStateRow .*;
38
+ import static io .delta .kernel .internal .util .PartitionUtils .getTargetDirectory ;
39
+ import static io .delta .kernel .internal .util .PartitionUtils .validateAndSanitizePartitionValues ;
40
+ import static io .delta .kernel .internal .util .Preconditions .checkArgument ;
41
+ import static io .delta .kernel .internal .util .SchemaUtils .findColIndex ;
42
+
30
43
/**
31
44
* Represents a transaction to mutate a Delta table.
32
45
*
@@ -104,7 +117,41 @@ static CloseableIterator<FilteredColumnarBatch> transformLogicalData(
104
117
Row transactionState ,
105
118
CloseableIterator <FilteredColumnarBatch > dataIter ,
106
119
Map <String , Literal > partitionValues ) {
107
- throw new UnsupportedOperationException ("Not implemented yet" );
120
+
121
+ // Note: `partitionValues` are not used as of now in this API, but taking the partition
122
+ // values as input forces the connector to not pass data from multiple partitions this
123
+ // API in a single call.
124
+ StructType tableSchema = getLogicalSchema (engine , transactionState );
125
+ List <String > partitionColNames = getPartitionColumnsList (transactionState );
126
+ validateAndSanitizePartitionValues (tableSchema , partitionColNames , partitionValues );
127
+
128
+ // TODO: add support for:
129
+ // - enforcing the constraints
130
+ // - generating the default value columns
131
+ // - generating the generated columns
132
+
133
+ // Remove the partition columns from the data as they are already part of file metadata
134
+ // and are not needed in the data files. TODO: once we start supporting uniform complaint
135
+ // tables, we may conditionally skip this step.
136
+
137
+ // TODO: set the correct schema once writing into column mapping enabled table is supported.
138
+ String tablePath = getTablePath (transactionState );
139
+ return dataIter .map (
140
+ filteredBatch -> {
141
+ ColumnarBatch data = filteredBatch .getData ();
142
+ if (!data .getSchema ().equals (tableSchema )) {
143
+ throw dataSchemaMismatch (tablePath , tableSchema , data .getSchema ());
144
+ }
145
+ for (String partitionColName : partitionColNames ) {
146
+ int partitionColIndex = findColIndex (data .getSchema (), partitionColName );
147
+ if (partitionColIndex < 0 ) {
148
+ throw partitionColumnMissingInData (tablePath , partitionColName );
149
+ }
150
+ data = data .withDeletedColumnAt (partitionColIndex );
151
+ }
152
+ return new FilteredColumnarBatch (data , filteredBatch .getSelectionVector ());
153
+ }
154
+ );
108
155
}
109
156
110
157
/**
@@ -124,7 +171,21 @@ static DataWriteContext getWriteContext(
124
171
Engine engine ,
125
172
Row transactionState ,
126
173
Map <String , Literal > partitionValues ) {
127
- throw new UnsupportedOperationException ("Not implemented yet" );
174
+ StructType tableSchema = getLogicalSchema (engine , transactionState );
175
+ List <String > partitionColNames = getPartitionColumnsList (transactionState );
176
+
177
+ partitionValues =
178
+ validateAndSanitizePartitionValues (tableSchema , partitionColNames , partitionValues );
179
+
180
+ String targetDirectory = getTargetDirectory (
181
+ getTablePath (transactionState ),
182
+ partitionColNames ,
183
+ partitionValues );
184
+
185
+ return new DataWriteContextImpl (
186
+ targetDirectory ,
187
+ partitionValues ,
188
+ getStatisticsColumns (engine , transactionState ));
128
189
}
129
190
130
191
/**
@@ -146,6 +207,19 @@ static CloseableIterator<Row> generateAppendActions(
146
207
Row transactionState ,
147
208
CloseableIterator <DataFileStatus > fileStatusIter ,
148
209
DataWriteContext dataWriteContext ) {
149
- throw new UnsupportedOperationException ("Not implemented yet" );
210
+ checkArgument (dataWriteContext instanceof DataWriteContextImpl ,
211
+ "DataWriteContext is not created by the `Transaction.getWriteContext()`" );
212
+
213
+ URI tableRoot = new Path (getTablePath (transactionState )).toUri ();
214
+ return fileStatusIter .map (
215
+ dataFileStatus -> {
216
+ Row addFileRow = AddFile .convertDataFileStatus (
217
+ tableRoot ,
218
+ dataFileStatus ,
219
+ ((DataWriteContextImpl ) dataWriteContext ).getPartitionValues (),
220
+ true /* dataChange */ );
221
+ return SingleAction .createAddFileSingleAction (addFileRow );
222
+ }
223
+ );
150
224
}
151
225
}
0 commit comments