-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Closed
Description
In 0.12.1, write empty dataset by Spark append mode will result in an empty snapshot created, write empty dataset by Spark overwrite mode will result in an exception below:
org.apache.spark.SparkException: Writing job aborted.
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2Exec.scala:92)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:260)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229)
at com.tencent.bk.base.datahub.iceberg.SparkUtils.writeTable(SparkUtils.java:248)
at com.tencent.bk.base.datahub.iceberg.TestSparkUtils.testWriteEmptyDs(TestSparkUtils.java:223)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:33)
at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:230)
at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:58)
Caused by: java.lang.IllegalStateException: Cannot determine partition spec: no data or delete files have been added
at org.apache.iceberg.relocated.com.google.common.base.Preconditions.checkState(Preconditions.java:508)
at org.apache.iceberg.MergingSnapshotProducer.writeSpec(MergingSnapshotProducer.java:121)
at org.apache.iceberg.BaseReplacePartitions.apply(BaseReplacePartitions.java:58)
at org.apache.iceberg.SnapshotProducer.apply(SnapshotProducer.java:164)
at org.apache.iceberg.BaseReplacePartitions.apply(BaseReplacePartitions.java:26)
at org.apache.iceberg.SnapshotProducer.lambda$commit$2(SnapshotProducer.java:283)
at org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:405)
at org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:214)
at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:198)
at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:190)
at org.apache.iceberg.SnapshotProducer.commit(SnapshotProducer.java:282)
at org.apache.iceberg.BaseReplacePartitions.commit(BaseReplacePartitions.java:26)
at org.apache.iceberg.spark.source.Writer.commitOperation(Writer.java:169)
at org.apache.iceberg.spark.source.Writer.replacePartitions(Writer.java:195)
at org.apache.iceberg.spark.source.Writer.commit(Writer.java:145)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2Exec.scala:76)
... 42 more
Overwrite partitions with emtpy dataset is the same as these lines of code:
Table t = loadTable(xxx);
t.newReplacePartitions().commit();
In the commit method, it calls down to
| Preconditions.checkState(spec != null, |
Metadata
Metadata
Assignees
Labels
No labels