-
Notifications
You must be signed in to change notification settings - Fork 3k
Description
Apache Iceberg version
1.2.1
Query engine
Spark
Please describe the bug 🐞
spark use 3.1 version
when i used spark to write iceberg with orc format, it error with Java heap space, detailed information is below
java.lang.OutOfMemoryError: Java heap space
at org.apache.iceberg.shaded.org.apache.orc.storage.ql.exec.vector.LongColumnVector.ensureSize(LongColumnVector.java:314)
at org.apache.iceberg.shaded.org.apache.orc.storage.ql.exec.vector.StructColumnVector.ensureSize(StructColumnVector.java:136)
at org.apache.iceberg.spark.data.SparkOrcValueWriters.growColumnVector(SparkOrcValueWriters.java:198)
at org.apache.iceberg.spark.data.SparkOrcValueWriters.access$300(SparkOrcValueWriters.java:39)
at org.apache.iceberg.spark.data.SparkOrcValueWriters$ListWriter.nonNullWrite(SparkOrcValueWriters.java:137)
at org.apache.iceberg.spark.data.SparkOrcValueWriters$ListWriter.nonNullWrite(SparkOrcValueWriters.java:116)
at org.apache.iceberg.orc.OrcValueWriter.write(OrcValueWriter.java:42)
at org.apache.iceberg.data.orc.GenericOrcWriters$StructWriter.write(GenericOrcWriters.java:483)
at org.apache.iceberg.data.orc.GenericOrcWriters$StructWriter.nonNullWrite(GenericOrcWriters.java:469)
at org.apache.iceberg.orc.OrcValueWriter.write(OrcValueWriter.java:42)
at org.apache.iceberg.spark.data.SparkOrcValueWriters$ListWriter.nonNullWrite(SparkOrcValueWriters.java:140)
at org.apache.iceberg.spark.data.SparkOrcValueWriters$ListWriter.nonNullWrite(SparkOrcValueWriters.java:116)
at org.apache.iceberg.orc.OrcValueWriter.write(OrcValueWriter.java:42)
at org.apache.iceberg.data.orc.GenericOrcWriters$StructWriter.write(GenericOrcWriters.java:483)
at org.apache.iceberg.data.orc.GenericOrcWriters$StructWriter.writeRow(GenericOrcWriters.java:476)
at org.apache.iceberg.spark.data.SparkOrcWriter.write(SparkOrcWriter.java:60)
at org.apache.iceberg.spark.data.SparkOrcWriter.write(SparkOrcWriter.java:46)
at org.apache.iceberg.orc.OrcFileAppender.add(OrcFileAppender.java:83)
at org.apache.iceberg.io.DataWriter.write(DataWriter.java:61)
at org.apache.iceberg.io.ClusteredWriter.write(ClusteredWriter.java:103)
at org.apache.iceberg.io.ClusteredDataWriter.write(ClusteredDataWriter.java:34)
at org.apache.iceberg.spark.source.SparkWrite$PartitionedDataWriter.write(SparkWrite.java:629)
at org.apache.iceberg.spark.source.SparkWrite$PartitionedDataWriter.write(SparkWrite.java:604)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$1(WriteToDataSourceV2Exec.scala:416)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$$Lambda$1166/1819967781.apply(Unknown Source)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1504)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:452)
at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:360)
at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec$$Lambda$716/86102097.apply(Unknown Source)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)