apache · liujiawinds · Jul 22, 2022 · Jul 25, 2022 · Jul 25, 2022 · Jul 25, 2022
diff --git a/flink-formats/flink-orc-nohive/pom.xml b/flink-formats/flink-orc-nohive/pom.xml
@@ -82,6 +82,18 @@ under the License.
 			</exclusions>
 		</dependency>
 
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-common</artifactId>
+			<scope>provided</scope>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-hdfs</artifactId>
+			<scope>provided</scope>
+		</dependency>
+
 		<!-- Tests -->
 
 		<dependency>

diff --git a/...link-orc-nohive/src/main/java/org/apache/flink/orc/nohive/OrcNoHiveBulkWriterFactory.java b/...link-orc-nohive/src/main/java/org/apache/flink/orc/nohive/OrcNoHiveBulkWriterFactory.java
@@ -20,7 +20,7 @@
 
 import org.apache.flink.api.common.serialization.BulkWriter;
 import org.apache.flink.core.fs.FSDataOutputStream;
-import org.apache.flink.orc.nohive.writer.NoHivePhysicalWriterImpl;
+import org.apache.flink.orc.writer.HadoopNoCloseStream;
 import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.types.logical.DecimalType;
 import org.apache.flink.table.types.logical.LocalZonedTimestampType;
@@ -31,7 +31,9 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.orc.OrcFile;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.PhysicalFsWriter;
 import org.apache.orc.impl.WriterImpl;
+import org.apache.orc.impl.writer.WriterEncryptionVariant;
 import org.apache.orc.storage.common.type.HiveDecimal;
 import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
 import org.apache.orc.storage.ql.exec.vector.ColumnVector;
@@ -65,7 +67,10 @@ public BulkWriter<RowData> create(FSDataOutputStream out) throws IOException {
         OrcFile.WriterOptions opts = OrcFile.writerOptions(new Properties(), conf);
         TypeDescription description = TypeDescription.fromString(schema);
         opts.setSchema(description);
-        opts.physicalWriter(new NoHivePhysicalWriterImpl(out, opts));
+
+        HadoopNoCloseStream hadoopOutputStream = new HadoopNoCloseStream(out, null);
+        opts.physicalWriter(
+                new PhysicalFsWriter(hadoopOutputStream, opts, new WriterEncryptionVariant[0]));
         WriterImpl writer = new WriterImpl(null, new Path("."), opts);
 
         VectorizedRowBatch rowBatch = description.createRowBatch();

diff --git a/...orc-nohive/src/main/java/org/apache/flink/orc/nohive/writer/NoHivePhysicalWriterImpl.java b/...orc-nohive/src/main/java/org/apache/flink/orc/nohive/writer/NoHivePhysicalWriterImpl.java
diff --git a/...nohive/src/test/java/org/apache/flink/orc/nohive/OrcColumnarRowSplitReaderNoHiveTest.java b/...nohive/src/test/java/org/apache/flink/orc/nohive/OrcColumnarRowSplitReaderNoHiveTest.java
@@ -47,11 +47,11 @@ protected void prepareReadFileWithTypes(String file, int rowSize) throws IOExcep
         TypeDescription schema =
                 TypeDescription.fromString(
                         "struct<"
-                                + "f0:float,"
-                                + "f1:double,"
-                                + "f2:timestamp,"
-                                + "f3:tinyint,"
-                                + "f4:smallint"
+                                + "_col0:float,"
+                                + "_col1:double,"
+                                + "_col2:timestamp,"
+                                + "_col3:tinyint,"
+                                + "_col4:smallint"
                                 + ">");
 
         org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(file);
@@ -105,7 +105,9 @@ protected OrcColumnarRowSplitReader createReader(
             throws IOException {
         return OrcNoHiveSplitReaderUtil.genPartColumnarRowReader(
                 new Configuration(),
-                IntStream.range(0, fullTypes.length).mapToObj(i -> "f" + i).toArray(String[]::new),
+                IntStream.range(0, fullTypes.length)
+                        .mapToObj(i -> "_col" + i)
+                        .toArray(String[]::new),
                 fullTypes,
                 partitionSpec,
                 selectedFields,

diff --git a/flink-formats/flink-orc/src/main/java/org/apache/flink/orc/writer/HadoopNoCloseStream.java b/flink-formats/flink-orc/src/main/java/org/apache/flink/orc/writer/HadoopNoCloseStream.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.orc.writer;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * This class is designed to not close the underlying flink stream to avoid exceptions when
+ * checkpointing.
+ */
+public class HadoopNoCloseStream extends FSDataOutputStream {
+
+    public HadoopNoCloseStream(OutputStream out, FileSystem.Statistics stats) throws IOException {
+        super(out, stats);
+    }
+
+    @Override
+    public void close() throws IOException {
+        // Don't close the internal stream here to avoid
+        // Stream Closed or ClosedChannelException when Flink performs checkpoint.
+        // noop
+    }
+}
diff --git a/flink-formats/flink-orc/src/main/java/org/apache/flink/orc/writer/OrcBulkWriterFactory.java b/flink-formats/flink-orc/src/main/java/org/apache/flink/orc/writer/OrcBulkWriterFactory.java
@@ -27,7 +27,9 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.orc.OrcFile;
+import org.apache.orc.impl.PhysicalFsWriter;
 import org.apache.orc.impl.WriterImpl;
+import org.apache.orc.impl.writer.WriterEncryptionVariant;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -96,7 +98,9 @@ public OrcBulkWriterFactory(
     @Override
     public BulkWriter<T> create(FSDataOutputStream out) throws IOException {
         OrcFile.WriterOptions opts = getWriterOptions();
-        opts.physicalWriter(new PhysicalWriterImpl(out, opts));
+        HadoopNoCloseStream hadoopOutputStream = new HadoopNoCloseStream(out, null);
+        opts.physicalWriter(
+                new PhysicalFsWriter(hadoopOutputStream, opts, new WriterEncryptionVariant[0]));
 
         // The path of the Writer is not used to indicate the destination file
         // in this case since we have used a dedicated physical writer to write