From a2aa92bc5d504427b34fa33848b781960a06ec08 Mon Sep 17 00:00:00 2001 From: "Mingyu Chen (Rayner)" Date: Thu, 3 Apr 2025 14:55:28 +0800 Subject: [PATCH] [fix](oss) the write to hive table on oss-hdfs may fail (#49754) Problem Summary: When insert data to a hive table on oss-hdfs, it may fail with following error: ``` Failed to delete directories for files: [oss://xxx] ``` This is because for oss-hdfs, we should use hadoop filesystem to do the operation. This PR fix it. When calling `getFSIdentity()`, we should pass the properties so that `LocationPath` can identify the right fs type. --- .../doris/common/util/LocationPath.java | 5 +-- .../datasource/hive/HiveMetaStoreCache.java | 18 +++++----- .../doris/fs/remote/SwitchingFileSystem.java | 2 +- .../doris/common/util/LocationPathTest.java | 33 +++++++++++++------ 4 files changed, 37 insertions(+), 21 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java index 4ca8f9605a06fa..7e8b357395c797 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java @@ -203,8 +203,9 @@ public static boolean isHdfsOnOssEndpoint(String location) { // Return the file system type and the file system identity. // The file system identity is the scheme and authority of the URI, eg. "hdfs://host:port" or "s3://bucket". - public static Pair getFSIdentity(String location, String bindBrokerName) { - LocationPath locationPath = new LocationPath(location, Collections.emptyMap(), true); + public static Pair getFSIdentity(String location, + Map properties, String bindBrokerName) { + LocationPath locationPath = new LocationPath(location, properties, true); FileSystemType fsType = (bindBrokerName != null) ? FileSystemType.BROKER : locationPath.getFileSystemType(); URI uri = locationPath.getPath().toUri(); String fsIdent = Strings.nullToEmpty(uri.getScheme()) + "://" + Strings.nullToEmpty(uri.getAuthority()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 29215da52054a4..2e330d51df3bf3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -349,10 +349,11 @@ private FileCacheValue getFileCache(String location, String inputFormat, List partitionValues, String bindBrokerName) throws UserException { FileCacheValue result = new FileCacheValue(); + Map properties = catalog.getCatalogProperty().getProperties(); RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey(LocationPath.getFSIdentity( - location, bindBrokerName), - catalog.getCatalogProperty().getProperties(), + location, properties, bindBrokerName), + properties, bindBrokerName, jobConf)); result.setSplittable(HiveUtil.isSplittable(fs, inputFormat, location)); // For Tez engine, it may generate subdirectoies for "union" query. @@ -743,6 +744,7 @@ public List getFilesByTransaction(List partitions boolean isFullAcid, boolean skipCheckingAcidVersionFile, long tableId, String bindBrokerName) { List fileCacheValues = Lists.newArrayList(); try { + Map properties = catalog.getCatalogProperty().getProperties(); for (HivePartition partition : partitions) { AuthenticationConfig authenticationConfig = AuthenticationConfig.getKerberosConfig(jobConf); @@ -777,8 +779,8 @@ public List getFilesByTransaction(List partitions RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey( LocationPath.getFSIdentity(baseOrDeltaPath.toUri().toString(), - bindBrokerName), - catalog.getCatalogProperty().getProperties(), + properties, bindBrokerName), + properties, bindBrokerName, jobConf)); Status status = fs.exists(acidVersionPath); if (status != Status.OK) { @@ -804,8 +806,8 @@ public List getFilesByTransaction(List partitions String location = delta.getPath().toString(); RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey( - LocationPath.getFSIdentity(location, bindBrokerName), - catalog.getCatalogProperty().getProperties(), bindBrokerName, jobConf)); + LocationPath.getFSIdentity(location, properties, bindBrokerName), + properties, bindBrokerName, jobConf)); List remoteFiles = new ArrayList<>(); Status status = fs.listFiles(location, false, remoteFiles); if (status.ok()) { @@ -832,8 +834,8 @@ public List getFilesByTransaction(List partitions String location = directory.getBaseDirectory().toString(); RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey( - LocationPath.getFSIdentity(location, bindBrokerName), - catalog.getCatalogProperty().getProperties(), bindBrokerName, jobConf)); + LocationPath.getFSIdentity(location, properties, bindBrokerName), + properties, bindBrokerName, jobConf)); List remoteFiles = new ArrayList<>(); Status status = fs.listFiles(location, false, remoteFiles); if (status.ok()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java index 00802922ef3689..ab7c91d693a34d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java @@ -125,7 +125,7 @@ public Status listDirectories(String remotePath, Set result) { public FileSystem fileSystem(String location) { return extMetaCacheMgr.getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey( - LocationPath.getFSIdentity(location, + LocationPath.getFSIdentity(location, properties, bindBrokerName), properties, bindBrokerName)); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java index 4457b7dd1efd60..1e9e3e1ab88437 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java @@ -25,6 +25,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -39,7 +40,8 @@ public void testHdfsLocationConvert() { String beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("hdfs://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.DFS); // HA props Map props = new HashMap<>(); @@ -92,7 +94,8 @@ public void testJFSLocationConvert() { // BE loc = locationPath.toStorageLocation().toString(); Assertions.assertTrue(loc.startsWith("jfs://")); - Assertions.assertEquals(LocationPath.getFSIdentity(loc, null).first, FileSystemType.JFS); + Assertions.assertEquals(LocationPath.getFSIdentity(loc, Collections.emptyMap(), null).first, + FileSystemType.JFS); } @Test @@ -106,7 +109,8 @@ public void testGSLocationConvert() { // BE String beLoc = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLoc.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLoc, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLoc, Collections.emptyMap(), null).first, + FileSystemType.S3); } @Test @@ -118,17 +122,21 @@ public void testOSSLocationConvert() { // BE String beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.S3); + // test oss-hdfs rangeProps.put(OssProperties.ENDPOINT, "oss-dls.aliyuncs.com"); locationPath = new LocationPath("oss://test.oss-dls.aliyuncs.com/path", rangeProps); + Assertions.assertEquals("oss://test.oss-dls.aliyuncs.com/path", locationPath.get()); + Assertions.assertEquals(LocationPath.getFSIdentity(locationPath.get(), rangeProps, null).first, + FileSystemType.DFS); // FE Assertions.assertTrue(locationPath.get().startsWith("oss://test.oss-dls.aliyuncs")); // BE beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("oss://test.oss-dls.aliyuncs")); Assertions.assertEquals(locationPath.getFileSystemType(), FileSystemType.DFS); - } @Test @@ -140,7 +148,8 @@ public void testCOSLocationConvert() { String beLocation = locationPath.toStorageLocation().toString(); // BE Assertions.assertTrue(beLocation.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.S3); locationPath = new LocationPath("cosn://test.com", rangeProps); // FE @@ -148,7 +157,8 @@ public void testCOSLocationConvert() { // BE beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.S3); locationPath = new LocationPath("ofs://test.com", rangeProps); // FE @@ -156,7 +166,8 @@ public void testCOSLocationConvert() { // BE beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("ofs://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.OFS); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.OFS); // GFS is now equals to DFS locationPath = new LocationPath("gfs://test.com", rangeProps); @@ -165,7 +176,8 @@ public void testCOSLocationConvert() { // BE beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("gfs://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.DFS); } @Test @@ -177,7 +189,8 @@ public void testOBSLocationConvert() { // BE String beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.S3); } @Test