Skip to content

Commit a4cbc7a

Browse files
adnanhemaniwilliamhyun
authored andcommitted
Remove Java URI validations for Blob Storage providers (#1604)
This is a retry at #1586, which I'll close if this PR is on the right direction instead. Java URI does not actually apply any normalization to URIs if we do not call URI.normalize() (which we currently do not). Additionally, blob storage providers like S3 and GCS can provide ".." and "." as valid fragments in URLs - which Java URI would attempt to normalize incorrectly. As a result, attempting to validate and/or normalize URIs for blob storage providers using the Java URI class is the incorrect behavior. While we may want to add location validation via regex later, removing it first should at least unblock the bug we see in #1545.
1 parent 94dfb79 commit a4cbc7a

File tree

2 files changed

+50
-5
lines changed

2 files changed

+50
-5
lines changed

polaris-core/src/main/java/org/apache/polaris/core/storage/StorageLocation.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,12 @@ public static StorageLocation of(String location) {
4040
protected StorageLocation(@Nonnull String location) {
4141
if (location == null) {
4242
this.location = null;
43-
} else if (location.startsWith("file:/") && !location.startsWith(LOCAL_PATH_PREFIX)) {
43+
} else if (location.startsWith("file:/")) {
4444
this.location = URI.create(location.replaceFirst("file:/+", LOCAL_PATH_PREFIX)).toString();
4545
} else if (location.startsWith("/")) {
4646
this.location = URI.create(location.replaceFirst("/+", LOCAL_PATH_PREFIX)).toString();
4747
} else {
48-
this.location = URI.create(location).toString();
48+
this.location = location;
4949
}
5050
}
5151

polaris-core/src/test/java/org/apache/polaris/service/storage/StorageLocationTest.java

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,63 @@
1818
*/
1919
package org.apache.polaris.service.storage;
2020

21+
import java.net.URISyntaxException;
2122
import org.apache.polaris.core.storage.StorageLocation;
23+
import org.apache.polaris.core.storage.azure.AzureLocation;
2224
import org.assertj.core.api.Assertions;
2325
import org.junit.jupiter.api.Test;
2426

2527
public class StorageLocationTest {
2628

2729
@Test
2830
public void testOfDifferentPrefixes() {
29-
StorageLocation StandardLocation = StorageLocation.of("file:///path/to/file");
31+
StorageLocation standardLocation = StorageLocation.of("file:///path/to/file");
3032
StorageLocation slashLeadingLocation = StorageLocation.of("/path/to/file");
33+
StorageLocation manySlashLeadingLocation = StorageLocation.of("////////path/to/file");
3134
StorageLocation fileSingleSlashLocation = StorageLocation.of("file:/path/to/file");
32-
Assertions.assertThat(slashLeadingLocation.equals(StandardLocation)).isTrue();
33-
Assertions.assertThat(fileSingleSlashLocation.equals(StandardLocation)).isTrue();
35+
StorageLocation fileTooManySlashesLocation = StorageLocation.of("file://///////path/to/file");
36+
Assertions.assertThat(slashLeadingLocation.equals(standardLocation)).isTrue();
37+
Assertions.assertThat(manySlashLeadingLocation.equals(standardLocation)).isTrue();
38+
Assertions.assertThat(fileSingleSlashLocation.equals(standardLocation)).isTrue();
39+
Assertions.assertThat(fileTooManySlashesLocation.equals(standardLocation)).isTrue();
40+
Assertions.assertThat(standardLocation).isExactlyInstanceOf(StorageLocation.class);
41+
Assertions.assertThat(slashLeadingLocation).isExactlyInstanceOf(StorageLocation.class);
42+
Assertions.assertThat(manySlashLeadingLocation).isExactlyInstanceOf(StorageLocation.class);
43+
Assertions.assertThat(fileSingleSlashLocation).isExactlyInstanceOf(StorageLocation.class);
44+
Assertions.assertThat(fileTooManySlashesLocation).isExactlyInstanceOf(StorageLocation.class);
45+
}
46+
47+
@Test
48+
public void testBlobStorageLocations() {
49+
StorageLocation azureLocation =
50+
StorageLocation.of("wasb://container@storageaccount.blob.core.windows.net/myfile");
51+
Assertions.assertThat(azureLocation instanceof AzureLocation).isTrue();
52+
azureLocation =
53+
StorageLocation.of("abfss://container@storageaccount.blob.core.windows.net/myfile");
54+
Assertions.assertThat(azureLocation instanceof AzureLocation).isTrue();
55+
56+
String s3LocationStr = "s3://test-bucket/mydirectory";
57+
StorageLocation s3Location = StorageLocation.of(s3LocationStr);
58+
Assertions.assertThat(s3Location instanceof AzureLocation).isFalse();
59+
60+
Assertions.assertThat(s3Location.toString()).isEqualTo(s3LocationStr);
61+
}
62+
63+
@Test
64+
public void testSpecialCharacters() {
65+
// Blob Storage does not have validations
66+
String specialCharsBlobStorage = "s3://test-bucket/quote'/equals=/period/../myfile.parquet";
67+
StorageLocation s3LocationSpecialCharacters = StorageLocation.of(specialCharsBlobStorage);
68+
69+
Assertions.assertThat(s3LocationSpecialCharacters.toString())
70+
.isEqualTo(specialCharsBlobStorage);
71+
72+
// But local filesystems do
73+
String specialCharsLocalStorage = "file:///var/tmp\"/myfile.parquet";
74+
Assertions.assertThatThrownBy(
75+
() -> {
76+
StorageLocation.of(specialCharsLocalStorage);
77+
})
78+
.hasCauseInstanceOf(URISyntaxException.class);
3479
}
3580
}

0 commit comments

Comments
 (0)