Skip to content

Commit

Permalink
Add NotificationType.VALIDATE which can serve as a dry-run of a CREAT…
Browse files Browse the repository at this point in the history
…E without a metadata file (#321)

If a remote catalog or manual caller wants to ensure that permissions, paths, etc., are configured
correctly to receive CREATE/UPDATE notifications before deciding to actually create a table in the
remote catalog, sending a VALIDATE notification with the prospective table metadata path can
pre-validate basic setup. In a VALIDATE call, no actual entities will be mutated or created.
  • Loading branch information
dennishuo authored Sep 27, 2024
1 parent afabfa6 commit 1c8ca73
Show file tree
Hide file tree
Showing 6 changed files with 306 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public class BasePolarisCatalog extends BaseMetastoreViewCatalog
private CloseableGroup closeableGroup;
private Map<String, String> catalogProperties;
private Map<String, String> tableDefaultProperties;
private final FileIOFactory fileIOFactory;
private FileIOFactory fileIOFactory;
private PolarisMetaStoreManager metaStoreManager;

/**
Expand Down Expand Up @@ -1613,6 +1613,11 @@ private PolarisMetaStoreManager getMetaStoreManager() {
return metaStoreManager;
}

@VisibleForTesting
void setFileIOFactory(FileIOFactory newFactory) {
this.fileIOFactory = newFactory;
}

@VisibleForTesting
long getCatalogId() {
// TODO: Properly handle initialization
Expand Down Expand Up @@ -1873,6 +1878,51 @@ private boolean sendNotificationForTableLike(
if (notificationType == NotificationType.DROP) {
return dropTableLike(PolarisEntitySubType.TABLE, tableIdentifier, Map.of(), false /* purge */)
.isSuccess();
} else if (notificationType == NotificationType.VALIDATE) {
// In this mode we don't want to make any mutations, so we won't auto-create non-existing
// parent namespaces. This means when we want to validate allowedLocations for the proposed
// table metadata location, we must independently find the deepest non-null parent namespace
// of the TableIdentifier, which may even be the base CatalogEntity if no parent namespaces
// actually exist yet. We can then extract the right StorageInfo entity via a normal call
// to findStorageInfoFromHierarchy.
PolarisResolvedPathWrapper resolvedStorageEntity = null;
Optional<PolarisEntity> storageInfoEntity = Optional.empty();
for (int i = tableIdentifier.namespace().length(); i >= 0; i--) {
Namespace nsLevel =
Namespace.of(
Arrays.stream(tableIdentifier.namespace().levels())
.limit(i)
.toArray(String[]::new));
resolvedStorageEntity = resolvedEntityView.getResolvedPath(nsLevel);
if (resolvedStorageEntity != null) {
storageInfoEntity = findStorageInfoFromHierarchy(resolvedStorageEntity);
break;
}
}

if (resolvedStorageEntity == null || storageInfoEntity.isEmpty()) {
throw new BadRequestException(
"Failed to find StorageInfo entity for TableIdentifier %s", tableIdentifier);
}

// Validate location against the resolvedStorageEntity
String metadataLocation =
transformTableLikeLocation(request.getPayload().getMetadataLocation());
validateLocationForTableLike(tableIdentifier, metadataLocation, resolvedStorageEntity);

// Validate that we can construct a FileIO
String locationDir = metadataLocation.substring(0, metadataLocation.lastIndexOf("/"));
refreshIOWithCredentials(
tableIdentifier,
Set.of(locationDir),
resolvedStorageEntity,
new HashMap<>(tableDefaultProperties),
Set.of(PolarisStorageActions.READ));

LOGGER.debug(
"Successful VALIDATE notification for tableIdentifier {}, metadataLocation {}",
tableIdentifier,
metadataLocation);
} else if (notificationType == NotificationType.CREATE
|| notificationType == NotificationType.UPDATE) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public enum NotificationType {
UNKNOWN(0, "UNKNOWN"),
CREATE(1, "CREATE"),
UPDATE(2, "UPDATE"),
DROP(3, "DROP");
DROP(3, "DROP"),
VALIDATE(4, "VALIDATE");

NotificationType(int id, String displayName) {
this.id = id;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,142 @@ public void testCreateNestedNamespaceUnderMissingParent() {
.hasMessageContaining("Parent");
}

@Test
public void testValidateNotificationWhenTableAndNamespacesDontExist() {
Assumptions.assumeTrue(
requiresNamespaceCreate(),
"Only applicable if namespaces must be created before adding children");
Assumptions.assumeTrue(
supportsNestedNamespaces(), "Only applicable if nested namespaces are supported");
Assumptions.assumeTrue(
supportsNotifications(), "Only applicable if notifications are supported");

final String tableLocation = "s3://externally-owned-bucket/validate_table/";
final String tableMetadataLocation = tableLocation + "metadata/v1.metadata.json";
BasePolarisCatalog catalog = catalog();

Namespace namespace = Namespace.of("parent", "child1");
TableIdentifier table = TableIdentifier.of(namespace, "table");

// For a VALIDATE request we can pass in a full metadata JSON filename or just the table's
// metadata directory; either way the path will be validated to be under the allowed locations,
// but any actual metadata JSON file will not be accessed.
NotificationRequest request = new NotificationRequest();
request.setNotificationType(NotificationType.VALIDATE);
TableUpdateNotification update = new TableUpdateNotification();
update.setMetadataLocation(tableMetadataLocation);
update.setTableName(table.name());
update.setTableUuid(UUID.randomUUID().toString());
update.setTimestamp(230950845L);
request.setPayload(update);

// We should be able to send the notification without creating the metadata file since it's
// only validating the ability to send the CREATE/UPDATE notification possibly before actually
// creating the table at all on the remote catalog.
Assertions.assertThat(catalog.sendNotification(table, request))
.as("Notification should be sent successfully")
.isTrue();
Assertions.assertThat(catalog.namespaceExists(namespace))
.as("Intermediate namespaces should not be created")
.isFalse();
Assertions.assertThat(catalog.tableExists(table))
.as("Table should not be created for a VALIDATE notification")
.isFalse();

// Now also check that despite creating the metadata file, the validation call still doesn't
// create any namespaces or tables.
InMemoryFileIO fileIO = (InMemoryFileIO) catalog.getIo();
fileIO.addFile(
tableMetadataLocation,
TableMetadataParser.toJson(createSampleTableMetadata(tableLocation)).getBytes(UTF_8));

Assertions.assertThat(catalog.sendNotification(table, request))
.as("Notification should be sent successfully")
.isTrue();
Assertions.assertThat(catalog.namespaceExists(namespace))
.as("Intermediate namespaces should not be created")
.isFalse();
Assertions.assertThat(catalog.tableExists(table))
.as("Table should not be created for a VALIDATE notification")
.isFalse();
}

@Test
public void testValidateNotificationInDisallowedLocation() {
Assumptions.assumeTrue(
requiresNamespaceCreate(),
"Only applicable if namespaces must be created before adding children");
Assumptions.assumeTrue(
supportsNestedNamespaces(), "Only applicable if nested namespaces are supported");
Assumptions.assumeTrue(
supportsNotifications(), "Only applicable if notifications are supported");

// The location of the metadata JSON file specified in the create will be forbidden.
// For a VALIDATE call we can pass in the metadata/ prefix itself instead of a metadata JSON
// filename.
final String tableLocation = "s3://forbidden-table-location/table/";
final String tableMetadataLocation = tableLocation + "metadata/";
BasePolarisCatalog catalog = catalog();

Namespace namespace = Namespace.of("parent", "child1");
TableIdentifier table = TableIdentifier.of(namespace, "table");

NotificationRequest request = new NotificationRequest();
request.setNotificationType(NotificationType.VALIDATE);
TableUpdateNotification update = new TableUpdateNotification();
update.setMetadataLocation(tableMetadataLocation);
update.setTableName(table.name());
update.setTableUuid(UUID.randomUUID().toString());
update.setTimestamp(230950845L);
request.setPayload(update);

Assertions.assertThatThrownBy(() -> catalog.sendNotification(table, request))
.isInstanceOf(ForbiddenException.class)
.hasMessageContaining("Invalid location");
}

@Test
public void testValidateNotificationFailToCreateFileIO() {
Assumptions.assumeTrue(
requiresNamespaceCreate(),
"Only applicable if namespaces must be created before adding children");
Assumptions.assumeTrue(
supportsNestedNamespaces(), "Only applicable if nested namespaces are supported");
Assumptions.assumeTrue(
supportsNotifications(), "Only applicable if notifications are supported");

// The location of the metadata JSON file specified in the create will be allowed, but
// we'll inject a separate ForbiddenException during FileIO instantiation.
// For a VALIDATE call we can pass in the metadata/ prefix itself instead of a metadata JSON
// filename.
final String tableLocation = "s3://externally-owned-bucket/validate_table/";
final String tableMetadataLocation = tableLocation + "metadata/";
BasePolarisCatalog catalog = catalog();

Namespace namespace = Namespace.of("parent", "child1");
TableIdentifier table = TableIdentifier.of(namespace, "table");

NotificationRequest request = new NotificationRequest();
request.setNotificationType(NotificationType.VALIDATE);
TableUpdateNotification update = new TableUpdateNotification();
update.setMetadataLocation(tableMetadataLocation);
update.setTableName(table.name());
update.setTableUuid(UUID.randomUUID().toString());
update.setTimestamp(230950845L);
request.setPayload(update);

catalog.setFileIOFactory(
new FileIOFactory() {
@Override
public FileIO loadFileIO(String impl, Map<String, String> properties) {
throw new ForbiddenException("Fake failure applying downscoped credentials");
}
});
Assertions.assertThatThrownBy(() -> catalog.sendNotification(table, request))
.isInstanceOf(ForbiddenException.class)
.hasMessageContaining("Fake failure applying downscoped credentials");
}

@Test
public void testUpdateNotificationWhenTableAndNamespacesDontExist() {
Assumptions.assumeTrue(
Expand Down
Loading

0 comments on commit 1c8ca73

Please sign in to comment.