Skip to content

Commit

Permalink
[#3403] fix(hive-catalog): add hive catalog property list-all-tables (#…
Browse files Browse the repository at this point in the history
…3703)

<!--
1. Title: [#<issue>] <type>(<scope>): <subject>
   Examples:
     - "[#123] feat(operator): support xxx"
     - "[#233] fix: check null before access result in xxx"
     - "[MINOR] refactor: fix typo in variable name"
     - "[MINOR] docs: fix typo in README"
     - "[#255] test: fix flaky test NameOfTheTest"
   Reference: https://www.conventionalcommits.org/en/v1.0.0/
2. If the PR is unfinished, please mark this PR as draft.
-->

### What changes were proposed in this pull request?

Add a Hive catalog property "list-all-tables". Using this property to
control whether the Iceberg table is displayed in the Hive table list.

### Why are the changes needed?

The bug is a schema has the Iceberg tables in the Hive catalog

Fix: #3403

### Does this PR introduce _any_ user-facing change?

N/A.

### How was this patch tested?

1.create a hive catalog with "list-all-tables " property.
2.crate a database and a iceberg table  in the catalog by hive beeline
3.check whether the table is displayed in the catalog .

---------

Co-authored-by: ericqin <ericqin@tencent.com>
  • Loading branch information
mygrsun and mygrsun2 authored Jun 5, 2024
1 parent 23b794e commit 1952737
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@

import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CLIENT_POOL_SIZE;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.LIST_ALL_TABLES;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.METASTORE_URIS;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.PRINCIPAL;
import static com.datastrato.gravitino.catalog.hive.HiveTable.ICEBERG_TABLE_TYPE_VALUE;
import static com.datastrato.gravitino.catalog.hive.HiveTable.SUPPORT_TABLE_TYPES;
import static com.datastrato.gravitino.catalog.hive.HiveTable.TABLE_TYPE_PROP;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.COMMENT;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE;
import static com.datastrato.gravitino.connector.BaseCatalog.CATALOG_BYPASS_PREFIX;
Expand Down Expand Up @@ -99,6 +102,7 @@ public class HiveCatalogOperations implements CatalogOperations, SupportsSchemas
private ScheduledThreadPoolExecutor checkTgtExecutor;
private String kerberosRealm;
private ProxyPlugin proxyPlugin;
boolean listAllTables = true;

// Map that maintains the mapping of keys in Gravitino to that in Hive, for example, users
// will only need to set the configuration 'METASTORE_URL' in Gravitino and Gravitino will change
Expand Down Expand Up @@ -150,6 +154,8 @@ public void initialize(

this.clientPool =
new CachedClientPool(getClientPoolSize(conf), hiveConf, getCacheEvictionInterval(conf));

this.listAllTables = enableListAllTables(conf);
}

private void initKerberosIfNecessary(Map<String, String> conf, Configuration hadoopConf) {
Expand Down Expand Up @@ -275,6 +281,10 @@ long getCacheEvictionInterval(Map<String, String> conf) {
.getOrDefault(conf, CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS);
}

boolean enableListAllTables(Map<String, String> conf) {
return (boolean)
propertiesMetadata.catalogPropertiesMetadata().getOrDefault(conf, LIST_ALL_TABLES);
}
/** Closes the Hive catalog and releases the associated client pool. */
@Override
public void close() {
Expand Down Expand Up @@ -534,7 +544,18 @@ public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaExcep
return clientPool.run(
c ->
c.getTableObjectsByName(schemaIdent.name(), allTables).stream()
.filter(tb -> SUPPORT_TABLE_TYPES.contains(tb.getTableType()))
.filter(
tb -> {
boolean isSupportTable = SUPPORT_TABLE_TYPES.contains(tb.getTableType());
if (!isSupportTable) {
return false;
}
if (!listAllTables) {
Map<String, String> parameters = tb.getParameters();
return isHiveTable(parameters);
}
return true;
})
.map(tb -> NameIdentifier.of(namespace, tb.getTableName()))
.toArray(NameIdentifier[]::new));
} catch (UnknownDBException e) {
Expand All @@ -550,6 +571,22 @@ public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaExcep
}
}

boolean isHiveTable(Map<String, String> tableParameters) {
if (isIcebergTable(tableParameters)) return false;
return true;
}

boolean isIcebergTable(Map<String, String> tableParameters) {
if (tableParameters != null) {
boolean isIcebergTable =
ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(tableParameters.get(TABLE_TYPE_PROP));
if (isIcebergTable) {
return true;
}
}
return false;
}

/**
* Loads a table from the Hive Metastore.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ public class HiveCatalogPropertiesMeta extends BaseCatalogPropertiesMetadata {

public static final String FETCH_TIMEOUT_SEC = "kerberos.keytab-fetch-timeout-sec";

public static final String LIST_ALL_TABLES = "list-all-tables";

public static final boolean DEFAULT_LIST_ALL_TABLES = false;

private static final Map<String, PropertyEntry<?>> HIVE_CATALOG_PROPERTY_ENTRIES =
ImmutableMap.<String, PropertyEntry<?>>builder()
.put(
Expand Down Expand Up @@ -88,6 +92,16 @@ public class HiveCatalogPropertiesMeta extends BaseCatalogPropertiesMetadata {
FETCH_TIMEOUT_SEC,
PropertyEntry.integerOptionalPropertyEntry(
FETCH_TIMEOUT_SEC, "The timeout to fetch key tab", true, 60, false))
.put(
LIST_ALL_TABLES,
PropertyEntry.booleanPropertyEntry(
LIST_ALL_TABLES,
"Lists all tables in a database, including non-Hive tables, such as Iceberg, etc.",
false,
false,
DEFAULT_LIST_ALL_TABLES,
false,
false))
.putAll(BASIC_CATALOG_PROPERTY_ENTRIES)
.build();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ public class HiveTable extends BaseTable {
// A set of supported Hive table types.
public static final Set<String> SUPPORT_TABLE_TYPES =
Sets.newHashSet(MANAGED_TABLE.name(), EXTERNAL_TABLE.name());
public static final String ICEBERG_TABLE_TYPE_VALUE = "ICEBERG";
public static final String TABLE_TYPE_PROP = "table_type";
private String schemaName;
private CachedClientPool clientPool;
private StorageDescriptor sd;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.FETCH_TIMEOUT_SEC;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.IMPERSONATION_ENABLE;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.KEY_TAB_URI;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.LIST_ALL_TABLES;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.METASTORE_URIS;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.PRINCIPAL;
import static com.datastrato.gravitino.catalog.hive.TestHiveCatalog.HIVE_PROPERTIES_METADATA;
Expand Down Expand Up @@ -67,12 +68,13 @@ void testPropertyMeta() {
Map<String, PropertyEntry<?>> propertyEntryMap =
HIVE_PROPERTIES_METADATA.catalogPropertiesMetadata().propertyEntries();

Assertions.assertEquals(11, propertyEntryMap.size());
Assertions.assertEquals(12, propertyEntryMap.size());
Assertions.assertTrue(propertyEntryMap.containsKey(METASTORE_URIS));
Assertions.assertTrue(propertyEntryMap.containsKey(Catalog.PROPERTY_PACKAGE));
Assertions.assertTrue(propertyEntryMap.containsKey(BaseCatalog.CATALOG_OPERATION_IMPL));
Assertions.assertTrue(propertyEntryMap.containsKey(CLIENT_POOL_SIZE));
Assertions.assertTrue(propertyEntryMap.containsKey(IMPERSONATION_ENABLE));
Assertions.assertTrue(propertyEntryMap.containsKey(LIST_ALL_TABLES));

Assertions.assertTrue(propertyEntryMap.get(METASTORE_URIS).isRequired());
Assertions.assertFalse(propertyEntryMap.get(Catalog.PROPERTY_PACKAGE).isRequired());
Expand Down
23 changes: 12 additions & 11 deletions docs/apache-hive-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,18 @@ The Hive catalog supports creating, updating, and deleting databases and tables

### Catalog properties

| Property Name | Description | Default Value | Required | Since Version |
|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------|---------------|
| `metastore.uris` | The Hive metastore service URIs, separate multiple addresses with commas. Such as `thrift://127.0.0.1:9083` | (none) | Yes | 0.2.0 |
| `client.pool-size` | The maximum number of Hive metastore clients in the pool for Gravitino. | 1 | No | 0.2.0 |
| `gravitino.bypass.` | Property name with this prefix passed down to the underlying HMS client for use. Such as `gravitino.bypass.hive.metastore.failure.retries = 3` indicate 3 times of retries upon failure of Thrift metastore calls | (none) | No | 0.2.0 |
| `client.pool-cache.eviction-interval-ms` | The cache pool eviction interval. | 300000 | No | 0.4.0 |
| `impersonation-enable` | Enable user impersonation for Hive catalog. | false | No | 0.4.0 |
| `kerberos.principal` | The Kerberos principal for the catalog. You should configure `gravitino.bypass.hadoop.security.authentication`, `gravitino.bypass.hive.metastore.kerberos.principal` and `gravitino.bypass.hive.metastore.sasl.enabled`if you want to use Kerberos. | (none) | required if you use kerberos | 0.4.0 |
| `kerberos.keytab-uri` | The uri of key tab for the catalog. Now supported protocols are `https`, `http`, `ftp`, `file`. | (none) | required if you use kerberos | 0.4.0 |
| `kerberos.check-interval-sec` | The interval to check validness of the principal | 60 | No | 0.4.0 |
| `kerberos.keytab-fetch-timeout-sec` | The timeout to fetch key tab | 60 | No | 0.4.0 |
| Property Name | Description | Default Value | Required | Since Version |
|------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------|---------------|
| `metastore.uris` | The Hive metastore service URIs, separate multiple addresses with commas. Such as `thrift://127.0.0.1:9083` | (none) | Yes | 0.2.0 |
| `client.pool-size` | The maximum number of Hive metastore clients in the pool for Gravitino. | 1 | No | 0.2.0 |
| `gravitino.bypass.` | Property name with this prefix passed down to the underlying HMS client for use. Such as `gravitino.bypass.hive.metastore.failure.retries = 3` indicate 3 times of retries upon failure of Thrift metastore calls | (none) | No | 0.2.0 |
| `client.pool-cache.eviction-interval-ms` | The cache pool eviction interval. | 300000 | No | 0.4.0 |
| `impersonation-enable` | Enable user impersonation for Hive catalog. | false | No | 0.4.0 |
| `kerberos.principal` | The Kerberos principal for the catalog. You should configure `gravitino.bypass.hadoop.security.authentication`, `gravitino.bypass.hive.metastore.kerberos.principal` and `gravitino.bypass.hive.metastore.sasl.enabled`if you want to use Kerberos. | (none) | required if you use kerberos | 0.4.0 |
| `kerberos.keytab-uri` | The uri of key tab for the catalog. Now supported protocols are `https`, `http`, `ftp`, `file`. | (none) | required if you use kerberos | 0.4.0 |
| `kerberos.check-interval-sec` | The interval to check validness of the principal | 60 | No | 0.4.0 |
| `kerberos.keytab-fetch-timeout-sec` | The timeout to fetch key tab | 60 | No | 0.4.0 |
| `list-all-tables` | Lists all tables in a database, including non-Hive tables, such as Iceberg, etc | false | No | 0.5.1 |

When you use the Gravitino with Trino. You can pass the Trino Hive connector configuration using prefix `trino.bypass.`. For example, using `trino.bypass.hive.config.resources` to pass the `hive.config.resources` to the Gravitino Hive catalog in Trino runtime.

Expand Down

0 comments on commit 1952737

Please sign in to comment.