From b18810dedf069b024af22daf41478a79253f6ac9 Mon Sep 17 00:00:00 2001 From: caican00 Date: Fri, 15 Mar 2024 15:15:39 +0800 Subject: [PATCH 01/55] [#2541] feat(spark-connector): support DDL, read and write operations to Iceberg catalog --- .../GravitinoCatalogAdaptorFactory.java | 3 ++ .../connector/iceberg/IcebergAdaptor.java | 53 +++++++++++++++++++ .../iceberg/IcebergPropertiesConverter.java | 18 +++++++ .../connector/iceberg/SparkIcebergTable.java | 18 +++++++ 4 files changed, 92 insertions(+) create mode 100644 spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java create mode 100644 spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java create mode 100644 spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoCatalogAdaptorFactory.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoCatalogAdaptorFactory.java index 23a13ceeec2..b3b014839df 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoCatalogAdaptorFactory.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoCatalogAdaptorFactory.java @@ -6,6 +6,7 @@ package com.datastrato.gravitino.spark.connector; import com.datastrato.gravitino.spark.connector.hive.HiveAdaptor; +import com.datastrato.gravitino.spark.connector.iceberg.IcebergAdaptor; import java.util.Locale; /** @@ -17,6 +18,8 @@ public static GravitinoCatalogAdaptor createGravitinoAdaptor(String provider) { switch (provider.toLowerCase(Locale.ROOT)) { case "hive": return new HiveAdaptor(); + case "iceberg": + return new IcebergAdaptor(); default: throw new RuntimeException(String.format("Provider:%s is not supported yet", provider)); } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java new file mode 100644 index 00000000000..7c866b84340 --- /dev/null +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -0,0 +1,53 @@ +package com.datastrato.gravitino.spark.connector.iceberg; + +import com.datastrato.gravitino.rel.Table; +import com.datastrato.gravitino.spark.connector.GravitinoCatalogAdaptor; +import com.datastrato.gravitino.spark.connector.GravitinoSparkConfig; +import com.datastrato.gravitino.spark.connector.PropertiesConverter; +import com.datastrato.gravitino.spark.connector.table.SparkBaseTable; +import com.google.common.base.Preconditions; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.iceberg.spark.SparkCatalog; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** IcebergAdaptor provides specific operations for Iceberg Catalog to adapt to GravitinoCatalog. */ +public class IcebergAdaptor implements GravitinoCatalogAdaptor { + + @Override + public PropertiesConverter getPropertiesConverter() { + return new IcebergPropertiesConverter(); + } + + @Override + public SparkBaseTable createSparkTable( + Identifier identifier, + Table gravitinoTable, + TableCatalog sparkCatalog, + PropertiesConverter propertiesConverter) { + return new SparkIcebergTable(identifier, gravitinoTable, sparkCatalog, propertiesConverter); + } + + @Override + public TableCatalog createAndInitSparkCatalog( + String name, CaseInsensitiveStringMap options, Map properties) { + Preconditions.checkArgument( + properties != null, "Iceberg Catalog properties should not be null"); + String metastoreUri = properties.get(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI); + Preconditions.checkArgument( + StringUtils.isNotBlank(metastoreUri), + "Couldn't get " + + GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI + + " from iceberg catalog properties"); + + TableCatalog icebergCatalog = new SparkCatalog(); + HashMap all = new HashMap<>(options); + all.put(GravitinoSparkConfig.SPARK_HIVE_METASTORE_URI, metastoreUri); + icebergCatalog.initialize(name, new CaseInsensitiveStringMap(all)); + + return icebergCatalog; + } +} diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java new file mode 100644 index 00000000000..09b6cb36779 --- /dev/null +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java @@ -0,0 +1,18 @@ +package com.datastrato.gravitino.spark.connector.iceberg; + +import com.datastrato.gravitino.spark.connector.PropertiesConverter; +import java.util.HashMap; +import java.util.Map; + +/** Transform iceberg catalog properties between Spark and Gravitino. */ +public class IcebergPropertiesConverter implements PropertiesConverter { + @Override + public Map toGravitinoTableProperties(Map properties) { + return new HashMap<>(properties); + } + + @Override + public Map toSparkTableProperties(Map properties) { + return new HashMap<>(properties); + } +} diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java new file mode 100644 index 00000000000..7f95e034a40 --- /dev/null +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java @@ -0,0 +1,18 @@ +package com.datastrato.gravitino.spark.connector.iceberg; + +import com.datastrato.gravitino.rel.Table; +import com.datastrato.gravitino.spark.connector.PropertiesConverter; +import com.datastrato.gravitino.spark.connector.table.SparkBaseTable; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.TableCatalog; + +public class SparkIcebergTable extends SparkBaseTable { + + public SparkIcebergTable( + Identifier identifier, + Table gravitinoTable, + TableCatalog sparkCatalog, + PropertiesConverter propertiesConverter) { + super(identifier, gravitinoTable, sparkCatalog, propertiesConverter); + } +} From c4445bdc575837863aa42603c237329517aab9cc Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 18 Mar 2024 21:30:54 +0800 Subject: [PATCH 02/55] [#2566] feat(spark-connector): Refactoring integration tests for spark-connector --- .../{SparkIT.java => SparkCommonIT.java} | 26 ++------------ .../integration/test/spark/SparkEnvIT.java | 19 +++++----- .../test/spark/SparkHiveCatalogIT.java | 36 +++++++++++++++++++ 3 files changed, 49 insertions(+), 32 deletions(-) rename integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/{SparkIT.java => SparkCommonIT.java} (94%) create mode 100644 integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java similarity index 94% rename from integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkIT.java rename to integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index c8fbdd3ea0c..337ef10e3e9 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -20,8 +20,6 @@ import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @@ -30,7 +28,7 @@ @Tag("gravitino-docker-it") @TestInstance(Lifecycle.PER_CLASS) -public class SparkIT extends SparkEnvIT { +public class SparkCommonIT extends SparkEnvIT { private static String getSelectAllSql(String tableName) { return String.format("SELECT * FROM %s", tableName); } @@ -43,32 +41,14 @@ private static String getInsertWithoutPartitionSql(String tableName, String valu private static final Map typeConstant = ImmutableMap.of(DataTypes.IntegerType, "2", DataTypes.StringType, "'gravitino_it_test'"); - // Use a custom database not the original default database because SparkIT couldn't read&write - // data to tables in default database. The main reason is default database location is - // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address - // not real HDFS address. The location of tables created under default database is like - // hdfs://localhost:9000/xxx which couldn't read write data from SparkIT. Will use default - // database after spark connector support Alter database xx set location command. - @BeforeAll - void initDefaultDatabase() { - sql("USE " + hiveCatalogName); - createDatabaseIfNotExists(getDefaultDatabase()); - } - - @BeforeEach - void init() { - sql("USE " + hiveCatalogName); - sql("USE " + getDefaultDatabase()); - } - - private String getDefaultDatabase() { + protected String getDefaultDatabase() { return "default_db"; } @Test void testLoadCatalogs() { Set catalogs = getCatalogs(); - Assertions.assertTrue(catalogs.contains(hiveCatalogName)); + Assertions.assertTrue(catalogs.contains(catalogName)); } @Test diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 0f7b0d385d5..6c86f4722ae 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -19,16 +19,16 @@ import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** Setup Hive, Gravitino, Spark, Metalake environment to execute SparkSQL. */ +/** Setup Datasource, Gravitino, Spark, Metalake environment to execute SparkSQL. */ public class SparkEnvIT extends SparkUtilIT { private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class); private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); - protected final String hiveCatalogName = "hive"; + protected String catalogName = null; + private String provider = null; private final String metalakeName = "test"; private SparkSession sparkSession; @@ -41,9 +41,10 @@ protected SparkSession getSparkSession() { return sparkSession; } - @BeforeAll - void startUp() { - initHiveEnv(); + void startUp(String catalogName, String provider) { + this.catalogName = catalogName; + this.provider = provider; + initDatasourceEnv(); initGravitinoEnv(); initMetalakeAndCatalogs(); initSparkEnv(); @@ -67,9 +68,9 @@ private void initMetalakeAndCatalogs() { properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri); metalake.createCatalog( - NameIdentifier.of(metalakeName, hiveCatalogName), + NameIdentifier.of(metalakeName, catalogName), Catalog.Type.RELATIONAL, - "hive", + provider, "", properties); } @@ -80,7 +81,7 @@ private void initGravitinoEnv() { gravitinoUri = String.format("http://127.0.0.1:%d", gravitinoPort); } - private void initHiveEnv() { + private void initDatasourceEnv() { containerSuite.startHiveContainer(); hiveMetastoreUri = String.format( diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java new file mode 100644 index 00000000000..b73337599e9 --- /dev/null +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java @@ -0,0 +1,36 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.integration.test.spark; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.TestInstance; + +@Tag("gravitino-docker-it") +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class SparkHiveCatalogIT extends SparkCommonIT { + + + // Use a custom database not the original default database because SparkIT couldn't read&write + // data to tables in default database. The main reason is default database location is + // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address + // not real HDFS address. The location of tables created under default database is like + // hdfs://localhost:9000/xxx which couldn't read write data from SparkIT. Will use default + // database after spark connector support Alter database xx set location command. + @BeforeAll + void initHiveCatalog() { + startUp("hive", "hive"); + sql("USE " + catalogName); + createDatabaseIfNotExists(getDefaultDatabase()); + } + + @BeforeEach + void init() { + sql("USE " + catalogName); + sql("USE " + getDefaultDatabase()); + } + +} From c9ab0074b292c38647cbff898fa6f14d053caecb Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 09:36:36 +0800 Subject: [PATCH 03/55] [#2566] feat(spark-connector): Refactoring integration tests for spark-connector --- .../integration/test/spark/SparkCommonIT.java | 2 +- .../test/spark/SparkHiveCatalogIT.java | 36 +++++++++---------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 5627e07cf9b..7960f6045ab 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -28,7 +28,7 @@ @Tag("gravitino-docker-it") @TestInstance(Lifecycle.PER_CLASS) -public class SparkCommonIT extends SparkEnvIT { +public abstract class SparkCommonIT extends SparkEnvIT { private static String getSelectAllSql(String tableName) { return String.format("SELECT * FROM %s", tableName); } diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java index b73337599e9..fdb60b91b38 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java @@ -13,24 +13,22 @@ @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class SparkHiveCatalogIT extends SparkCommonIT { + // Use a custom database not the original default database because SparkIT couldn't read&write + // data to tables in default database. The main reason is default database location is + // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address + // not real HDFS address. The location of tables created under default database is like + // hdfs://localhost:9000/xxx which couldn't read write data from SparkIT. Will use default + // database after spark connector support Alter database xx set location command. + @BeforeAll + void initHiveCatalog() { + startUp("hive", "hive"); + sql("USE " + catalogName); + createDatabaseIfNotExists(getDefaultDatabase()); + } - // Use a custom database not the original default database because SparkIT couldn't read&write - // data to tables in default database. The main reason is default database location is - // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address - // not real HDFS address. The location of tables created under default database is like - // hdfs://localhost:9000/xxx which couldn't read write data from SparkIT. Will use default - // database after spark connector support Alter database xx set location command. - @BeforeAll - void initHiveCatalog() { - startUp("hive", "hive"); - sql("USE " + catalogName); - createDatabaseIfNotExists(getDefaultDatabase()); - } - - @BeforeEach - void init() { - sql("USE " + catalogName); - sql("USE " + getDefaultDatabase()); - } - + @BeforeEach + void init() { + sql("USE " + catalogName); + sql("USE " + getDefaultDatabase()); + } } From b80c366a7dc207225b3965789129702377b89a8b Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 11:00:44 +0800 Subject: [PATCH 04/55] [#2566] feat(spark-connector): Refactoring integration tests for spark-connector --- .../integration/test/spark/SparkHiveCatalogIT.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java index fdb60b91b38..f189893e438 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java @@ -13,11 +13,13 @@ @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class SparkHiveCatalogIT extends SparkCommonIT { - // Use a custom database not the original default database because SparkIT couldn't read&write + // Use a custom database not the original default database because SparkHiveCatalogIT couldn't + // read&write // data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address // not real HDFS address. The location of tables created under default database is like - // hdfs://localhost:9000/xxx which couldn't read write data from SparkIT. Will use default + // hdfs://localhost:9000/xxx which couldn't read write data from SparkHiveCatalogIT. Will use + // default // database after spark connector support Alter database xx set location command. @BeforeAll void initHiveCatalog() { From a7fbb0b6a014d2993b1b36d3e42f0c73223c8ac6 Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 11:35:04 +0800 Subject: [PATCH 05/55] [#2566] feat(spark-connector): Refactoring integration tests for spark-connector --- .../gravitino/integration/test/spark/SparkCommonIT.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 7960f6045ab..07cdd790c99 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -22,12 +22,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.TestInstance.Lifecycle; import org.junit.platform.commons.util.StringUtils; @Tag("gravitino-docker-it") -@TestInstance(Lifecycle.PER_CLASS) public abstract class SparkCommonIT extends SparkEnvIT { private static String getSelectAllSql(String tableName) { return String.format("SELECT * FROM %s", tableName); @@ -499,7 +496,8 @@ private void checkTableReadWrite(SparkTableInfo table) { Assertions.assertEquals(checkValues, queryResult.get(0)); } - private String getCreateSimpleTableString(String tableName) { + // override this method in subclass to create table for different datasources + protected String getCreateSimpleTableString(String tableName) { return String.format( "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', age INT)", tableName); From 2847dc434a411ceba82314632638666a350154f0 Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 15:52:41 +0800 Subject: [PATCH 06/55] [#2566] feat(spark-connector): Refactoring integration tests for spark-connector --- .../integration/test/spark/SparkCommonIT.java | 28 +++++++++++++++---- .../integration/test/spark/SparkEnvIT.java | 24 ++++++++-------- .../test/spark/SparkHiveCatalogIT.java | 25 ++++------------- 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 07cdd790c99..19e34082a42 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -20,11 +20,11 @@ import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.platform.commons.util.StringUtils; -@Tag("gravitino-docker-it") public abstract class SparkCommonIT extends SparkEnvIT { private static String getSelectAllSql(String tableName) { return String.format("SELECT * FROM %s", tableName); @@ -51,6 +51,25 @@ private static String getInsertWithoutPartitionSql(String tableName, String valu DataTypes.createStructField("col2", DataTypes.StringType, true))), "struct(1, 'a')"); + // Use a custom database not the original default database because SparkCommonIT couldn't + // read&write + // data to tables in default database. The main reason is default database location is + // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address + // not real HDFS address. The location of tables created under default database is like + // hdfs://localhost:9000/xxx which couldn't read write data from SparkCommonIT. Will use default + // database after spark connector support Alter database xx set location command. + @BeforeAll + void initDefaultDatabase() { + sql("USE " + getCatalogName()); + createDatabaseIfNotExists(getDefaultDatabase()); + } + + @BeforeEach + void init() { + sql("USE " + getCatalogName()); + sql("USE " + getDefaultDatabase()); + } + protected String getDefaultDatabase() { return "default_db"; } @@ -58,7 +77,7 @@ protected String getDefaultDatabase() { @Test void testLoadCatalogs() { Set catalogs = getCatalogs(); - Assertions.assertTrue(catalogs.contains(catalogName)); + Assertions.assertTrue(catalogs.contains(getCatalogName())); } @Test @@ -496,8 +515,7 @@ private void checkTableReadWrite(SparkTableInfo table) { Assertions.assertEquals(checkValues, queryResult.get(0)); } - // override this method in subclass to create table for different datasources - protected String getCreateSimpleTableString(String tableName) { + private String getCreateSimpleTableString(String tableName) { return String.format( "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', age INT)", tableName); diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 6c86f4722ae..dd78613a32b 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -19,16 +19,15 @@ import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** Setup Datasource, Gravitino, Spark, Metalake environment to execute SparkSQL. */ -public class SparkEnvIT extends SparkUtilIT { +/** Setup Hive, Gravitino, Spark, Metalake environment to execute SparkSQL. */ +public abstract class SparkEnvIT extends SparkUtilIT { private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class); private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); - protected String catalogName = null; - private String provider = null; private final String metalakeName = "test"; private SparkSession sparkSession; @@ -41,10 +40,9 @@ protected SparkSession getSparkSession() { return sparkSession; } - void startUp(String catalogName, String provider) { - this.catalogName = catalogName; - this.provider = provider; - initDatasourceEnv(); + @BeforeAll + void startUp() { + initHiveEnv(); initGravitinoEnv(); initMetalakeAndCatalogs(); initSparkEnv(); @@ -68,9 +66,9 @@ private void initMetalakeAndCatalogs() { properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri); metalake.createCatalog( - NameIdentifier.of(metalakeName, catalogName), + NameIdentifier.of(metalakeName, getCatalogName()), Catalog.Type.RELATIONAL, - provider, + getProvider(), "", properties); } @@ -81,7 +79,7 @@ private void initGravitinoEnv() { gravitinoUri = String.format("http://127.0.0.1:%d", gravitinoPort); } - private void initDatasourceEnv() { + private void initHiveEnv() { containerSuite.startHiveContainer(); hiveMetastoreUri = String.format( @@ -107,4 +105,8 @@ private void initSparkEnv() { .enableHiveSupport() .getOrCreate(); } + + protected abstract String getCatalogName(); + + protected abstract String getProvider(); } diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java index f189893e438..691986fd2bc 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java @@ -4,8 +4,6 @@ */ package com.datastrato.gravitino.integration.test.spark; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.TestInstance; @@ -13,24 +11,13 @@ @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class SparkHiveCatalogIT extends SparkCommonIT { - // Use a custom database not the original default database because SparkHiveCatalogIT couldn't - // read&write - // data to tables in default database. The main reason is default database location is - // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address - // not real HDFS address. The location of tables created under default database is like - // hdfs://localhost:9000/xxx which couldn't read write data from SparkHiveCatalogIT. Will use - // default - // database after spark connector support Alter database xx set location command. - @BeforeAll - void initHiveCatalog() { - startUp("hive", "hive"); - sql("USE " + catalogName); - createDatabaseIfNotExists(getDefaultDatabase()); + @Override + protected String getCatalogName() { + return "hive"; } - @BeforeEach - void init() { - sql("USE " + catalogName); - sql("USE " + getDefaultDatabase()); + @Override + protected String getProvider() { + return "hive"; } } From b2f31e88eb8d1e1c331f973714b4704f9cb658cd Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 15:55:58 +0800 Subject: [PATCH 07/55] [#2566] feat(spark-connector): Refactoring integration tests for spark-connector --- .../gravitino/integration/test/spark/SparkCommonIT.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 19e34082a42..6b735affd69 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -52,8 +52,7 @@ private static String getInsertWithoutPartitionSql(String tableName, String valu "struct(1, 'a')"); // Use a custom database not the original default database because SparkCommonIT couldn't - // read&write - // data to tables in default database. The main reason is default database location is + // read&write data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address // not real HDFS address. The location of tables created under default database is like // hdfs://localhost:9000/xxx which couldn't read write data from SparkCommonIT. Will use default From 51b24b6f90ee5453f00c6a25f5573b8bc199d97f Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 16:20:24 +0800 Subject: [PATCH 08/55] updated. --- .../integration/test/spark/{ => hive}/SparkHiveCatalogIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) rename integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/{ => hive}/SparkHiveCatalogIT.java (78%) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java similarity index 78% rename from integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java rename to integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index 691986fd2bc..bce6cb212bf 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -2,8 +2,9 @@ * Copyright 2024 Datastrato Pvt Ltd. * This software is licensed under the Apache License version 2. */ -package com.datastrato.gravitino.integration.test.spark; +package com.datastrato.gravitino.integration.test.spark.hive; +import com.datastrato.gravitino.integration.test.spark.SparkCommonIT; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.TestInstance; From e322776fbb31bce95e161158bd755a5b36f432c4 Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 16:23:20 +0800 Subject: [PATCH 09/55] updated. --- .../spark/iceberg/SparkIcebergCatalogIT.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java new file mode 100644 index 00000000000..cec445b85ad --- /dev/null +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -0,0 +1,24 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.integration.test.spark.iceberg; + +import com.datastrato.gravitino.integration.test.spark.SparkCommonIT; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.TestInstance; + +@Tag("gravitino-docker-it") +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class SparkIcebergCatalogIT extends SparkCommonIT { + + @Override + protected String getCatalogName() { + return "iceberg"; + } + + @Override + protected String getProvider() { + return "iceberg"; + } +} From bd6821c94a13195bc07dd9608c9ee43d0f4d9803 Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 17:39:15 +0800 Subject: [PATCH 10/55] updated. --- .../gravitino/spark/connector/iceberg/IcebergAdaptor.java | 5 +++++ .../spark/connector/iceberg/IcebergPropertiesConverter.java | 5 +++++ .../gravitino/spark/connector/iceberg/SparkIcebergTable.java | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java index 7c866b84340..5af39a38e1f 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -1,3 +1,8 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + package com.datastrato.gravitino.spark.connector.iceberg; import com.datastrato.gravitino.rel.Table; diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java index 09b6cb36779..a6c8f781696 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java @@ -1,3 +1,8 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + package com.datastrato.gravitino.spark.connector.iceberg; import com.datastrato.gravitino.spark.connector.PropertiesConverter; diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java index 7f95e034a40..fe085826ed0 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java @@ -1,3 +1,8 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + package com.datastrato.gravitino.spark.connector.iceberg; import com.datastrato.gravitino.rel.Table; From 2633a603a48d736bb5b1794c834e129f4d1397ee Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 19 Mar 2024 20:18:05 +0800 Subject: [PATCH 11/55] updated. --- .../gravitino/integration/test/spark/SparkEnvIT.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index dd78613a32b..b0b7fd895e6 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -34,6 +34,10 @@ public abstract class SparkEnvIT extends SparkUtilIT { private String hiveMetastoreUri; private String gravitinoUri; + protected abstract String getCatalogName(); + + protected abstract String getProvider(); + @Override protected SparkSession getSparkSession() { Assertions.assertNotNull(sparkSession); @@ -105,8 +109,4 @@ private void initSparkEnv() { .enableHiveSupport() .getOrCreate(); } - - protected abstract String getCatalogName(); - - protected abstract String getProvider(); } From 563b39b94fc8d4a5acbc50e39f55ae0c927808f9 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 13:58:11 +0800 Subject: [PATCH 12/55] updated. --- .../integration/test/spark/iceberg/SparkIcebergCatalogIT.java | 2 +- .../spark/connector/GravitinoCatalogAdaptorFactory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index cec445b85ad..7a7fa5e16cc 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -19,6 +19,6 @@ protected String getCatalogName() { @Override protected String getProvider() { - return "iceberg"; + return "lakehouse-iceberg"; } } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoCatalogAdaptorFactory.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoCatalogAdaptorFactory.java index b3b014839df..0599f5cad1b 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoCatalogAdaptorFactory.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoCatalogAdaptorFactory.java @@ -18,7 +18,7 @@ public static GravitinoCatalogAdaptor createGravitinoAdaptor(String provider) { switch (provider.toLowerCase(Locale.ROOT)) { case "hive": return new HiveAdaptor(); - case "iceberg": + case "lakehouse-iceberg": return new IcebergAdaptor(); default: throw new RuntimeException(String.format("Provider:%s is not supported yet", provider)); From 54ec3b9647ab0ea666bfda4b3c66407d93c0701f Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 16:09:17 +0800 Subject: [PATCH 13/55] updated. --- .../integration/test/spark/SparkEnvIT.java | 15 +++++++++------ .../spark/connector/GravitinoSparkConfig.java | 3 +++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index b0b7fd895e6..c3f1f30644f 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -29,6 +29,11 @@ public abstract class SparkEnvIT extends SparkUtilIT { private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); private final String metalakeName = "test"; + private final String warehouse = + String.format( + "hdfs://%s:%d/user/hive/warehouse", + containerSuite.getHiveContainer().getContainerIpAddress(), + HiveContainer.HDFS_DEFAULTFS_PORT); private SparkSession sparkSession; private String hiveMetastoreUri; @@ -68,6 +73,9 @@ private void initMetalakeAndCatalogs() { GravitinoMetalake metalake = client.loadMetalake(NameIdentifier.of(metalakeName)); Map properties = Maps.newHashMap(); properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri); + properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND, "hive"); + properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, warehouse); + properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, hiveMetastoreUri); metalake.createCatalog( NameIdentifier.of(metalakeName, getCatalogName()), @@ -100,12 +108,7 @@ private void initSparkEnv() { .config("spark.plugins", GravitinoSparkPlugin.class.getName()) .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) - .config( - "spark.sql.warehouse.dir", - String.format( - "hdfs://%s:%d/user/hive/warehouse", - containerSuite.getHiveContainer().getContainerIpAddress(), - HiveContainer.HDFS_DEFAULTFS_PORT)) + .config("spark.sql.warehouse.dir", warehouse) .enableHiveSupport() .getOrCreate(); } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java index 9c5e8e66b9f..bf4d0f406d3 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java @@ -12,6 +12,9 @@ public class GravitinoSparkConfig { public static final String GRAVITINO_METALAKE = GRAVITINO_PREFIX + "metalake"; public static final String GRAVITINO_HIVE_METASTORE_URI = "metastore.uris"; public static final String SPARK_HIVE_METASTORE_URI = "hive.metastore.uris"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND = "catalog-backend"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE = "warehouse"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_URI = "uri"; private GravitinoSparkConfig() {} } From 8969ad664027ea22eceb835b0a35094a774867cb Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 16:32:37 +0800 Subject: [PATCH 14/55] updated. --- .../gravitino/integration/test/spark/SparkEnvIT.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index c3f1f30644f..5e8392f6414 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -29,15 +29,11 @@ public abstract class SparkEnvIT extends SparkUtilIT { private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); private final String metalakeName = "test"; - private final String warehouse = - String.format( - "hdfs://%s:%d/user/hive/warehouse", - containerSuite.getHiveContainer().getContainerIpAddress(), - HiveContainer.HDFS_DEFAULTFS_PORT); private SparkSession sparkSession; private String hiveMetastoreUri; private String gravitinoUri; + private String warehouse; protected abstract String getCatalogName(); @@ -98,6 +94,11 @@ private void initHiveEnv() { "thrift://%s:%d", containerSuite.getHiveContainer().getContainerIpAddress(), HiveContainer.HIVE_METASTORE_PORT); + warehouse = + String.format( + "hdfs://%s:%d/user/hive/warehouse", + containerSuite.getHiveContainer().getContainerIpAddress(), + HiveContainer.HDFS_DEFAULTFS_PORT); } private void initSparkEnv() { From 2c039f9babee6cd699bee969e247360940938117 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 17:25:35 +0800 Subject: [PATCH 15/55] updated. --- .../integration/test/spark/SparkEnvIT.java | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 5e8392f6414..3b1cc7754d2 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -102,15 +102,17 @@ private void initHiveEnv() { } private void initSparkEnv() { - sparkSession = - SparkSession.builder() - .master("local[1]") - .appName("Spark connector integration test") - .config("spark.plugins", GravitinoSparkPlugin.class.getName()) - .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) - .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) - .config("spark.sql.warehouse.dir", warehouse) - .enableHiveSupport() - .getOrCreate(); + if (sparkSession == null) { + sparkSession = + SparkSession.builder() + .master("local[1]") + .appName("Spark connector integration test") + .config("spark.plugins", GravitinoSparkPlugin.class.getName()) + .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) + .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) + .config("spark.sql.warehouse.dir", warehouse) + .enableHiveSupport() + .getOrCreate(); + } } } From 00dfcdf529a8ce32a57b64ac594b5ee7aa52155f Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 17:41:01 +0800 Subject: [PATCH 16/55] updated. --- .../gravitino/integration/test/spark/SparkEnvIT.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 3b1cc7754d2..073ebeb46eb 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -102,7 +102,10 @@ private void initHiveEnv() { } private void initSparkEnv() { - if (sparkSession == null) { + if (sparkSession != null) { + sparkSession.close(); + sparkSession = null; + } sparkSession = SparkSession.builder() .master("local[1]") @@ -114,5 +117,4 @@ private void initSparkEnv() { .enableHiveSupport() .getOrCreate(); } - } } From e5538c5c1f9405442064e48c168853159ab7a5cb Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 17:53:34 +0800 Subject: [PATCH 17/55] updated. --- .../gravitino/integration/test/spark/SparkEnvIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 073ebeb46eb..efa7c4477bb 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -106,8 +106,8 @@ private void initSparkEnv() { sparkSession.close(); sparkSession = null; } - sparkSession = - SparkSession.builder() + sparkSession = + SparkSession.builder() .master("local[1]") .appName("Spark connector integration test") .config("spark.plugins", GravitinoSparkPlugin.class.getName()) From 7f822866e13ad996224de16844c3cc9a45dd9274 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 18:00:41 +0800 Subject: [PATCH 18/55] updated. --- .../integration/test/spark/SparkEnvIT.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index efa7c4477bb..8635d3fcab7 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -107,14 +107,14 @@ private void initSparkEnv() { sparkSession = null; } sparkSession = - SparkSession.builder() - .master("local[1]") - .appName("Spark connector integration test") - .config("spark.plugins", GravitinoSparkPlugin.class.getName()) - .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) - .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) - .config("spark.sql.warehouse.dir", warehouse) - .enableHiveSupport() - .getOrCreate(); - } + SparkSession.builder() + .master("local[1]") + .appName("Spark connector integration test") + .config("spark.plugins", GravitinoSparkPlugin.class.getName()) + .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) + .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) + .config("spark.sql.warehouse.dir", warehouse) + .enableHiveSupport() + .getOrCreate(); + } } From 9d1df67b69c5c9edfb3feec44ab349bebf193a99 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 19:52:24 +0800 Subject: [PATCH 19/55] updated. --- .../integration/test/spark/SparkEnvIT.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 8635d3fcab7..230963c6f15 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -27,10 +27,10 @@ public abstract class SparkEnvIT extends SparkUtilIT { private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class); private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); + private static SparkSession sparkSession; private final String metalakeName = "test"; - private SparkSession sparkSession; private String hiveMetastoreUri; private String gravitinoUri; private String warehouse; @@ -102,19 +102,17 @@ private void initHiveEnv() { } private void initSparkEnv() { - if (sparkSession != null) { - sparkSession.close(); - sparkSession = null; + if (sparkSession == null) { + sparkSession = + SparkSession.builder() + .master("local[1]") + .appName("Spark connector integration test") + .config("spark.plugins", GravitinoSparkPlugin.class.getName()) + .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) + .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) + .config("spark.sql.warehouse.dir", warehouse) + .enableHiveSupport() + .getOrCreate(); } - sparkSession = - SparkSession.builder() - .master("local[1]") - .appName("Spark connector integration test") - .config("spark.plugins", GravitinoSparkPlugin.class.getName()) - .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) - .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) - .config("spark.sql.warehouse.dir", warehouse) - .enableHiveSupport() - .getOrCreate(); } } From 2a7e1220226c1aecdf87cf59ef1313fa78a37fbe Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 20:14:56 +0800 Subject: [PATCH 20/55] updated. --- .../integration/test/spark/SparkEnvIT.java | 24 +++++++++---------- .../catalog/GravitinoCatalogManager.java | 13 ++++++---- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 230963c6f15..5e8392f6414 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -27,10 +27,10 @@ public abstract class SparkEnvIT extends SparkUtilIT { private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class); private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); - private static SparkSession sparkSession; private final String metalakeName = "test"; + private SparkSession sparkSession; private String hiveMetastoreUri; private String gravitinoUri; private String warehouse; @@ -102,17 +102,15 @@ private void initHiveEnv() { } private void initSparkEnv() { - if (sparkSession == null) { - sparkSession = - SparkSession.builder() - .master("local[1]") - .appName("Spark connector integration test") - .config("spark.plugins", GravitinoSparkPlugin.class.getName()) - .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) - .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) - .config("spark.sql.warehouse.dir", warehouse) - .enableHiveSupport() - .getOrCreate(); - } + sparkSession = + SparkSession.builder() + .master("local[1]") + .appName("Spark connector integration test") + .config("spark.plugins", GravitinoSparkPlugin.class.getName()) + .config(GravitinoSparkConfig.GRAVITINO_URI, gravitinoUri) + .config(GravitinoSparkConfig.GRAVITINO_METALAKE, metalakeName) + .config("spark.sql.warehouse.dir", warehouse) + .enableHiveSupport() + .getOrCreate(); } } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/catalog/GravitinoCatalogManager.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/catalog/GravitinoCatalogManager.java index c933cb6d2d7..c23df81f617 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/catalog/GravitinoCatalogManager.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/catalog/GravitinoCatalogManager.java @@ -23,7 +23,8 @@ /** GravitinoCatalogManager is used to retrieve catalogs from Gravitino server. */ public class GravitinoCatalogManager { private static final Logger LOG = LoggerFactory.getLogger(GravitinoCatalogManager.class); - private static GravitinoCatalogManager gravitinoCatalogManager; + private static volatile GravitinoCatalogManager gravitinoCatalogManager; + private static final Object LOCK = new Object(); private volatile boolean isClosed = false; private final Cache gravitinoCatalogs; @@ -40,9 +41,13 @@ private GravitinoCatalogManager(String gravitinoUri, String metalakeName) { } public static GravitinoCatalogManager create(String gravitinoUrl, String metalakeName) { - Preconditions.checkState( - gravitinoCatalogManager == null, "Should not create duplicate GravitinoCatalogManager"); - gravitinoCatalogManager = new GravitinoCatalogManager(gravitinoUrl, metalakeName); + if (gravitinoCatalogManager == null) { + synchronized (LOCK) { + if (gravitinoCatalogManager == null) { + gravitinoCatalogManager = new GravitinoCatalogManager(gravitinoUrl, metalakeName); + } + } + } return gravitinoCatalogManager; } From bc884c309703a05cb6b2965e2f8d1b4f676d4d62 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 23:04:12 +0800 Subject: [PATCH 21/55] updated. --- .../connector/catalog/GravitinoCatalogManager.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/catalog/GravitinoCatalogManager.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/catalog/GravitinoCatalogManager.java index c23df81f617..5b610201817 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/catalog/GravitinoCatalogManager.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/catalog/GravitinoCatalogManager.java @@ -23,8 +23,7 @@ /** GravitinoCatalogManager is used to retrieve catalogs from Gravitino server. */ public class GravitinoCatalogManager { private static final Logger LOG = LoggerFactory.getLogger(GravitinoCatalogManager.class); - private static volatile GravitinoCatalogManager gravitinoCatalogManager; - private static final Object LOCK = new Object(); + private static GravitinoCatalogManager gravitinoCatalogManager; private volatile boolean isClosed = false; private final Cache gravitinoCatalogs; @@ -41,13 +40,9 @@ private GravitinoCatalogManager(String gravitinoUri, String metalakeName) { } public static GravitinoCatalogManager create(String gravitinoUrl, String metalakeName) { - if (gravitinoCatalogManager == null) { - synchronized (LOCK) { - if (gravitinoCatalogManager == null) { - gravitinoCatalogManager = new GravitinoCatalogManager(gravitinoUrl, metalakeName); - } - } - } + Preconditions.checkState( + gravitinoCatalogManager == null, "Should not create duplicate GravitinoCatalogManager"); + gravitinoCatalogManager = new GravitinoCatalogManager(gravitinoUrl, metalakeName); return gravitinoCatalogManager; } @@ -63,6 +58,7 @@ public void close() { Preconditions.checkState(!isClosed, "Gravitino Catalog is already closed"); isClosed = true; gravitinoClient.close(); + gravitinoCatalogManager = null; } public Catalog getGravitinoCatalogInfo(String name) { From 69696c08edba5b239b68cf52e88e30c3311b2959 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 20 Mar 2024 23:50:56 +0800 Subject: [PATCH 22/55] updated. --- integration-test/build.gradle.kts | 1 - 1 file changed, 1 deletion(-) diff --git a/integration-test/build.gradle.kts b/integration-test/build.gradle.kts index 0aacb8399ab..5becdd13e7f 100644 --- a/integration-test/build.gradle.kts +++ b/integration-test/build.gradle.kts @@ -29,7 +29,6 @@ dependencies { testImplementation(project(":server")) testImplementation(project(":server-common")) testImplementation(project(":spark-connector")) { - exclude("org.apache.iceberg") exclude("org.apache.hadoop", "hadoop-client-api") exclude("org.apache.hadoop", "hadoop-client-runtime") } From 68f67176846a90f902941f5240828bd041ad6712 Mon Sep 17 00:00:00 2001 From: caican00 Date: Thu, 21 Mar 2024 14:15:24 +0800 Subject: [PATCH 23/55] updated. --- .../datastrato/gravitino/integration/test/spark/SparkEnvIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 5e8392f6414..54c8c693116 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -26,7 +26,7 @@ /** Setup Hive, Gravitino, Spark, Metalake environment to execute SparkSQL. */ public abstract class SparkEnvIT extends SparkUtilIT { private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class); - private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); + private final ContainerSuite containerSuite = ContainerSuite.getInstance(); private final String metalakeName = "test"; From 01ae436b943f5036f56f61da9fd57da03e683857 Mon Sep 17 00:00:00 2001 From: caican00 Date: Fri, 22 Mar 2024 00:03:32 +0800 Subject: [PATCH 24/55] fix --- .../integration/test/spark/SparkCommonIT.java | 40 ++++++++++--------- .../integration/test/spark/SparkEnvIT.java | 2 +- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 6b735affd69..6592c95bc30 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -146,7 +146,7 @@ void testDropSchema() { @Test void testCreateSimpleTable() { - String tableName = "simple_table"; + String tableName = formatTableName("simple_table"); dropTableIfExists(tableName); createSimpleTable(tableName); SparkTableInfo tableInfo = getTableInfo(tableName); @@ -165,7 +165,7 @@ void testCreateSimpleTable() { void testCreateTableWithDatabase() { // test db.table as table identifier String databaseName = "db1"; - String tableName = "table1"; + String tableName = formatTableName("table1"); createDatabaseIfNotExists(databaseName); String tableIdentifier = String.join(".", databaseName, tableName); @@ -178,7 +178,7 @@ void testCreateTableWithDatabase() { // use db then create table with table name databaseName = "db2"; - tableName = "table2"; + tableName = formatTableName("table2"); createDatabaseIfNotExists(databaseName); sql("USE " + databaseName); @@ -192,7 +192,7 @@ void testCreateTableWithDatabase() { @Test void testCreateTableWithComment() { - String tableName = "comment_table"; + String tableName = formatTableName("comment_table"); dropTableIfExists(tableName); String createTableSql = getCreateSimpleTableString(tableName); String tableComment = "tableComment"; @@ -212,7 +212,7 @@ void testCreateTableWithComment() { @Test void testDropTable() { - String tableName = "drop_table"; + String tableName = formatTableName("drop_table"); createSimpleTable(tableName); Assertions.assertEquals(true, tableExists(tableName)); @@ -224,8 +224,8 @@ void testDropTable() { @Test void testRenameTable() { - String tableName = "rename1"; - String newTableName = "rename2"; + String tableName = formatTableName("rename1"); + String newTableName = formatTableName("rename2"); dropTableIfExists(tableName); dropTableIfExists(newTableName); @@ -250,8 +250,8 @@ void testRenameTable() { @Test void testListTable() { - String table1 = "list1"; - String table2 = "list2"; + String table1 = formatTableName("list1"); + String table2 = formatTableName("list2"); createSimpleTable(table1); createSimpleTable(table2); Set tables = listTableNames(); @@ -260,8 +260,8 @@ void testListTable() { // show tables from not current db String database = "db_list"; - String table3 = "list3"; - String table4 = "list4"; + String table3 = formatTableName("list3"); + String table4 = formatTableName("list4"); createDatabaseIfNotExists(database); createSimpleTable(String.join(".", database, table3)); createSimpleTable(String.join(".", database, table4)); @@ -275,7 +275,7 @@ void testListTable() { @Test void testAlterTableSetAndRemoveProperty() { - String tableName = "test_property"; + String tableName = formatTableName("test_property"); dropTableIfExists(tableName); createSimpleTable(tableName); @@ -293,7 +293,7 @@ void testAlterTableSetAndRemoveProperty() { @Test void testAlterTableAddAndDeleteColumn() { - String tableName = "test_column"; + String tableName = formatTableName("test_column"); dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); @@ -312,7 +312,7 @@ void testAlterTableAddAndDeleteColumn() { @Test void testAlterTableUpdateColumnType() { - String tableName = "test_column_type"; + String tableName = formatTableName("test_column_type"); dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); @@ -329,7 +329,7 @@ void testAlterTableUpdateColumnType() { @Test void testAlterTableRenameColumn() { - String tableName = "test_rename_column"; + String tableName = formatTableName("test_rename_column"); dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); createSimpleTable(tableName); @@ -349,7 +349,7 @@ void testAlterTableRenameColumn() { @Test void testUpdateColumnPosition() { - String tableName = "test_column_position"; + String tableName = formatTableName("test_column_position"); dropTableIfExists(tableName); List simpleTableColumns = @@ -392,7 +392,7 @@ void testUpdateColumnPosition() { @Test void testAlterTableUpdateColumnComment() { - String tableName = "test_update_column_comment"; + String tableName = formatTableName("test_update_column_comment"); dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); createSimpleTable(tableName); @@ -415,7 +415,7 @@ void testAlterTableUpdateColumnComment() { @Test void testComplexType() { - String tableName = "complex_type_table"; + String tableName = formatTableName("complex_type_table"); dropTableIfExists(tableName); sql( @@ -442,6 +442,10 @@ void testComplexType() { checkTableReadWrite(tableInfo); } + private String formatTableName(String tableName) { + return String.format("%s_%s", tableName, getProvider()); + } + private void checkTableColumns( String tableName, List columnInfos, SparkTableInfo tableInfo) { SparkTableInfoChecker.create() diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 54c8c693116..5e8392f6414 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -26,7 +26,7 @@ /** Setup Hive, Gravitino, Spark, Metalake environment to execute SparkSQL. */ public abstract class SparkEnvIT extends SparkUtilIT { private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class); - private final ContainerSuite containerSuite = ContainerSuite.getInstance(); + private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); private final String metalakeName = "test"; From 069f8a381f2b6f465b61e4002de8f070b0827ec7 Mon Sep 17 00:00:00 2001 From: caican00 Date: Fri, 22 Mar 2024 00:22:44 +0800 Subject: [PATCH 25/55] fix --- .../gravitino/integration/test/spark/SparkCommonIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 6592c95bc30..7ee245e8c1a 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -443,7 +443,7 @@ void testComplexType() { } private String formatTableName(String tableName) { - return String.format("%s_%s", tableName, getProvider()); + return String.format("%s_%s", tableName, getCatalogName()); } private void checkTableColumns( From fc95191bde8245dfbf93e4f33b70c6b60247995e Mon Sep 17 00:00:00 2001 From: caican00 Date: Fri, 22 Mar 2024 00:30:51 +0800 Subject: [PATCH 26/55] fix --- .../gravitino/integration/test/spark/SparkCommonIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 7ee245e8c1a..8691ab65f74 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -443,7 +443,7 @@ void testComplexType() { } private String formatTableName(String tableName) { - return String.format("%s_%s", tableName, getCatalogName()); + return String.format("%s_%s", getCatalogName(), tableName); } private void checkTableColumns( From c3fae3728d131e9d5acf63da22935611b9ec66f2 Mon Sep 17 00:00:00 2001 From: caican00 Date: Fri, 22 Mar 2024 11:55:38 +0800 Subject: [PATCH 27/55] fix --- .../integration/test/spark/SparkCommonIT.java | 47 ++++++++++--------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 8691ab65f74..c52cfdd73f3 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -19,6 +19,7 @@ import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.DataTypes; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -69,6 +70,12 @@ void init() { sql("USE " + getDefaultDatabase()); } + @AfterAll + void cleanUp() { + sql("USE " + getCatalogName()); + sql("DROP DATABASE IF EXISTS " + getDefaultDatabase() + " CASCADE"); + } + protected String getDefaultDatabase() { return "default_db"; } @@ -146,7 +153,7 @@ void testDropSchema() { @Test void testCreateSimpleTable() { - String tableName = formatTableName("simple_table"); + String tableName = "simple_table"; dropTableIfExists(tableName); createSimpleTable(tableName); SparkTableInfo tableInfo = getTableInfo(tableName); @@ -165,7 +172,7 @@ void testCreateSimpleTable() { void testCreateTableWithDatabase() { // test db.table as table identifier String databaseName = "db1"; - String tableName = formatTableName("table1"); + String tableName = "table1"; createDatabaseIfNotExists(databaseName); String tableIdentifier = String.join(".", databaseName, tableName); @@ -178,7 +185,7 @@ void testCreateTableWithDatabase() { // use db then create table with table name databaseName = "db2"; - tableName = formatTableName("table2"); + tableName = "table2"; createDatabaseIfNotExists(databaseName); sql("USE " + databaseName); @@ -192,7 +199,7 @@ void testCreateTableWithDatabase() { @Test void testCreateTableWithComment() { - String tableName = formatTableName("comment_table"); + String tableName = "comment_table"; dropTableIfExists(tableName); String createTableSql = getCreateSimpleTableString(tableName); String tableComment = "tableComment"; @@ -212,7 +219,7 @@ void testCreateTableWithComment() { @Test void testDropTable() { - String tableName = formatTableName("drop_table"); + String tableName = "drop_table"; createSimpleTable(tableName); Assertions.assertEquals(true, tableExists(tableName)); @@ -224,8 +231,8 @@ void testDropTable() { @Test void testRenameTable() { - String tableName = formatTableName("rename1"); - String newTableName = formatTableName("rename2"); + String tableName = "rename1"; + String newTableName = "rename2"; dropTableIfExists(tableName); dropTableIfExists(newTableName); @@ -250,8 +257,8 @@ void testRenameTable() { @Test void testListTable() { - String table1 = formatTableName("list1"); - String table2 = formatTableName("list2"); + String table1 = "list1"; + String table2 = "list2"; createSimpleTable(table1); createSimpleTable(table2); Set tables = listTableNames(); @@ -260,8 +267,8 @@ void testListTable() { // show tables from not current db String database = "db_list"; - String table3 = formatTableName("list3"); - String table4 = formatTableName("list4"); + String table3 = "list3"; + String table4 = "list4"; createDatabaseIfNotExists(database); createSimpleTable(String.join(".", database, table3)); createSimpleTable(String.join(".", database, table4)); @@ -275,7 +282,7 @@ void testListTable() { @Test void testAlterTableSetAndRemoveProperty() { - String tableName = formatTableName("test_property"); + String tableName = "test_property"; dropTableIfExists(tableName); createSimpleTable(tableName); @@ -293,7 +300,7 @@ void testAlterTableSetAndRemoveProperty() { @Test void testAlterTableAddAndDeleteColumn() { - String tableName = formatTableName("test_column"); + String tableName = "test_column"; dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); @@ -312,7 +319,7 @@ void testAlterTableAddAndDeleteColumn() { @Test void testAlterTableUpdateColumnType() { - String tableName = formatTableName("test_column_type"); + String tableName = "test_column_type"; dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); @@ -329,7 +336,7 @@ void testAlterTableUpdateColumnType() { @Test void testAlterTableRenameColumn() { - String tableName = formatTableName("test_rename_column"); + String tableName = "test_rename_column"; dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); createSimpleTable(tableName); @@ -349,7 +356,7 @@ void testAlterTableRenameColumn() { @Test void testUpdateColumnPosition() { - String tableName = formatTableName("test_column_position"); + String tableName = "test_column_position"; dropTableIfExists(tableName); List simpleTableColumns = @@ -392,7 +399,7 @@ void testUpdateColumnPosition() { @Test void testAlterTableUpdateColumnComment() { - String tableName = formatTableName("test_update_column_comment"); + String tableName = "test_update_column_comment"; dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); createSimpleTable(tableName); @@ -415,7 +422,7 @@ void testAlterTableUpdateColumnComment() { @Test void testComplexType() { - String tableName = formatTableName("complex_type_table"); + String tableName = "complex_type_table"; dropTableIfExists(tableName); sql( @@ -442,10 +449,6 @@ void testComplexType() { checkTableReadWrite(tableInfo); } - private String formatTableName(String tableName) { - return String.format("%s_%s", getCatalogName(), tableName); - } - private void checkTableColumns( String tableName, List columnInfos, SparkTableInfo tableInfo) { SparkTableInfoChecker.create() From 39f11f3102fd6a314427a93cbf2b96d8d525b79c Mon Sep 17 00:00:00 2001 From: caican00 Date: Fri, 22 Mar 2024 14:12:00 +0800 Subject: [PATCH 28/55] fix --- .../integration/test/spark/SparkCommonIT.java | 14 +++++++++----- .../test/spark/hive/SparkHiveCatalogIT.java | 5 +++++ .../test/spark/iceberg/SparkIcebergCatalogIT.java | 5 +++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index c52cfdd73f3..d39bfe75aff 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -52,6 +52,8 @@ private static String getInsertWithoutPartitionSql(String tableName, String valu DataTypes.createStructField("col2", DataTypes.StringType, true))), "struct(1, 'a')"); + protected abstract String getUsingClause(); + // Use a custom database not the original default database because SparkCommonIT couldn't // read&write data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address @@ -73,7 +75,9 @@ void init() { @AfterAll void cleanUp() { sql("USE " + getCatalogName()); - sql("DROP DATABASE IF EXISTS " + getDefaultDatabase() + " CASCADE"); + getDatabases() + .forEach( + databaseName -> sql(String.format("DROP DATABASE IF EXISTS %s CASCADE", databaseName))); } protected String getDefaultDatabase() { @@ -328,9 +332,9 @@ void testAlterTableUpdateColumnType() { checkTableColumns(tableName, simpleTableColumns, getTableInfo(tableName)); sql(String.format("ALTER TABLE %S ADD COLUMNS (col1 int)", tableName)); - sql(String.format("ALTER TABLE %S CHANGE COLUMN col1 col1 string", tableName)); + sql(String.format("ALTER TABLE %S CHANGE COLUMN col1 col1 bigint", tableName)); ArrayList updateColumns = new ArrayList<>(simpleTableColumns); - updateColumns.add(SparkColumnInfo.of("col1", DataTypes.StringType, null)); + updateColumns.add(SparkColumnInfo.of("col1", DataTypes.LongType, null)); checkTableColumns(tableName, updateColumns, getTableInfo(tableName)); } @@ -367,8 +371,8 @@ void testUpdateColumnPosition() { sql( String.format( - "CREATE TABLE %s (id STRING COMMENT '', name STRING COMMENT '', age STRING COMMENT '') USING PARQUET", - tableName)); + "CREATE TABLE %s (id STRING COMMENT '', name STRING COMMENT '', age STRING COMMENT '') %s", + tableName, getUsingClause())); checkTableColumns(tableName, simpleTableColumns, getTableInfo(tableName)); sql(String.format("ALTER TABLE %S ADD COLUMNS (col1 STRING COMMENT '')", tableName)); diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index bce6cb212bf..c14930b2d40 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -21,4 +21,9 @@ protected String getCatalogName() { protected String getProvider() { return "hive"; } + + @Override + protected String getUsingClause() { + return "USING PARQUET"; + } } diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index 7a7fa5e16cc..04a6c487255 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -21,4 +21,9 @@ protected String getCatalogName() { protected String getProvider() { return "lakehouse-iceberg"; } + + @Override + protected String getUsingClause() { + return "USING ICEBERG"; + } } From 49181d27a28dfcbc73e5e3a537aeae2100f69964 Mon Sep 17 00:00:00 2001 From: caican00 Date: Fri, 22 Mar 2024 15:06:56 +0800 Subject: [PATCH 29/55] fix --- .../iceberg/IcebergCatalogOperations.java | 5 ++ .../integration/test/spark/SparkCommonIT.java | 48 ----------------- .../test/spark/hive/SparkHiveCatalogIT.java | 52 +++++++++++++++++++ .../spark/iceberg/SparkIcebergCatalogIT.java | 52 +++++++++++++++++++ 4 files changed, 109 insertions(+), 48 deletions(-) diff --git a/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalogOperations.java b/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalogOperations.java index 8473ede2422..6191facbcb6 100644 --- a/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalogOperations.java +++ b/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalogOperations.java @@ -317,6 +317,11 @@ public boolean dropSchema(NameIdentifier ident, boolean cascade) throws NonEmpty */ @Override public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaException { + NameIdentifier schemaIdent = NameIdentifier.of(namespace.levels()); + if (!schemaExists(schemaIdent)) { + throw new NoSuchSchemaException("Schema (database) does not exist %s", namespace); + } + try { ListTablesResponse listTablesResponse = icebergTableOps.listTable(IcebergTableOpsHelper.getIcebergNamespace(namespace)); diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index d39bfe75aff..5652efacafa 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -24,7 +24,6 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.platform.commons.util.StringUtils; public abstract class SparkCommonIT extends SparkEnvIT { private static String getSelectAllSql(String tableName) { @@ -90,53 +89,6 @@ void testLoadCatalogs() { Assertions.assertTrue(catalogs.contains(getCatalogName())); } - @Test - void testCreateAndLoadSchema() { - String testDatabaseName = "t_create1"; - dropDatabaseIfExists(testDatabaseName); - sql("CREATE DATABASE " + testDatabaseName); - Map databaseMeta = getDatabaseMetadata(testDatabaseName); - Assertions.assertFalse(databaseMeta.containsKey("Comment")); - Assertions.assertTrue(databaseMeta.containsKey("Location")); - Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); - String properties = databaseMeta.get("Properties"); - Assertions.assertTrue(StringUtils.isBlank(properties)); - - testDatabaseName = "t_create2"; - dropDatabaseIfExists(testDatabaseName); - String testDatabaseLocation = "/tmp/" + testDatabaseName; - sql( - String.format( - "CREATE DATABASE %s COMMENT 'comment' LOCATION '%s'\n" + " WITH DBPROPERTIES (ID=001);", - testDatabaseName, testDatabaseLocation)); - databaseMeta = getDatabaseMetadata(testDatabaseName); - String comment = databaseMeta.get("Comment"); - Assertions.assertEquals("comment", comment); - Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); - // underlying catalog may change /tmp/t_create2 to file:/tmp/t_create2 - Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); - properties = databaseMeta.get("Properties"); - Assertions.assertEquals("((ID,001))", properties); - } - - @Test - void testAlterSchema() { - String testDatabaseName = "t_alter"; - sql("CREATE DATABASE " + testDatabaseName); - Assertions.assertTrue( - StringUtils.isBlank(getDatabaseMetadata(testDatabaseName).get("Properties"))); - - sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='001')", testDatabaseName)); - Assertions.assertEquals("((ID,001))", getDatabaseMetadata(testDatabaseName).get("Properties")); - - // Hive metastore doesn't support alter database location, therefore this test method - // doesn't verify ALTER DATABASE database_name SET LOCATION 'new_location'. - - Assertions.assertThrowsExactly( - NoSuchNamespaceException.class, - () -> sql("ALTER DATABASE notExists SET DBPROPERTIES ('ID'='001')")); - } - @Test void testDropSchema() { String testDatabaseName = "t_drop"; diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index c14930b2d40..af35713ffe2 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -5,8 +5,13 @@ package com.datastrato.gravitino.integration.test.spark.hive; import com.datastrato.gravitino.integration.test.spark.SparkCommonIT; +import java.util.Map; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; +import org.junit.platform.commons.util.StringUtils; @Tag("gravitino-docker-it") @TestInstance(TestInstance.Lifecycle.PER_CLASS) @@ -26,4 +31,51 @@ protected String getProvider() { protected String getUsingClause() { return "USING PARQUET"; } + + @Test + void testCreateAndLoadSchema() { + String testDatabaseName = "t_create1"; + dropDatabaseIfExists(testDatabaseName); + sql("CREATE DATABASE " + testDatabaseName); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + Assertions.assertFalse(databaseMeta.containsKey("Comment")); + Assertions.assertTrue(databaseMeta.containsKey("Location")); + Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); + String properties = databaseMeta.get("Properties"); + Assertions.assertTrue(StringUtils.isBlank(properties)); + + testDatabaseName = "t_create2"; + dropDatabaseIfExists(testDatabaseName); + String testDatabaseLocation = "/tmp/" + testDatabaseName; + sql( + String.format( + "CREATE DATABASE %s COMMENT 'comment' LOCATION '%s'\n" + " WITH DBPROPERTIES (ID=001);", + testDatabaseName, testDatabaseLocation)); + databaseMeta = getDatabaseMetadata(testDatabaseName); + String comment = databaseMeta.get("Comment"); + Assertions.assertEquals("comment", comment); + Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); + // underlying catalog may change /tmp/t_create2 to file:/tmp/t_create2 + Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); + properties = databaseMeta.get("Properties"); + Assertions.assertEquals("((ID,001))", properties); + } + + @Test + void testAlterSchema() { + String testDatabaseName = "t_alter"; + sql("CREATE DATABASE " + testDatabaseName); + Assertions.assertTrue( + StringUtils.isBlank(getDatabaseMetadata(testDatabaseName).get("Properties"))); + + sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='001')", testDatabaseName)); + Assertions.assertEquals("((ID,001))", getDatabaseMetadata(testDatabaseName).get("Properties")); + + // Hive metastore doesn't support alter database location, therefore this test method + // doesn't verify ALTER DATABASE database_name SET LOCATION 'new_location'. + + Assertions.assertThrowsExactly( + NoSuchNamespaceException.class, + () -> sql("ALTER DATABASE notExists SET DBPROPERTIES ('ID'='001')")); + } } diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index 04a6c487255..3cac0dac454 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -5,7 +5,11 @@ package com.datastrato.gravitino.integration.test.spark.iceberg; import com.datastrato.gravitino.integration.test.spark.SparkCommonIT; +import java.util.Map; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @Tag("gravitino-docker-it") @@ -26,4 +30,52 @@ protected String getProvider() { protected String getUsingClause() { return "USING ICEBERG"; } + + @Test + void testCreateAndLoadSchema() { + String testDatabaseName = "t_create1"; + dropDatabaseIfExists(testDatabaseName); + sql("CREATE DATABASE " + testDatabaseName); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + Assertions.assertFalse(databaseMeta.containsKey("Comment")); + Assertions.assertTrue(databaseMeta.containsKey("Location")); + Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); + String properties = databaseMeta.get("Properties"); + Assertions.assertEquals("((owner,datastrato))", properties); + + testDatabaseName = "t_create2"; + dropDatabaseIfExists(testDatabaseName); + String testDatabaseLocation = "/tmp/" + testDatabaseName; + sql( + String.format( + "CREATE DATABASE %s COMMENT 'comment' LOCATION '%s'\n" + " WITH DBPROPERTIES (ID=001);", + testDatabaseName, testDatabaseLocation)); + databaseMeta = getDatabaseMetadata(testDatabaseName); + String comment = databaseMeta.get("Comment"); + Assertions.assertEquals("comment", comment); + Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); + // underlying catalog may change /tmp/t_create2 to file:/tmp/t_create2 + Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); + properties = databaseMeta.get("Properties"); + Assertions.assertEquals("((owner,datastrato),(ID,001))", properties); + } + + @Test + void testAlterSchema() { + String testDatabaseName = "t_alter"; + sql("CREATE DATABASE " + testDatabaseName); + Assertions.assertEquals( + "((owner,datastrato))", getDatabaseMetadata(testDatabaseName).get("Properties")); + + sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='001')", testDatabaseName)); + Assertions.assertEquals( + "((owner,datastrato),(ID,001))", getDatabaseMetadata(testDatabaseName).get("Properties")); + + // Hive metastore doesn't support alter database location, therefore this test method + // doesn't verify ALTER DATABASE database_name SET LOCATION 'new_location'. + + Assertions.assertThrowsExactly( + NoSuchNamespaceException.class, + () -> sql("ALTER DATABASE notExists SET DBPROPERTIES ('ID'='001')")); + } } From 16ec0220695dd90d982bff90596e70b20a97a86f Mon Sep 17 00:00:00 2001 From: caican00 Date: Fri, 22 Mar 2024 15:54:14 +0800 Subject: [PATCH 30/55] fix --- .../test/spark/iceberg/SparkIcebergCatalogIT.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index 3cac0dac454..529220d5b38 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -41,7 +41,9 @@ void testCreateAndLoadSchema() { Assertions.assertTrue(databaseMeta.containsKey("Location")); Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); String properties = databaseMeta.get("Properties"); - Assertions.assertEquals("((owner,datastrato))", properties); + Assertions.assertEquals( + "((hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER))", + properties); testDatabaseName = "t_create2"; dropDatabaseIfExists(testDatabaseName); @@ -57,7 +59,9 @@ void testCreateAndLoadSchema() { // underlying catalog may change /tmp/t_create2 to file:/tmp/t_create2 Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); properties = databaseMeta.get("Properties"); - Assertions.assertEquals("((owner,datastrato),(ID,001))", properties); + Assertions.assertEquals( + "((hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER), (ID,001))", + properties); } @Test @@ -65,11 +69,13 @@ void testAlterSchema() { String testDatabaseName = "t_alter"; sql("CREATE DATABASE " + testDatabaseName); Assertions.assertEquals( - "((owner,datastrato))", getDatabaseMetadata(testDatabaseName).get("Properties")); + "((hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER))", + getDatabaseMetadata(testDatabaseName).get("Properties")); sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='001')", testDatabaseName)); Assertions.assertEquals( - "((owner,datastrato),(ID,001))", getDatabaseMetadata(testDatabaseName).get("Properties")); + "((hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER), (ID,001))", + getDatabaseMetadata(testDatabaseName).get("Properties")); // Hive metastore doesn't support alter database location, therefore this test method // doesn't verify ALTER DATABASE database_name SET LOCATION 'new_location'. From 2ad0a1800749a1bd0d8c8fe050482eaa81439ee6 Mon Sep 17 00:00:00 2001 From: caican00 Date: Sat, 23 Mar 2024 17:41:58 +0800 Subject: [PATCH 31/55] fix --- .../integration/test/spark/iceberg/SparkIcebergCatalogIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index 529220d5b38..7082b27ac97 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -60,7 +60,7 @@ void testCreateAndLoadSchema() { Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); properties = databaseMeta.get("Properties"); Assertions.assertEquals( - "((hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER), (ID,001))", + "((ID,001), (hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER))", properties); } @@ -74,7 +74,7 @@ void testAlterSchema() { sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='001')", testDatabaseName)); Assertions.assertEquals( - "((hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER), (ID,001))", + "((ID,001), (hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER))", getDatabaseMetadata(testDatabaseName).get("Properties")); // Hive metastore doesn't support alter database location, therefore this test method From 99dd7254d760b43b644f8d4ae117076b44636524 Mon Sep 17 00:00:00 2001 From: caican00 Date: Sun, 24 Mar 2024 19:56:22 +0800 Subject: [PATCH 32/55] fix --- .../test/spark/hive/SparkHiveCatalogIT.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index 5bce11526a1..58227c7c981 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -11,11 +11,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.hadoop.fs.Path; -import org.apache.spark.sql.types.DataTypes; -import org.junit.jupiter.api.Assertions; import java.util.Map; +import org.apache.hadoop.fs.Path; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; @@ -108,10 +107,10 @@ public void testCreateHiveFormatPartitionTable() { SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() - .withName(tableName) - .withColumns(columns) - .withIdentifyPartition(Arrays.asList("age_p1", "age_p2")); + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(columns) + .withIdentifyPartition(Arrays.asList("age_p1", "age_p2")); checker.check(tableInfo); // write to static partition checkTableReadWrite(tableInfo); @@ -131,8 +130,8 @@ public void testWriteHiveDynamicPartition() { // write data to dynamic partition String insertData = - String.format( - "INSERT OVERWRITE %s PARTITION(age_p1=1, age_p2) values(1,'a',3,'b');", tableName); + String.format( + "INSERT OVERWRITE %s PARTITION(age_p1=1, age_p2) values(1,'a',3,'b');", tableName); sql(insertData); List queryResult = getTableData(tableName); Assertions.assertTrue(queryResult.size() == 1); From e75e23198e686e247608eab3621613bf8859dbd4 Mon Sep 17 00:00:00 2001 From: caican00 Date: Sun, 24 Mar 2024 20:10:46 +0800 Subject: [PATCH 33/55] fix --- .../test/spark/iceberg/SparkIcebergCatalogIT.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index 7082b27ac97..dba1b3cde2c 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -26,6 +26,11 @@ protected String getProvider() { return "lakehouse-iceberg"; } + @Override + protected boolean supportsSparkSQLClusteredBy() { + return false; + } + @Override protected String getUsingClause() { return "USING ICEBERG"; From 93a764386d1c4831fccd2f14c4d116128581cad9 Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 25 Mar 2024 14:35:48 +0800 Subject: [PATCH 34/55] test IT --- .../gravitino/integration/test/spark/SparkCommonIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 94e5c40d80f..c691ab7a6a3 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -310,6 +310,7 @@ void testAlterTableRenameColumn() { sql( String.format( "ALTER TABLE %S RENAME COLUMN %S TO %S", tableName, oldColumnName, newColumnName)); + getSparkSession().sql("desc extended test_rename_column").show(false); ArrayList renameColumns = new ArrayList<>(simpleTableColumns); renameColumns.add(SparkColumnInfo.of(newColumnName, DataTypes.IntegerType, null)); checkTableColumns(tableName, renameColumns, getTableInfo(tableName)); From a91249f3e18de4ab1c8edd0b55ec8c7f156373c8 Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 25 Mar 2024 15:15:04 +0800 Subject: [PATCH 35/55] fix a IT --- .../gravitino/integration/test/spark/SparkCommonIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index c691ab7a6a3..4275d9d7ec0 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -310,7 +310,6 @@ void testAlterTableRenameColumn() { sql( String.format( "ALTER TABLE %S RENAME COLUMN %S TO %S", tableName, oldColumnName, newColumnName)); - getSparkSession().sql("desc extended test_rename_column").show(false); ArrayList renameColumns = new ArrayList<>(simpleTableColumns); renameColumns.add(SparkColumnInfo.of(newColumnName, DataTypes.IntegerType, null)); checkTableColumns(tableName, renameColumns, getTableInfo(tableName)); @@ -417,7 +416,8 @@ void testCreateDatasourceFormatPartitionTable() { dropTableIfExists(tableName); String createTableSQL = getCreateSimpleTableString(tableName); - createTableSQL = createTableSQL + "USING PARQUET PARTITIONED BY (name, age)"; + createTableSQL = + String.format("%s %s PARTITIONED BY (name, age)", createTableSQL, getUsingClause()); sql(createTableSQL); SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = From 63a8f4094c4201dbd179cf04056c6992d3b73a45 Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 25 Mar 2024 15:40:39 +0800 Subject: [PATCH 36/55] fix a IT --- .../gravitino/integration/test/spark/SparkCommonIT.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 4275d9d7ec0..9046e1b2021 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -85,7 +85,10 @@ void cleanUp() { sql("USE " + getCatalogName()); getDatabases() .forEach( - databaseName -> sql(String.format("DROP DATABASE IF EXISTS %s CASCADE", databaseName))); + databaseName -> { + listTableNames(databaseName).forEach(this::dropTableIfExists); + dropDatabaseIfExists(databaseName); + }); } @Test From d84e6a3d9836601e326b65935f65b98f51925e54 Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 25 Mar 2024 16:38:56 +0800 Subject: [PATCH 37/55] fix an IT --- .../gravitino/integration/test/spark/SparkCommonIT.java | 2 +- .../spark/connector/iceberg/IcebergPropertiesConverter.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 9046e1b2021..2253727467c 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -87,7 +87,7 @@ void cleanUp() { .forEach( databaseName -> { listTableNames(databaseName).forEach(this::dropTableIfExists); - dropDatabaseIfExists(databaseName); + sql(String.format("DROP DATABASE IF EXISTS %s CASCADE;", databaseName)); }); } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java index a6c8f781696..f96107c814d 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConverter.java @@ -9,7 +9,7 @@ import java.util.HashMap; import java.util.Map; -/** Transform iceberg catalog properties between Spark and Gravitino. */ +/** Transform Iceberg catalog properties between Spark and Gravitino. */ public class IcebergPropertiesConverter implements PropertiesConverter { @Override public Map toGravitinoTableProperties(Map properties) { From ec35db0344536ad5c98d46f126ecfcc3a698d2bd Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 25 Mar 2024 17:06:58 +0800 Subject: [PATCH 38/55] update --- .../gravitino/integration/test/spark/SparkCommonIT.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 2253727467c..9ce624fe874 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -86,8 +86,10 @@ void cleanUp() { getDatabases() .forEach( databaseName -> { - listTableNames(databaseName).forEach(this::dropTableIfExists); - sql(String.format("DROP DATABASE IF EXISTS %s CASCADE;", databaseName)); + listTableNames(databaseName) + .forEach( + tableName -> sql(String.format("DROP TABLE IF EXISTS %s PURGE", tableName))); + sql(String.format("DROP DATABASE IF EXISTS %s;", databaseName)); }); } From af39f835c42a4b44be018c4bdb9aa534e962f982 Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 25 Mar 2024 18:18:15 +0800 Subject: [PATCH 39/55] update --- .../gravitino/integration/test/spark/SparkCommonIT.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 9ce624fe874..75d75ca1ff1 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -84,13 +84,7 @@ void init() { void cleanUp() { sql("USE " + getCatalogName()); getDatabases() - .forEach( - databaseName -> { - listTableNames(databaseName) - .forEach( - tableName -> sql(String.format("DROP TABLE IF EXISTS %s PURGE", tableName))); - sql(String.format("DROP DATABASE IF EXISTS %s;", databaseName)); - }); + .forEach(database -> sql(String.format("DROP DATABASE IF EXISTS %s CASCADE", database))); } @Test From 4d8598a3569d86d50b5cad25325de3d43091fa8e Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 25 Mar 2024 21:34:05 +0800 Subject: [PATCH 40/55] update --- .../gravitino/integration/test/spark/SparkCommonIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 75d75ca1ff1..2f9c3c360fb 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -303,7 +303,7 @@ void testAlterTableRenameColumn() { checkTableColumns(tableName, simpleTableColumns, getTableInfo(tableName)); String oldColumnName = "col1"; - String newColumnName = "col2"; + String newColumnName = "col1_new"; sql(String.format("ALTER TABLE %S ADD COLUMNS (col1 int)", tableName)); sql( From 4d3b50fd80b2c471622b414e72a40dd80c740a53 Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 26 Mar 2024 15:41:35 +0800 Subject: [PATCH 41/55] fix comment --- .../integration/test/spark/SparkCommonIT.java | 11 +-- .../integration/test/spark/SparkEnvIT.java | 17 ++-- .../test/spark/hive/SparkHiveCatalogIT.java | 5 -- .../spark/iceberg/SparkIcebergCatalogIT.java | 5 -- .../spark/connector/GravitinoSparkConfig.java | 11 +++ .../connector/iceberg/IcebergAdaptor.java | 86 +++++++++++++++++-- 6 files changed, 106 insertions(+), 29 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 2f9c3c360fb..5765921df26 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -60,8 +60,6 @@ private static String getInsertWithPartitionSql( // Whether supports [CLUSTERED BY col_name3 SORTED BY col_name INTO num_buckets BUCKETS] protected abstract boolean supportsSparkSQLClusteredBy(); - protected abstract String getUsingClause(); - // Use a custom database not the original default database because SparkIT couldn't read&write // data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address @@ -308,7 +306,7 @@ void testAlterTableRenameColumn() { sql(String.format("ALTER TABLE %S ADD COLUMNS (col1 int)", tableName)); sql( String.format( - "ALTER TABLE %S RENAME COLUMN %S TO %S", tableName, oldColumnName, newColumnName)); + "ALTER TABLE %s RENAME COLUMN %s TO %s", tableName, oldColumnName, newColumnName)); ArrayList renameColumns = new ArrayList<>(simpleTableColumns); renameColumns.add(SparkColumnInfo.of(newColumnName, DataTypes.IntegerType, null)); checkTableColumns(tableName, renameColumns, getTableInfo(tableName)); @@ -327,8 +325,8 @@ void testUpdateColumnPosition() { sql( String.format( - "CREATE TABLE %s (id STRING COMMENT '', name STRING COMMENT '', age STRING COMMENT '') %s", - tableName, getUsingClause())); + "CREATE TABLE %s (id STRING COMMENT '', name STRING COMMENT '', age STRING COMMENT '')", + tableName)); checkTableColumns(tableName, simpleTableColumns, getTableInfo(tableName)); sql(String.format("ALTER TABLE %S ADD COLUMNS (col1 STRING COMMENT '')", tableName)); @@ -415,8 +413,7 @@ void testCreateDatasourceFormatPartitionTable() { dropTableIfExists(tableName); String createTableSQL = getCreateSimpleTableString(tableName); - createTableSQL = - String.format("%s %s PARTITIONED BY (name, age)", createTableSQL, getUsingClause()); + createTableSQL = createTableSQL + " PARTITIONED BY (name, age)"; sql(createTableSQL); SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index a8ec1868d54..69498fd1df0 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -80,11 +80,18 @@ private void initMetalakeAndCatalogs() { client.createMetalake(NameIdentifier.of(metalakeName), "", Collections.emptyMap()); GravitinoMetalake metalake = client.loadMetalake(NameIdentifier.of(metalakeName)); Map properties = Maps.newHashMap(); - properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri); - properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND, "hive"); - properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, warehouse); - properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, hiveMetastoreUri); - + switch (getProvider()) { + case "hive": + properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri); + break; + case "lakehouse-iceberg": + properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND, "hive"); + properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, warehouse); + properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, hiveMetastoreUri); + break; + default: + throw new IllegalArgumentException("Unsupported provider: " + getProvider()); + } metalake.createCatalog( NameIdentifier.of(metalakeName, getCatalogName()), Catalog.Type.RELATIONAL, diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index 58227c7c981..0b1fa83be28 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -35,11 +35,6 @@ protected String getProvider() { return "hive"; } - @Override - protected String getUsingClause() { - return "USING PARQUET"; - } - @Test void testCreateAndLoadSchema() { String testDatabaseName = "t_create1"; diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index dba1b3cde2c..f046d9717cd 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -31,11 +31,6 @@ protected boolean supportsSparkSQLClusteredBy() { return false; } - @Override - protected String getUsingClause() { - return "USING ICEBERG"; - } - @Test void testCreateAndLoadSchema() { String testDatabaseName = "t_create1"; diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java index bf4d0f406d3..32c21bb830e 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java @@ -12,9 +12,20 @@ public class GravitinoSparkConfig { public static final String GRAVITINO_METALAKE = GRAVITINO_PREFIX + "metalake"; public static final String GRAVITINO_HIVE_METASTORE_URI = "metastore.uris"; public static final String SPARK_HIVE_METASTORE_URI = "hive.metastore.uris"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND = "catalog-backend"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_TYPE = "type"; public static final String LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE = "warehouse"; public static final String LAKEHOUSE_ICEBERG_CATALOG_URI = "uri"; + public static final String GRAVITINO_JDBC_USER = "jdbc-user"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER = "jdbc.user"; + public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD = "jdbc.password"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE = "jdbc-initialize"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER = "jdbc-driver"; + + public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE = "hive"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC = "jdbc"; private GravitinoSparkConfig() {} } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java index 5af39a38e1f..68f2ca4005b 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -12,6 +12,7 @@ import com.datastrato.gravitino.spark.connector.table.SparkBaseTable; import com.google.common.base.Preconditions; import java.util.HashMap; +import java.util.Locale; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.iceberg.spark.SparkCatalog; @@ -41,16 +42,87 @@ public TableCatalog createAndInitSparkCatalog( String name, CaseInsensitiveStringMap options, Map properties) { Preconditions.checkArgument( properties != null, "Iceberg Catalog properties should not be null"); - String metastoreUri = properties.get(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI); + + String catalogBackend = properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND); Preconditions.checkArgument( - StringUtils.isNotBlank(metastoreUri), - "Couldn't get " - + GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI - + " from iceberg catalog properties"); + StringUtils.isNotBlank(catalogBackend), "Iceberg Catalog backend should not be empty."); - TableCatalog icebergCatalog = new SparkCatalog(); HashMap all = new HashMap<>(options); - all.put(GravitinoSparkConfig.SPARK_HIVE_METASTORE_URI, metastoreUri); + + switch (catalogBackend.toLowerCase(Locale.ENGLISH)) { + case GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE: + String metastoreUri = properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI); + Preconditions.checkArgument( + StringUtils.isNotBlank(metastoreUri), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI + + " from iceberg catalog properties"); + String hiveWarehouse = + properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); + Preconditions.checkArgument( + StringUtils.isNotBlank(hiveWarehouse), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE + + " from iceberg catalog properties"); + all.put( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, + catalogBackend.toLowerCase(Locale.ENGLISH)); + all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, metastoreUri); + all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, hiveWarehouse); + break; + case GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC: + String jdbcUri = properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcUri), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI + + " from iceberg catalog properties"); + String jdbcWarehouse = + properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcWarehouse), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE + + " from iceberg catalog properties"); + String jdbcUser = properties.get(GravitinoSparkConfig.GRAVITINO_JDBC_USER); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcUser), + "Couldn't get " + + GravitinoSparkConfig.GRAVITINO_JDBC_USER + + " from iceberg catalog properties"); + String jdbcPasswrod = properties.get(GravitinoSparkConfig.GRAVITINO_JDBC_PASSWORD); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcPasswrod), + "Couldn't get " + + GravitinoSparkConfig.GRAVITINO_JDBC_PASSWORD + + " from iceberg catalog properties"); + String jdbcDriver = + properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcDriver), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER + + " from iceberg catalog properties"); + all.put( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, + catalogBackend.toLowerCase(Locale.ENGLISH)); + all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, jdbcUri); + all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); + all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER, jdbcUser); + all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPasswrod); + all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); + all.put( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE, + properties.getOrDefault( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE, "true")); + break; + default: + // SparkCatalog does not support Memory type catalog + throw new IllegalArgumentException( + "Unsupported Iceberg Catalog backend: " + catalogBackend); + } + + TableCatalog icebergCatalog = new SparkCatalog(); icebergCatalog.initialize(name, new CaseInsensitiveStringMap(all)); return icebergCatalog; From 4004edc39a621c496c159c67b1390e4acdced6fe Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 26 Mar 2024 19:27:05 +0800 Subject: [PATCH 42/55] fix IT --- .../integration/test/spark/SparkCommonIT.java | 19 ---------------- .../test/spark/hive/SparkHiveCatalogIT.java | 19 ++++++++++++++++ .../spark/iceberg/SparkIcebergCatalogIT.java | 22 +++++++++++++++++++ 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 5765921df26..40291c8d1f4 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -407,25 +407,6 @@ void testComplexType() { checkTableReadWrite(tableInfo); } - @Test - void testCreateDatasourceFormatPartitionTable() { - String tableName = "datasource_partition_table"; - - dropTableIfExists(tableName); - String createTableSQL = getCreateSimpleTableString(tableName); - createTableSQL = createTableSQL + " PARTITIONED BY (name, age)"; - sql(createTableSQL); - SparkTableInfo tableInfo = getTableInfo(tableName); - SparkTableInfoChecker checker = - SparkTableInfoChecker.create() - .withName(tableName) - .withColumns(getSimpleTableColumn()) - .withIdentifyPartition(Arrays.asList("name", "age")); - checker.check(tableInfo); - checkTableReadWrite(tableInfo); - checkPartitionDirExists(tableInfo); - } - @Test @EnabledIf("supportsSparkSQLClusteredBy") void testCreateBucketTable() { diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index 0b1fa83be28..8ce3b1b152e 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -112,6 +112,25 @@ public void testCreateHiveFormatPartitionTable() { checkPartitionDirExists(tableInfo); } + @Test + void testCreateDatasourceFormatPartitionTable() { + String tableName = "datasource_partition_table"; + + dropTableIfExists(tableName); + String createTableSQL = getCreateSimpleTableString(tableName); + createTableSQL = createTableSQL + " USING PARQUET PARTITIONED BY (name, age)"; + sql(createTableSQL); + SparkTableInfo tableInfo = getTableInfo(tableName); + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getSimpleTableColumn()) + .withIdentifyPartition(Arrays.asList("name", "age")); + checker.check(tableInfo); + checkTableReadWrite(tableInfo); + checkPartitionDirExists(tableInfo); + } + @Test public void testWriteHiveDynamicPartition() { String tableName = "hive_dynamic_partition_table"; diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index f046d9717cd..26ff88adbea 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -5,6 +5,9 @@ package com.datastrato.gravitino.integration.test.spark.iceberg; import com.datastrato.gravitino.integration.test.spark.SparkCommonIT; +import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfo; +import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfoChecker; +import java.util.Arrays; import java.util.Map; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.junit.jupiter.api.Assertions; @@ -84,4 +87,23 @@ void testAlterSchema() { NoSuchNamespaceException.class, () -> sql("ALTER DATABASE notExists SET DBPROPERTIES ('ID'='001')")); } + + @Test + void testCreateIcebergDatasourceFormatPartitionTable() { + String tableName = "datasource_partition_table"; + + dropTableIfExists(tableName); + String createTableSQL = getCreateSimpleTableString(tableName); + createTableSQL = createTableSQL + " USING ICEBERG PARTITIONED BY (name, age)"; + sql(createTableSQL); + SparkTableInfo tableInfo = getTableInfo(tableName); + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getSimpleTableColumn()) + .withIdentifyPartition(Arrays.asList("name", "age")); + checker.check(tableInfo); + checkTableReadWrite(tableInfo); + checkPartitionDirExists(tableInfo); + } } From e7149b0e151de545f4a9659420b7824c8bda3324 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 27 Mar 2024 10:08:30 +0800 Subject: [PATCH 43/55] fix --- .../integration/test/spark/SparkCommonIT.java | 28 ++++ .../test/spark/hive/SparkHiveCatalogIT.java | 31 +--- .../spark/iceberg/SparkIcebergCatalogIT.java | 27 +--- .../connector/iceberg/IcebergAdaptor.java | 140 ++++++++++-------- 4 files changed, 116 insertions(+), 110 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 40291c8d1f4..9e9716ef703 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -60,6 +60,8 @@ private static String getInsertWithPartitionSql( // Whether supports [CLUSTERED BY col_name3 SORTED BY col_name INTO num_buckets BUCKETS] protected abstract boolean supportsSparkSQLClusteredBy(); + protected abstract boolean supportPartition(); + // Use a custom database not the original default database because SparkIT couldn't read&write // data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address @@ -407,6 +409,32 @@ void testComplexType() { checkTableReadWrite(tableInfo); } + @Test + @EnabledIf("supportPartition") + public void testWriteHiveDynamicPartition() { + String tableName = "hive_dynamic_partition_table"; + + dropTableIfExists(tableName); + String createTableSQL = getCreateSimpleTableString(tableName); + createTableSQL = createTableSQL + "PARTITIONED BY (age_p1 INT, age_p2 STRING)"; + sql(createTableSQL); + + SparkTableInfo tableInfo = getTableInfo(tableName); + + // write data to dynamic partition + String insertData = + String.format( + "INSERT OVERWRITE %s PARTITION(age_p1=1, age_p2) values(1,'a',3,'b');", tableName); + sql(insertData); + List queryResult = getTableData(tableName); + Assertions.assertTrue(queryResult.size() == 1); + Assertions.assertEquals("1,a,3,1,b", queryResult.get(0)); + String location = tableInfo.getTableLocation(); + String partitionExpression = "age_p1=1/age_p2=b"; + Path partitionPath = new Path(location, partitionExpression); + checkDirExists(partitionPath); + } + @Test @EnabledIf("supportsSparkSQLClusteredBy") void testCreateBucketTable() { diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index 8ce3b1b152e..28d07383ab2 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -12,7 +12,6 @@ import java.util.Arrays; import java.util.List; import java.util.Map; -import org.apache.hadoop.fs.Path; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; @@ -87,6 +86,11 @@ protected boolean supportsSparkSQLClusteredBy() { return true; } + @Override + protected boolean supportPartition() { + return true; + } + @Test public void testCreateHiveFormatPartitionTable() { String tableName = "hive_partition_table"; @@ -130,29 +134,4 @@ void testCreateDatasourceFormatPartitionTable() { checkTableReadWrite(tableInfo); checkPartitionDirExists(tableInfo); } - - @Test - public void testWriteHiveDynamicPartition() { - String tableName = "hive_dynamic_partition_table"; - - dropTableIfExists(tableName); - String createTableSQL = getCreateSimpleTableString(tableName); - createTableSQL = createTableSQL + "PARTITIONED BY (age_p1 INT, age_p2 STRING)"; - sql(createTableSQL); - - SparkTableInfo tableInfo = getTableInfo(tableName); - - // write data to dynamic partition - String insertData = - String.format( - "INSERT OVERWRITE %s PARTITION(age_p1=1, age_p2) values(1,'a',3,'b');", tableName); - sql(insertData); - List queryResult = getTableData(tableName); - Assertions.assertTrue(queryResult.size() == 1); - Assertions.assertEquals("1,a,3,1,b", queryResult.get(0)); - String location = tableInfo.getTableLocation(); - String partitionExpression = "age_p1=1/age_p2=b"; - Path partitionPath = new Path(location, partitionExpression); - checkDirExists(partitionPath); - } } diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index 26ff88adbea..47fe53f1974 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -5,9 +5,6 @@ package com.datastrato.gravitino.integration.test.spark.iceberg; import com.datastrato.gravitino.integration.test.spark.SparkCommonIT; -import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfo; -import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfoChecker; -import java.util.Arrays; import java.util.Map; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.junit.jupiter.api.Assertions; @@ -34,6 +31,11 @@ protected boolean supportsSparkSQLClusteredBy() { return false; } + @Override + protected boolean supportPartition() { + return false; + } + @Test void testCreateAndLoadSchema() { String testDatabaseName = "t_create1"; @@ -87,23 +89,4 @@ void testAlterSchema() { NoSuchNamespaceException.class, () -> sql("ALTER DATABASE notExists SET DBPROPERTIES ('ID'='001')")); } - - @Test - void testCreateIcebergDatasourceFormatPartitionTable() { - String tableName = "datasource_partition_table"; - - dropTableIfExists(tableName); - String createTableSQL = getCreateSimpleTableString(tableName); - createTableSQL = createTableSQL + " USING ICEBERG PARTITIONED BY (name, age)"; - sql(createTableSQL); - SparkTableInfo tableInfo = getTableInfo(tableName); - SparkTableInfoChecker checker = - SparkTableInfoChecker.create() - .withName(tableName) - .withColumns(getSimpleTableColumn()) - .withIdentifyPartition(Arrays.asList("name", "age")); - checker.check(tableInfo); - checkTableReadWrite(tableInfo); - checkPartitionDirExists(tableInfo); - } } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java index 68f2ca4005b..2f5447bcce4 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -23,6 +23,82 @@ /** IcebergAdaptor provides specific operations for Iceberg Catalog to adapt to GravitinoCatalog. */ public class IcebergAdaptor implements GravitinoCatalogAdaptor { + private void initHiveProperties( + String catalogBackend, + Map gravitinoProperties, + HashMap icebergProperties) { + String metastoreUri = + gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI); + Preconditions.checkArgument( + StringUtils.isNotBlank(metastoreUri), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI + + " from iceberg catalog properties"); + String hiveWarehouse = + gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); + Preconditions.checkArgument( + StringUtils.isNotBlank(hiveWarehouse), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE + + " from iceberg catalog properties"); + icebergProperties.put( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, + catalogBackend.toLowerCase(Locale.ENGLISH)); + icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, metastoreUri); + icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, hiveWarehouse); + } + + private void initJdbcProperties( + String catalogBackend, + Map gravitinoProperties, + HashMap icebergProperties) { + String jdbcUri = gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcUri), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI + + " from iceberg catalog properties"); + String jdbcWarehouse = + gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcWarehouse), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE + + " from iceberg catalog properties"); + String jdbcUser = gravitinoProperties.get(GravitinoSparkConfig.GRAVITINO_JDBC_USER); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcUser), + "Couldn't get " + + GravitinoSparkConfig.GRAVITINO_JDBC_USER + + " from iceberg catalog properties"); + String jdbcPasswrod = gravitinoProperties.get(GravitinoSparkConfig.GRAVITINO_JDBC_PASSWORD); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcPasswrod), + "Couldn't get " + + GravitinoSparkConfig.GRAVITINO_JDBC_PASSWORD + + " from iceberg catalog properties"); + String jdbcDriver = + gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER); + Preconditions.checkArgument( + StringUtils.isNotBlank(jdbcDriver), + "Couldn't get " + + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER + + " from iceberg catalog properties"); + icebergProperties.put( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, + catalogBackend.toLowerCase(Locale.ENGLISH)); + icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, jdbcUri); + icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); + icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER, jdbcUser); + icebergProperties.put( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPasswrod); + icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); + icebergProperties.put( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE, + gravitinoProperties.getOrDefault( + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE, "true")); + } + @Override public PropertiesConverter getPropertiesConverter() { return new IcebergPropertiesConverter(); @@ -51,70 +127,10 @@ public TableCatalog createAndInitSparkCatalog( switch (catalogBackend.toLowerCase(Locale.ENGLISH)) { case GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE: - String metastoreUri = properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI); - Preconditions.checkArgument( - StringUtils.isNotBlank(metastoreUri), - "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI - + " from iceberg catalog properties"); - String hiveWarehouse = - properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); - Preconditions.checkArgument( - StringUtils.isNotBlank(hiveWarehouse), - "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE - + " from iceberg catalog properties"); - all.put( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, - catalogBackend.toLowerCase(Locale.ENGLISH)); - all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, metastoreUri); - all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, hiveWarehouse); + initHiveProperties(catalogBackend, properties, all); break; case GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC: - String jdbcUri = properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI); - Preconditions.checkArgument( - StringUtils.isNotBlank(jdbcUri), - "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI - + " from iceberg catalog properties"); - String jdbcWarehouse = - properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); - Preconditions.checkArgument( - StringUtils.isNotBlank(jdbcWarehouse), - "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE - + " from iceberg catalog properties"); - String jdbcUser = properties.get(GravitinoSparkConfig.GRAVITINO_JDBC_USER); - Preconditions.checkArgument( - StringUtils.isNotBlank(jdbcUser), - "Couldn't get " - + GravitinoSparkConfig.GRAVITINO_JDBC_USER - + " from iceberg catalog properties"); - String jdbcPasswrod = properties.get(GravitinoSparkConfig.GRAVITINO_JDBC_PASSWORD); - Preconditions.checkArgument( - StringUtils.isNotBlank(jdbcPasswrod), - "Couldn't get " - + GravitinoSparkConfig.GRAVITINO_JDBC_PASSWORD - + " from iceberg catalog properties"); - String jdbcDriver = - properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER); - Preconditions.checkArgument( - StringUtils.isNotBlank(jdbcDriver), - "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER - + " from iceberg catalog properties"); - all.put( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, - catalogBackend.toLowerCase(Locale.ENGLISH)); - all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, jdbcUri); - all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); - all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER, jdbcUser); - all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPasswrod); - all.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); - all.put( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE, - properties.getOrDefault( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE, "true")); + initJdbcProperties(catalogBackend, properties, all); break; default: // SparkCatalog does not support Memory type catalog From e434133cf1d093a62a4d5d978ce2af755f0ed169 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 27 Mar 2024 10:14:31 +0800 Subject: [PATCH 44/55] fix --- .../gravitino/integration/test/spark/SparkCommonIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 7f9bd4a6293..588ec62d0e8 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -303,7 +303,7 @@ void testAlterTableRenameColumn() { checkTableColumns(tableName, simpleTableColumns, getTableInfo(tableName)); String oldColumnName = "col1"; - String newColumnName = "col1_new"; + String newColumnName = "col2"; sql(String.format("ALTER TABLE %S ADD COLUMNS (col1 int)", tableName)); sql( From df700cc7f14cff234da7217b7c624b1df945371d Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 27 Mar 2024 11:12:07 +0800 Subject: [PATCH 45/55] fix --- .../integration/test/spark/SparkCommonIT.java | 29 ++++++++----------- .../test/spark/hive/SparkHiveCatalogIT.java | 29 ++++++++++++------- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 588ec62d0e8..bd916db67e8 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -411,28 +411,22 @@ void testComplexType() { @Test @EnabledIf("supportPartition") - public void testWriteHiveDynamicPartition() { - String tableName = "hive_dynamic_partition_table"; + void testCreateDatasourceFormatPartitionTable() { + String tableName = "datasource_partition_table"; dropTableIfExists(tableName); String createTableSQL = getCreateSimpleTableString(tableName); - createTableSQL = createTableSQL + "PARTITIONED BY (age_p1 INT, age_p2 STRING)"; + createTableSQL = createTableSQL + " USING PARQUET PARTITIONED BY (name, age)"; sql(createTableSQL); - SparkTableInfo tableInfo = getTableInfo(tableName); - - // write data to dynamic partition - String insertData = - String.format( - "INSERT OVERWRITE %s PARTITION(age_p1=1, age_p2) values(1,'a',3,'b');", tableName); - sql(insertData); - List queryResult = getTableData(tableName); - Assertions.assertTrue(queryResult.size() == 1); - Assertions.assertEquals("1,a,3,1,b", queryResult.get(0)); - String location = tableInfo.getTableLocation(); - String partitionExpression = "age_p1=1/age_p2=b"; - Path partitionPath = new Path(location, partitionExpression); - checkDirExists(partitionPath); + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getSimpleTableColumn()) + .withIdentifyPartition(Arrays.asList("name", "age")); + checker.check(tableInfo); + checkTableReadWrite(tableInfo); + checkPartitionDirExists(tableInfo); } @Test @@ -519,6 +513,7 @@ void testInsertTableAsSelect() { } @Test + @EnabledIf("supportPartition") void testInsertDatasourceFormatPartitionTableAsSelect() { String tableName = "insert_select_partition_table"; String newTableName = "new_" + tableName; diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index 515b7181969..b0c07f4368f 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -12,6 +12,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import org.apache.hadoop.fs.Path; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; @@ -117,22 +118,28 @@ public void testCreateHiveFormatPartitionTable() { } @Test - void testCreateDatasourceFormatPartitionTable() { - String tableName = "datasource_partition_table"; + public void testWriteHiveDynamicPartition() { + String tableName = "hive_dynamic_partition_table"; dropTableIfExists(tableName); String createTableSQL = getCreateSimpleTableString(tableName); - createTableSQL = createTableSQL + " USING PARQUET PARTITIONED BY (name, age)"; + createTableSQL = createTableSQL + "PARTITIONED BY (age_p1 INT, age_p2 STRING)"; sql(createTableSQL); + SparkTableInfo tableInfo = getTableInfo(tableName); - SparkTableInfoChecker checker = - SparkTableInfoChecker.create() - .withName(tableName) - .withColumns(getSimpleTableColumn()) - .withIdentifyPartition(Arrays.asList("name", "age")); - checker.check(tableInfo); - checkTableReadWrite(tableInfo); - checkPartitionDirExists(tableInfo); + + // write data to dynamic partition + String insertData = + String.format( + "INSERT OVERWRITE %s PARTITION(age_p1=1, age_p2) values(1,'a',3,'b');", tableName); + sql(insertData); + List queryResult = getTableData(tableName); + Assertions.assertTrue(queryResult.size() == 1); + Assertions.assertEquals("1,a,3,1,b", queryResult.get(0)); + String location = tableInfo.getTableLocation(); + String partitionExpression = "age_p1=1/age_p2=b"; + Path partitionPath = new Path(location, partitionExpression); + checkDirExists(partitionPath); } @Test From 74c7d2ee236510d273b67211b035579f4988cf77 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 27 Mar 2024 12:04:08 +0800 Subject: [PATCH 46/55] fix --- .../gravitino/spark/connector/iceberg/IcebergAdaptor.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java index 2f5447bcce4..ed7d89e7134 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -93,10 +93,6 @@ private void initJdbcProperties( icebergProperties.put( GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPasswrod); icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); - icebergProperties.put( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE, - gravitinoProperties.getOrDefault( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE, "true")); } @Override From eeb63188a61140a8a81e77ec8d7586a4fc2043bd Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 27 Mar 2024 15:16:48 +0800 Subject: [PATCH 47/55] fix --- .../integration/test/spark/SparkCommonIT.java | 56 ++++++++++++++++- .../test/spark/hive/SparkHiveCatalogIT.java | 52 +--------------- .../spark/iceberg/SparkIcebergCatalogIT.java | 60 +------------------ .../connector/iceberg/IcebergAdaptor.java | 2 +- 4 files changed, 56 insertions(+), 114 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index bd916db67e8..14fbdd987ed 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -60,7 +60,7 @@ private static String getInsertWithPartitionSql( // Whether supports [CLUSTERED BY col_name3 SORTED BY col_name INTO num_buckets BUCKETS] protected abstract boolean supportsSparkSQLClusteredBy(); - protected abstract boolean supportPartition(); + protected abstract boolean supportsPartition(); // Use a custom database not the original default database because SparkIT couldn't read&write // data to tables in default database. The main reason is default database location is @@ -93,6 +93,56 @@ void testLoadCatalogs() { Assertions.assertTrue(catalogs.contains(getCatalogName())); } + @Test + void testCreateAndLoadSchema() { + String testDatabaseName = "t_create1"; + dropDatabaseIfExists(testDatabaseName); + sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + Assertions.assertFalse(databaseMeta.containsKey("Comment")); + Assertions.assertTrue(databaseMeta.containsKey("Location")); + Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); + String properties = databaseMeta.get("Properties"); + Assertions.assertTrue(properties.contains("ID=001")); + + testDatabaseName = "t_create2"; + dropDatabaseIfExists(testDatabaseName); + String testDatabaseLocation = "/tmp/" + testDatabaseName; + sql( + String.format( + "CREATE DATABASE %s COMMENT 'comment' LOCATION '%s'\n" + " WITH DBPROPERTIES (ID=002);", + testDatabaseName, testDatabaseLocation)); + databaseMeta = getDatabaseMetadata(testDatabaseName); + String comment = databaseMeta.get("Comment"); + Assertions.assertEquals("comment", comment); + Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); + // underlying catalog may change /tmp/t_create2 to file:/tmp/t_create2 + Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); + properties = databaseMeta.get("Properties"); + Assertions.assertTrue(properties.contains("((ID,002))")); + } + + @Test + void testAlterSchema() { + String testDatabaseName = "t_alter"; + sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); + Assertions.assertTrue( + getDatabaseMetadata(testDatabaseName).get("Properties").contains("ID=002")); + + sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName)); + Assertions.assertFalse( + getDatabaseMetadata(testDatabaseName).get("Properties").contains("((ID,001))")); + Assertions.assertTrue( + getDatabaseMetadata(testDatabaseName).get("Properties").contains("((ID,002))")); + + // Hive metastore doesn't support alter database location, therefore this test method + // doesn't verify ALTER DATABASE database_name SET LOCATION 'new_location'. + + Assertions.assertThrowsExactly( + NoSuchNamespaceException.class, + () -> sql("ALTER DATABASE notExists SET DBPROPERTIES ('ID'='001')")); + } + @Test void testDropSchema() { String testDatabaseName = "t_drop"; @@ -410,7 +460,7 @@ void testComplexType() { } @Test - @EnabledIf("supportPartition") + @EnabledIf("supportsPartition") void testCreateDatasourceFormatPartitionTable() { String tableName = "datasource_partition_table"; @@ -513,7 +563,7 @@ void testInsertTableAsSelect() { } @Test - @EnabledIf("supportPartition") + @EnabledIf("supportsPartition") void testInsertDatasourceFormatPartitionTableAsSelect() { String tableName = "insert_select_partition_table"; String newTableName = "new_" + tableName; diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index b0c07f4368f..d35002640c2 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -11,15 +11,12 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Map; import org.apache.hadoop.fs.Path; -import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; -import org.junit.platform.commons.util.StringUtils; @Tag("gravitino-docker-it") @TestInstance(TestInstance.Lifecycle.PER_CLASS) @@ -35,60 +32,13 @@ protected String getProvider() { return "hive"; } - @Test - void testCreateAndLoadSchema() { - String testDatabaseName = "t_create1"; - dropDatabaseIfExists(testDatabaseName); - sql("CREATE DATABASE " + testDatabaseName); - Map databaseMeta = getDatabaseMetadata(testDatabaseName); - Assertions.assertFalse(databaseMeta.containsKey("Comment")); - Assertions.assertTrue(databaseMeta.containsKey("Location")); - Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); - String properties = databaseMeta.get("Properties"); - Assertions.assertTrue(StringUtils.isBlank(properties)); - - testDatabaseName = "t_create2"; - dropDatabaseIfExists(testDatabaseName); - String testDatabaseLocation = "/tmp/" + testDatabaseName; - sql( - String.format( - "CREATE DATABASE %s COMMENT 'comment' LOCATION '%s'\n" + " WITH DBPROPERTIES (ID=001);", - testDatabaseName, testDatabaseLocation)); - databaseMeta = getDatabaseMetadata(testDatabaseName); - String comment = databaseMeta.get("Comment"); - Assertions.assertEquals("comment", comment); - Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); - // underlying catalog may change /tmp/t_create2 to file:/tmp/t_create2 - Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); - properties = databaseMeta.get("Properties"); - Assertions.assertEquals("((ID,001))", properties); - } - - @Test - void testAlterSchema() { - String testDatabaseName = "t_alter"; - sql("CREATE DATABASE " + testDatabaseName); - Assertions.assertTrue( - StringUtils.isBlank(getDatabaseMetadata(testDatabaseName).get("Properties"))); - - sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='001')", testDatabaseName)); - Assertions.assertEquals("((ID,001))", getDatabaseMetadata(testDatabaseName).get("Properties")); - - // Hive metastore doesn't support alter database location, therefore this test method - // doesn't verify ALTER DATABASE database_name SET LOCATION 'new_location'. - - Assertions.assertThrowsExactly( - NoSuchNamespaceException.class, - () -> sql("ALTER DATABASE notExists SET DBPROPERTIES ('ID'='001')")); - } - @Override protected boolean supportsSparkSQLClusteredBy() { return true; } @Override - protected boolean supportPartition() { + protected boolean supportsPartition() { return true; } diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java index 47fe53f1974..53cd78db2d1 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/iceberg/SparkIcebergCatalogIT.java @@ -5,11 +5,7 @@ package com.datastrato.gravitino.integration.test.spark.iceberg; import com.datastrato.gravitino.integration.test.spark.SparkCommonIT; -import java.util.Map; -import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @Tag("gravitino-docker-it") @@ -32,61 +28,7 @@ protected boolean supportsSparkSQLClusteredBy() { } @Override - protected boolean supportPartition() { + protected boolean supportsPartition() { return false; } - - @Test - void testCreateAndLoadSchema() { - String testDatabaseName = "t_create1"; - dropDatabaseIfExists(testDatabaseName); - sql("CREATE DATABASE " + testDatabaseName); - Map databaseMeta = getDatabaseMetadata(testDatabaseName); - Assertions.assertFalse(databaseMeta.containsKey("Comment")); - Assertions.assertTrue(databaseMeta.containsKey("Location")); - Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); - String properties = databaseMeta.get("Properties"); - Assertions.assertEquals( - "((hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER))", - properties); - - testDatabaseName = "t_create2"; - dropDatabaseIfExists(testDatabaseName); - String testDatabaseLocation = "/tmp/" + testDatabaseName; - sql( - String.format( - "CREATE DATABASE %s COMMENT 'comment' LOCATION '%s'\n" + " WITH DBPROPERTIES (ID=001);", - testDatabaseName, testDatabaseLocation)); - databaseMeta = getDatabaseMetadata(testDatabaseName); - String comment = databaseMeta.get("Comment"); - Assertions.assertEquals("comment", comment); - Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); - // underlying catalog may change /tmp/t_create2 to file:/tmp/t_create2 - Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); - properties = databaseMeta.get("Properties"); - Assertions.assertEquals( - "((ID,001), (hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER))", - properties); - } - - @Test - void testAlterSchema() { - String testDatabaseName = "t_alter"; - sql("CREATE DATABASE " + testDatabaseName); - Assertions.assertEquals( - "((hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER))", - getDatabaseMetadata(testDatabaseName).get("Properties")); - - sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='001')", testDatabaseName)); - Assertions.assertEquals( - "((ID,001), (hive.metastore.database.owner,datastrato), (hive.metastore.database.owner-type,USER))", - getDatabaseMetadata(testDatabaseName).get("Properties")); - - // Hive metastore doesn't support alter database location, therefore this test method - // doesn't verify ALTER DATABASE database_name SET LOCATION 'new_location'. - - Assertions.assertThrowsExactly( - NoSuchNamespaceException.class, - () -> sql("ALTER DATABASE notExists SET DBPROPERTIES ('ID'='001')")); - } } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java index ed7d89e7134..0ecf9833dc5 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -86,7 +86,7 @@ private void initJdbcProperties( + " from iceberg catalog properties"); icebergProperties.put( GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, - catalogBackend.toLowerCase(Locale.ENGLISH)); + catalogBackend.toLowerCase(Locale.ROOT)); icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, jdbcUri); icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER, jdbcUser); From 2a994c7e48a67634fc58c67dac6e91bd49b30bd0 Mon Sep 17 00:00:00 2001 From: caican00 Date: Wed, 27 Mar 2024 15:58:06 +0800 Subject: [PATCH 48/55] fix --- .../integration/test/spark/SparkCommonIT.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 14fbdd987ed..d9f752f1d08 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -103,7 +103,7 @@ void testCreateAndLoadSchema() { Assertions.assertTrue(databaseMeta.containsKey("Location")); Assertions.assertEquals("datastrato", databaseMeta.get("Owner")); String properties = databaseMeta.get("Properties"); - Assertions.assertTrue(properties.contains("ID=001")); + Assertions.assertTrue(properties.contains("(ID,001)")); testDatabaseName = "t_create2"; dropDatabaseIfExists(testDatabaseName); @@ -119,7 +119,7 @@ void testCreateAndLoadSchema() { // underlying catalog may change /tmp/t_create2 to file:/tmp/t_create2 Assertions.assertTrue(databaseMeta.get("Location").contains(testDatabaseLocation)); properties = databaseMeta.get("Properties"); - Assertions.assertTrue(properties.contains("((ID,002))")); + Assertions.assertTrue(properties.contains("(ID,002)")); } @Test @@ -127,13 +127,13 @@ void testAlterSchema() { String testDatabaseName = "t_alter"; sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); Assertions.assertTrue( - getDatabaseMetadata(testDatabaseName).get("Properties").contains("ID=002")); + getDatabaseMetadata(testDatabaseName).get("Properties").contains("(ID,001)")); sql(String.format("ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName)); Assertions.assertFalse( - getDatabaseMetadata(testDatabaseName).get("Properties").contains("((ID,001))")); + getDatabaseMetadata(testDatabaseName).get("Properties").contains("(ID,001)")); Assertions.assertTrue( - getDatabaseMetadata(testDatabaseName).get("Properties").contains("((ID,002))")); + getDatabaseMetadata(testDatabaseName).get("Properties").contains("(ID,002)")); // Hive metastore doesn't support alter database location, therefore this test method // doesn't verify ALTER DATABASE database_name SET LOCATION 'new_location'. From a2e8efd0b77179eb0fd64414217c34433ccdefe7 Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 1 Apr 2024 00:18:11 +0800 Subject: [PATCH 49/55] update --- .../integration/test/spark/SparkEnvIT.java | 7 +- .../spark/connector/GravitinoSparkConfig.java | 14 ---- .../connector/iceberg/IcebergAdaptor.java | 75 ++++++++++--------- .../iceberg/IcebergPropertiesConstants.java | 39 ++++++++++ .../connector/iceberg/SparkIcebergTable.java | 4 +- 5 files changed, 85 insertions(+), 54 deletions(-) create mode 100644 spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 10b1baa7c49..fb8aa4900ab 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -12,6 +12,7 @@ import com.datastrato.gravitino.integration.test.container.HiveContainer; import com.datastrato.gravitino.integration.test.util.spark.SparkUtilIT; import com.datastrato.gravitino.spark.connector.GravitinoSparkConfig; +import com.datastrato.gravitino.spark.connector.iceberg.IcebergPropertiesConstants; import com.datastrato.gravitino.spark.connector.plugin.GravitinoSparkPlugin; import com.google.common.collect.Maps; import java.io.IOException; @@ -85,9 +86,9 @@ private void initMetalakeAndCatalogs() { properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri); break; case "lakehouse-iceberg": - properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND, "hive"); - properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, warehouse); - properties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, hiveMetastoreUri); + properties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_BACKEND, "hive"); + properties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, warehouse); + properties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI, hiveMetastoreUri); break; default: throw new IllegalArgumentException("Unsupported provider: " + getProvider()); diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java index 32c21bb830e..9c5e8e66b9f 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/GravitinoSparkConfig.java @@ -13,19 +13,5 @@ public class GravitinoSparkConfig { public static final String GRAVITINO_HIVE_METASTORE_URI = "metastore.uris"; public static final String SPARK_HIVE_METASTORE_URI = "hive.metastore.uris"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND = "catalog-backend"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_TYPE = "type"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE = "warehouse"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_URI = "uri"; - public static final String GRAVITINO_JDBC_USER = "jdbc-user"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER = "jdbc.user"; - public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD = "jdbc.password"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_INITIALIZE = "jdbc-initialize"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER = "jdbc-driver"; - - public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE = "hive"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC = "jdbc"; - private GravitinoSparkConfig() {} } diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java index 0ecf9833dc5..d7f4988daeb 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -7,7 +7,6 @@ import com.datastrato.gravitino.rel.Table; import com.datastrato.gravitino.spark.connector.GravitinoCatalogAdaptor; -import com.datastrato.gravitino.spark.connector.GravitinoSparkConfig; import com.datastrato.gravitino.spark.connector.PropertiesConverter; import com.datastrato.gravitino.spark.connector.table.SparkBaseTable; import com.google.common.base.Preconditions; @@ -28,71 +27,76 @@ private void initHiveProperties( Map gravitinoProperties, HashMap icebergProperties) { String metastoreUri = - gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI); + gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI); Preconditions.checkArgument( StringUtils.isNotBlank(metastoreUri), "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI - + " from iceberg catalog properties"); + + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI + + " from Iceberg Catalog properties"); String hiveWarehouse = - gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); + gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); Preconditions.checkArgument( StringUtils.isNotBlank(hiveWarehouse), "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE - + " from iceberg catalog properties"); + + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE + + " from Iceberg Catalog properties"); icebergProperties.put( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_TYPE, catalogBackend.toLowerCase(Locale.ENGLISH)); - icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, metastoreUri); - icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, hiveWarehouse); + icebergProperties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI, metastoreUri); + icebergProperties.put( + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, hiveWarehouse); } private void initJdbcProperties( String catalogBackend, Map gravitinoProperties, HashMap icebergProperties) { - String jdbcUri = gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI); + String jdbcUri = + gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcUri), "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI - + " from iceberg catalog properties"); + + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI + + " from Iceberg Catalog properties"); String jdbcWarehouse = - gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); + gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcWarehouse), "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE - + " from iceberg catalog properties"); - String jdbcUser = gravitinoProperties.get(GravitinoSparkConfig.GRAVITINO_JDBC_USER); + + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE + + " from Iceberg Catalog properties"); + String jdbcUser = gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_JDBC_USER); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcUser), "Couldn't get " - + GravitinoSparkConfig.GRAVITINO_JDBC_USER - + " from iceberg catalog properties"); - String jdbcPasswrod = gravitinoProperties.get(GravitinoSparkConfig.GRAVITINO_JDBC_PASSWORD); + + IcebergPropertiesConstants.GRAVITINO_JDBC_USER + + " from Iceberg Catalog properties"); + String jdbcPassword = + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_JDBC_PASSWORD); Preconditions.checkArgument( - StringUtils.isNotBlank(jdbcPasswrod), + StringUtils.isNotBlank(jdbcPassword), "Couldn't get " - + GravitinoSparkConfig.GRAVITINO_JDBC_PASSWORD - + " from iceberg catalog properties"); + + IcebergPropertiesConstants.GRAVITINO_JDBC_PASSWORD + + " from Iceberg Catalog properties"); String jdbcDriver = - gravitinoProperties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER); + gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcDriver), "Couldn't get " - + GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER - + " from iceberg catalog properties"); + + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER + + " from Iceberg Catalog properties"); icebergProperties.put( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_TYPE, + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_TYPE, catalogBackend.toLowerCase(Locale.ROOT)); - icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_URI, jdbcUri); - icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); - icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER, jdbcUser); + icebergProperties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI, jdbcUri); + icebergProperties.put( + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); + icebergProperties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER, jdbcUser); + icebergProperties.put( + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPassword); icebergProperties.put( - GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPasswrod); - icebergProperties.put(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); } @Override @@ -115,17 +119,18 @@ public TableCatalog createAndInitSparkCatalog( Preconditions.checkArgument( properties != null, "Iceberg Catalog properties should not be null"); - String catalogBackend = properties.get(GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND); + String catalogBackend = + properties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_BACKEND); Preconditions.checkArgument( StringUtils.isNotBlank(catalogBackend), "Iceberg Catalog backend should not be empty."); HashMap all = new HashMap<>(options); switch (catalogBackend.toLowerCase(Locale.ENGLISH)) { - case GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE: + case IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE: initHiveProperties(catalogBackend, properties, all); break; - case GravitinoSparkConfig.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC: + case IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC: initJdbcProperties(catalogBackend, properties, all); break; default: diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java new file mode 100644 index 00000000000..6470efa8625 --- /dev/null +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java @@ -0,0 +1,39 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + +package com.datastrato.gravitino.spark.connector.iceberg; + +import com.google.common.annotations.VisibleForTesting; + +public class IcebergPropertiesConstants { + + @VisibleForTesting + public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND = + IcebergCatalogPropertiesMetadata.CATALOG_BACKEND; + + @VisibleForTesting + public static final String LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE = + IcebergCatalogPropertiesMetadata.WAREHOUSE; + + @VisibleForTesting + public static final String LAKEHOUSE_ICEBERG_CATALOG_URI = IcebergCatalogPropertiesMetadata.URI; + + public static final String GRAVITINO_JDBC_USER = + IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_USER; + public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER = + IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_USER; + public static final String GRAVITINO_JDBC_PASSWORD = + IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_PASSWORD; + public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD = + IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_PASSWORD; + public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER = + IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_DRIVER; + + public static final String LAKEHOUSE_ICEBERG_CATALOG_TYPE = "type"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE = "hive"; + public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC = "jdbc"; + + private IcebergPropertiesConstants() {} +} diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java index fe085826ed0..aabdf149efa 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/SparkIcebergTable.java @@ -16,8 +16,8 @@ public class SparkIcebergTable extends SparkBaseTable { public SparkIcebergTable( Identifier identifier, Table gravitinoTable, - TableCatalog sparkCatalog, + TableCatalog sparkIcebergCatalog, PropertiesConverter propertiesConverter) { - super(identifier, gravitinoTable, sparkCatalog, propertiesConverter); + super(identifier, gravitinoTable, sparkIcebergCatalog, propertiesConverter); } } From 69a7af65d44333cd79c94a7c074e6bc69f3e6ffc Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 1 Apr 2024 00:30:04 +0800 Subject: [PATCH 50/55] update --- .../spark/connector/iceberg/IcebergPropertiesConstants.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java index 6470efa8625..b5075430269 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java @@ -5,13 +5,14 @@ package com.datastrato.gravitino.spark.connector.iceberg; +import com.datastrato.gravitino.catalog.lakehouse.iceberg.IcebergCatalogPropertiesMetadata; import com.google.common.annotations.VisibleForTesting; public class IcebergPropertiesConstants { @VisibleForTesting public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND = - IcebergCatalogPropertiesMetadata.CATALOG_BACKEND; + IcebergCatalogPropertiesMetadata.CATALOG_BACKEND_NAME; @VisibleForTesting public static final String LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE = From 3d494ac1d8c6aa6ab9fabb31488f9e1bcd89235e Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 1 Apr 2024 18:08:43 +0800 Subject: [PATCH 51/55] update --- .../integration/test/spark/SparkCommonIT.java | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index bb4f95f1d72..a224e3c8842 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -4,10 +4,12 @@ */ package com.datastrato.gravitino.integration.test.spark; +import com.datastrato.gravitino.exceptions.NoSuchSchemaException; import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfo; import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfo.SparkColumnInfo; import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfoChecker; import com.google.common.collect.ImmutableMap; +import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -15,6 +17,7 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.Path; import org.apache.spark.sql.AnalysisException; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; @@ -28,8 +31,11 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public abstract class SparkCommonIT extends SparkEnvIT { + private static final Logger LOG = LoggerFactory.getLogger(SparkCommonIT.class); // To generate test data for write&read table. protected static final Map typeConstant = @@ -63,14 +69,25 @@ private static String getInsertWithPartitionSql( protected abstract boolean supportsPartition(); - // Use a custom database not the original default database because SparkIT couldn't read&write - // data to tables in default database. The main reason is default database location is + // Use a custom database not the original default database because SparkCommonIT couldn't + // read&write data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address // not real HDFS address. The location of tables created under default database is like // hdfs://localhost:9000/xxx which couldn't read write data from SparkCommonIT. Will use default // database after spark connector support Alter database xx set location command. @BeforeAll void initDefaultDatabase() { + // cleanup the metastore_db directory in embedded mode + // to avoid the exception about `ERROR XSDB6: Another instance of Derby may have already booted + // the database /home/runner/work/gravitino/gravitino/integration-test/metastore_db` + File hiveLocalMetaStorePath = new File("metastore_db"); + try { + if (hiveLocalMetaStorePath.exists()) { + FileUtils.deleteDirectory(hiveLocalMetaStorePath); + } + } catch (IOException e) { + LOG.error(e.getMessage(), e); + } sql("USE " + getCatalogName()); createDatabaseIfNotExists(getDefaultDatabase()); } @@ -88,6 +105,18 @@ void cleanUp() { .forEach(database -> sql(String.format("DROP DATABASE IF EXISTS %s CASCADE", database))); } + @Test + void testListTables() { + String tableName = "t_list"; + Set tableNames = listTableNames(); + Assertions.assertFalse(tableNames.contains(tableName)); + createSimpleTable(tableName); + tableNames = listTableNames(); + Assertions.assertTrue(tableNames.contains(tableName)); + Assertions.assertThrowsExactly( + NoSuchSchemaException.class, () -> sql("SHOW TABLES IN nonexistent_schema")); + } + @Test void testLoadCatalogs() { Set catalogs = getCatalogs(); From 2301e3530adb323a363d3ea6290595a26bf9d0ad Mon Sep 17 00:00:00 2001 From: caican00 Date: Mon, 1 Apr 2024 21:31:23 +0800 Subject: [PATCH 52/55] update --- .../gravitino/integration/test/spark/SparkCommonIT.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index a224e3c8842..e72204d33ad 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -4,7 +4,6 @@ */ package com.datastrato.gravitino.integration.test.spark; -import com.datastrato.gravitino.exceptions.NoSuchSchemaException; import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfo; import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfo.SparkColumnInfo; import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfoChecker; @@ -114,7 +113,7 @@ void testListTables() { tableNames = listTableNames(); Assertions.assertTrue(tableNames.contains(tableName)); Assertions.assertThrowsExactly( - NoSuchSchemaException.class, () -> sql("SHOW TABLES IN nonexistent_schema")); + NoSuchNamespaceException.class, () -> sql("SHOW TABLES IN nonexistent_schema")); } @Test From 71577ed3e8a36af12d3a09f3d59b15b902bff7e6 Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 2 Apr 2024 10:06:42 +0800 Subject: [PATCH 53/55] update --- .../integration/test/spark/SparkCommonIT.java | 1 + .../integration/test/spark/SparkEnvIT.java | 6 +-- .../connector/iceberg/IcebergAdaptor.java | 53 ++++++++++--------- .../iceberg/IcebergPropertiesConstants.java | 22 ++++---- 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index e72204d33ad..b12094e80a2 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -107,6 +107,7 @@ void cleanUp() { @Test void testListTables() { String tableName = "t_list"; + dropTableIfExists(tableName); Set tableNames = listTableNames(); Assertions.assertFalse(tableNames.contains(tableName)); createSimpleTable(tableName); diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index fb8aa4900ab..096402e6121 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -86,9 +86,9 @@ private void initMetalakeAndCatalogs() { properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri); break; case "lakehouse-iceberg": - properties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_BACKEND, "hive"); - properties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, warehouse); - properties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI, hiveMetastoreUri); + properties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_BACKEND, "hive"); + properties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE, warehouse); + properties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_URI, hiveMetastoreUri); break; default: throw new IllegalArgumentException("Unsupported provider: " + getProvider()); diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java index d7f4988daeb..79d5a82f81e 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -27,25 +27,25 @@ private void initHiveProperties( Map gravitinoProperties, HashMap icebergProperties) { String metastoreUri = - gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI); + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_URI); Preconditions.checkArgument( StringUtils.isNotBlank(metastoreUri), "Couldn't get " - + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI + + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_URI + " from Iceberg Catalog properties"); String hiveWarehouse = - gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE); Preconditions.checkArgument( StringUtils.isNotBlank(hiveWarehouse), "Couldn't get " - + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE + + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE + " from Iceberg Catalog properties"); icebergProperties.put( - IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_TYPE, + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_TYPE, catalogBackend.toLowerCase(Locale.ENGLISH)); - icebergProperties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI, metastoreUri); + icebergProperties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_URI, metastoreUri); icebergProperties.put( - IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, hiveWarehouse); + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE, hiveWarehouse); } private void initJdbcProperties( @@ -53,50 +53,51 @@ private void initJdbcProperties( Map gravitinoProperties, HashMap icebergProperties) { String jdbcUri = - gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI); + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_URI); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcUri), "Couldn't get " - + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI + + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_URI + " from Iceberg Catalog properties"); String jdbcWarehouse = - gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE); + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcWarehouse), "Couldn't get " - + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE + + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE + " from Iceberg Catalog properties"); - String jdbcUser = gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_JDBC_USER); + String jdbcUser = + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_USER); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcUser), "Couldn't get " - + IcebergPropertiesConstants.GRAVITINO_JDBC_USER + + IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_USER + " from Iceberg Catalog properties"); String jdbcPassword = - gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_JDBC_PASSWORD); + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_PASSWORD); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcPassword), "Couldn't get " - + IcebergPropertiesConstants.GRAVITINO_JDBC_PASSWORD + + IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_PASSWORD + " from Iceberg Catalog properties"); String jdbcDriver = - gravitinoProperties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER); + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_DRIVER); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcDriver), "Couldn't get " - + IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER + + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_DRIVER + " from Iceberg Catalog properties"); icebergProperties.put( - IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_TYPE, + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_TYPE, catalogBackend.toLowerCase(Locale.ROOT)); - icebergProperties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_URI, jdbcUri); + icebergProperties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_URI, jdbcUri); icebergProperties.put( - IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); - icebergProperties.put(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER, jdbcUser); + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); + icebergProperties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_USER, jdbcUser); icebergProperties.put( - IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPassword); + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPassword); icebergProperties.put( - IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); } @Override @@ -120,17 +121,17 @@ public TableCatalog createAndInitSparkCatalog( properties != null, "Iceberg Catalog properties should not be null"); String catalogBackend = - properties.get(IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_BACKEND); + properties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_BACKEND); Preconditions.checkArgument( StringUtils.isNotBlank(catalogBackend), "Iceberg Catalog backend should not be empty."); HashMap all = new HashMap<>(options); switch (catalogBackend.toLowerCase(Locale.ENGLISH)) { - case IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE: + case IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_BACKEND_HIVE: initHiveProperties(catalogBackend, properties, all); break; - case IcebergPropertiesConstants.LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC: + case IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_BACKEND_JDBC: initJdbcProperties(catalogBackend, properties, all); break; default: diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java index b5075430269..85a84005903 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java @@ -11,30 +11,30 @@ public class IcebergPropertiesConstants { @VisibleForTesting - public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND = + public static final String GRAVITINO_ICEBERG_CATALOG_BACKEND = IcebergCatalogPropertiesMetadata.CATALOG_BACKEND_NAME; @VisibleForTesting - public static final String LAKEHOUSE_ICEBERG_CATALOG_WAREHOUSE = + public static final String GRAVITINO_ICEBERG_CATALOG_WAREHOUSE = IcebergCatalogPropertiesMetadata.WAREHOUSE; @VisibleForTesting - public static final String LAKEHOUSE_ICEBERG_CATALOG_URI = IcebergCatalogPropertiesMetadata.URI; + public static final String GRAVITINO_ICEBERG_CATALOG_URI = IcebergCatalogPropertiesMetadata.URI; - public static final String GRAVITINO_JDBC_USER = + public static final String GRAVITINO_ICEBERG_JDBC_USER = IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_USER; - public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_USER = + public static final String GRAVITINO_ICEBERG_CATALOG_JDBC_USER = IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_USER; - public static final String GRAVITINO_JDBC_PASSWORD = + public static final String GRAVITINO_ICEBERG_JDBC_PASSWORD = IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_PASSWORD; - public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_PASSWORD = + public static final String GRAVITINO_ICEBERG_CATALOG_JDBC_PASSWORD = IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_PASSWORD; - public static final String LAKEHOUSE_ICEBERG_CATALOG_JDBC_DRIVER = + public static final String GRAVITINO_ICEBERG_CATALOG_JDBC_DRIVER = IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_DRIVER; - public static final String LAKEHOUSE_ICEBERG_CATALOG_TYPE = "type"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND_HIVE = "hive"; - public static final String LAKEHOUSE_ICEBERG_CATALOG_BACKEND_JDBC = "jdbc"; + public static final String GRAVITINO_ICEBERG_CATALOG_TYPE = "type"; + public static final String GRAVITINO_ICEBERG_CATALOG_BACKEND_HIVE = "hive"; + public static final String GRAVITINO_ICEBERG_CATALOG_BACKEND_JDBC = "jdbc"; private IcebergPropertiesConstants() {} } From 8f13efcdfc00266fa567f4e72f69ceadd5acf83d Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 2 Apr 2024 14:16:30 +0800 Subject: [PATCH 54/55] update --- .../integration/test/spark/SparkCommonIT.java | 17 +++++++++++---- .../connector/iceberg/IcebergAdaptor.java | 21 ++++++++----------- .../iceberg/IcebergPropertiesConstants.java | 10 ++++----- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index b12094e80a2..b313f3a9018 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -75,10 +75,18 @@ private static String getInsertWithPartitionSql( // hdfs://localhost:9000/xxx which couldn't read write data from SparkCommonIT. Will use default // database after spark connector support Alter database xx set location command. @BeforeAll - void initDefaultDatabase() { - // cleanup the metastore_db directory in embedded mode - // to avoid the exception about `ERROR XSDB6: Another instance of Derby may have already booted - // the database /home/runner/work/gravitino/gravitino/integration-test/metastore_db` + void initDefaultDatabase() throws IOException { + // In embedded mode, derby acts as the backend database for the hive metastore + // and creates a directory named metastore_db to store metadata, + // supporting only one connection at a time. + // Previously, only SparkHiveCatalogIT accessed derby without any exceptions. + // Now, SparkIcebergCatalogIT exists at the same time. + // This exception about `ERROR XSDB6: Another instance of Derby may have already + // booted the database {GRAVITINO_HOME}/integration-test/metastore_db` will occur when + // SparkIcebergCatalogIT is initialized after the Sparkhivecatalogit is executed. + // The main reason is that the lock file in the metastore_db directory is not cleaned so that a + // new connection cannot be created, + // so a clean operation is done here to ensure that a new connection can be created. File hiveLocalMetaStorePath = new File("metastore_db"); try { if (hiveLocalMetaStorePath.exists()) { @@ -86,6 +94,7 @@ void initDefaultDatabase() { } } catch (IOException e) { LOG.error(e.getMessage(), e); + throw e; } sql("USE " + getCatalogName()); createDatabaseIfNotExists(getDefaultDatabase()); diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java index 79d5a82f81e..cf73dfb0427 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergAdaptor.java @@ -66,26 +66,25 @@ private void initJdbcProperties( "Couldn't get " + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE + " from Iceberg Catalog properties"); - String jdbcUser = - gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_USER); + String jdbcUser = gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_JDBC_USER); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcUser), "Couldn't get " - + IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_USER + + IcebergPropertiesConstants.GRAVITINO_JDBC_USER + " from Iceberg Catalog properties"); String jdbcPassword = - gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_PASSWORD); + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_JDBC_PASSWORD); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcPassword), "Couldn't get " - + IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_PASSWORD + + IcebergPropertiesConstants.GRAVITINO_JDBC_PASSWORD + " from Iceberg Catalog properties"); String jdbcDriver = - gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_DRIVER); + gravitinoProperties.get(IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_DRIVER); Preconditions.checkArgument( StringUtils.isNotBlank(jdbcDriver), "Couldn't get " - + IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_DRIVER + + IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_DRIVER + " from Iceberg Catalog properties"); icebergProperties.put( IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_TYPE, @@ -93,11 +92,9 @@ private void initJdbcProperties( icebergProperties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_URI, jdbcUri); icebergProperties.put( IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_WAREHOUSE, jdbcWarehouse); - icebergProperties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_USER, jdbcUser); - icebergProperties.put( - IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_PASSWORD, jdbcPassword); - icebergProperties.put( - IcebergPropertiesConstants.GRAVITINO_ICEBERG_CATALOG_JDBC_DRIVER, jdbcDriver); + icebergProperties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_USER, jdbcUser); + icebergProperties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_PASSWORD, jdbcPassword); + icebergProperties.put(IcebergPropertiesConstants.GRAVITINO_ICEBERG_JDBC_DRIVER, jdbcDriver); } @Override diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java index 85a84005903..d69964785ab 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/iceberg/IcebergPropertiesConstants.java @@ -21,15 +21,15 @@ public class IcebergPropertiesConstants { @VisibleForTesting public static final String GRAVITINO_ICEBERG_CATALOG_URI = IcebergCatalogPropertiesMetadata.URI; - public static final String GRAVITINO_ICEBERG_JDBC_USER = + public static final String GRAVITINO_JDBC_USER = IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_USER; - public static final String GRAVITINO_ICEBERG_CATALOG_JDBC_USER = + public static final String GRAVITINO_ICEBERG_JDBC_USER = IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_USER; - public static final String GRAVITINO_ICEBERG_JDBC_PASSWORD = + public static final String GRAVITINO_JDBC_PASSWORD = IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_PASSWORD; - public static final String GRAVITINO_ICEBERG_CATALOG_JDBC_PASSWORD = + public static final String GRAVITINO_ICEBERG_JDBC_PASSWORD = IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_PASSWORD; - public static final String GRAVITINO_ICEBERG_CATALOG_JDBC_DRIVER = + public static final String GRAVITINO_ICEBERG_JDBC_DRIVER = IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_DRIVER; public static final String GRAVITINO_ICEBERG_CATALOG_TYPE = "type"; From 509b0d6f72ed879536e92fb7bfcb3e0f89be45b1 Mon Sep 17 00:00:00 2001 From: caican00 Date: Tue, 2 Apr 2024 14:39:58 +0800 Subject: [PATCH 55/55] update --- .../gravitino/integration/test/spark/SparkCommonIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index b313f3a9018..04f1c0dbc05 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -93,7 +93,7 @@ void initDefaultDatabase() throws IOException { FileUtils.deleteDirectory(hiveLocalMetaStorePath); } } catch (IOException e) { - LOG.error(e.getMessage(), e); + LOG.error(String.format("delete director %s failed.", hiveLocalMetaStorePath), e); throw e; } sql("USE " + getCatalogName());