[apache#2566] Improvement(spark-connector): Refactoring integration t…

…ests for spark-connector (apache#2578) ### What changes were proposed in this pull request? 1. Make SparkIT to SparkCommonIT which contains the common tests shared by all catalogs. 2. Add new SparkHiveCatalogIT to test Hive specific tests, and both SparkXXCatalogIT extends SparkCommonIT. ### Why are the changes needed? Separate integration testing for different data sources. Fix: apache#2566 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Integration tests after separation.
coolderli · Mar 28, 2024 · fb8d3d2 · fb8d3d2
1 parent 807975b
commit fb8d3d2
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 17 deletions.
diff --git a/...itino/integration/test/spark/SparkIT.java → ...integration/test/spark/SparkCommonIT.java b/...itino/integration/test/spark/SparkIT.java → ...integration/test/spark/SparkCommonIT.java
@@ -22,15 +22,10 @@
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.TestInstance;
-import org.junit.jupiter.api.TestInstance.Lifecycle;
 import org.junit.platform.commons.util.StringUtils;
 
-@Tag("gravitino-docker-it")
-@TestInstance(Lifecycle.PER_CLASS)
-public class SparkIT extends SparkEnvIT {
+public abstract class SparkCommonIT extends SparkEnvIT {
   private static String getSelectAllSql(String tableName) {
     return String.format("SELECT * FROM %s", tableName);
   }
@@ -56,32 +51,32 @@ private static String getInsertWithoutPartitionSql(String tableName, String valu
                   DataTypes.createStructField("col2", DataTypes.StringType, true))),
           "struct(1, 'a')");
 
-  // Use a custom database not the original default database because SparkIT couldn't read&write
-  // data to tables in default database. The main reason is default database location is
+  // Use a custom database not the original default database because SparkCommonIT couldn't
+  // read&write data to tables in default database. The main reason is default database location is
   // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address
   // not real HDFS address. The location of tables created under default database is like
-  // hdfs://localhost:9000/xxx which couldn't read write data from SparkIT. Will use default
+  // hdfs://localhost:9000/xxx which couldn't read write data from SparkCommonIT. Will use default
   // database after spark connector support Alter database xx set location command.
   @BeforeAll
   void initDefaultDatabase() {
-    sql("USE " + hiveCatalogName);
+    sql("USE " + getCatalogName());
     createDatabaseIfNotExists(getDefaultDatabase());
   }
 
   @BeforeEach
   void init() {
-    sql("USE " + hiveCatalogName);
+    sql("USE " + getCatalogName());
     sql("USE " + getDefaultDatabase());
   }
 
-  private String getDefaultDatabase() {
+  protected String getDefaultDatabase() {
     return "default_db";
   }
 
   @Test
   void testLoadCatalogs() {
     Set<String> catalogs = getCatalogs();
-    Assertions.assertTrue(catalogs.contains(hiveCatalogName));
+    Assertions.assertTrue(catalogs.contains(getCatalogName()));
   }
 
   @Test

diff --git a/...ration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/...ration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java
@@ -24,17 +24,20 @@
 import org.slf4j.LoggerFactory;
 
 /** Setup Hive, Gravitino, Spark, Metalake environment to execute SparkSQL. */
-public class SparkEnvIT extends SparkUtilIT {
+public abstract class SparkEnvIT extends SparkUtilIT {
   private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class);
   private static final ContainerSuite containerSuite = ContainerSuite.getInstance();
 
-  protected final String hiveCatalogName = "hive";
   private final String metalakeName = "test";
 
   private SparkSession sparkSession;
   private String hiveMetastoreUri;
   private String gravitinoUri;
 
+  protected abstract String getCatalogName();
+
+  protected abstract String getProvider();
+
   @Override
   protected SparkSession getSparkSession() {
     Assertions.assertNotNull(sparkSession);
@@ -67,9 +70,9 @@ private void initMetalakeAndCatalogs() {
     properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri);
 
     metalake.createCatalog(
-        NameIdentifier.of(metalakeName, hiveCatalogName),
+        NameIdentifier.of(metalakeName, getCatalogName()),
         Catalog.Type.RELATIONAL,
-        "hive",
+        getProvider(),
         "",
         properties);
   }

diff --git a/...rc/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/...rc/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2024 Datastrato Pvt Ltd.
+ * This software is licensed under the Apache License version 2.
+ */
+package com.datastrato.gravitino.integration.test.spark.hive;
+
+import com.datastrato.gravitino.integration.test.spark.SparkCommonIT;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.TestInstance;
+
+@Tag("gravitino-docker-it")
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+public class SparkHiveCatalogIT extends SparkCommonIT {
+
+  @Override
+  protected String getCatalogName() {
+    return "hive";
+  }
+
+  @Override
+  protected String getProvider() {
+    return "hive";
+  }
+}