Skip to content

Commit

Permalink
[apache#2566] Improvement(spark-connector): Refactoring integration t…
Browse files Browse the repository at this point in the history
…ests for spark-connector (apache#2578)

### What changes were proposed in this pull request?

1. Make SparkIT to SparkCommonIT which contains the common tests shared
by all catalogs.

2. Add new SparkHiveCatalogIT to test Hive specific tests, and both
SparkXXCatalogIT extends SparkCommonIT.

### Why are the changes needed?
Separate integration testing for different data sources.

Fix: apache#2566

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
 Integration tests after separation.
  • Loading branch information
caican00 authored and xiaojiebao committed Mar 28, 2024
1 parent 807975b commit fb8d3d2
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,10 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.TestInstance.Lifecycle;
import org.junit.platform.commons.util.StringUtils;

@Tag("gravitino-docker-it")
@TestInstance(Lifecycle.PER_CLASS)
public class SparkIT extends SparkEnvIT {
public abstract class SparkCommonIT extends SparkEnvIT {
private static String getSelectAllSql(String tableName) {
return String.format("SELECT * FROM %s", tableName);
}
Expand All @@ -56,32 +51,32 @@ private static String getInsertWithoutPartitionSql(String tableName, String valu
DataTypes.createStructField("col2", DataTypes.StringType, true))),
"struct(1, 'a')");

// Use a custom database not the original default database because SparkIT couldn't read&write
// data to tables in default database. The main reason is default database location is
// Use a custom database not the original default database because SparkCommonIT couldn't
// read&write data to tables in default database. The main reason is default database location is
// determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address
// not real HDFS address. The location of tables created under default database is like
// hdfs://localhost:9000/xxx which couldn't read write data from SparkIT. Will use default
// hdfs://localhost:9000/xxx which couldn't read write data from SparkCommonIT. Will use default
// database after spark connector support Alter database xx set location command.
@BeforeAll
void initDefaultDatabase() {
sql("USE " + hiveCatalogName);
sql("USE " + getCatalogName());
createDatabaseIfNotExists(getDefaultDatabase());
}

@BeforeEach
void init() {
sql("USE " + hiveCatalogName);
sql("USE " + getCatalogName());
sql("USE " + getDefaultDatabase());
}

private String getDefaultDatabase() {
protected String getDefaultDatabase() {
return "default_db";
}

@Test
void testLoadCatalogs() {
Set<String> catalogs = getCatalogs();
Assertions.assertTrue(catalogs.contains(hiveCatalogName));
Assertions.assertTrue(catalogs.contains(getCatalogName()));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,20 @@
import org.slf4j.LoggerFactory;

/** Setup Hive, Gravitino, Spark, Metalake environment to execute SparkSQL. */
public class SparkEnvIT extends SparkUtilIT {
public abstract class SparkEnvIT extends SparkUtilIT {
private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class);
private static final ContainerSuite containerSuite = ContainerSuite.getInstance();

protected final String hiveCatalogName = "hive";
private final String metalakeName = "test";

private SparkSession sparkSession;
private String hiveMetastoreUri;
private String gravitinoUri;

protected abstract String getCatalogName();

protected abstract String getProvider();

@Override
protected SparkSession getSparkSession() {
Assertions.assertNotNull(sparkSession);
Expand Down Expand Up @@ -67,9 +70,9 @@ private void initMetalakeAndCatalogs() {
properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri);

metalake.createCatalog(
NameIdentifier.of(metalakeName, hiveCatalogName),
NameIdentifier.of(metalakeName, getCatalogName()),
Catalog.Type.RELATIONAL,
"hive",
getProvider(),
"",
properties);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/
package com.datastrato.gravitino.integration.test.spark.hive;

import com.datastrato.gravitino.integration.test.spark.SparkCommonIT;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.TestInstance;

@Tag("gravitino-docker-it")
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
public class SparkHiveCatalogIT extends SparkCommonIT {

@Override
protected String getCatalogName() {
return "hive";
}

@Override
protected String getProvider() {
return "hive";
}
}

0 comments on commit fb8d3d2

Please sign in to comment.