apache · zhongyujiang · Jul 30, 2024 · Aug 1, 2024 · Aug 4, 2024 · Aug 4, 2024
diff --git a/paimon-core/src/main/java/org/apache/paimon/catalog/FileSystemCatalog.java b/paimon-core/src/main/java/org/apache/paimon/catalog/FileSystemCatalog.java
@@ -18,6 +18,7 @@
 
 package org.apache.paimon.catalog;
 
+import org.apache.paimon.CoreOptions;
 import org.apache.paimon.fs.FileIO;
 import org.apache.paimon.fs.Path;
 import org.apache.paimon.operation.Lock;
@@ -36,6 +37,7 @@
 import java.util.concurrent.Callable;
 
 import static org.apache.paimon.catalog.FileSystemCatalogOptions.CASE_SENSITIVE;
+import static org.apache.paimon.utils.Preconditions.checkArgument;
 
 /** A catalog implementation for {@link FileIO}. */
 public class FileSystemCatalog extends AbstractCatalog {
@@ -117,6 +119,9 @@ protected void dropTableImpl(Identifier identifier) {
 
  @Override
  public void createTableImpl(Identifier identifier, Schema schema) {
+ checkArgument(
+ !schema.options().containsKey(CoreOptions.PATH.key()),
+ "The FileSystemCatalog does not support specifying location when creating a table.");
  uncheck(() -> schemaManager(identifier).createTable(schema));
  }
 

diff --git a/paimon-core/src/main/java/org/apache/paimon/jdbc/JdbcCatalog.java b/paimon-core/src/main/java/org/apache/paimon/jdbc/JdbcCatalog.java
@@ -18,6 +18,7 @@
 
 package org.apache.paimon.jdbc;
 
+import org.apache.paimon.CoreOptions;
 import org.apache.paimon.annotation.VisibleForTesting;
 import org.apache.paimon.catalog.AbstractCatalog;
 import org.apache.paimon.catalog.CatalogLockContext;
@@ -57,6 +58,7 @@
 import static org.apache.paimon.jdbc.JdbcUtils.execute;
 import static org.apache.paimon.jdbc.JdbcUtils.insertProperties;
 import static org.apache.paimon.jdbc.JdbcUtils.updateTable;
+import static org.apache.paimon.utils.Preconditions.checkArgument;
 
 /* This file is based on source code from the Iceberg Project (http://iceberg.apache.org/), licensed by the Apache
  * Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE file distributed with this work for
@@ -235,6 +237,9 @@ protected void dropTableImpl(Identifier identifier) {
 
  @Override
  protected void createTableImpl(Identifier identifier, Schema schema) {
+ checkArgument(
+ !schema.options().containsKey(CoreOptions.PATH.key()),
+ "The JdbcCatalog does not support specifying location when creating a table.");
  try {
  // create table file
  getSchemaManager(identifier).createTable(schema);

diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/Schema.java b/paimon-core/src/main/java/org/apache/paimon/schema/Schema.java
@@ -337,13 +337,4 @@ public Schema build() {
  return new Schema(columns, partitionKeys, primaryKeys, options, comment);
  }
  }
-
- public static Schema fromTableSchema(TableSchema tableSchema) {
- return new Schema(
- tableSchema.fields(),
- tableSchema.partitionKeys(),
- tableSchema.primaryKeys(),
- tableSchema.options(),
- tableSchema.comment());
- }
 }
diff --git a/...n-flink-common/src/main/java/org/apache/paimon/flink/clone/PickFilesForCloneOperator.java b/...n-flink-common/src/main/java/org/apache/paimon/flink/clone/PickFilesForCloneOperator.java
@@ -18,15 +18,21 @@
 
 package org.apache.paimon.flink.clone;
 
+import org.apache.paimon.CoreOptions;
 import org.apache.paimon.catalog.Catalog;
 import org.apache.paimon.catalog.Identifier;
 import org.apache.paimon.flink.FlinkCatalogFactory;
 import org.apache.paimon.fs.Path;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.schema.Schema;
+import org.apache.paimon.schema.TableSchema;
 import org.apache.paimon.table.FileStoreTable;
 import org.apache.paimon.utils.Preconditions;
 
+import org.apache.paimon.shade.guava30.com.google.common.collect.ImmutableList;
+import org.apache.paimon.shade.guava30.com.google.common.collect.ImmutableMap;
+import org.apache.paimon.shade.guava30.com.google.common.collect.Iterables;
+
 import org.apache.flink.api.java.tuple.Tuple2;
 import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
 import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
@@ -37,6 +43,7 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 /**
  * Pick the files to be cloned of a table based on the input record. The record type it produce is
@@ -77,7 +84,7 @@ public void processElement(StreamRecord<Tuple2<String, String>> streamRecord) th
  FileStoreTable sourceTable = (FileStoreTable) sourceCatalog.getTable(sourceIdentifier);
  targetCatalog.createDatabase(targetIdentifier.getDatabaseName(), true);
  targetCatalog.createTable(
- targetIdentifier, Schema.fromTableSchema(sourceTable.schema()), true);
+ targetIdentifier, newSchemaFromTableSchema(sourceTable.schema()), true);
 
  List<CloneFileInfo> result =
  toCloneFileInfos(
@@ -95,6 +102,18 @@ public void processElement(StreamRecord<Tuple2<String, String>> streamRecord) th
  }
  }
 
+ private static Schema newSchemaFromTableSchema(TableSchema tableSchema) {
+ return new Schema(
+ ImmutableList.copyOf(tableSchema.fields()),
+ ImmutableList.copyOf(tableSchema.partitionKeys()),
+ ImmutableList.copyOf(tableSchema.primaryKeys()),
+ ImmutableMap.copyOf(
+ Iterables.filter(
+ tableSchema.options().entrySet(),
+ entry -> !Objects.equals(entry.getKey(), CoreOptions.PATH.key()))),
+ tableSchema.comment());
+ }
+
  private List<CloneFileInfo> toCloneFileInfos(
  List<Path> files,
  Path sourceTableRoot,

diff --git a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
@@ -78,6 +78,7 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
 import java.util.function.Function;
@@ -493,7 +494,18 @@ protected void createTableImpl(Identifier identifier, Schema schema) {
  // if changes on Hive fails there is no harm to perform the same changes to files again
  TableSchema tableSchema;
  try {
- tableSchema = schemaManager(identifier).createTable(schema, usingExternalTable());
+ Path tableRoot;
+ if (schema.options().containsKey(CoreOptions.PATH.key())) {
+ checkArgument(
+ Objects.equals(createTableType(), TableType.EXTERNAL),
+ "The HiveCatalog only supports specifying location when creating an external table");
+ tableRoot = new Path(schema.options().get(CoreOptions.PATH.key()));
+ } else {
+ tableRoot = getTableLocation(identifier);
+ }
+
+ tableSchema =
+ schemaManager(identifier, tableRoot).createTable(schema, usingExternalTable());
  } catch (Exception e) {
  throw new RuntimeException(
  "Failed to commit changes of table "
@@ -707,10 +719,7 @@ public String warehouse() {
 
  private Table newHmsTable(Identifier identifier, Map<String, String> tableParameters) {
  long currentTimeMillis = System.currentTimeMillis();
- TableType tableType =
- OptionsUtils.convertToEnum(
- hiveConf.get(TABLE_TYPE.key(), TableType.MANAGED.toString()),
- TableType.class);
+ TableType tableType = createTableType();
  Table table =
  new Table(
  identifier.getTableName(),
@@ -735,6 +744,11 @@ private Table newHmsTable(Identifier identifier, Map<String, String> tableParame
  return table;
  }
 
+ private TableType createTableType() {
+ return OptionsUtils.convertToEnum(
+ hiveConf.get(TABLE_TYPE.key(), TableType.MANAGED.toString()), TableType.class);
+ }
+
  private void updateHmsTable(Table table, Identifier identifier, TableSchema schema) {
  StorageDescriptor sd = table.getSd() != null ? table.getSd() : new StorageDescriptor();
 
@@ -792,7 +806,11 @@ private void updateHmsTable(Table table, Identifier identifier, TableSchema sche
  }
 
  // update location
- locationHelper.specifyTableLocation(table, getTableLocation(identifier).toString());
+ String location =
+ schema.options().containsKey(CoreOptions.PATH.key())
+ ? schema.options().get(CoreOptions.PATH.key())
+ : getTableLocation(identifier).toString();
+ locationHelper.specifyTableLocation(table, location);
  }
 
  private void updateHmsTablePars(Table table, TableSchema schema) {
@@ -816,8 +834,11 @@ private FieldSchema convertToFieldSchema(DataField dataField) {
  }
 
  private SchemaManager schemaManager(Identifier identifier) {
- return new SchemaManager(
- fileIO, getTableLocation(identifier), identifier.getBranchNameOrDefault())
+ return schemaManager(identifier, getTableLocation(identifier));
+ }
+
+ private SchemaManager schemaManager(Identifier identifier, Path path) {
+ return new SchemaManager(fileIO, path, identifier.getBranchNameOrDefault())
  .withLock(lock(identifier));
  }
 

diff --git a/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveCatalogTest.java b/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveCatalogTest.java
@@ -18,9 +18,11 @@
 
 package org.apache.paimon.hive;
 
+import org.apache.paimon.CoreOptions;
 import org.apache.paimon.catalog.CatalogTestBase;
 import org.apache.paimon.catalog.Identifier;
 import org.apache.paimon.client.ClientPool;
+import org.apache.paimon.options.CatalogOptions;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.schema.Schema;
 import org.apache.paimon.schema.SchemaChange;
@@ -29,6 +31,7 @@
 import org.apache.paimon.utils.CommonTestUtils;
 import org.apache.paimon.utils.HadoopUtils;
 
+import org.apache.paimon.shade.guava30.com.google.common.collect.ImmutableMap;
 import org.apache.paimon.shade.guava30.com.google.common.collect.Lists;
 
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -37,6 +40,7 @@
 import org.apache.thrift.TException;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
 
 import java.lang.reflect.Field;
 import java.util.Arrays;
@@ -268,4 +272,36 @@ public void testAlterHiveTableParameters() {
  fail("Test failed due to exception: " + e.getMessage());
  }
  }
+
+ @Test
+ public void testCreateExternalTableWithLocation(@TempDir java.nio.file.Path tempDir)
+ throws Exception {
+ HiveConf hiveConf = new HiveConf();
+ String jdoConnectionURL = "jdbc:derby:memory:" + UUID.randomUUID();
+ hiveConf.setVar(METASTORECONNECTURLKEY, jdoConnectionURL + ";create=true");
+ hiveConf.set(CatalogOptions.TABLE_TYPE.key(), "external");
+ String metastoreClientClass = "org.apache.hadoop.hive.metastore.HiveMetaStoreClient";
+ HiveCatalog externalWarehouseCatalog =
+ new HiveCatalog(fileIO, hiveConf, metastoreClientClass, warehouse);
+
+ String externalTablePath = tempDir.toString();
+
+ Schema schema =
+ new Schema(
+ Lists.newArrayList(new DataField(0, "foo", DataTypes.INT())),
+ Collections.emptyList(),
+ Collections.emptyList(),
+ ImmutableMap.of("path", externalTablePath),
+ "");
+
+ Identifier identifier = Identifier.create("default", "my_table");
+ externalWarehouseCatalog.createTable(identifier, schema, true);
+
+ org.apache.paimon.table.Table table = externalWarehouseCatalog.getTable(identifier);
+ assertThat(table.options())
+ .extracting(CoreOptions.PATH.key())
+ .isEqualTo("file:" + externalTablePath);
+
+ externalWarehouseCatalog.close();
+ }
 }
diff --git a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkCatalog.java b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkCatalog.java
@@ -392,6 +392,11 @@ private Schema toInitialSchema(
  Map<String, String> normalizedProperties = mergeSQLConf(properties);
  normalizedProperties.remove(PRIMARY_KEY_IDENTIFIER);
  normalizedProperties.remove(TableCatalog.PROP_COMMENT);
+ if (normalizedProperties.containsKey(TableCatalog.PROP_LOCATION)) {
+ String path = normalizedProperties.remove(TableCatalog.PROP_LOCATION);
+ normalizedProperties.put(CoreOptions.PATH.key(), path);
+ }
+
  String pkAsString = properties.get(PRIMARY_KEY_IDENTIFIER);
  List<String> primaryKeys =
  pkAsString == null

diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala
@@ -60,6 +60,7 @@ case class SparkTable(table: Table)
  properties.put(CoreOptions.PRIMARY_KEY.key, String.join(",", table.primaryKeys))
  }
  properties.put(TableCatalog.PROP_PROVIDER, SparkSource.NAME)
+ properties.put(TableCatalog.PROP_LOCATION, properties.get(CoreOptions.PATH.key()))
  if (table.comment.isPresent) {
  properties.put(TableCatalog.PROP_COMMENT, table.comment.get)
  }