Skip to content

Commit 6b43938

Browse files
committed
Hive Metadata Scan: Support reading tables with only Hive metadata (#23)
* Support reading tables with Hive metadata if Iceberg metadata is not available
1 parent bf68ea0 commit 6b43938

16 files changed

+1566
-4
lines changed

core/src/main/java/org/apache/iceberg/BaseFileScanTask.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@
2929
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
3030
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
3131

32-
class BaseFileScanTask implements FileScanTask {
32+
public class BaseFileScanTask implements FileScanTask {
3333
private final DataFile file;
3434
private final String schemaString;
3535
private final String specString;
3636
private final ResidualEvaluator residuals;
3737

3838
private transient PartitionSpec spec = null;
3939

40-
BaseFileScanTask(DataFile file, String schemaString, String specString, ResidualEvaluator residuals) {
40+
public BaseFileScanTask(DataFile file, String schemaString, String specString, ResidualEvaluator residuals) {
4141
this.file = file;
4242
this.schemaString = schemaString;
4343
this.specString = specString;

core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ private Table loadMetadataTable(TableIdentifier identifier) {
183183
}
184184
}
185185

186-
private boolean isValidMetadataIdentifier(TableIdentifier identifier) {
186+
protected boolean isValidMetadataIdentifier(TableIdentifier identifier) {
187187
return MetadataTableType.from(identifier.name()) != null &&
188188
isValidIdentifier(TableIdentifier.of(identifier.namespace().levels()));
189189
}
@@ -281,7 +281,7 @@ private static void deleteFiles(FileIO io, Set<ManifestFile> allManifests) {
281281
});
282282
}
283283

284-
private static String fullTableName(String catalogName, TableIdentifier identifier) {
284+
protected static String fullTableName(String catalogName, TableIdentifier identifier) {
285285
StringBuilder sb = new StringBuilder();
286286

287287
if (catalogName.contains("/") || catalogName.contains(":")) {

core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,4 +270,13 @@ private void deleteRemovedMetadataFiles(TableMetadata base, TableMetadata metada
270270
.run(previousMetadataFile -> io().deleteFile(previousMetadataFile.file()));
271271
}
272272
}
273+
274+
protected void setCurrentMetadata(TableMetadata currentMetadata) {
275+
this.currentMetadata = currentMetadata;
276+
}
277+
278+
protected void setShouldRefresh(boolean shouldRefresh) {
279+
this.shouldRefresh = shouldRefresh;
280+
}
281+
273282
}

hive/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.concurrent.TimeUnit;
2626
import org.apache.hadoop.conf.Configuration;
2727
import org.apache.hadoop.hive.conf.HiveConf;
28+
import org.apache.iceberg.hive.legacy.LegacyHiveCatalog;
2829

2930
public final class HiveCatalogs {
3031

@@ -39,6 +40,11 @@ public final class HiveCatalogs {
3940
.removalListener((RemovalListener<String, HiveCatalog>) (uri, catalog, cause) -> catalog.close())
4041
.build();
4142

43+
private static final Cache<String, HiveCatalog> LEGACY_CATALOG_CACHE = Caffeine.newBuilder()
44+
.expireAfterAccess(10, TimeUnit.MINUTES)
45+
.removalListener((RemovalListener<String, HiveCatalog>) (uri, catalog, cause) -> catalog.close())
46+
.build();
47+
4248
private HiveCatalogs() {}
4349

4450
public static HiveCatalog loadCatalog(Configuration conf) {
@@ -47,6 +53,12 @@ public static HiveCatalog loadCatalog(Configuration conf) {
4753
return CATALOG_CACHE.get(metastoreUri, uri -> new HiveCatalog(conf));
4854
}
4955

56+
public static HiveCatalog loadLegacyCatalog(Configuration conf) {
57+
// metastore URI can be null in local mode
58+
String metastoreUri = conf.get(HiveConf.ConfVars.METASTOREURIS.varname, "");
59+
return LEGACY_CATALOG_CACHE.get(metastoreUri, uri -> new LegacyHiveCatalog(conf));
60+
}
61+
5062
/**
5163
* @deprecated Use {@link #loadHiveMetadataPreservingCatalog(Configuration)} instead
5264
*/

hive/src/main/java/org/apache/iceberg/hive/HiveMetadataPreservingTableOperations.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ protected void doRefresh() {
9191
String errMsg = String.format("%s.%s is missing %s property", database, tableName, METADATA_LOCATION_PROP);
9292
throw new IllegalArgumentException(errMsg);
9393
}
94+
9495
if (!io().newInputFile(metadataLocation).exists()) {
9596
String errMsg = String.format("%s property for %s.%s points to a non-existent file %s",
9697
METADATA_LOCATION_PROP, database, tableName, metadataLocation);

hive/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ protected void doRefresh() {
121121
throw new IllegalArgumentException(errMsg);
122122
}
123123

124+
if (!io().newInputFile(metadataLocation).exists()) {
125+
String errMsg = String.format("%s property for %s.%s points to a non-existent file %s",
126+
METADATA_LOCATION_PROP, database, tableName, metadataLocation);
127+
throw new IllegalArgumentException(errMsg);
128+
}
124129
} catch (NoSuchObjectException e) {
125130
if (currentMetadataLocation() != null) {
126131
throw new NoSuchTableException(String.format("No such table: %s.%s", database, tableName));
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.iceberg.hive.legacy;
21+
22+
import org.apache.iceberg.FileFormat;
23+
import org.apache.iceberg.StructLike;
24+
25+
26+
/**
27+
* Metadata for a data directory referenced by either a Hive table or a partition
28+
*/
29+
class DirectoryInfo {
30+
private final String location;
31+
private final FileFormat format;
32+
private final StructLike partitionData;
33+
34+
DirectoryInfo(String location, FileFormat format, StructLike partitionData) {
35+
this.location = location;
36+
this.format = format;
37+
this.partitionData = partitionData;
38+
}
39+
40+
public String location() {
41+
return location;
42+
}
43+
44+
public FileFormat format() {
45+
return format;
46+
}
47+
48+
public StructLike partitionData() {
49+
return partitionData;
50+
}
51+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.iceberg.hive.legacy;
21+
22+
import java.io.IOException;
23+
import java.util.Arrays;
24+
import java.util.List;
25+
import org.apache.hadoop.conf.Configuration;
26+
import org.apache.hadoop.fs.FileStatus;
27+
import org.apache.hadoop.fs.FileSystem;
28+
import org.apache.hadoop.fs.Path;
29+
import org.apache.hadoop.fs.PathFilter;
30+
import org.apache.iceberg.exceptions.RuntimeIOException;
31+
32+
33+
class FileSystemUtils {
34+
35+
private FileSystemUtils() {}
36+
37+
/**
38+
* Lists all non-hidden files for the given directory
39+
*/
40+
static List<FileStatus> listFiles(String directory, Configuration conf) {
41+
42+
final Path directoryPath = new Path(directory);
43+
final FileStatus[] files;
44+
try {
45+
FileSystem fs = directoryPath.getFileSystem(conf);
46+
files = fs.listStatus(directoryPath, HiddenPathFilter.INSTANCE);
47+
} catch (IOException e) {
48+
throw new RuntimeIOException(e, "Error listing files for directory: " + directory);
49+
}
50+
return Arrays.asList(files);
51+
}
52+
53+
private enum HiddenPathFilter implements PathFilter {
54+
INSTANCE;
55+
56+
@Override
57+
public boolean accept(Path path) {
58+
return !path.getName().startsWith("_") && !path.getName().startsWith(".");
59+
}
60+
}
61+
}

0 commit comments

Comments
 (0)