Skip to content

Commit c734d69

Browse files
committed
HADOOP-16898. Batch listing of multiple directories via an (unstable) interface
Contributed by Steve Loughran. This moves the new API of HDFS-13616 into a interface which is implemented by HDFS RPC filesystem client (not WebHDFS or any other connector) This new interface, BatchListingOperations, is in hadoop-common, so applications do not need to be compiled with HDFS on the classpath. They must cast the FS into the interface. instanceof can probe the client for having the new interface -the patch also adds a new path capability to probe for this. The FileSystem implementation is cut; tests updated as appropriate. All new interfaces/classes/constants are marked as @unstable. Change-Id: I5623c51f2c75804f58f915dd7e60cb2cffdac681
1 parent d4d4c37 commit c734d69

File tree

8 files changed

+94
-39
lines changed

8 files changed

+94
-39
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs;
20+
21+
import java.io.IOException;
22+
import java.util.List;
23+
24+
import org.apache.hadoop.classification.InterfaceAudience;
25+
import org.apache.hadoop.classification.InterfaceStability;
26+
27+
/**
28+
* Interface filesystems MAY implement to offer a batched list.
29+
* If implemented, filesystems SHOULD declare
30+
* {@link CommonPathCapabilities#FS_EXPERIMENTAL_BATCH_LISTING} to be a supported
31+
* path capability.
32+
*/
33+
@InterfaceAudience.Public
34+
@InterfaceStability.Unstable
35+
public interface BatchListingOperations {
36+
37+
/**
38+
* Batched listing API that returns {@link PartialListing}s for the
39+
* passed Paths.
40+
*
41+
* @param paths List of paths to list.
42+
* @return RemoteIterator that returns corresponding PartialListings.
43+
* @throws IOException failure
44+
*/
45+
RemoteIterator<PartialListing<FileStatus>> batchedListStatusIterator(
46+
List<Path> paths) throws IOException;
47+
48+
/**
49+
* Batched listing API that returns {@link PartialListing}s for the passed
50+
* Paths. The PartialListing will contain {@link LocatedFileStatus} entries
51+
* with locations.
52+
*
53+
* @param paths List of paths to list.
54+
* @return RemoteIterator that returns corresponding PartialListings.
55+
* @throws IOException failure
56+
*/
57+
RemoteIterator<PartialListing<LocatedFileStatus>>
58+
batchedListLocatedStatusIterator(
59+
List<Path> paths) throws IOException;
60+
61+
}

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
package org.apache.hadoop.fs;
2020

21+
import org.apache.hadoop.classification.InterfaceStability;
22+
2123
/**
2224
* Common path capabilities.
2325
*/
@@ -123,4 +125,10 @@ private CommonPathCapabilities() {
123125
*/
124126
public static final String FS_XATTRS = "fs.capability.paths.xattrs";
125127

128+
/**
129+
* Probe for support for {@link BatchListingOperations}.
130+
*/
131+
@InterfaceStability.Unstable
132+
public static final String FS_EXPERIMENTAL_BATCH_LISTING =
133+
"fs.capability.batch.listing";
126134
}

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2227,33 +2227,6 @@ public RemoteIterator<FileStatus> listStatusIterator(final Path p)
22272227
return new DirListingIterator<>(p);
22282228
}
22292229

2230-
/**
2231-
* Batched listing API that returns {@link PartialListing}s for the
2232-
* passed Paths.
2233-
*
2234-
* @param paths List of paths to list.
2235-
* @return RemoteIterator that returns corresponding PartialListings.
2236-
* @throws IOException
2237-
*/
2238-
public RemoteIterator<PartialListing<FileStatus>> batchedListStatusIterator(
2239-
final List<Path> paths) throws IOException {
2240-
throw new UnsupportedOperationException("Not implemented");
2241-
}
2242-
2243-
/**
2244-
* Batched listing API that returns {@link PartialListing}s for the passed
2245-
* Paths. The PartialListing will contain {@link LocatedFileStatus} entries
2246-
* with locations.
2247-
*
2248-
* @param paths List of paths to list.
2249-
* @return RemoteIterator that returns corresponding PartialListings.
2250-
* @throws IOException
2251-
*/
2252-
public RemoteIterator<PartialListing<LocatedFileStatus>> batchedListLocatedStatusIterator(
2253-
final List<Path> paths) throws IOException {
2254-
throw new UnsupportedOperationException("Not implemented");
2255-
}
2256-
22572230
/**
22582231
* List the statuses and block locations of the files in the given path.
22592232
* Does not guarantee to return the iterator that traverses statuses

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
* {@link #get()} will throw an Exception if there was a failure.
3636
*/
3737
@InterfaceAudience.Public
38-
@InterfaceStability.Stable
38+
@InterfaceStability.Unstable
3939
public class PartialListing<T extends FileStatus> {
4040
private final Path listedPath;
4141
private final List<T> partialListing;

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import java.net.URI;
2828
import java.util.EnumSet;
2929
import java.util.Iterator;
30-
import java.util.List;
3130

3231
import org.apache.commons.logging.Log;
3332
import org.apache.hadoop.conf.Configuration;
@@ -106,10 +105,6 @@ public FSDataOutputStream create(Path f, FsPermission permission,
106105
public FileStatus[] listStatusBatch(Path f, byte[] token);
107106
public FileStatus[] listStatus(Path[] files);
108107
public FileStatus[] listStatus(Path[] files, PathFilter filter);
109-
public RemoteIterator<PartialListing<LocatedFileStatus>> batchedListLocatedStatusIterator(
110-
final List<Path> paths) throws IOException;
111-
public RemoteIterator<PartialListing<FileStatus>> batchedListStatusIterator(
112-
final List<Path> paths) throws IOException;
113108
public FileStatus[] globStatus(Path pathPattern);
114109
public FileStatus[] globStatus(Path pathPattern, PathFilter filter);
115110
public Iterator<LocatedFileStatus> listFiles(Path path,

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,6 @@ public FSDataOutputStream create(Path f, FsPermission permission,
125125
public FileStatus[] listStatusBatch(Path f, byte[] token);
126126
public FileStatus[] listStatus(Path[] files);
127127
public FileStatus[] listStatus(Path[] files, PathFilter filter);
128-
public RemoteIterator<PartialListing<LocatedFileStatus>> batchedListLocatedStatusIterator(
129-
final List<Path> paths) throws IOException;
130-
public RemoteIterator<PartialListing<FileStatus>> batchedListStatusIterator(
131-
final List<Path> paths) throws IOException;
132128
public FileStatus[] globStatus(Path pathPattern);
133129
public FileStatus[] globStatus(Path pathPattern, PathFilter filter);
134130

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@
2929
import org.apache.hadoop.conf.Configuration;
3030
import org.apache.hadoop.crypto.key.KeyProvider;
3131
import org.apache.hadoop.crypto.key.KeyProviderTokenIssuer;
32+
import org.apache.hadoop.fs.BatchListingOperations;
3233
import org.apache.hadoop.fs.BlockLocation;
3334
import org.apache.hadoop.fs.BlockStoragePolicySpi;
3435
import org.apache.hadoop.fs.CacheFlag;
36+
import org.apache.hadoop.fs.CommonPathCapabilities;
3537
import org.apache.hadoop.fs.ContentSummary;
3638
import org.apache.hadoop.fs.CreateFlag;
3739
import org.apache.hadoop.fs.FSDataInputStream;
@@ -129,6 +131,8 @@
129131
import java.util.NoSuchElementException;
130132
import java.util.Optional;
131133

134+
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
135+
132136
/****************************************************************
133137
* Implementation of the abstract FileSystem for the DFS system.
134138
* This object is the way end-user code interacts with a Hadoop
@@ -138,7 +142,7 @@
138142
@InterfaceAudience.LimitedPrivate({ "MapReduce", "HBase" })
139143
@InterfaceStability.Unstable
140144
public class DistributedFileSystem extends FileSystem
141-
implements KeyProviderTokenIssuer {
145+
implements KeyProviderTokenIssuer, BatchListingOperations {
142146
private Path workingDir;
143147
private URI uri;
144148

@@ -3575,6 +3579,15 @@ public boolean hasPathCapability(final Path path, final String capability)
35753579
if (cap.isPresent()) {
35763580
return cap.get();
35773581
}
3582+
// this switch is for features which are in the DFS client but not
3583+
// (yet/ever) in the WebHDFS API.
3584+
switch (validatePathCapabilityArgs(path, capability)) {
3585+
case CommonPathCapabilities.FS_EXPERIMENTAL_BATCH_LISTING:
3586+
return true;
3587+
default:
3588+
// fall through
3589+
}
3590+
35783591
return super.hasPathCapability(p, capability);
35793592
}
35803593
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBatchedListDirectories.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import com.google.common.collect.Lists;
2222
import org.apache.hadoop.conf.Configuration;
23+
import org.apache.hadoop.fs.CommonPathCapabilities;
2324
import org.apache.hadoop.fs.FSDataOutputStream;
2425
import org.apache.hadoop.fs.FileStatus;
2526
import org.apache.hadoop.fs.FileSystem;
@@ -253,6 +254,13 @@ public void listFilesRelative() throws Exception {
253254
}
254255
}
255256

257+
@Test
258+
public void testDFSHasCapability() throws Throwable {
259+
assertTrue("FS does not declare PathCapability support",
260+
dfs.hasPathCapability(new Path("/"),
261+
CommonPathCapabilities.FS_EXPERIMENTAL_BATCH_LISTING));
262+
}
263+
256264
private void listFilesInternal(int numFiles) throws Exception {
257265
List<Path> paths = FILE_PATHS.subList(0, numFiles);
258266
List<FileStatus> statuses = getStatuses(paths);
@@ -384,7 +392,8 @@ private void listAsNormalUser(List<Path> paths)
384392
@Override
385393
public Void run() throws Exception {
386394
// try renew with long name
387-
FileSystem fs = FileSystem.get(cluster.getURI(), conf);
395+
DistributedFileSystem fs = (DistributedFileSystem)
396+
FileSystem.get(cluster.getURI(), conf);
388397
RemoteIterator<PartialListing<FileStatus>> it =
389398
fs.batchedListStatusIterator(paths);
390399
PartialListing<FileStatus> listing = it.next();

0 commit comments

Comments
 (0)