Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDDS-10634. Recon - listKeys API for listing of OBS , FSO and Legacy bucket keys with filters. #6503

Closed
wants to merge 12 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ private ReconConstants() {
public static final String DEFAULT_OPEN_KEY_INCLUDE_NON_FSO = "false";
public static final String DEFAULT_OPEN_KEY_INCLUDE_FSO = "false";
public static final String DEFAULT_FETCH_COUNT = "1000";
public static final String DEFAULT_KEY_SIZE = "0";
public static final String DEFAULT_BATCH_NUMBER = "1";
public static final String RECON_QUERY_BATCH_PARAM = "batchNum";
public static final String RECON_QUERY_PREVKEY = "prevKey";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Timestamp;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.TimeZone;
import java.util.concurrent.BlockingQueue;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -107,6 +112,30 @@ public static File getReconScmDbDir(ConfigurationSource conf) {
return queues;
}

/**
* Converts string date in a provided format to server timezone's epoch milllioseconds.
*
* @param dateString
* @param dateFormat
* @param timeZone
* @return
* @throws ParseException
*/
public static long convertToEpochMillis(String dateString, String dateFormat, TimeZone timeZone) {
try {
SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
sdf.setTimeZone(timeZone); // Set server's timezone
Date date = sdf.parse(dateString);
return date.getTime(); // Convert to epoch milliseconds
} catch (ParseException parseException) {
LOG.error("Date parse exception for date: {} in format: {} -> {}", dateString, dateFormat, parseException);
return Instant.now().toEpochMilli();
} catch (Exception exception) {
LOG.error("Unexpected error while parsing date: {} in format: {}", dateString, dateFormat);
return Instant.now().toEpochMilli();
}
}

/**
* Get configured Recon DB directory value based on config. If not present,
* fallback to ozone.metadata.dirs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,18 @@

package org.apache.hadoop.ozone.recon.api;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager;
import org.apache.hadoop.ozone.om.helpers.BucketLayout;
import org.apache.hadoop.ozone.recon.ReconUtils;
import org.apache.hadoop.ozone.recon.api.handlers.EntityHandler;
import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse;
import org.apache.hadoop.ozone.recon.api.types.DUResponse;
import org.apache.hadoop.ozone.recon.api.types.QuotaUsageResponse;
import org.apache.hadoop.ozone.recon.api.types.ResponseStatus;
import org.apache.hadoop.ozone.recon.api.types.FileSizeDistributionResponse;
import org.apache.hadoop.ozone.recon.api.types.EntityType;
import org.apache.hadoop.ozone.recon.api.types.Stats;
import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager;
import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager;

Expand All @@ -39,6 +42,14 @@
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import java.io.IOException;
import java.util.List;
import java.util.TimeZone;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_FETCH_COUNT;
import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_KEY_SIZE;

/**
* REST APIs for namespace metadata summary.
Expand Down Expand Up @@ -128,8 +139,110 @@ public Response getDiskUsage(@QueryParam("path") String path,
reconNamespaceSummaryManager,
omMetadataManager, reconSCM, path);

duResponse = handler.getDuResponse(listFile, withReplica, sortSubpaths);
duResponse = handler.getDuResponse(listFile, withReplica, sortSubpaths, false, new Stats(-1));

return Response.ok(duResponse).build();
}

/**
* This API will list out limited 'count' number of keys after applying below filters in API parameters:
* Default Values of API param filters:
* -- replicationType - empty string and filter will not be applied, so list out all keys irrespective of
* replication type.
* -- creationTime - empty string and filter will not be applied, so list out keys irrespective of age.
* -- keySize - 0 bytes, which means all keys greater than zero bytes will be listed, effectively all.
* -- startPrefix - /
* -- count - 1000
*
* @param replicationType Filter for RATIS or EC replication keys
* @param creationDate Filter for keys created after creationDate in "MM-dd-yyyy HH:mm:ss" string format.
* @param keySize Filter for Keys greater than keySize in bytes.
* @param startPrefix Filter for startPrefix path.
* @param limit Filter for limited count of keys.
* @param recursive listing out keys recursively for FSO buckets.
* @return the list of keys in below structured format:
* Response For OBS Bucket keys:
* ********************************************************
* {
* "status": "OK",
* "path": "/volume1/obs-bucket/",
* "size": 73400320,
* "sizeWithReplica": 81788928,
* "subPathCount": 1,
* "totalKeyCount": 7,
* "lastKey": "/volume1/obs-bucket/key7",
* "subPaths": [
* {
* "key": true,
* "path": "key1",
* "size": 10485760,
* "sizeWithReplica": 18874368,
* "isKey": true,
* "replicationType": "RATIS",
* "creationTime": 1712321367060,
* "modificationTime": 1712321368190
* },
* {
* "key": true,
* "path": "key7",
* "size": 10485760,
* "sizeWithReplica": 18874368,
* "isKey": true,
* "replicationType": "EC",
* "creationTime": 1713261005555,
* "modificationTime": 1713261006728
* }
* ],
* "sizeDirectKey": 73400320
* }
* ********************************************************
* @throws IOException
*/
@GET
@Path("/listKeys")
@SuppressWarnings("methodlength")
public Response listKeysWithDu(@QueryParam("replicationType") String replicationType,
@QueryParam("creationDate") String creationDate,
@DefaultValue(DEFAULT_KEY_SIZE) @QueryParam("keySize") long keySize,
@DefaultValue(OM_KEY_PREFIX) @QueryParam("startPrefix") String startPrefix,
@DefaultValue(DEFAULT_FETCH_COUNT) @QueryParam("count") long limit,
@DefaultValue("false") @QueryParam("recursive") boolean recursive)
throws IOException {

if (startPrefix == null || startPrefix.length() == 0) {
return Response.status(Response.Status.BAD_REQUEST).build();
}
DUResponse duResponse = new DUResponse();
if (!isInitializationComplete()) {
duResponse.setStatus(ResponseStatus.INITIALIZING);
return Response.ok(duResponse).build();
}
EntityHandler handler = EntityHandler.getEntityHandler(
reconNamespaceSummaryManager,
omMetadataManager, reconSCM, startPrefix);

Stats stats = new Stats(limit);

duResponse = handler.getListKeysResponse(stats, recursive);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of overloading DuReponse class, we can define new response class for listFile purpose and return only required information


List<DUResponse.DiskUsage> keyListWithDu = duResponse.getDuData();

long epochMillis = ReconUtils.convertToEpochMillis(creationDate, "MM-dd-yyyy HH:mm:ss", TimeZone.getDefault());
Predicate<DUResponse.DiskUsage> keyAgeFilter = keyData -> keyData.getCreationTime() >= epochMillis;
Predicate<DUResponse.DiskUsage> keyReplicationFilter =
keyData -> keyData.getReplicationType().equals(replicationType);
Predicate<DUResponse.DiskUsage> keySizeFilter = keyData -> keyData.getSize() > keySize;
Predicate<DUResponse.DiskUsage> keyFilter = keyData -> keyData.isKey();

List<DUResponse.DiskUsage> filteredKeyList = keyListWithDu.stream()
.filter(keyFilter)
.filter(keyData -> !StringUtils.isEmpty(creationDate) ? keyAgeFilter.test(keyData) : true)
.filter(keyData -> !StringUtils.isEmpty(replicationType) ? keyReplicationFilter.test(keyData) : true)
.filter(keySizeFilter)
.collect(Collectors.toList());

duResponse.setDuData(filteredKeyList);
duResponse.setCount(filteredKeyList.size());
return Response.ok(duResponse).build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.hadoop.ozone.recon.api.types.FileSizeDistributionResponse;
import org.apache.hadoop.ozone.recon.api.types.NSSummary;
import org.apache.hadoop.ozone.recon.api.types.ResponseStatus;
import org.apache.hadoop.ozone.recon.api.types.Stats;
import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager;
import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager;

Expand All @@ -43,6 +44,7 @@
* Class for handling bucket entity type.
*/
public class BucketEntityHandler extends EntityHandler {

public BucketEntityHandler(
ReconNamespaceSummaryManager reconNamespaceSummaryManager,
ReconOMMetadataManager omMetadataManager,
Expand Down Expand Up @@ -90,7 +92,7 @@ private BucketObjectDBInfo getBucketObjDbInfo(String[] names)

@Override
public DUResponse getDuResponse(
boolean listFile, boolean withReplica, boolean sortSubpaths)
boolean listFile, boolean withReplica, boolean sortSubpaths, boolean recursive, Stats stats)
throws IOException {
DUResponse duResponse = new DUResponse();
duResponse.setPath(getNormalizedPath());
Expand All @@ -112,6 +114,7 @@ public DUResponse getDuResponse(
long bucketDataSize = duResponse.getKeySize();
long bucketDataSizeWithReplica = 0L;
for (long subdirObjectId: bucketSubdirs) {
List<DUResponse.DiskUsage> diskUsageList = new ArrayList<>();
NSSummary subdirNSSummary = getReconNamespaceSummaryManager()
.getNSSummary(subdirObjectId);

Expand All @@ -125,20 +128,25 @@ public DUResponse getDuResponse(
long dataSize = getTotalSize(subdirObjectId);
bucketDataSize += dataSize;

stats.setCurrentCount(stats.getCurrentCount() + 1);

if (withReplica) {
long dirDU = getBucketHandler()
.calculateDUUnderObject(subdirObjectId);
.calculateDUUnderObject(subdirObjectId, recursive, diskUsageList, stats);
diskUsage.setSizeWithReplica(dirDU);
bucketDataSizeWithReplica += dirDU;
}
diskUsage.setSize(dataSize);
dirDUData.add(diskUsage);
if (diskUsageList.size() > 0) {
dirDUData.addAll(diskUsageList);
}
}
// Either listFile or withReplica is enabled, we need the directKeys info
if (listFile || withReplica) {
bucketDataSizeWithReplica += getBucketHandler()
.handleDirectKeys(bucketObjectId, withReplica,
listFile, dirDUData, getNormalizedPath());
listFile, dirDUData, getNormalizedPath(), stats);
}
if (withReplica) {
duResponse.setSizeWithReplica(bucketDataSizeWithReplica);
Expand All @@ -153,7 +161,8 @@ public DUResponse getDuResponse(
}

duResponse.setDuData(dirDUData);

duResponse.setTotalCount(stats.getTotalCount());
duResponse.setLastKey(stats.getLastKey());
return duResponse;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
import org.apache.hadoop.ozone.recon.api.types.DUResponse;
import org.apache.hadoop.ozone.recon.api.types.EntityType;
import org.apache.hadoop.ozone.recon.api.types.Stats;
import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager;
import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager;
import org.slf4j.Logger;
Expand Down Expand Up @@ -77,17 +78,17 @@ public ReconNamespaceSummaryManager getReconNamespaceSummaryManager() {
public abstract EntityType determineKeyPath(String keyName)
throws IOException;

public abstract long calculateDUUnderObject(long parentId)
throws IOException;

public abstract long handleDirectKeys(long parentId,
boolean withReplica, boolean listFile,
List<DUResponse.DiskUsage> duData,
String normalizedPath) throws IOException;
public abstract long calculateDUUnderObject(long parentId, boolean recursive,
List<DUResponse.DiskUsage> diskUsageList, Stats stats) throws IOException;

public abstract long getDirObjectId(String[] names)
throws IOException;

public abstract long handleDirectKeys(long parentId,
boolean withReplica, boolean listFile,
List<DUResponse.DiskUsage> duData,
String normalizedPath, Stats stats) throws IOException;

public abstract long getDirObjectId(String[] names, int cutoff)
throws IOException;

Expand Down Expand Up @@ -232,4 +233,19 @@ public static BucketHandler getBucketHandler(
return getBucketHandler(reconNamespaceSummaryManager,
omMetadataManager, reconSCM, bucketInfo);
}

protected static void verifyStatsAndAddDURecord(List<DUResponse.DiskUsage> duData, Stats stats,
Table.KeyValue<String, OmKeyInfo> kv,
DUResponse.DiskUsage diskUsage) throws IOException {
if (stats.getLimit() == -1) {
duData.add(diskUsage);
} else {
if (stats.getCurrentCount() < stats.getLimit()) {
duData.add(diskUsage);
stats.setCurrentCount(stats.getCurrentCount() + 1);
stats.setLastKey(kv.getKey());
}
}
stats.setTotalCount(stats.getTotalCount() + 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.hadoop.ozone.recon.api.types.QuotaUsageResponse;
import org.apache.hadoop.ozone.recon.api.types.FileSizeDistributionResponse;

import org.apache.hadoop.ozone.recon.api.types.Stats;
import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager;
import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager;

Expand Down Expand Up @@ -83,7 +84,7 @@ private ObjectDBInfo getDirectoryObjDbInfo(String[] names)

@Override
public DUResponse getDuResponse(
boolean listFile, boolean withReplica, boolean sortSubPaths)
boolean listFile, boolean withReplica, boolean sortSubPaths, boolean recursive, Stats stats)
throws IOException {
DUResponse duResponse = new DUResponse();
duResponse.setPath(getNormalizedPath());
Expand All @@ -106,6 +107,7 @@ public DUResponse getDuResponse(
List<DUResponse.DiskUsage> subdirDUData = new ArrayList<>();
// iterate all subdirectories to get disk usage data
for (long subdirObjectId: subdirs) {
List<DUResponse.DiskUsage> diskUsageList = new ArrayList<>();
NSSummary subdirNSSummary =
getReconNamespaceSummaryManager().getNSSummary(subdirObjectId);
// for the subdirName we need the subdir filename, not the key name
Expand Down Expand Up @@ -136,20 +138,23 @@ public DUResponse getDuResponse(

if (withReplica) {
long subdirDU = getBucketHandler()
.calculateDUUnderObject(subdirObjectId);
.calculateDUUnderObject(subdirObjectId, recursive, diskUsageList, stats);
diskUsage.setSizeWithReplica(subdirDU);
dirDataSizeWithReplica += subdirDU;
}

diskUsage.setSize(dataSize);
subdirDUData.add(diskUsage);
if (recursive) {
subdirDUData.addAll(diskUsageList);
}
}

// handle direct keys under directory
if (listFile || withReplica) {
dirDataSizeWithReplica += getBucketHandler()
.handleDirectKeys(dirObjectId, withReplica,
listFile, subdirDUData, getNormalizedPath());
listFile, subdirDUData, getNormalizedPath(), stats);
}

if (withReplica) {
Expand All @@ -165,7 +170,8 @@ public DUResponse getDuResponse(
}

duResponse.setDuData(subdirDUData);

duResponse.setTotalCount(stats.getTotalCount());
duResponse.setLastKey(stats.getLastKey());
return duResponse;
}

Expand Down
Loading
Loading