From ed8b49b2bcf78a4c8534815165f521d32c61da28 Mon Sep 17 00:00:00 2001 From: Stephen O'Donnell Date: Wed, 13 Nov 2024 10:45:11 +0000 Subject: [PATCH] HDDS-11668. Recon List Keys API: Reuse key prefix if parentID is the same (#7410) (cherry picked from commit 8e4a50815504b9a82aa9bac9373b1f2ed1f2cd52) --- .../apache/hadoop/ozone/recon/ReconUtils.java | 55 +++++++++++++++++-- .../ozone/recon/api/OMDBInsightEndpoint.java | 32 ++++++++++- 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java index 88418baffaa8..830cf2e12dd9 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java @@ -317,7 +317,37 @@ public static String constructFullPath(OmKeyInfo omKeyInfo, public static String constructFullPath(String keyName, long initialParentId, String volumeName, String bucketName, ReconNamespaceSummaryManager reconNamespaceSummaryManager, ReconOMMetadataManager omMetadataManager) throws IOException { - StringBuilder fullPath = new StringBuilder(keyName); + StringBuilder fullPath = constructFullPathPrefix(initialParentId, volumeName, bucketName, + reconNamespaceSummaryManager, omMetadataManager); + if (fullPath.length() == 0) { + return ""; + } + fullPath.append(keyName); + return fullPath.toString(); + } + + + /** + * Constructs the prefix path to a key from its key name and parent ID using a bottom-up approach, starting from the + * leaf node. + * + * The method begins with the leaf node (the key itself) and recursively prepends parent directory names, fetched + * via NSSummary objects, until reaching the parent bucket (parentId is -1). It effectively builds the path from + * bottom to top, finally prepending the volume and bucket names to complete the full path. If the directory structure + * is currently being rebuilt (indicated by the rebuildTriggered flag), this method returns an empty string to signify + * that path construction is temporarily unavailable. + * + * @param initialParentId The parent ID of the key + * @param volumeName The name of the volume + * @param bucketName The name of the bucket + * @return A StringBuilder containing the constructed prefix path of the key, or an empty string builder if a rebuild + * is in progress. + * @throws IOException + */ + public static StringBuilder constructFullPathPrefix(long initialParentId, String volumeName, + String bucketName, ReconNamespaceSummaryManager reconNamespaceSummaryManager, + ReconOMMetadataManager omMetadataManager) throws IOException { + StringBuilder fullPath = new StringBuilder(); long parentId = initialParentId; boolean isDirectoryPresent = false; @@ -326,16 +356,21 @@ public static String constructFullPath(String keyName, long initialParentId, Str if (nsSummary == null) { log.warn("NSSummary tree is currently being rebuilt or the directory could be in the progress of " + "deletion, returning empty string for path construction."); - return ""; + fullPath.setLength(0); + return fullPath; } if (nsSummary.getParentId() == -1) { if (rebuildTriggered.compareAndSet(false, true)) { triggerRebuild(reconNamespaceSummaryManager, omMetadataManager); } log.warn("NSSummary tree is currently being rebuilt, returning empty string for path construction."); - return ""; + fullPath.setLength(0); + return fullPath; + } + // On the last pass, dir-name will be empty and parent will be zero, indicating the loop should end. + if (!nsSummary.getDirName().isEmpty()) { + fullPath.insert(0, nsSummary.getDirName() + OM_KEY_PREFIX); } - fullPath.insert(0, nsSummary.getDirName() + OM_KEY_PREFIX); // Move to the parent ID of the current directory parentId = nsSummary.getParentId(); @@ -344,10 +379,18 @@ public static String constructFullPath(String keyName, long initialParentId, Str // Prepend the volume and bucket to the constructed path fullPath.insert(0, volumeName + OM_KEY_PREFIX + bucketName + OM_KEY_PREFIX); + // TODO - why is this needed? It seems lke it should handle double slashes in the path name, + // but its not clear how they get there. This normalize call is quite expensive as it + // creates several objects (URI, PATH, back to string). There was a bug fixed above + // where the last parent dirName was empty, which always caused a double // after the + // bucket name, but with that fixed, it seems like this should not be needed. All tests + // pass without it for key listing. if (isDirectoryPresent) { - return OmUtils.normalizeKey(fullPath.toString(), true); + String path = fullPath.toString(); + fullPath.setLength(0); + fullPath.append(OmUtils.normalizeKey(path, true)); } - return fullPath.toString(); + return fullPath; } private static void triggerRebuild(ReconNamespaceSummaryManager reconNamespaceSummaryManager, diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java index 21c9552c035a..abd3fae4fa3c 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java @@ -1176,6 +1176,9 @@ private void retrieveKeysFromTable( keyIter.seek(paramInfo.getStartPrefix()); } + long prevParentID = -1; + StringBuilder keyPrefix = null; + int keyPrefixLength = 0; while (keyIter.hasNext()) { Table.KeyValue entry = keyIter.next(); String dbKey = entry.getKey(); @@ -1189,9 +1192,32 @@ private void retrieveKeysFromTable( if (applyFilters(entry, paramInfo)) { KeyEntityInfoProtoWrapper keyEntityInfo = entry.getValue(); keyEntityInfo.setKey(dbKey); - keyEntityInfo.setPath(ReconUtils.constructFullPath(keyEntityInfo.getKeyName(), keyEntityInfo.getParentId(), - keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager, - omMetadataManager)); + if (keyEntityInfo.getParentId() == 0) { + // Legacy bucket keys have a parentID of zero. OBS bucket keys have a parentID of the bucketID. + // FSO keys have a parent of the immediate parent directory. + // Legacy buckets are obsolete, so this code path is not optimized. We don't expect to see many Legacy + // buckets in practice. + prevParentID = -1; + keyEntityInfo.setPath(ReconUtils.constructFullPath(keyEntityInfo.getKeyName(), keyEntityInfo.getParentId(), + keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager, + omMetadataManager)); + } else { + // As we iterate keys in sorted order, its highly likely that keys have the same prefix for many keys in a + // row. Especially for FSO buckets, its expensive to construct the path for each key. So, we construct the + // prefix once and reuse it for each identical parent. Only if the parent changes do we need to construct + // a new prefix path. + if (prevParentID != keyEntityInfo.getParentId()) { + prevParentID = keyEntityInfo.getParentId(); + keyPrefix = ReconUtils.constructFullPathPrefix(keyEntityInfo.getParentId(), + keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager, + omMetadataManager); + keyPrefixLength = keyPrefix.length(); + } + keyPrefix.setLength(keyPrefixLength); + keyPrefix.append(keyEntityInfo.getKeyName()); + keyEntityInfo.setPath(keyPrefix.toString()); + } + results.add(keyEntityInfo); paramInfo.setLastKey(dbKey); if (results.size() >= paramInfo.getLimit()) {