From d0b830b67d307a2b0f5eea6699632a39436244ac Mon Sep 17 00:00:00 2001 From: S O'Donnell Date: Fri, 8 Nov 2024 12:08:03 +0000 Subject: [PATCH 1/3] HDDS-11668. Recon List Keys API: Reuse key prefix if parentID is the same --- .../apache/hadoop/ozone/recon/ReconUtils.java | 50 ++++++++++++++++--- .../ozone/recon/api/OMDBInsightEndpoint.java | 32 ++++++++++-- 2 files changed, 73 insertions(+), 9 deletions(-) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java index 88418baffaa..1e27aa33b4f 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java @@ -317,7 +317,34 @@ public static String constructFullPath(OmKeyInfo omKeyInfo, public static String constructFullPath(String keyName, long initialParentId, String volumeName, String bucketName, ReconNamespaceSummaryManager reconNamespaceSummaryManager, ReconOMMetadataManager omMetadataManager) throws IOException { - StringBuilder fullPath = new StringBuilder(keyName); + StringBuilder fullPath = constructFullPathPrefix(initialParentId, volumeName, bucketName, + reconNamespaceSummaryManager, omMetadataManager); + fullPath.append(keyName); + return fullPath.toString(); + } + + + /** + * Constructs the prefix path to a key from its key name and parent ID using a bottom-up approach, starting from the + * leaf node. + * + * The method begins with the leaf node (the key itself) and recursively prepends parent directory names, fetched + * via NSSummary objects, until reaching the parent bucket (parentId is -1). It effectively builds the path from + * bottom to top, finally prepending the volume and bucket names to complete the full path. If the directory structure + * is currently being rebuilt (indicated by the rebuildTriggered flag), this method returns an empty string to signify + * that path construction is temporarily unavailable. + * + * @param initialParentId The parent ID of the key + * @param volumeName The name of the volume + * @param bucketName The name of the bucket + * @return A StringBuilder containing the constructed prefix path of the key, or an empty string builder if a rebuild + * is in progress. + * @throws IOException + */ + public static StringBuilder constructFullPathPrefix(long initialParentId, String volumeName, + String bucketName, ReconNamespaceSummaryManager reconNamespaceSummaryManager, + ReconOMMetadataManager omMetadataManager) throws IOException { + StringBuilder fullPath = new StringBuilder(); long parentId = initialParentId; boolean isDirectoryPresent = false; @@ -326,16 +353,19 @@ public static String constructFullPath(String keyName, long initialParentId, Str if (nsSummary == null) { log.warn("NSSummary tree is currently being rebuilt or the directory could be in the progress of " + "deletion, returning empty string for path construction."); - return ""; + return fullPath; } if (nsSummary.getParentId() == -1) { if (rebuildTriggered.compareAndSet(false, true)) { triggerRebuild(reconNamespaceSummaryManager, omMetadataManager); } log.warn("NSSummary tree is currently being rebuilt, returning empty string for path construction."); - return ""; + return fullPath; + } + // On the last pass, dir-name will be empty and parent will be zero, indicating the loop should end. + if (!nsSummary.getDirName().isEmpty()) { + fullPath.insert(0, nsSummary.getDirName() + OM_KEY_PREFIX); } - fullPath.insert(0, nsSummary.getDirName() + OM_KEY_PREFIX); // Move to the parent ID of the current directory parentId = nsSummary.getParentId(); @@ -344,10 +374,18 @@ public static String constructFullPath(String keyName, long initialParentId, Str // Prepend the volume and bucket to the constructed path fullPath.insert(0, volumeName + OM_KEY_PREFIX + bucketName + OM_KEY_PREFIX); + // TODO - why is this needed? It seems lke it should handle double slashes in the path name, + // but its not clear how they get there. This normalize call is quite expensive as it + // creates several objects (URI, PATH, back to string). There was a bug fixed above + // where the last parent dirName was empty, which always caused a double // after the + // bucket name, but with that fixed, it seems like this should not be needed. All tests + // pass without it for key listing. if (isDirectoryPresent) { - return OmUtils.normalizeKey(fullPath.toString(), true); + String path = fullPath.toString(); + fullPath.setLength(0); + fullPath.append(OmUtils.normalizeKey(path, true)); } - return fullPath.toString(); + return fullPath; } private static void triggerRebuild(ReconNamespaceSummaryManager reconNamespaceSummaryManager, diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java index 21c9552c035..abd3fae4fa3 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java @@ -1176,6 +1176,9 @@ private void retrieveKeysFromTable( keyIter.seek(paramInfo.getStartPrefix()); } + long prevParentID = -1; + StringBuilder keyPrefix = null; + int keyPrefixLength = 0; while (keyIter.hasNext()) { Table.KeyValue entry = keyIter.next(); String dbKey = entry.getKey(); @@ -1189,9 +1192,32 @@ private void retrieveKeysFromTable( if (applyFilters(entry, paramInfo)) { KeyEntityInfoProtoWrapper keyEntityInfo = entry.getValue(); keyEntityInfo.setKey(dbKey); - keyEntityInfo.setPath(ReconUtils.constructFullPath(keyEntityInfo.getKeyName(), keyEntityInfo.getParentId(), - keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager, - omMetadataManager)); + if (keyEntityInfo.getParentId() == 0) { + // Legacy bucket keys have a parentID of zero. OBS bucket keys have a parentID of the bucketID. + // FSO keys have a parent of the immediate parent directory. + // Legacy buckets are obsolete, so this code path is not optimized. We don't expect to see many Legacy + // buckets in practice. + prevParentID = -1; + keyEntityInfo.setPath(ReconUtils.constructFullPath(keyEntityInfo.getKeyName(), keyEntityInfo.getParentId(), + keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager, + omMetadataManager)); + } else { + // As we iterate keys in sorted order, its highly likely that keys have the same prefix for many keys in a + // row. Especially for FSO buckets, its expensive to construct the path for each key. So, we construct the + // prefix once and reuse it for each identical parent. Only if the parent changes do we need to construct + // a new prefix path. + if (prevParentID != keyEntityInfo.getParentId()) { + prevParentID = keyEntityInfo.getParentId(); + keyPrefix = ReconUtils.constructFullPathPrefix(keyEntityInfo.getParentId(), + keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager, + omMetadataManager); + keyPrefixLength = keyPrefix.length(); + } + keyPrefix.setLength(keyPrefixLength); + keyPrefix.append(keyEntityInfo.getKeyName()); + keyEntityInfo.setPath(keyPrefix.toString()); + } + results.add(keyEntityInfo); paramInfo.setLastKey(dbKey); if (results.size() >= paramInfo.getLimit()) { From cc88565a1b079a212e982555b65036c3e2bd5174 Mon Sep 17 00:00:00 2001 From: S O'Donnell Date: Fri, 8 Nov 2024 16:01:41 +0000 Subject: [PATCH 2/3] Fix failing test caused by not returning empty string when rebuilding --- .../main/java/org/apache/hadoop/ozone/recon/ReconUtils.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java index 1e27aa33b4f..c136b72fc4a 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java @@ -319,6 +319,9 @@ public static String constructFullPath(String keyName, long initialParentId, Str ReconOMMetadataManager omMetadataManager) throws IOException { StringBuilder fullPath = constructFullPathPrefix(initialParentId, volumeName, bucketName, reconNamespaceSummaryManager, omMetadataManager); + if (fullPath.length() == 0) { + return ""; + } fullPath.append(keyName); return fullPath.toString(); } From d8a4e1cfed5045dec8d4ccb33bfd72a994525515 Mon Sep 17 00:00:00 2001 From: S O'Donnell Date: Fri, 8 Nov 2024 17:30:31 +0000 Subject: [PATCH 3/3] Return empty prefix even if a partial prefix was created prior to NsSummary Rebuild --- .../src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java index c136b72fc4a..830cf2e12dd 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java @@ -356,6 +356,7 @@ public static StringBuilder constructFullPathPrefix(long initialParentId, String if (nsSummary == null) { log.warn("NSSummary tree is currently being rebuilt or the directory could be in the progress of " + "deletion, returning empty string for path construction."); + fullPath.setLength(0); return fullPath; } if (nsSummary.getParentId() == -1) { @@ -363,6 +364,7 @@ public static StringBuilder constructFullPathPrefix(long initialParentId, String triggerRebuild(reconNamespaceSummaryManager, omMetadataManager); } log.warn("NSSummary tree is currently being rebuilt, returning empty string for path construction."); + fullPath.setLength(0); return fullPath; } // On the last pass, dir-name will be empty and parent will be zero, indicating the loop should end.