From 1ab11f6a4b93005f5c4bde7b97ac1e95ce3a4505 Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Mon, 8 Apr 2024 11:56:36 -0700 Subject: [PATCH] Optimize VBE input generation (#1854) Summary: While authoring VBE benchmarks, this code block was very inefficient as determined by the profiler: https://www.internalfb.com/fburl?nopassthru=1&key=scuba%2Fpyperf_experimental%2Fon_demand%2Fnbkvd0xv. This diff optimizes the code by vectorizing the addition and appending to the list Differential Revision: D55882021 --- torchrec/distributed/test_utils/test_model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torchrec/distributed/test_utils/test_model.py b/torchrec/distributed/test_utils/test_model.py index 89215cafd..4bc70d180 100644 --- a/torchrec/distributed/test_utils/test_model.py +++ b/torchrec/distributed/test_utils/test_model.py @@ -304,6 +304,7 @@ def generate_variable_batch_input( strides_per_rank_per_feature = {} inverse_indices_per_rank_per_feature = {} label_per_rank = [] + for rank in range(world_size): # keys, values, lengths, strides lengths_per_rank_per_feature[rank] = {} @@ -375,12 +376,11 @@ def generate_variable_batch_input( accum_batch_size = 0 inverse_indices = [] for rank in range(world_size): - inverse_indices += [ - index + accum_batch_size - for index in inverse_indices_per_rank_per_feature[rank][key] - ] + inverse_indices.append( + inverse_indices_per_rank_per_feature[rank][key] + accum_batch_size + ) accum_batch_size += strides_per_rank_per_feature[rank][key] - inverse_indices_list.append(torch.IntTensor(inverse_indices)) + inverse_indices_list.append(torch.cat(inverse_indices)) global_inverse_indices = (list(keys.keys()), torch.stack(inverse_indices_list)) if global_constant_batch: global_offsets = []