Skip to content

Commit

Permalink
Get rename metrics for non nested scenario for hns bucket (#2339)
Browse files Browse the repository at this point in the history
* added check directory functions and its unit tests

* adding function to parse config file and generate dir in bucket

* exit_code set to 1

* correct format

* refactors

* test formatting

* function to avoid code repitition

* added check directory functions and its unit tests

* moving mount functions to utils

* compute metrics from time of operation

* uploading metrics to gsheet

* nits

* testing non nested scenario for gcs bucket

* unit tests

* using single config file flag along with bucket type

* correcting header checks

* refactor to avoid code repitition
  • Loading branch information
anushka567 authored Aug 23, 2024
1 parent 4638976 commit 8288d45
Show file tree
Hide file tree
Showing 5 changed files with 255 additions and 39 deletions.
1 change: 0 additions & 1 deletion .github/header-checker-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ allowedLicenses:
sourceFileExtensions:
- 'go'
- 'Makefile'
- 'json'
- 'yml'
- 'txt'
- 'py'
Expand Down
93 changes: 93 additions & 0 deletions perfmetrics/scripts/hns_rename_folders_metrics/config-flat.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"name": "hns-rename-benchmark-flat" ,
"folders" : {
"num_folders": 3,
"folder_structure" : [
{
"name": "1k_files_rename_test_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "5k_files_rename_test_0" ,
"num_files": 5000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "10k_files_rename_test_0" ,
"num_files": 10000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
}
]
},
"nested_folders": {
"folder_name": "nested_folder_rename_test",
"num_folders": 10,
"folder_structure" : [
{
"name": "nested_folder_1_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_2_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_3_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_4_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_5_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_6_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_7_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_8_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_9_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
},
{
"name": "nested_folder_10_0" ,
"num_files": 1000 ,
"file_name_prefix": "file" ,
"file_size": "1kb"
}
]

}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "hns-rename-folders-test" ,
"name": "hns-rename-benchmark-hns" ,
"folders" : {
"num_folders": 3,
"folder_structure" : [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

# limitations under the License.
# To run the script,run in terminal:
# python3 renaming_benchmark.py <dir-config.json> [--upload_gs] [--num_samples NUM_SAMPLES]
# python3 renaming_benchmark.py config.json bucket_type [--upload_gs] \
# [--num_samples NUM_SAMPLES]
# where dir-config.json file contains the directory structure details for the test.

import os
Expand All @@ -33,6 +34,7 @@
from gsheet import gsheet

WORKSHEET_NAME_FLAT = 'rename_metrics_flat'
WORKSHEET_NAME_HNS = 'rename_metrics_hns'
SPREADSHEET_ID = '1UVEvsf49eaDJdTGLQU1rlNTIAxg8PZoNQCy_GX6Nw-A'

logging.basicConfig(
Expand Down Expand Up @@ -239,7 +241,7 @@ def _record_time_of_operation(mount_point, dir, num_samples):
return results


def _perform_testing(dir, test_type, num_samples, results):
def _perform_testing(dir, test_type, num_samples):
"""
This function performs rename operations and records time of operation .
Args:
Expand Down Expand Up @@ -272,31 +274,39 @@ def _perform_testing(dir, test_type, num_samples, results):
}
test_type : flat or hns.
num_samples: Number of samples to collect for each test.
results: Dictionary to store the results corresponding to each test type
"""
if test_type == "hns":
# TODO add mount function for test type hns
return
# Creating config file for mounting with hns enabled.
with open("/tmp/config.yml",'w') as mount_config:
mount_config.write("enable-hns: true")
mount_flags="--config-file=/tmp/config.yml"
else :
mount_flags = "--implicit-dirs --rename-dir-limit=1000000"

# Mounting the gcs bucket.
flat_mount_flags = "--implicit-dirs --rename-dir-limit=1000000"
flat_bucket_name = mount_gcs_bucket(dir["name"], flat_mount_flags, log)

bucket_name = mount_gcs_bucket(dir["name"], mount_flags, log)
# Record time of operation and populate the results dict.
flat_results = _record_time_of_operation(flat_bucket_name, dir, num_samples)
results["flat"] = flat_results

results = _record_time_of_operation(bucket_name, dir, num_samples)
# Unmounting the bucket.
unmount_gcs_bucket(dir["name"], log)

return results


def _parse_arguments(argv):
argv = sys.argv
parser = argparse.ArgumentParser()

parser.add_argument(
'dir_config_file',
help='Provide path of the config file.',
action='store'
'config_file',
help='Provide path of the config file for GCS bucket.',
action='store',
)
parser.add_argument(
'bucket_type',
help='Provide bucket type - hns or flat ',
action='store',
choices=['hns','flat']
)
parser.add_argument(
'--upload_gs',
Expand All @@ -317,17 +327,8 @@ def _parse_arguments(argv):
return parser.parse_args(argv[1:])


if __name__ == '__main__':
argv = sys.argv
if len(argv) < 2:
raise TypeError('Incorrect number of arguments.\n'
'Usage: '
'python3 renaming_benchmark.py [--upload_gs] [--num_samples NUM_SAMPLES] config_file ')

args = _parse_arguments(argv)
check_dependencies(['gcloud', 'gcsfuse'], log)

with open(os.path.abspath(args.dir_config_file)) as file:
def _run_rename_benchmark(test_type,dir_config,num_samples,upload_gs):
with open(os.path.abspath(dir_config)) as file:
dir_str = json.load(file)

exit_code = _check_for_config_file_inconsistency(dir_str)
Expand All @@ -339,20 +340,37 @@ def _parse_arguments(argv):
dir_structure_present = _check_if_dir_structure_exists(dir_str)
if not dir_structure_present:
log.error("Test data does not exist.To create test data, run : \
python3 generate_folders_and_files.py <dir_config.json> ")
python3 generate_folders_and_files.py {} ".format(dir_config))
sys.exit(1)

results = dict() # Dict object to store the results corresonding to the test types.
_perform_testing(dir_str, "flat", args.num_samples, results)
flat_parsed_metrics = _parse_results(dir_str, results['flat'], args.num_samples)
upload_values_flat = _get_values_to_export(dir_str, flat_parsed_metrics,
"flat")
results=_perform_testing(dir_str, test_type, num_samples)
parsed_metrics = _parse_results(dir_str, results, num_samples)
upload_values = _get_values_to_export(dir_str, parsed_metrics,
test_type)

if args.upload_gs:
if upload_gs:
log.info('Uploading files to the Google Sheet\n')
exit_code = _upload_to_gsheet(WORKSHEET_NAME_FLAT, upload_values_flat,
if test_type == "flat":
worksheet= WORKSHEET_NAME_FLAT
else:
worksheet= WORKSHEET_NAME_HNS

exit_code = _upload_to_gsheet(worksheet, upload_values,
SPREADSHEET_ID)
if exit_code != 0:
log.error("Upload to gsheet failed!")
else:
print(upload_values_flat)
print(upload_values)


if __name__ == '__main__':
argv = sys.argv
if len(argv) < 3:
raise TypeError('Incorrect number of arguments.\n'
'Usage: '
'python3 renaming_benchmark.py [--upload_gs] [--num_samples NUM_SAMPLES] config_file bucket_type')

args = _parse_arguments(argv)
check_dependencies(['gcloud', 'gcsfuse'], log)
_run_rename_benchmark(args.bucket_type, args.config_file, args.num_samples,
args.upload_gs)
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
import unittest
import renaming_benchmark
from mock import patch, call
from mock import patch, call, mock_open

class TestRenamingBenchmark(unittest.TestCase):

Expand Down Expand Up @@ -98,9 +98,44 @@ def test_perform_testing_flat(self, mock_log, mock_record_time_of_operation,
mount_flags = "--implicit-dirs --rename-dir-limit=1000000"
mock_mount_gcs_bucket.return_value="flat_bucket"
mock_record_time_of_operation.return_value = {"test_folder": [0.1, 0.2, 0.3, 0.4]}
expected_results = {"flat": {"test_folder": [0.1, 0.2, 0.3, 0.4]}}
expected_results = {"test_folder": [0.1, 0.2, 0.3, 0.4]}

renaming_benchmark._perform_testing(dir, test_type, num_samples, results)
results= renaming_benchmark._perform_testing(dir, test_type, num_samples)

self.assertEqual(results, expected_results)
# Verify calls to other functions.
mock_mount_gcs_bucket.assert_called_once_with(dir["name"], mount_flags, mock_log)
mock_record_time_of_operation.assert_called_once_with(mock_mount_gcs_bucket.return_value, dir, num_samples)
mock_unmount_gcs_bucket.assert_called_once_with(dir["name"], mock_log)
mock_log.error.assert_not_called() # No errors should be logged

@patch('renaming_benchmark.unmount_gcs_bucket')
@patch('renaming_benchmark.mount_gcs_bucket')
@patch('renaming_benchmark._record_time_of_operation')
@patch('renaming_benchmark.log')
def test_perform_testing_hns(self, mock_log, mock_record_time_of_operation,
mock_mount_gcs_bucket, mock_unmount_gcs_bucket):
dir = {
"name":"hns_bucket",
"folders":{
"num_folders":1,
"folder_structure":{
'name': "test_folder",
"num_files": 1,
"file_name_prefix": "file",
"file_size": "1kb"
}
}
}
test_type = "hns"
num_samples = 4
results = {}
mount_flags = "--config-file=/tmp/config.yml"
mock_mount_gcs_bucket.return_value="hns_bucket"
mock_record_time_of_operation.return_value = {"test_folder": [0.1, 0.2, 0.3, 0.4]}
expected_results = {"test_folder": [0.1, 0.2, 0.3, 0.4]}

results= renaming_benchmark._perform_testing(dir, test_type, num_samples)

self.assertEqual(results, expected_results)
# Verify calls to other functions.
Expand Down Expand Up @@ -177,6 +212,77 @@ def test_upload_to_gsheet_no_spreadsheet_id_passed(self,mock_log,mock_os):
self.assertEqual(exit_code,1)
mock_log.error.assert_called_once_with('Empty spreadsheet id passed!')

@patch('builtins.open', new_callable=mock_open)
@patch('renaming_benchmark.log')
@patch('renaming_benchmark._check_for_config_file_inconsistency')
@patch('renaming_benchmark.json.load')
def test_run_rename_benchmark_error_config_inconsistency(self,mock_json,mock_inconsistency,mock_log,mock_open):
test_type="flat"
dir_config="test-config.json"
num_samples=10
results=dict()
upload_gs=True
mock_inconsistency.return_value=1
mock_json.return_value={}

with self.assertRaises(SystemExit):
renaming_benchmark._run_rename_benchmark(test_type,dir_config,num_samples,upload_gs)

mock_log.error.assert_called_once_with('Exited with code 1')

@patch('builtins.open', new_callable=mock_open)
@patch('renaming_benchmark.log')
@patch('renaming_benchmark._check_for_config_file_inconsistency')
@patch('renaming_benchmark._check_if_dir_structure_exists')
@patch('renaming_benchmark.json.load')
def test_run_rename_benchmark_error_dir_does_not_exist(self,mock_json,mock_check_dir_exists,mock_inconsistency,mock_log,mock_open):
test_type="flat"
dir_config="test-config.json"
num_samples=10
results=dict()
upload_gs=True
mock_inconsistency.return_value=0
mock_check_dir_exists.return_value=False
mock_json.return_value={}

with self.assertRaises(SystemExit) :
renaming_benchmark._run_rename_benchmark(test_type,dir_config,num_samples,upload_gs)

mock_log.error.assert_called_once_with("Test data does not exist.To create test data, run : \
python3 generate_folders_and_files.py {} ".format(dir_config))

@patch('renaming_benchmark.SPREADSHEET_ID','temp-gsheet-id')
@patch('renaming_benchmark.WORKSHEET_NAME_FLAT','flat-sheet')
@patch('builtins.open', new_callable=mock_open)
@patch('renaming_benchmark.log')
@patch('renaming_benchmark._check_for_config_file_inconsistency')
@patch('renaming_benchmark._check_if_dir_structure_exists')
@patch('renaming_benchmark._perform_testing')
@patch('renaming_benchmark._parse_results')
@patch('renaming_benchmark._get_values_to_export')
@patch('renaming_benchmark._upload_to_gsheet')
@patch('renaming_benchmark.json.load')
def test_run_rename_benchmark_upload_true(self,mock_json,mock_upload,mock_get_values,mock_parse_results,mock_perform_testing,mock_check_dir_exists,mock_inconsistency,mock_log,mock_open):
test_type="flat"
dir_config="test-config.json"
num_samples=10
results={'flat':''}
upload_gs=True
worksheet= 'flat-sheet'
spreadsheet_id='temp-gsheet-id'
mock_inconsistency.return_value=0
mock_check_dir_exists.return_value=True
mock_parse_results.return_value={'key':'val'}
mock_get_values.return_value=[['testdata','testdata2']]
mock_upload.return_value=0
mock_json.return_value={}


renaming_benchmark._run_rename_benchmark(test_type,dir_config,num_samples,upload_gs)

mock_log.info.assert_called_with('Uploading files to the Google Sheet\n')
mock_upload.assert_called_with(worksheet,[['testdata','testdata2']],spreadsheet_id)


if __name__ == '__main__':
unittest.main()

0 comments on commit 8288d45

Please sign in to comment.