Skip to content

Commit

Permalink
DAOS-6916 test: Fix interception library tests by letting threads to …
Browse files Browse the repository at this point in the history
…have independent data (#4828) (#5243)

IOR thread was using self.ior_cmd. This will lead to a collision between threads because many things, such as ior command
arguments, are shared. Updated ior_test_base.py so that each thread will have its own IorCommand object so that they are
independent and no need to sleep between the thread starts.
Updated ior_intercept_dfuse_mix.py test steps to do a comparison of 2 IOR threads throughput performance. Also print out
a summary of improvements.
Removed use_json from pool_query.
Added control_method: dmg to yaml.
Also refactored tags.

Signed-off-by: Makito Kano <makito.kano@intel.com>
  • Loading branch information
shimizukko authored Apr 1, 2021
1 parent 832da87 commit b7ceab3
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 142 deletions.
168 changes: 104 additions & 64 deletions src/tests/ftest/ior/ior_intercept_dfuse_mix.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
SPDX-License-Identifier: BSD-2-Clause-Patent
"""

import os
from ior_test_base import IorTestBase
from ior_utils import IorCommand, IorMetrics
Expand All @@ -16,6 +15,8 @@ class IorInterceptDfuseMix(IorTestBase):
dfuse and interception library on a single server and multi
client settings with basic parameters.
Verify the throughput improvement with IL.
:avocado: recursive
"""

Expand All @@ -26,7 +27,7 @@ def test_ior_intercept_dfuse_mix(self):
Purpose of this test is to run ior through dfuse on 4 clients
for 5 minutes and capture the metrics and use the
intercepiton library by exporting LD_PRELOAD to the libioil.so
path on 3 clients and leave 1 client to use dfuse and rerun
path on 2 clients and leave 2 clients to use dfuse and rerun
the above ior and capture the metrics and compare the
performance difference and check using interception
library make significant performance improvement. Verify the
Expand All @@ -42,78 +43,117 @@ def test_ior_intercept_dfuse_mix(self):
library provides better performance and not using it
does not change the performance.
:avocado: tags=all,full_regression,hw,large,daosio,iorinterceptmix
:avocado: tags=all,full_regression
:avocado: tags=hw,large
:avocado: tags=daosio,ior_intercept_mix
"""
without_intercept = dict()
self.run_multiple_ior_with_pool(without_intercept)
self.add_pool()
self.add_container(self.pool)

# Run 2 IOR threads; one with IL and the other without.
results = dict()
intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
with_intercept = dict()
self.run_multiple_ior_with_pool(with_intercept, intercept)
self.log_metrics(without_intercept, with_intercept)
client_count = len(self.hostlist_clients)
w_clients = self.hostlist_clients[0:int(client_count / 2)]
wo_clients = self.hostlist_clients[int(client_count / 2):]
self.run_ior_threads_il(
results=results, intercept=intercept, with_clients=w_clients,
without_clients=wo_clients)

# Print the raw results from the IOR stdout.
IorCommand.log_metrics(
self.log, "{} clients - with interception library".format(
len(w_clients)), results[1])
IorCommand.log_metrics(
self.log, "{} clients - without interception library".format(
len(wo_clients)), results[2])

# Get Max, Min, and Mean throughput values for Write and Read.
w_write_results = results[1][0]
w_read_results = results[1][1]
wo_write_results = results[2][0]
wo_read_results = results[2][1]

max_mib = int(IorMetrics.Max_MiB)
min_mib = int(IorMetrics.Min_MiB)
mean_mib = int(IorMetrics.Mean_MiB)

w_write_max = float(w_write_results[max_mib])
wo_write_max = float(wo_write_results[max_mib])
w_write_min = float(w_write_results[min_mib])
wo_write_min = float(wo_write_results[min_mib])
w_write_mean = float(w_write_results[mean_mib])
wo_write_mean = float(wo_write_results[mean_mib])

w_read_max = float(w_read_results[max_mib])
wo_read_max = float(wo_read_results[max_mib])
w_read_min = float(w_read_results[min_mib])
wo_read_min = float(wo_read_results[min_mib])
w_read_mean = float(w_read_results[mean_mib])
wo_read_mean = float(wo_read_results[mean_mib])

# Calculate the increase for the 6 values.
# [max, min, mean]
write_changes = [-1, -1, -1]
if wo_write_max > 0:
write_changes[0] = round(w_write_max / wo_write_max, 4)
if wo_write_min > 0:
write_changes[1] = round(w_write_min / wo_write_min, 4)
if wo_write_mean > 0:
write_changes[2] = round(w_write_mean / wo_write_mean, 4)

# [max, min, mean]
read_changes = [-1, -1, -1]
if wo_read_max > 0:
read_changes[0] = round(w_read_max / wo_read_max, 4)
if wo_read_min > 0:
read_changes[1] = round(w_read_min / wo_read_min, 4)
if wo_read_mean > 0:
read_changes[2] = round(w_read_mean / wo_read_mean, 4)

# Print the summary of improvements.
self.log.info(
"--- Throughput Improvement with Interception Library ---")
self.log.info("Clients with IL: %s", w_clients)
self.log.info("Clients without IL: %s\n", wo_clients)
self.log.info("Write Max: x%f", write_changes[0])
self.log.info("Write Min: x%f", write_changes[1])
self.log.info("Write Mean: x%f\n", write_changes[2])
self.log.info("Read Max: x%f", read_changes[0])
self.log.info("Read Min: x%f", read_changes[1])
self.log.info("Read Mean: x%f", read_changes[2])

# Do the threshold testing.
write_x = self.params.get("write_x", "/run/ior/iorflags/ssf/*", 1)
read_x = self.params.get("read_x", "/run/ior/iorflags/ssf/*", 1)
#read_x = self.params.get("read_x", "/run/ior/iorflags/ssf/*", 1)

errors = []
# Verify that using interception library gives desired performance
# improvement.
# Verifying write performance
self.assertTrue(float(with_intercept[1][0][max_mib]) >
write_x * float(without_intercept[1][0][max_mib]))
self.assertTrue(float(with_intercept[1][0][min_mib]) >
write_x * float(without_intercept[1][0][min_mib]))
self.assertTrue(float(with_intercept[1][0][mean_mib]) >
write_x * float(without_intercept[1][0][mean_mib]))

# Verifying read performance
self.assertTrue(float(with_intercept[1][1][max_mib]) >
read_x * float(without_intercept[1][1][max_mib]))
self.assertTrue(float(with_intercept[1][1][min_mib]) >
read_x * float(without_intercept[1][1][min_mib]))
self.assertTrue(float(with_intercept[1][1][mean_mib]) >
read_x * float(without_intercept[1][1][mean_mib]))

# Verify that not using interception library on both runs does
# not change the performance.
# Perf. improvement if any is less than the desired.
# Verifying write performance
self.assertTrue(float(with_intercept[2][0][max_mib]) <
write_x * float(without_intercept[2][0][max_mib]))
self.assertTrue(float(with_intercept[2][0][min_mib]) <
write_x * float(without_intercept[2][0][min_mib]))
self.assertTrue(float(with_intercept[2][0][mean_mib]) <
write_x * float(without_intercept[2][0][mean_mib]))

if w_write_max <= write_x * wo_write_max:
errors.append("Write Max with IL is less than x{}!".format(write_x))
if w_write_min <= write_x * wo_write_min:
errors.append("Write Min with IL is less than x{}!".format(write_x))
if w_write_mean <= write_x * wo_write_mean:
errors.append(
"Write Mean with IL is less than x{}!".format(write_x))

# DAOS-5857
# Read performance with IL was lower in CI. The environment had OPA +
# PMEM and NVMe. It was about 2x with IB + RAM.
# Uncomment below (and read_x line) if the lower performance issue is
# fixed.
# Verifying read performance
# Read performance is not significant with interception library
# and most likely the read_x will be 1. To avoid unnecessary
# failure keeping flat 1.5 x just to set the boundary for the client
# without interception library
self.assertTrue(float(with_intercept[2][1][max_mib]) <
1.5 * float(without_intercept[2][1][max_mib]))
self.assertTrue(float(with_intercept[2][1][min_mib]) <
1.5 * float(without_intercept[2][1][min_mib]))
self.assertTrue(float(with_intercept[2][1][mean_mib]) <
1.5 * float(without_intercept[2][1][mean_mib]))

def log_metrics(self, without_intercept, with_intercept):
"""Log the ior metrics because the stdout from ior can be mixed
because of multithreading.
Args:
without_intercept (dict): IOR Metrics without using
interception library.
with_intercept (dict): IOR Metrics using interception
library.
"""
IorCommand.log_metrics(self.log, "3 clients - without " +
"interception library", without_intercept[1])
IorCommand.log_metrics(self.log, "3 clients - with " +
"interception library", with_intercept[1])
IorCommand.log_metrics(self.log, "1 client - without " +
"interception library", without_intercept[2])
IorCommand.log_metrics(self.log, "1 clients - without " +
"interception library", with_intercept[2])
# if w_read_max <= read_x * wo_read_max:
# errors.append("Read Max with IL is less than x{}!".format(read_x))
# if w_read_min <= read_x * wo_read_min:
# errors.append(
# "Read Min with IL is less than x{}!".format(read_x))
# if w_read_mean <= read_x * wo_read_mean:
# errors.append(
# "Read Mean with IL is less than x{}!".format(read_x))

if errors:
self.fail("Poor IL throughput improvement!\n{}".format(
"\n".join(errors)))
5 changes: 2 additions & 3 deletions src/tests/ftest/ior/ior_intercept_dfuse_mix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ container:
ior:
client_processes:
np: 32
test_file: daos:testFile
repetitions: 1
# Remove the below line once DAOS-3143 is resolved
dfs_destroy: False
Expand All @@ -38,8 +37,8 @@ ior:
api: POSIX
transfer_size: '1M'
block_size: '8G'
write_x: 1
read_x: 1
write_x: 4
read_x: 2
dfs_oclass: "SX"
dfuse:
mount_dir: "/tmp/daos_dfuse/"
31 changes: 20 additions & 11 deletions src/tests/ftest/ior/ior_intercept_verify_data_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
SPDX-License-Identifier: BSD-2-Clause-Patent
"""

import os
from ior_test_base import IorTestBase
from ior_utils import IorCommand
Expand All @@ -24,7 +23,7 @@ def test_ior_intercept_verify_data(self):
Test Description:
Purpose of this test is to run ior through dfuse with
interception library on 5 clients and without interception
interception library on 5 clients and without interception
library on 1 client for at least 30 minutes and verify the
data integrity using ior's Read Verify and Write Verify
options.
Expand All @@ -35,14 +34,24 @@ def test_ior_intercept_verify_data(self):
Run ior with read, write, read verify
write verify for 30 minutes
:avocado: tags=all,full_regression,hw,large
:avocado: tags=daosio,iorinterceptverifydata
:avocado: tags=all,full_regression
:avocado: tags=hw,large
:avocado: tags=daosio,ior_intercept_verify_data
"""
intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
with_intercept = dict()
self.run_multiple_ior_with_pool(with_intercept, intercept)
self.add_pool()
self.add_container(self.pool)

IorCommand.log_metrics(self.log, "5 clients - with " +
"interception library", with_intercept[1])
IorCommand.log_metrics(self.log, "1 client - without " +
"interception library", with_intercept[2])
intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
results = dict()
client_count = len(self.hostlist_clients)
w_clients = self.hostlist_clients[0:client_count - 1]
wo_clients = [self.hostlist_clients[-1]]

self.run_ior_threads_il(
results=results, intercept=intercept, with_clients=w_clients,
without_clients=wo_clients)

IorCommand.log_metrics(
self.log, "5 clients - with interception library", results[1])
IorCommand.log_metrics(
self.log, "1 client - without interception library", results[2])
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pool:
nvme_size: 200000000000
createsvc:
svcn: 1
control_method: dmg
container:
type: POSIX
control_method: daos
Expand Down
8 changes: 2 additions & 6 deletions src/tests/ftest/util/dmg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,12 +424,11 @@ def pool_create(self, scm_size, uid=None, gid=None, nvme_size=None,

return data

def pool_query(self, pool, use_json=True):
def pool_query(self, pool):
"""Query a pool with the dmg command.
Args:
uuid (str): Pool UUID to query.
use_json (bool): Whether to use --json. Defaults to True.
Raises:
CommandFailure: if the dmg pool query command fails.
Expand Down Expand Up @@ -475,10 +474,7 @@ def pool_query(self, pool, use_json=True):
# "error": null,
# "status": 0
# }
if use_json:
return self._get_json_result(("pool", "query"), pool=pool)

return self._get_result(("pool", "query"), pool=pool)
return self._get_json_result(("pool", "query"), pool=pool)

def pool_destroy(self, pool, force=True):
"""Destroy a pool with the dmg command.
Expand Down
Loading

0 comments on commit b7ceab3

Please sign in to comment.