Skip to content

Commit

Permalink
fabtests/efa: Avoid testing duplicate mixed memory type workload
Browse files Browse the repository at this point in the history
Currently, we always test mixed memory type workload in two directions explicitly:
like cuda-to-host, host-to-cuda.

However. pingpong and rma read/write test are all bi-directional, which means there is
no difference for cuda-to-host and host-to-cuda. Running them twice in this situation is
not necessary and causing longer test duration.

This patch improves this by having a reduced memory type list for bi-directional tests.

Signed-off-by: Shi Jin <sjina@amazon.com>
  • Loading branch information
shijin-aws committed Sep 11, 2024
1 parent 036e2dd commit 1e1c34e
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 35 deletions.
37 changes: 30 additions & 7 deletions fabtests/pytest/efa/conftest.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,39 @@
import pytest

# The memory types for bi-directional tests.
memory_type_list_bi_dir = [
pytest.param("host_to_host"),
pytest.param("host_to_cuda", marks=pytest.mark.cuda_memory),
pytest.param("cuda_to_cuda", marks=pytest.mark.cuda_memory),
pytest.param("host_to_neuron", marks=pytest.mark.neuron_memory),
pytest.param("neuron_to_neuron", marks=pytest.mark.neuron_memory),
]

@pytest.fixture(scope="module", params=["host_to_host",
pytest.param("host_to_cuda", marks=pytest.mark.cuda_memory),
pytest.param("cuda_to_host", marks=pytest.mark.cuda_memory),
pytest.param("cuda_to_cuda", marks=pytest.mark.cuda_memory),
pytest.param("neuron_to_neuron", marks=pytest.mark.neuron_memory),
pytest.param("neuron_to_host", marks=pytest.mark.neuron_memory),
pytest.param("host_to_neuron", marks=pytest.mark.neuron_memory)])
# Add more memory types that are useful for uni-directional tests.
memory_type_list_all = memory_type_list_bi_dir + [
pytest.param("cuda_to_host", marks=pytest.mark.cuda_memory),
pytest.param("neuron_to_host", marks=pytest.mark.neuron_memory),
]

@pytest.fixture(scope="module", params=memory_type_list_all)
def memory_type(request):
return request.param

@pytest.fixture(scope="module", params=memory_type_list_bi_dir)
def memory_type_bi_dir(request):
return request.param

@pytest.fixture(scope="module", params=["read", "writedata", "write"])
def rma_operation_type(request):
return request.param

@pytest.fixture(scope="module")
def check_rma_bw_memory_type(memory_type, rma_operation_type):
is_test_bi_dir = False if rma_operation_type == "writedata" else True
if is_test_bi_dir and (memory_type not in [_.values[0] for _ in memory_type_list_bi_dir]):
pytest.skip("Duplicated memory type for bi-directional test")


@pytest.fixture(scope="module", params=["r:0,4,64",
"r:4048,4,4148",
"r:8000,4,9000",
Expand Down
20 changes: 10 additions & 10 deletions fabtests/pytest/efa/test_rdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
@pytest.mark.parametrize("iteration_type",
[pytest.param("short", marks=pytest.mark.short),
pytest.param("standard", marks=pytest.mark.standard)])
def test_rdm_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type, completion_type):
def test_rdm_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type):
command = "fi_rdm_pingpong" + " " + perf_progress_model_cli
efa_run_client_server_test(cmdline_args, command, iteration_type,
completion_semantic, memory_type, "all", completion_type=completion_type)
completion_semantic, memory_type_bi_dir, "all", completion_type=completion_type)

@pytest.mark.functional
@pytest.mark.serial
Expand All @@ -21,9 +21,9 @@ def test_mr_exhaustion_rdm_pingpong(cmdline_args):
"transmit_complete", "host_to_host", "all", timeout=1000)

@pytest.mark.functional
def test_rdm_pingpong_range(cmdline_args, completion_semantic, memory_type, message_size):
def test_rdm_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size):
efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong", "short",
completion_semantic, memory_type, message_size)
completion_semantic, memory_type_bi_dir, message_size)

@pytest.mark.functional
def test_rdm_pingpong_no_inject_range(cmdline_args, completion_semantic, inject_message_size):
Expand All @@ -33,15 +33,15 @@ def test_rdm_pingpong_no_inject_range(cmdline_args, completion_semantic, inject_
@pytest.mark.parametrize("iteration_type",
[pytest.param("short", marks=pytest.mark.short),
pytest.param("standard", marks=pytest.mark.standard)])
def test_rdm_tagged_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type, completion_type):
def test_rdm_tagged_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type):
command = "fi_rdm_tagged_pingpong" + " " + perf_progress_model_cli
efa_run_client_server_test(cmdline_args, command, iteration_type,
completion_semantic, memory_type, "all", completion_type=completion_type)
completion_semantic, memory_type_bi_dir, "all", completion_type=completion_type)

@pytest.mark.functional
def test_rdm_tagged_pingpong_range(cmdline_args, completion_semantic, memory_type, message_size):
def test_rdm_tagged_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size):
efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_pingpong", "short",
completion_semantic, memory_type, message_size)
completion_semantic, memory_type_bi_dir, message_size)

@pytest.mark.parametrize("iteration_type",
[pytest.param("short", marks=pytest.mark.short),
Expand Down Expand Up @@ -116,13 +116,13 @@ def test_rdm_pingpong_1G(cmdline_args, completion_semantic):
memory_type="host_to_host", warmup_iteration_type=0)

@pytest.mark.functional
def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size):
def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type_bi_dir, zcpy_recv_max_msg_size, zcpy_recv_message_size):
if cmdline_args.server_id == cmdline_args.client_id:
pytest.skip("no zero copy recv for intra-node communication")
cmdline_args_copy = copy.copy(cmdline_args)
cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0")
efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}",
"short", "transmit_complete", memory_type, zcpy_recv_message_size)
"short", "transmit_complete", memory_type_bi_dir, zcpy_recv_message_size)

@pytest.mark.functional
def test_rdm_bw_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size):
Expand Down
20 changes: 8 additions & 12 deletions fabtests/pytest/efa/test_rma_bw.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,41 @@
import copy


@pytest.mark.parametrize("operation_type", ["read", "writedata", "write"])
@pytest.mark.parametrize("iteration_type",
[pytest.param("short", marks=pytest.mark.short),
pytest.param("standard", marks=pytest.mark.standard)])
def test_rma_bw(cmdline_args, iteration_type, operation_type, completion_semantic, memory_type):
def test_rma_bw(cmdline_args, iteration_type, rma_operation_type, completion_semantic, memory_type, check_rma_bw_memory_type):
command = "fi_rma_bw -e rdm"
command = command + " -o " + operation_type + " " + perf_progress_model_cli
command = command + " -o " + rma_operation_type + " " + perf_progress_model_cli
# rma_bw test with data verification takes longer to finish
timeout = max(540, cmdline_args.timeout)
efa_run_client_server_test(cmdline_args, command, iteration_type, completion_semantic, memory_type, "all", timeout=timeout)

@pytest.mark.parametrize("operation_type", ["read", "writedata", "write"])
@pytest.mark.parametrize("env_vars", [["FI_EFA_TX_SIZE=64"], ["FI_EFA_RX_SIZE=64"], ["FI_EFA_TX_SIZE=64", "FI_EFA_RX_SIZE=64"]])
def test_rma_bw_small_tx_rx(cmdline_args, operation_type, completion_semantic, memory_type, env_vars):
def test_rma_bw_small_tx_rx(cmdline_args, rma_operation_type, completion_semantic, memory_type, env_vars, check_rma_bw_memory_type):
cmdline_args_copy = copy.copy(cmdline_args)
for env_var in env_vars:
cmdline_args_copy.append_environ(env_var)
# Use a window size larger than tx/rx size
command = "fi_rma_bw -e rdm -W 128"
command = command + " -o " + operation_type + " " + perf_progress_model_cli
command = command + " -o " + rma_operation_type + " " + perf_progress_model_cli
# rma_bw test with data verification takes longer to finish
timeout = max(540, cmdline_args_copy.timeout)
efa_run_client_server_test(cmdline_args_copy, command, "short", completion_semantic, memory_type, "all", timeout=timeout)

@pytest.mark.functional
@pytest.mark.parametrize("operation_type", ["read", "writedata", "write"])
def test_rma_bw_range(cmdline_args, operation_type, completion_semantic, message_size, memory_type):
def test_rma_bw_range(cmdline_args, rma_operation_type, completion_semantic, message_size, memory_type, check_rma_bw_memory_type):
command = "fi_rma_bw -e rdm"
command = command + " -o " + operation_type
command = command + " -o " + rma_operation_type
# rma_bw test with data verification takes longer to finish
timeout = max(540, cmdline_args.timeout)
efa_run_client_server_test(cmdline_args, command, "short", completion_semantic, memory_type, message_size, timeout=timeout)


@pytest.mark.functional
@pytest.mark.parametrize("operation_type", ["read", "writedata", "write"])
def test_rma_bw_range_no_inject(cmdline_args, operation_type, completion_semantic, inject_message_size):
def test_rma_bw_range_no_inject(cmdline_args, rma_operation_type, completion_semantic, inject_message_size):
command = "fi_rma_bw -e rdm -j 0"
command = command + " -o " + operation_type
command = command + " -o " + rma_operation_type
# rma_bw test with data verification takes longer to finish
timeout = max(540, cmdline_args.timeout)
efa_run_client_server_test(cmdline_args, command, "short", completion_semantic, "host_to_host", inject_message_size, timeout=timeout)
Expand Down
12 changes: 6 additions & 6 deletions fabtests/pytest/efa/test_rma_pingpong.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@ def rma_pingpong_message_size(request):
@pytest.mark.parametrize("iteration_type",
[pytest.param("short", marks=pytest.mark.short),
pytest.param("standard", marks=pytest.mark.standard)])
def test_rma_pingpong(cmdline_args, iteration_type, operation_type, completion_semantic, memory_type):
def test_rma_pingpong(cmdline_args, iteration_type, operation_type, completion_semantic, memory_type_bi_dir):
command = "fi_rma_pingpong -e rdm"
command = command + " -o " + operation_type + " " + perf_progress_model_cli
efa_run_client_server_test(cmdline_args, command, iteration_type, completion_semantic, memory_type, "all")
efa_run_client_server_test(cmdline_args, command, iteration_type, completion_semantic, memory_type_bi_dir, "all")


@pytest.mark.functional
@pytest.mark.parametrize("operation_type", ["writedata"])
def test_rma_pingpong_range(cmdline_args, operation_type, completion_semantic, rma_pingpong_message_size, memory_type):
def test_rma_pingpong_range(cmdline_args, operation_type, completion_semantic, rma_pingpong_message_size, memory_type_bi_dir):
command = "fi_rma_pingpong -e rdm"
command = command + " -o " + operation_type
efa_run_client_server_test(cmdline_args, command, "short", completion_semantic, memory_type, rma_pingpong_message_size)
efa_run_client_server_test(cmdline_args, command, "short", completion_semantic, memory_type_bi_dir, rma_pingpong_message_size)


@pytest.mark.functional
@pytest.mark.parametrize("operation_type", ["writedata"])
def test_rma_pingpong_range_no_inject(cmdline_args, operation_type, completion_semantic, rma_pingpong_message_size, memory_type):
def test_rma_pingpong_range_no_inject(cmdline_args, operation_type, completion_semantic, rma_pingpong_message_size, memory_type_bi_dir):
command = "fi_rma_pingpong -e rdm -j 0"
command = command + " -o " + operation_type
efa_run_client_server_test(cmdline_args, command, "short", completion_semantic, memory_type, rma_pingpong_message_size)
efa_run_client_server_test(cmdline_args, command, "short", completion_semantic, memory_type_bi_dir, rma_pingpong_message_size)

0 comments on commit 1e1c34e

Please sign in to comment.