Skip to content

Commit

Permalink
contrib/intel/jenkins: Fix bug where device is slurm partition
Browse files Browse the repository at this point in the history
Slurm partition name is getting passed in as the device when it
shouldn't be.
Add cuda.exclude file to remove failing tests
Skip xd2di. Since there is only one device this test is impossible.

Signed-off-by: Zach Dworkin <zachary.dworkin@intel.com>
  • Loading branch information
zachdworkin committed Mar 15, 2024
1 parent ccb0f0a commit 0377d56
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 7 deletions.
8 changes: 4 additions & 4 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -776,8 +776,8 @@ pipeline {
"shm", null, null, "h2d")
run_fabtests("cuda_v1_shm", "cyndaquil", "cyndaquil", "1",
"shm", null, null, "d2d")
run_fabtests("cuda_v1_shm", "cyndaquil", "cyndaquil", "1",
"shm", null, null, "xd2d")
// run_fabtests("cuda_v1_shm", "cyndaquil", "cyndaquil", "1",
// "shm", null, null, "xd2d")
}
}
}
Expand All @@ -790,8 +790,8 @@ pipeline {
null, null, "h2d")
run_fabtests("cuda_v2_shm", "quilava", "quilava", "1", "shm",
null, null, "d2d")
run_fabtests("cuda_v2_shm", "quilava ", "quilava", "1", "shm",
null, null, "xd2d")
// run_fabtests("cuda_v2_shm", "quilava ", "quilava", "1", "shm",
// null, null, "xd2d")
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions contrib/intel/jenkins/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def build_fabtests(libfab_install_path, mode, cuda=False):
config_cmd = ['./configure', f'--prefix={libfab_install_path}',
f'--with-libfabric={libfab_install_path}']

if cuda:
config_cmd.append(f'--with-cuda={os.environ["CUDA_INSTALL"]}')

common.run_command(['./autogen.sh'])
common.run_command(config_cmd)
common.run_command(['make','clean'])
Expand Down
9 changes: 6 additions & 3 deletions contrib/intel/jenkins/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,16 @@ def __init__(self, jobname, buildno, testname, hw, core_prov, fabric,
util_prov, way)
self.fabtestpath = f'{self.libfab_installpath}/bin'
self.fabtestconfigpath = f'{self.libfab_installpath}/share/fabtests'
self.device = cloudbees_config.fabric_map[self.hw]

def get_exclude_file(self):
path = self.libfab_installpath
efile_path = f'{path}/share/fabtests/test_configs'

if self.hw == 'ivysaur':
efile = f'{efile_path}/{self.core_prov}/io_uring.exclude'
elif self.hw == 'cyndaquil' or self.hw == 'quilava':
efile = f'{efile_path}/{self.core_prov}/cuda.exclude'
else:
prov = self.util_prov if self.util_prov else self.core_prov
efile_old = f'{efile_path}/{prov}/{prov}.exclude'
Expand Down Expand Up @@ -155,11 +158,11 @@ def options(self):
opts += "-t all "

if (self.way == 'h2d'):
opts += f"-C \"-H\" -L \"-D {self.hw}\" "
opts += f"-C \"-H\" -L \"-D {self.device}\" "
elif (self.way == 'd2d'):
opts += f"-C \"-D {self.hw}\" -L \"-D {self.hw}\" "
opts += f"-C \"-D {self.device}\" -L \"-D {self.device}\" "
elif (self.way == 'xd2d'):
opts += f"-C \"-D {self.hw}\" -L \"-D {self.hw} -i 1\" "
opts += f"-C \"-D {self.device}\" -L \"-D {self.device} -i 1\" "

if (self.core_prov == 'sockets' and self.ofi_build_mode == 'reg'):
complex_test_file = f'{self.libfab_installpath}/share/fabtests/'\
Expand Down
1 change: 1 addition & 0 deletions fabtests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ nobase_dist_config_DATA = \
test_configs/ofi_rxd/ofi_rxd.exclude \
test_configs/shm/all.test \
test_configs/shm/shm.exclude \
test_configs/shm/cuda.exclude \
test_configs/shm/quick.test \
test_configs/shm/verify.test \
test_configs/sm2/quick.test \
Expand Down
28 changes: 28 additions & 0 deletions fabtests/test_configs/shm/cuda.exclude
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Regex patterns of tests to exclude in runfabtests.sh

inject_test
^fi_msg
-e msg
^fi_dgram
-e dgram
rdm_tagged_peek
multi_ep
av_xfer
unexpected_msg
multi_recv

# Exclude tests that use sread/polling
rdm_cntr_pingpong
poll

# Exclude tests with unsupported capabilities
-k
cm_data
trigger
shared_ctx
scalable_ep
shared_av
multi_mr
av_test

multinode

0 comments on commit 0377d56

Please sign in to comment.