Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ansible/adhoc/cudatests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
- hosts: cuda
become: true
gather_facts: true
tags: cuda_samples
tags: cuda_bandwidth
tasks:
- ansible.builtin.import_role:
- name: Run CUDA bandwidth tasks
ansible.builtin.import_role:
name: cuda
tasks_from: samples.yml
tasks_from: bandwidth.yml
4 changes: 4 additions & 0 deletions ansible/roles/cuda/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,7 @@ cuda_samples_programs:
# cuda_devices: # discovered from deviceQuery run
cuda_persistenced_state: started
cuda_install_nvidiafabricmanger: false
# variables for nvbandwidth (for bandwidth.yml tasks run in cudatests.yml)
cuda_bandwidth_version: '0.8'
cuda_bandwidth_path: "/var/lib/{{ ansible_user }}/cuda_bandwidth"
cuda_bandwidth_release_url: "https://github.com/NVIDIA/nvbandwidth/archive/refs/tags/v{{ cuda_bandwidth_version }}.tar.gz"
57 changes: 57 additions & 0 deletions ansible/roles/cuda/tasks/bandwidth.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
- name: Ensure CUDA bandwidth path exists
ansible.builtin.file:
state: directory
path: "{{ cuda_bandwidth_path }}"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: "0755"

- name: Download CUDA bandwith test release
ansible.builtin.unarchive:
remote_src: true
src: "{{ cuda_bandwidth_release_url }}"
dest: "{{ cuda_bandwidth_path }}"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
creates: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}"

- name: Creates CUDA bandwidth test build directory
ansible.builtin.file:
state: directory
path: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}/build"
mode: "0755"

- name: Ensure cudatests directory exists
ansible.builtin.file:
path: "{{ appliances_environment_root }}/cudatests"
state: directory
mode: '0755'
delegate_to: localhost

- name: Build CUDA bandwidth test
ansible.builtin.shell:
cmd: >
source /cvmfs/software.eessi.io/versions/2023.06/init/bash &&
module load buildenv/default-foss-2023b &&
module load Boost/1.82.0-GCC-12.3.0 &&
. /etc/profile.d/sh.local &&
cmake .. &&
make -j {{ ansible_processor_vcpus }}
chdir: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}/build"
creates: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}/build/nvbandwidth"

- name: Run CUDA bandwidth test
ansible.builtin.shell: |
./nvbandwidth
args:
chdir: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}/build/"
register: cuda_bandwidth_output
changed_when: true

- name: Save CUDA bandwidth output to bandwidth_results.txt
ansible.builtin.copy:
content: "{{ cuda_bandwidth_output.stdout }}"
dest: "{{ appliances_environment_root }}/cudatests/nvbandwidth-{{ inventory_hostname }}.txt"
mode: '0644'
delegate_to: localhost
Loading