Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ansible/adhoc/cudatests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
- hosts: cuda
become: true
gather_facts: true
tags: cuda_samples
tags: cuda_bandwidth
tasks:
- ansible.builtin.import_role:
- name: Run CUDA bandwidth tasks
ansible.builtin.import_role:
name: cuda
tasks_from: samples.yml
tasks_from: bandwidth.yml
4 changes: 4 additions & 0 deletions ansible/roles/cuda/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ cuda_samples_programs:
- bandwidthTest
# cuda_devices: # discovered from deviceQuery run
cuda_persistenced_state: started
# variables for nvbandwidth (for bandwidth.yml tasks run in cudatests.yml)
cuda_bandwidth_version: 'v0.8'
cuda_bandwidth_path: "/var/lib/{{ ansible_user }}/cuda_bandwidth"
cuda_bandwidth_release_url: "https://github.com/NVIDIA/nvbandwidth/archive/refs/tags/{{ cuda_bandwidth_version }}.tar.gz"
57 changes: 57 additions & 0 deletions ansible/roles/cuda/tasks/bandwidth.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
- name: Ensure CUDA bandwidth path exists
ansible.builtin.file:
state: directory
path: "{{ cuda_bandwidth_path }}"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: "0755"

- name: Download CUDA bandwith test release
ansible.builtin.unarchive:
remote_src: true
src: "{{ cuda_bandwidth_release_url }}"
dest: "{{ cuda_bandwidth_path }}"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
creates: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8"

- name: Creates CUDA bandwidth test build directory
ansible.builtin.file:
state: directory
path: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8/build"
mode: "0755"

- name: Ensure cudatests directory exists
ansible.builtin.file:
path: "{{ appliances_environment_root }}/cudatests"
state: directory
mode: '0755'
delegate_to: localhost

- name: Build CUDA bandwidth test
ansible.builtin.shell:
cmd: >
source /cvmfs/software.eessi.io/versions/2023.06/init/bash &&
module load buildenv/default-foss-2023b &&
module load Boost/1.82.0-GCC-12.3.0 &&
. /etc/profile.d/sh.local &&
cmake .. &&
make -j {{ ansible_processor_vcpus }}
chdir: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8/build"
creates: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8/build/nvbandwidth"

- name: Run CUDA bandwidth test
ansible.builtin.shell: |
./nvbandwidth
args:
chdir: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8/build/"
register: cuda_bandwidth_output
changed_when: true

- name: Save CUDA bandwidth output to bandwidth_results.txt
Copy link
Collaborator

@sjpb sjpb Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there no useful summary we can do here? So someone not familar with the system can get a quick idea of "it works" or "it doesn't"?

Copy link
Contributor Author

@claudia-lola claudia-lola Oct 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so you can run test cases by running ./nvbandwidth -t <testcase name> e.g ./nvbandwidth -t device_to_device_memcpy_read_ce where as just running ./nvbandwidth will run all the testcases. The example I gave here runs alot quicker and gives a shorter output than running all the testcases. Would it be useful then to but a task before name: Run CUDA bandwidth test which just runs the testcase device_to_device_memcpy_read_ce to the console to show the user that it works?

ansible.builtin.copy:
content: "{{ cuda_bandwidth_output.stdout }}"
dest: "{{ appliances_environment_root }}/cudatests/{{ inventory_hostname }}bandwidth_results.txt"
mode: '0644'
delegate_to: localhost
Loading