File tree Expand file tree Collapse file tree 3 files changed +18
-12
lines changed
aws-parallelcluster-computefleet
templates/compute_fleet_status
aws-parallelcluster-slurm/libraries Expand file tree Collapse file tree 3 files changed +18
-12
lines changed Original file line number Diff line number Diff line change 3838 mode '0755'
3939 end
4040
41+ template "/usr/local/bin/is_fleet_ready.sh" do
42+ source 'compute_fleet_status/is_fleet_ready.erb'
43+ owner 'root'
44+ group 'root'
45+ mode '0755'
46+ end
47+
4148 template "#{ node [ 'cluster' ] [ 'etc_dir' ] } /clusterstatusmgtd.conf" do
4249 source 'clusterstatusmgtd/clusterstatusmgtd.conf.erb'
4350 owner 'root'
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+
3+ sinfo_output=$( < %= node[' cluster' ][' slurm' ][' install_dir' ] %> /bin/sinfo -h -o ' %N %t' | grep -v -E ' (idle|alloc|mix|maint)$' )
4+ while IFS= read -r line; do
5+ nodelist=$( echo " $line " | awk ' {print $1}' )
6+ < %= node[' cluster' ][' slurm' ][' install_dir' ] %> /bin/scontrol show hostnames " $nodelist " | { grep -E ' ^[a-z0-9\-]+\-st\-[a-z0-9\-]+\-[0-9]+.*' || true ; }
7+ done <<< " $sinfo_output"
Original file line number Diff line number Diff line change @@ -204,18 +204,10 @@ def check_for_protected_mode(fleet_status_command) # rubocop:disable Lint/Nested
204204 "/usr/local/bin/get-compute-fleet-status.sh"
205205 )
206206 # Example output for sinfo
207- # $ /opt/slurm/bin/sinfo -N -h -o '%N %t'
208- # ondemand-dy-c52xlarge-1 idle~
209- # ondemand-dy-c52xlarge-2 idle~
210- # spot-dy-c5xlarge-1 idle~
211- # spot-st-t2large-1 down
212- # spot-st-t2large-2 idle
213- # capacity-block-st-t2micro-1 maint
214- # capacity-block-dy-t2micro-1 maint
215- is_fleet_ready_command = Shellwords . escape (
216- "set -o pipefail && #{ node [ 'cluster' ] [ 'slurm' ] [ 'install_dir' ] } /bin/sinfo -N -h -o '%N %t' | { grep -E '^[a-z0-9\\ -]+\\ -st\\ -[a-z0-9\\ -]+\\ -[0-9]+ .*' || true; } | { grep -v -E '(idle|alloc|mix|maint)$' || true; }"
217- )
218- until shell_out! ( "/bin/bash -c #{ is_fleet_ready_command } " ) . stdout . strip . empty?
207+ # sinfo -h -o '%N %t'
208+ # queue-0-dy-compute-resource-g4dn-0-[1-10],queue-1-dy-compute-resource-g4dn-1-[1-10] idle~
209+ # queue-2-dy-compute-resource-g4dn-2-[1-10],queue-3-dy-compute-resource-g4dn-3-[1-10] idle
210+ until shell_out! ( "/bin/bash -c /usr/local/bin/is_fleet_ready.sh" ) . stdout . strip . empty?
219211 check_for_protected_mode ( fleet_status_command )
220212
221213 Chef ::Log . info ( "Waiting for static fleet capacity provisioning" )
You can’t perform that action at this time.
0 commit comments