-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathfunctions.sh
More file actions
108 lines (92 loc) · 2.68 KB
/
functions.sh
File metadata and controls
108 lines (92 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/bash
submit_job() {
local partition="$1"
local nodelist="$2"
local script="$3"
local log_file="$4"
shift 4
local job_output
job_output=$(sbatch --partition="$partition" \
--nodelist="$nodelist" \
--output="$log_file" \
"$script" "$@")
if [[ "$job_output" =~ Submitted\ batch\ job\ ([0-9]+) ]]; then
echo "${BASH_REMATCH[1]}"
else
echo "Submission failed: $job_output" >&2
echo "Command: sbatch --partition=$partition --nodelist=$nodelist --output=$log_file $script $*" >&2
exit 1
fi
}
wait_for_job() {
local job_id="$1"
local description="$2"
echo "Waiting for $description job ($job_id) to complete..."
while true; do
state=$(squeue -j "$job_id" -h -o %T 2>/dev/null)
if [[ -z "$state" ]]; then
echo "$description job completed"
break
else
sleep 60
fi
done
}
wait_for_jobs() {
local description="$1"
shift
local job_ids=("$@")
if [[ ${#job_ids[@]} -eq 0 ]]; then
echo "No $description jobs to wait for."
return 0
fi
echo "Waiting for all $description jobs to complete (Total: ${#job_ids[@]})..."
declare -A running_map
while true; do
local unfinished=0
running_map=()
local squeue_output
if ! squeue_output=$(squeue -u "$USER" -h -o "%i" 2>/dev/null); then
echo "Warning: squeue command failed (scheduler might be busy), retrying in 60 seconds..."
sleep 60
continue # Skip this loop iteration to avoid false completion detection
fi
# 2. Read output into array
local running_jobs
mapfile -t running_jobs <<< "$squeue_output"
# 3. Build Hash Map
for jid in "${running_jobs[@]}"; do
# Process non-empty lines only
if [[ -n "$jid" ]]; then
running_map["$jid"]=1
fi
done
# 4. Check status of target jobs
for job_id in "${job_ids[@]}"; do
if [[ -n "${running_map[$job_id]}" ]]; then
unfinished=$((unfinished + 1))
fi
done
# 5. Determine result
if [[ "$unfinished" -eq 0 ]]; then
echo "All $description jobs completed"
break
else
# Optional: Print current time for troubleshooting
echo "[$(date '+%H:%M:%S')] $unfinished $description jobs still pending/running..."
sleep 60
fi
done
}
log_step() {
echo "========================================================================================"
echo "========== [Step $1] $2"
echo "========================================================================================"
}
log_info() {
echo "[INFO] $(date '+%Y-%m-%d %H:%M:%S') $1"
}
log_error() {
echo "[ERROR] $(date '+%Y-%m-%d %H:%M:%S') $1" >&2
exit 1
}