Skip to content

Commit edec400

Browse files
authored
feat(ci): run CI pipelines in multi-threaded mode (#975)
### Briefly, what does this PR introduce? We get 4 cores on (default) GitHub actions nodes. This enables eicrecon multi-threading on all cores. It will hopefully surface any multi-threading issues soon enough (preferably in this PR...). It will also help us assess how much faster multi-threaded running is compared to single-threaded. Needs: - [x] #1763 ### What kind of change does this PR introduce? - [ ] Bug fix (issue #__) - [x] New feature (issue #__) - [ ] Documentation update - [ ] Other: __ ### Please check if this PR fulfills the following: - [x] Tests for the changes have been added - [ ] Documentation has been added / updated - [ ] Changes have been communicated to collaborators ### Does this PR introduce breaking changes? What changes might users need to make to their code? No. ### Does this PR change default behavior? No.
1 parent 8547350 commit edec400

File tree

7 files changed

+87
-64
lines changed

7 files changed

+87
-64
lines changed

.github/workflows/linux-eic-shell.yml

Lines changed: 69 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ env:
4646
ASAN_OPTIONS: suppressions=${{ github.workspace }}/.github/asan.supp:malloc_context_size=20:detect_leaks=1:verify_asan_link_order=0:detect_stack_use_after_return=1:detect_odr_violation=1:new_delete_type_mismatch=0:intercept_tls_get_addr=0
4747
LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/lsan.supp
4848
UBSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/ubsan.supp:print_stacktrace=1:silence_unsigned_overflow=1:report_error_type=1
49+
TSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/tsan.supp
4950

5051
jobs:
5152
env:
@@ -70,6 +71,9 @@ jobs:
7071
matrix:
7172
CXX: [g++, clang++]
7273
CMAKE_BUILD_TYPE: [Release, Debug]
74+
USE_ASAN: [ON]
75+
USE_TSAN: [OFF]
76+
USE_UBSAN: [ON]
7377
release: ${{ fromJSON(needs.env.outputs.release_json) }}
7478
# note that entries that overwrite matrix values require a full specification
7579
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/running-variations-of-jobs-in-a-workflow#expanding-or-adding-matrix-configurations
@@ -97,6 +101,13 @@ jobs:
97101
- CXX: clang++
98102
CMAKE_BUILD_TYPE: Release
99103
release: nightly
104+
- CXX: clang++
105+
# thread sanitizer build
106+
- CXX: clang++
107+
CMAKE_BUILD_TYPE: Release
108+
release: nightly
109+
USE_ASAN: OFF
110+
USE_TSAN: ON
100111
- CXX: clang++
101112
CMAKE_BUILD_TYPE: Release
102113
release: 24.09.0-stable
@@ -115,12 +126,13 @@ jobs:
115126
uses: actions/cache@v4
116127
with:
117128
path: .ccache
118-
key: ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{ github.head_ref || github.ref_name }}-${{ steps.ccache_cache_timestamp.outputs.timestamp }}
129+
key: ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{matrix.USE_ASAN}}-${{matrix.USE_TSAN}}-${{ github.head_ref || github.ref_name }}-${{ steps.ccache_cache_timestamp.outputs.timestamp }}
119130
restore-keys: |
120-
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{ github.ref_name }}-
121-
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{ github.head_ref }}-
122-
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{ github.base_ref }}-
123-
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{ github.event.merge_group.base_ref }}-
131+
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{matrix.USE_ASAN}}-${{matrix.USE_TSAN}}-${{ github.ref_name }}-
132+
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{matrix.USE_ASAN}}-${{matrix.USE_TSAN}}-${{ github.head_ref }}-
133+
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{matrix.USE_ASAN}}-${{matrix.USE_TSAN}}-${{ github.base_ref }}-
134+
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{matrix.USE_ASAN}}-${{matrix.USE_TSAN}}-${{ github.event.merge_group.base_ref }}-
135+
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-${{matrix.USE_ASAN}}-${{matrix.USE_TSAN}}-
124136
ccache-${{ matrix.CXX }}-${{ matrix.release }}-${{ matrix.CMAKE_BUILD_TYPE }}-
125137
ccache-${{ matrix.CXX }}-${{ matrix.release }}-
126138
ccache-${{ matrix.CXX }}-
@@ -138,7 +150,7 @@ jobs:
138150
platform-release: "${{ env.platform }}:${{ matrix.release }}"
139151
run: |
140152
# install this repo
141-
CXX="${{ matrix.CXX }}" CXXFLAGS="${{ matrix.CXXFLAGS }}" cmake --warn-uninitialized -B build -S . -DCMAKE_INSTALL_PREFIX=install -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE=${{ matrix.CMAKE_BUILD_TYPE }} -DUSE_ASAN=ON -DUSE_TSAN=OFF -DUSE_UBSAN=ON
153+
CXX="${{ matrix.CXX }}" CXXFLAGS="${{ matrix.CXXFLAGS }}" cmake --warn-uninitialized -B build -S . -DCMAKE_INSTALL_PREFIX=install -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE=${{ matrix.CMAKE_BUILD_TYPE }} -DUSE_ASAN=${{matrix.USE_ASAN}} -DUSE_TSAN=${{matrix.USE_TSAN}} -DUSE_UBSAN=${{matrix.USE_UBSAN}}
142154
cmake --build build -j $(getconf _NPROCESSORS_ONLN) --target install
143155
ccache --show-stats --verbose
144156
ccache --evict-older-than 7d
@@ -164,10 +176,18 @@ jobs:
164176
ctest --test-dir build -V
165177
- name: Compress install directory
166178
run: tar -caf install.tar.zst install/
167-
- name: Upload install directory
179+
- name: Upload install directory (ASAN)
180+
if: ${{ matrix.USE_ASAN == 'ON' && matrix.USE_TSAN == 'OFF' }}
168181
uses: actions/upload-artifact@v4
169182
with:
170-
name: install-${{ matrix.CXX }}-eic-shell-${{ matrix.CMAKE_BUILD_TYPE }}-${{ env.platform }}-${{ matrix.release }}
183+
name: install-${{ matrix.CXX }}-eic-shell-${{ matrix.CMAKE_BUILD_TYPE }}-${{ env.platform }}-${{ matrix.release }}-ASAN
184+
path: install.tar.zst
185+
if-no-files-found: error
186+
- name: Upload install directory (TSAN)
187+
if: ${{ matrix.USE_ASAN == 'OFF' && matrix.USE_TSAN == 'ON' }}
188+
uses: actions/upload-artifact@v4
189+
with:
190+
name: install-${{ matrix.CXX }}-eic-shell-${{ matrix.CMAKE_BUILD_TYPE }}-${{ env.platform }}-${{ matrix.release }}-TSAN
171191
path: install.tar.zst
172192
if-no-files-found: error
173193
# Only compress and upload build directory if we are going to use it later
@@ -487,7 +507,7 @@ jobs:
487507
- name: Download install directory
488508
uses: actions/download-artifact@v4
489509
with:
490-
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}
510+
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-ASAN
491511
- name: Unarchive install directory
492512
run: tar -xaf install.tar.zst
493513
- name: Download simulation input
@@ -547,7 +567,7 @@ jobs:
547567
- name: Download install directory
548568
uses: actions/download-artifact@v4
549569
with:
550-
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}
570+
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-ASAN
551571
- name: Unarchive install directory
552572
run: tar -xaf install.tar.zst
553573
- uses: actions/download-artifact@v4
@@ -590,7 +610,7 @@ jobs:
590610
- name: Download install directory
591611
uses: actions/download-artifact@v4
592612
with:
593-
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}
613+
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-ASAN
594614
- name: Unarchive install directory
595615
run: tar -xaf install.tar.zst
596616
- uses: actions/download-artifact@v4
@@ -639,7 +659,7 @@ jobs:
639659
- name: Download install directory
640660
uses: actions/download-artifact@v4
641661
with:
642-
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}
662+
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-ASAN
643663
- name: Unarchive install directory
644664
run: tar -xaf install.tar.zst
645665
- uses: actions/download-artifact@v4
@@ -680,7 +700,7 @@ jobs:
680700
- name: Download install directory
681701
uses: actions/download-artifact@v4
682702
with:
683-
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}
703+
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-ASAN
684704
- name: Uncompress install directory
685705
run: tar -xaf install.tar.zst
686706
- uses: actions/download-artifact@v4
@@ -790,7 +810,7 @@ jobs:
790810
- name: Download install directory
791811
uses: actions/download-artifact@v4
792812
with:
793-
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}
813+
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-ASAN
794814
- name: Unarchive install directory
795815
run: tar -xaf install.tar.zst
796816
- uses: actions/download-artifact@v4
@@ -841,29 +861,47 @@ jobs:
841861
- build
842862
- npsim-dis
843863
- npsim-minbias
864+
continue-on-error: ${{ matrix.nthreads > 1 }}
844865
strategy:
845866
matrix:
846867
include:
847868
- CXX: clang++
848869
beam: 5x41
849870
minq2: 0
850871
detector_config: craterlake_5x41
872+
sanitizer: ASAN
851873
- CXX: clang++
852874
beam: 10x100
853875
minq2: 0
854876
detector_config: craterlake_10x100
877+
sanitizer: TSAN
855878
- CXX: clang++
856879
beam: 18x275
857880
minq2: 0
858881
detector_config: craterlake_18x275
882+
sanitizer: ASAN
883+
- CXX: clang++
884+
beam: 18x275
885+
minq2: 0
886+
detector_config: craterlake_18x275
887+
sanitizer: TSAN
888+
nthreads: 4
859889
- CXX: clang++
860890
beam: 10x100
861891
minq2: 1000
862892
detector_config: craterlake_tracking_only
893+
sanitizer: ASAN
894+
- CXX: clang++
895+
beam: 18x275
896+
minq2: 1000
897+
detector_config: craterlake_18x275
898+
sanitizer: ASAN
863899
- CXX: clang++
864900
beam: 18x275
865901
minq2: 1000
866902
detector_config: craterlake_18x275
903+
sanitizer: TSAN
904+
nthreads: 4
867905
steps:
868906
- name: Checkout .github
869907
uses: actions/checkout@v4
@@ -872,7 +910,7 @@ jobs:
872910
- name: Download install directory
873911
uses: actions/download-artifact@v4
874912
with:
875-
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}
913+
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-${{ matrix.sanitizer }}
876914
- name: Unarchive install directory
877915
run: tar -xaf install.tar.zst
878916
- uses: actions/download-artifact@v4
@@ -891,15 +929,15 @@ jobs:
891929
export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH
892930
export JANA_PLUGIN_PATH=$PWD/install/lib/EICrecon/plugins:/usr/local/plugins
893931
prmon --json-summary rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.prmon.json -- \
894-
$PWD/install/bin/eicrecon ${{env.JANA_OPTIONS}} -Ppodio:output_file=rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root sim_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4hep.root -Pacts:WriteObj=true -Pacts:WritePly=true -Pplugins=janadot,janatop $(<${{ github.workspace }}/.github/janadot.groups)
932+
$PWD/install/bin/eicrecon ${{env.JANA_OPTIONS}} ${{matrix.nthreads > 1 && format('-Pnthreads={0}', matrix.nthreads) || ''}} -Ppodio:output_file=rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root sim_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4hep.root -Pacts:WriteObj=true -Pacts:WritePly=true -Pplugins=janadot,janatop $(<${{ github.workspace }}/.github/janadot.groups)
895933
- uses: actions/upload-artifact@v4
896934
with:
897-
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root
935+
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}.edm4eic.root
898936
path: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root
899937
if-no-files-found: error
900938
- uses: actions/upload-artifact@v4
901939
with:
902-
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.prmon.json
940+
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}.prmon.json
903941
path: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.prmon.json
904942
if-no-files-found: error
905943
- name: Convert execution graphs
@@ -914,21 +952,21 @@ jobs:
914952
continue-on-error: true
915953
- uses: actions/upload-artifact@v4
916954
with:
917-
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.dot
955+
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}.dot
918956
path: |
919957
*.dot
920958
*.svg
921959
if-no-files-found: error
922960
- uses: actions/upload-artifact@v4
923961
with:
924-
name: obj_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}
962+
name: obj_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}
925963
path: |
926964
*.mtl
927965
*.obj
928966
if-no-files-found: error
929967
- uses: actions/upload-artifact@v4
930968
with:
931-
name: ply_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}
969+
name: ply_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}
932970
path: |
933971
*.ply
934972
if-no-files-found: error
@@ -938,6 +976,9 @@ jobs:
938976
with:
939977
branch: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref_name }}
940978
path: ref/
979+
# FIXME: compare multi-threaded here with single-threaded reference
980+
# since multi-threaded has issues with randomness reproducibility
981+
#name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}.edm4eic.root
941982
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root
942983
workflow: ".github/workflows/linux-eic-shell.yml"
943984
workflow_conclusion: "completed"
@@ -953,15 +994,17 @@ jobs:
953994
ln -sf ../rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root new/
954995
# using nullglob to ensure ref/* does not fail
955996
shopt -s nullglob
997+
# FIXME: compare multi-threaded here with single-threaded reference
998+
#capybara bara ref/*rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}.edm4eic.root new/rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root
956999
capybara bara ref/*rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root new/rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.edm4eic.root
957-
mv capybara-reports rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}
958-
touch .rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}
1000+
mv capybara-reports rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}
1001+
touch .rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}
9591002
- uses: actions/upload-artifact@v4
9601003
with:
961-
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}.capy
1004+
name: rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}.capy
9621005
path: |
963-
.rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}
964-
rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}/
1006+
.rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}
1007+
rec_dis_${{matrix.beam}}_minQ2=${{matrix.minq2}}_${{ matrix.detector_config }}${{matrix.nthreads > 0 && '_MT' || ''}}/
9651008
if-no-files-found: error
9661009

9671010
trigger-container:

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ set(fmt_VERSION_MIN 9.0.0)
6767
set(IRT_VERSION_MIN 1.0.5)
6868
set(JANA_VERSION_MIN 2.2.0)
6969
set(onnxruntime_MIN_VERSION 1.17)
70-
set(podio_VERSION_MIN 0.16)
70+
set(podio_VERSION_MIN 0.17)
7171
set(ROOT_VERSION_MIN 6.28)
7272
set(spdlog_VERSION_MIN 1.11.0)
7373

src/algorithms/pid/IrtCherenkovParticleID.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,9 @@ void IrtCherenkovParticleID::process(const IrtCherenkovParticleID::Input& input,
178178
auto irt_particle = std::make_unique<ChargedParticle>();
179179

180180
// loop over radiators
181+
// note: this must run exclusively since irt_rad points to shared IRT objects that are
182+
// owned by the RichGeo_service; it holds state (e.g. irt_rad->ResetLocation())
183+
std::lock_guard<std::mutex> lock(m_pid_radiators_mutex);
181184
for (auto [rad_name, irt_rad] : m_pid_radiators) {
182185

183186
// get the `charged_particle` for this radiator

src/algorithms/pid/IrtCherenkovParticleID.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <edm4eic/TrackSegmentCollection.h>
1414
#include <stdint.h>
1515
#include <map>
16+
#include <mutex>
1617
#include <string>
1718
#include <string_view>
1819
#include <unordered_map>
@@ -66,6 +67,8 @@ class IrtCherenkovParticleID : public IrtCherenkovParticleIDAlgorithm,
6667
uint64_t m_cell_mask;
6768
std::string m_det_name;
6869
std::unordered_map<int, double> m_pdg_mass;
70+
71+
inline static std::mutex m_pid_radiators_mutex;
6972
std::map<std::string, CherenkovRadiator*> m_pid_radiators;
7073
};
7174

0 commit comments

Comments
 (0)