From 87997206d6b2e9cdf2ca36345f71e254d85e3fa5 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Wed, 9 Jul 2025 18:51:50 +0000 Subject: [PATCH 1/3] {2023.06}[foss/2023a] TensorFlow v2.15.1 w/ CUDA 12.1.1 + eb_hooks --- .../2023.06/eessi-2023.06-eb-5.1.1-2023a.yml | 2 + eb_hooks.py | 51 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-5.1.1-2023a.yml diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-5.1.1-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-5.1.1-2023a.yml new file mode 100644 index 0000000..2459a2e --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-5.1.1-2023a.yml @@ -0,0 +1,2 @@ +easyconfigs: + - TensorFlow-2.15.1-foss-2023a-CUDA-12.1.1.eb diff --git a/eb_hooks.py b/eb_hooks.py index e20ef2e..f4d10f6 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -256,6 +256,56 @@ def post_prepare_hook(self, *args, **kwargs): post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs) +def parse_hook_tensorflow_CUDA(ec, eprefix): + """ + Fix the Python and environment used while building and running tests for TensorFlow with CUDA + """ + if ec.name == 'TensorFlow' and ec.version == '2.15.1' : + # Check if CUDA is in dependencies + has_cuda = any( + (isinstance(dep, (list, tuple)) and dep[0] == 'CUDA') or + (isinstance(dep, dict) and dep.get('name') == 'CUDA') + for dep in ec.get('dependencies', []) + ) + + if has_cuda: + ec['preconfigopts'] = ( + 'export TF_NEED_CUDA=1 && ' + 'export CUDA_TOOLKIT_PATH=$EBROOTCUDA && ' + 'export TF_CUDA_INCLUDE_PATH=$EBROOTCUDA/include && ' + 'export CUDNN_INSTALL_PATH=$EBROOTCUDNN && ' + 'export GCC_HOST_COMPILER_PATH=$EBROOTGCC/bin/gcc && ' + 'sed -i \'s|--define=PREFIX=/usr|--define=PREFIX=\\$EESSI_EPREFIX|g\' .bazelrc && ' + ) + + ec['buildopts'] = [ + '--linkopt=-Wl,--disable-new-dtags --host_linkopt=-Wl,--disable-new-dtags --action_env=GCC_HOST_COMPILER_PATH=$EBROOT +GCC/bin/gcc --host_action_env=GCC_HOST_COMPILER_PATH=$EBROOTGCC/bin/gcc --linkopt=-Wl,-rpath,$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOT +NCCL/lib --host_linkopt=-Wl,-rpath,$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOTNCCL/lib', + ] + + ec['pretestopts'] = ( + #'patchelf --set-interpreter "$EESSI_EPREFIX/lib64/ld-linux-%(arch)s.so.1" ' + #'"%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_aarch64-unknown-linux-gnu/bin/python%(pyshortver)s" && ' + + 'mv "%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_aarch64-unknown-linux-gnu/bin/python%(pyshortver)s" "%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_aarch64-unknown-linux-gnu/bin/python%(pyshortver)s.old" && cp -f $EBROOTPYTHON/bin/python%(pyshortver)s "%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_aarch64-unknown-linux-gnu/bin/python%(pyshortver)s" && ' + + + + #'LD_LIBRARY_PATH=$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOTNCCL/lib:$LD_LIBRARY_PATH && ' + ) + + ec['postinstallcmds'] = [ + 'mkdir -p %(installdir)s/bin', + 'ln -s $EBROOTCUDA/bin/cuobjdump %(installdir)s/bin/cuobjdump', + 'chmod 755 -R %(builddir)s', + ] + + print_msg("TensorFlow-CUDA required changes are applied!!!"), + else: + raise EasyBuildError("TensorFlow-CUDA specific hook triggered for non-TensorFlow-CUDA easyconfig?!") + + def parse_hook_casacore_disable_vectorize(ec, eprefix): """ Disable 'vectorize' toolchain option for casacore 3.5.0 on aarch64/neoverse_v1 @@ -1298,6 +1348,7 @@ def post_module_hook(self, *args, **kwargs): PARSE_HOOKS = { + 'TensorFlow': parse_hook_tensorflow_CUDA, 'casacore': parse_hook_casacore_disable_vectorize, 'CGAL': parse_hook_cgal_toolchainopts_precise, 'fontconfig': parse_hook_fontconfig_add_fonts, From 998037d513ff58c54d3125eb3957e53688371293 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Fri, 11 Jul 2025 19:38:52 +0000 Subject: [PATCH 2/3] fixed typo in eb_hooks.py --- .../eessi-2023.06-eb-5.1.1-2023a-CUDA.yml} | 0 eb_hooks.py | 25 ++++++++----------- 2 files changed, 10 insertions(+), 15 deletions(-) rename easystacks/software.eessi.io/2023.06/{eessi-2023.06-eb-5.1.1-2023a.yml => accel/nvidia/eessi-2023.06-eb-5.1.1-2023a-CUDA.yml} (100%) diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-5.1.1-2023a.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-5.1.1-2023a-CUDA.yml similarity index 100% rename from easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-5.1.1-2023a.yml rename to easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-5.1.1-2023a-CUDA.yml diff --git a/eb_hooks.py b/eb_hooks.py index f4d10f6..dbc5b65 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -279,29 +279,23 @@ def parse_hook_tensorflow_CUDA(ec, eprefix): ) ec['buildopts'] = [ - '--linkopt=-Wl,--disable-new-dtags --host_linkopt=-Wl,--disable-new-dtags --action_env=GCC_HOST_COMPILER_PATH=$EBROOT -GCC/bin/gcc --host_action_env=GCC_HOST_COMPILER_PATH=$EBROOTGCC/bin/gcc --linkopt=-Wl,-rpath,$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOT -NCCL/lib --host_linkopt=-Wl,-rpath,$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOTNCCL/lib', + '--linkopt=-Wl,--disable-new-dtags --host_linkopt=-Wl,--disable-new-dtags --action_env=GCC_HOST_COMPILER_PATH=$EBROOTGCC/bin/gcc --host_action_env=GCC_HOST_COMPILER_PATH=$EBROOTGCC/bin/gcc --linkopt=-Wl,-rpath,$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOTNCCL/lib --host_linkopt=-Wl,-rpath,$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOTNCCL/lib', ] ec['pretestopts'] = ( - #'patchelf --set-interpreter "$EESSI_EPREFIX/lib64/ld-linux-%(arch)s.so.1" ' - #'"%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_aarch64-unknown-linux-gnu/bin/python%(pyshortver)s" && ' - - 'mv "%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_aarch64-unknown-linux-gnu/bin/python%(pyshortver)s" "%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_aarch64-unknown-linux-gnu/bin/python%(pyshortver)s.old" && cp -f $EBROOTPYTHON/bin/python%(pyshortver)s "%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_aarch64-unknown-linux-gnu/bin/python%(pyshortver)s" && ' - - - - #'LD_LIBRARY_PATH=$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOTNCCL/lib:$LD_LIBRARY_PATH && ' - ) + """interppath=$(find "$EESSI_EPREFIX/lib64" -name 'ld-*' | grep -E 'so\\.1|so\\.2' | head -n1) && """ + """patchelf --set-interpreter "$interppath" """ + """"%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_%(arch)s-unknown-linux-gnu/bin/python%(pyshortver)s" && """ + """export LD_LIBRARY_PATH="$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOTNCCL/lib:$LD_LIBRARY_PATH" && """ + ) ec['postinstallcmds'] = [ 'mkdir -p %(installdir)s/bin', 'ln -s $EBROOTCUDA/bin/cuobjdump %(installdir)s/bin/cuobjdump', - 'chmod 755 -R %(builddir)s', + #'chmod 755 -R %(builddir)s', ] - print_msg("TensorFlow-CUDA required changes are applied!!!"), + print_msg("TensorFlow-CUDA related changes have been applied") else: raise EasyBuildError("TensorFlow-CUDA specific hook triggered for non-TensorFlow-CUDA easyconfig?!") @@ -325,7 +319,7 @@ def parse_hook_casacore_disable_vectorize(ec, eprefix): if 'toolchainopts' not in ec or ec['toolchainopts'] is None: ec['toolchainopts'] = {} ec['toolchainopts']['vectorize'] = False - print_msg("Changed toochainopts for %s: %s", ec.name, ec['toolchainopts']) + print_msg("Changed toolchainopts for %s: %s", ec.name, ec['toolchainopts']) else: print_msg("Not changing option vectorize for %s on non-neoverse_v1", ec.name) else: @@ -1451,3 +1445,4 @@ def set_maximum(parallel, max_value): CPU_TARGET_A64FX: (set_maximum, 8), }, } + From 8c0692022650dad701da5bba3c17b7ac5cc7d507 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Sat, 12 Jul 2025 17:45:27 +0000 Subject: [PATCH 3/3] fixed hard-coded path in eb_hooks.py --- eb_hooks.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index dbc5b65..a678dd8 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -284,15 +284,14 @@ def parse_hook_tensorflow_CUDA(ec, eprefix): ec['pretestopts'] = ( """interppath=$(find "$EESSI_EPREFIX/lib64" -name 'ld-*' | grep -E 'so\\.1|so\\.2' | head -n1) && """ - """patchelf --set-interpreter "$interppath" """ - """"%(builddir)s/%(name)s/bazel-root/0b9648e0837f9e5bb579e0e2e64adf3f/external/python_%(arch)s-unknown-linux-gnu/bin/python%(pyshortver)s" && """ + """pybin=$(find "%(builddir)s/%(name)s/bazel-root/" -type f -path "*/external/python_%(arch)s-unknown-linux-gnu/bin/python%(pyshortver)s" | head -n1) && """ + """patchelf --set-interpreter "$interppath" "$pybin" && """ """export LD_LIBRARY_PATH="$EBROOTCUDA/lib:$EBROOTCUDNN/lib:$EBROOTNCCL/lib:$LD_LIBRARY_PATH" && """ ) ec['postinstallcmds'] = [ 'mkdir -p %(installdir)s/bin', 'ln -s $EBROOTCUDA/bin/cuobjdump %(installdir)s/bin/cuobjdump', - #'chmod 755 -R %(builddir)s', ] print_msg("TensorFlow-CUDA related changes have been applied")