From 55f02368f14627b4680dde82d605f9f0b19c0397 Mon Sep 17 00:00:00 2001 From: JDBetteridge Date: Wed, 5 Nov 2025 22:48:43 +0000 Subject: [PATCH 1/4] compiler: Add shared object versioning to pgcc --- devito/arch/compiler.py | 25 +++++++++++++++++++++++++ devito/passes/iet/engine.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 36b47eecd2..a676ad3572 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -1,5 +1,6 @@ from functools import partial from hashlib import sha1 +from itertools import repeat, filterfalse from os import environ, path, makedirs from packaging.version import Version from subprocess import (DEVNULL, PIPE, CalledProcessError, check_output, @@ -666,6 +667,30 @@ def __lookup_cmds__(self): self.MPICC = 'mpic++' self.MPICXX = 'mpicxx' + def add_libraries(self, libs): + # Urgh... + # PGIComiler inherits from Compiler inherits from GCCToolchain in codepy + # GCC supports linking versioned shared objects with the syntax + # `gcc -L/path/to/versioned/lib -l:libfoo.so.2.0 ...` + # But this syntax is not supported by the Portland (or Nvidia) compiler. + # Nor does codepy.GCCToolchain understand that linking to versioned objects + # is a thing that someone might want to do + # + # Since this is just linking information, we can just tell the linker + # (which we invoke using the compiler and the `-Wl,-options` syntax) to + # go and look in all of the directories we have provided thus far and + # the linker supports the syntax: + # `ld -L/path/to/versioned/lib -l:libfoo.so.2.0 ...` + # Note: It would be nicer to just look in the one _relevant_ lib dir! + new = as_list(libs) + versioned = filter(lambda s: s.startswith(':'), new) + versioned = map(lambda s: s.removeprefix(':'), versioned) + self.add_ldflags([ + f'-Wl,-L{",-L".join(map(str, self.library_dirs))},-l:{soname}' + for soname in versioned + ]) + super().add_libraries(filterfalse(lambda s: s.startswith(':'), new)) + class NvidiaCompiler(PGICompiler): diff --git a/devito/passes/iet/engine.py b/devito/passes/iet/engine.py index 3f002b2c85..724ccf5c84 100644 --- a/devito/passes/iet/engine.py +++ b/devito/passes/iet/engine.py @@ -147,9 +147,9 @@ def apply(self, func, **kwargs): try: compiler = kwargs['compiler'] compiler.add_include_dirs(as_tuple(metadata.get('include_dirs'))) - compiler.add_libraries(as_tuple(metadata.get('libs'))) compiler.add_library_dirs(as_tuple(metadata.get('lib_dirs')), rpath=metadata.get('rpath', False)) + compiler.add_libraries(as_tuple(metadata.get('libs'))) except KeyError: pass From 824827bde5ce7ff6b6d9841d5d6692bc43b2bce7 Mon Sep 17 00:00:00 2001 From: JDBetteridge Date: Thu, 6 Nov 2025 13:20:09 +0000 Subject: [PATCH 2/4] compiler: Expunge Portland Compiler --- FAQ.md | 44 +++++++++++++++++++-------------------- benchmarks/user/README.md | 2 +- devito/arch/compiler.py | 35 +++++++++++-------------------- 3 files changed, 35 insertions(+), 46 deletions(-) diff --git a/FAQ.md b/FAQ.md index 75eee0319f..1cc79c3932 100644 --- a/FAQ.md +++ b/FAQ.md @@ -46,16 +46,16 @@ Devito is a Python-based domain-specific language (DSL) and code generation fram Here’s what Devito brings to the table: -1. **Symbolic PDE definitions in pure Python** +1. **Symbolic PDE definitions in pure Python** Express your governing equations exactly as you would on paper, using a concise, SymPy-powered syntax — no boilerplate, no glue code. -2. **Automatic generation of fast, low-level code** +2. **Automatic generation of fast, low-level code** Devito turns your high-level symbolic equations into highly optimized C/C++ code via a sophisticated compiler stack, automatically applying an array of performance optimizations during the lowering process. -3. **Portable performance across architectures** +3. **Portable performance across architectures** From laptops to supercomputers: Devito targets multi-core CPUs (with OpenMP and MPI) and accelerators (CUDA, HIP, SYCL, OpenACC, and MPI) from the same source. GPU support for CUDA, HIP, and SYCL is included in [DevitoPRO](https://www.devitocodes.com/features/). -4. **Custom finite-difference and stencil kernel design** +4. **Custom finite-difference and stencil kernel design** Design bespoke numerical schemes for acoustics, elasticity, electromagnetics, fluid dynamics, or whatever PDE-driven physics your research demands. By providing high-level abstractions for numerical scheme specification, Devito combines productivity, portability, and performance, avoiding the need to hand-implement and optimize finite-difference solvers, or rely on pre-existing implementations. This enables domain scientists to rapidly specify the solvers they require, whilst remaining focused on their areas of expertise - typically the overarching problem. @@ -79,7 +79,7 @@ Furthermore, the examples provided with Devito are often conflated with the core Whilst components of the provided examples may be well-suited to user applications, it is worth bearing in mind that they represent a higher level of abstraction atop of Devito. We do, however, encourage the extension of example objects, and their use as templates for implementing your own higher-level functionalities. ## How can I see the code generated by Devito? -After you build an ```op=Operator(...)``` implementing one or more equations, you can use ```print(op)``` to see the generated low level code. The example below builds an operator that takes a 1/2 cell forward shifted derivative of the ```Function``` **f** and puts the result in the ```Function``` **g**. +After you build an ```op=Operator(...)``` implementing one or more equations, you can use ```print(op)``` to see the generated low level code. The example below builds an operator that takes a 1/2 cell forward shifted derivative of the ```Function``` **f** and puts the result in the ```Function``` **g**. ```python import numpy as np @@ -145,7 +145,7 @@ int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict g_vec, const ## How can I see the compilation command with which Devito compiles the generated code? -Set the environment variable `DEVITO_LOGGING=DEBUG`. When an Operator gets compiled, the used compilation command will be emitted to stdout. +Set the environment variable `DEVITO_LOGGING=DEBUG`. When an Operator gets compiled, the used compilation command will be emitted to stdout. If nothing seems to change, it is possible that no compilation is happening under-the-hood as all kernels have already been compiled in a previous run. You will then have to clear up the Devito kernel cache. From the Devito root directory, run: @@ -163,7 +163,7 @@ Devito stores the generated code as well as the jit-compiled libraries in a temp ## Can I change the directory where Devito stashes the generated code? -Yes, just set the environment variable `TMPDIR` to your favorite location. +Yes, just set the environment variable `TMPDIR` to your favorite location. [top](#Frequently-Asked-Questions) @@ -195,7 +195,7 @@ ux_update = t.spacing**2 * b * \ (c55 * u_x.dz(x0=z+z.spacing/2)).dz(x0=z-z.spacing/2) + (c13 * u_z.dz(x0=z+z.spacing/2)).dx(x0=x-x.spacing/2) + (c55 * u_z.dx(x0=x+x.spacing/2)).dz(x0=z-z.spacing/2)) + \ - (2 - t.spacing * wOverQ) * u_x + (t.spacing * wOverQ - 1) * u_x.backward + (2 - t.spacing * wOverQ) * u_x + (t.spacing * wOverQ - 1) * u_x.backward stencil_x = Eq(u_x.forward, ux_update) print("\n", stencil_x) op = Operator([stencil_x]) @@ -209,7 +209,7 @@ ux_update = \ t.spacing**2 * b * (c55 * u_x.dz(x0=z+z.spacing/2)).dz(x0=z-z.spacing/2) + \ t.spacing**2 * b * (c13 * u_z.dz(x0=z+z.spacing/2)).dx(x0=x-x.spacing/2) + \ t.spacing**2 * b * (c55 * u_z.dx(x0=x+x.spacing/2)).dz(x0=z-z.spacing/2) + \ - (2 - t.spacing * wOverQ) * u_x + (t.spacing * wOverQ - 1) * u_x.backward + (2 - t.spacing * wOverQ) * u_x + (t.spacing * wOverQ - 1) * u_x.backward stencil_x = Eq(u_x.forward, ux_update) print("\n", stencil_x) op = Operator([stencil_x]) @@ -246,18 +246,18 @@ For more info, take a look [at this notebook](https://github.com/devitocodes/dev [top](#Frequently-Asked-Questions) -## How do I get an estimate of the memory consumption for an Operator? +## How do I get an estimate of the memory consumption for an Operator? The memory consumption of an `Operator` can be estimated using the `Operator.estimate_memory()` utility. This returns an estimate of consumption of both host and device memory for the `Operator`, including that associated with any array temporaries introduced by optimizations applied by the compiler. A deeper overview can be found toward the end of [this notebook](https://github.com/devitocodes/devito/blob/main/examples/userapi/02_apply.ipynb). [top](#Frequently-Asked-Questions) ## How are abstractions used in the seismic examples? -Many Devito examples are provided that demonstrate application for specific problems, including e.g. fluid mechanics and seismic modeling. We focus in this question on seismic modeling examples that provide convenience wrappers to build differential equations and create Devito Operators for various types of modeling physics including isotropic and anisotropic, acoustic and elastic. +Many Devito examples are provided that demonstrate application for specific problems, including e.g. fluid mechanics and seismic modeling. We focus in this question on seismic modeling examples that provide convenience wrappers to build differential equations and create Devito Operators for various types of modeling physics including isotropic and anisotropic, acoustic and elastic. -These examples ([link](https://github.com/devitocodes/devito/tree/main/examples)) use abstractions to remove details from the methods that actually build the operators. The idea is that at the time you build a Devito operator, you don't need specific material parameter arrays (e.g. velocity or density or Thomsen parameter), and you don't need specific locations of sources and receiver instruments. All you need to build the operator is a placeholder that can provide the dimensionality and (for example) the spatial order of finite difference approximation you wish to employ. In this way you can build and return functioning highly optimized operators to which you can provide the specific implementation details at runtime via command line arguments. +These examples ([link](https://github.com/devitocodes/devito/tree/main/examples)) use abstractions to remove details from the methods that actually build the operators. The idea is that at the time you build a Devito operator, you don't need specific material parameter arrays (e.g. velocity or density or Thomsen parameter), and you don't need specific locations of sources and receiver instruments. All you need to build the operator is a placeholder that can provide the dimensionality and (for example) the spatial order of finite difference approximation you wish to employ. In this way you can build and return functioning highly optimized operators to which you can provide the specific implementation details at runtime via command line arguments. -An example of this abstraction (or placeholder design pattern) in operation is the call to the isotropic acoustic ```AcousticWaveSolver.forward``` method that returns a Devito operator via the ```ForwardOperator``` method defined in [operators.py](https://github.com/devitocodes/devito/blob/main/examples/seismic/acoustic/operators.py#L65-L105). +An example of this abstraction (or placeholder design pattern) in operation is the call to the isotropic acoustic ```AcousticWaveSolver.forward``` method that returns a Devito operator via the ```ForwardOperator``` method defined in [operators.py](https://github.com/devitocodes/devito/blob/main/examples/seismic/acoustic/operators.py#L65-L105). You will note that this method uses placeholders for the material parameter arrays and the source and receiver locations, and then at runtime uses arguments provided to the returned ```Operator``` to provide state to the placeholders. You can see this happen on lines 112-113 in [wavesolver.py](https://github.com/devitocodes/devito/blob/main/examples/seismic/acoustic/wavesolver.py#L112-L113). @@ -289,7 +289,7 @@ These environment variables can either be set from the shell or programmatically ### Description of Devito environment variables #### DEVITO_ARCH -Used to select a specific "backend compiler". The backend compiler takes as input the code generated by Devito and produces a shared object. Supported backend compilers are `gcc`, `icc`, `pgcc`, `clang`. For each of these compilers, Devito uses some preset compilation flags (e.g., `-O3`, `-march=native`, `-fast-math`). If this environment variable is left unset, Devito will attempt auto-detection of the most suitable backend compiler available on the underlying system. +Used to select a specific "backend compiler". The backend compiler takes as input the code generated by Devito and produces a shared object. Supported backend compilers are `gcc`, `icc`, `clang`. For each of these compilers, Devito uses some preset compilation flags (e.g., `-O3`, `-march=native`, `-fast-math`). If this environment variable is left unset, Devito will attempt auto-detection of the most suitable backend compiler available on the underlying system. #### DEVITO_PLATFORM This environment variable is mostly needed when running on GPUs, to ask Devito to generate code for a particular device (see for example this [tutorial](https://github.com/devitocodes/devito/blob/main/examples/gpu/01_diffusion_with_openmp_offloading.ipynb)). Can be also used to specify CPU architectures such as Intel's -- Haswell, Broadwell, SKL and KNL -- ARM, AMD, and Power. Often one can ignore this variable because Devito typically does a decent job at auto-detecting the underlying platform. @@ -313,7 +313,7 @@ Choose the performance optimization level. By default set to the maximum level, Controls MPI in Devito. Use `1` to enable MPI. The most powerful MPI mode is called "full", and is activated setting `DEVITO_MPI=full`. The "full" mode implements a number of optimizations including computation/communication overlap. #### DEVITO_AUTOTUNING -Search across a set of block shapes to maximize the effectiveness of loop tiling (aka cache blocking). You can choose between `off` (default), `basic`, `aggressive`, `max`. A more aggressive autotuning should eventually result in better runtime performance, though the search phase will take longer. +Search across a set of block shapes to maximize the effectiveness of loop tiling (aka cache blocking). You can choose between `off` (default), `basic`, `aggressive`, `max`. A more aggressive autotuning should eventually result in better runtime performance, though the search phase will take longer. #### DEVITO_LOGGING Run with `DEVITO_LOGGING=DEBUG` to find out the specific performance optimizations applied by an Operator, how auto-tuning is getting along, to emit the command used to compile the generated code, to emit more performance metrics, and much more. @@ -345,7 +345,7 @@ When using OpenMP offloading, it is recommended to stick to the corresponding ve Requires: `PLATFORM=nvidiaX` and `ARCH=nvc`. -The legacy PGI compiler is also supported via `ARCH=pgcc`. +The legacy PGI compiler is no longer supported. #### LANGUAGE=cuda @@ -564,7 +564,7 @@ Instead of swapping arrays, devito uses the modulus of a time index to map incre ## Can I subclass basic types such as TimeFunction? Yes, just like we did for our seismic examples, for example, the [PointSource class](https://github.com/devitocodes/devito/blob/main/examples/seismic/source.py). A few caveats are necessary, though. -First, classes such as `Function` or `SparseTimeFunction` are inherently complex. In fact, `SparseTimeFunction` itself is a subclass of `Function`. The whole class hierarchy is modular and well-structured, but at the same time, it's depth and offers several hooks to allow specialization by subclasses -- for example, all methods starting with `__` such as `__init_finalize__` or `__shape_setup__`. It will take some time to digest it. Luckily, you will only need to learn a little of this, at least for simple subclasses. +First, classes such as `Function` or `SparseTimeFunction` are inherently complex. In fact, `SparseTimeFunction` itself is a subclass of `Function`. The whole class hierarchy is modular and well-structured, but at the same time, it's depth and offers several hooks to allow specialization by subclasses -- for example, all methods starting with `__` such as `__init_finalize__` or `__shape_setup__`. It will take some time to digest it. Luckily, you will only need to learn a little of this, at least for simple subclasses. Second, you must know that these objects are subjected to so-called reconstruction during compilation. Objects are immutable inside Devito; therefore, even a straightforward symbolic transformation such as `f[x] -> f[y]` boils down to performing a reconstruction, that is, creating a whole new object. Since `f` carries around several attributes (e.g., shape, grid, dimensions), each time Devito performs a reconstruction, we only want to specify which attributes are changing -- not all of them, as it would make the code ugly and incredibly complicated. The solution to this problem is that all the base symbolic types inherit from a common base class called `Reconstructable`; a `Reconstructable` object has two special class attributes, called `__rargs__` and `__rkwargs__`. If a subclass adds a new positional or keyword argument to its `__init_finalize__`, it must also be added to `__rargs__` or `__rkwargs__`, respectively. This will provide Devito with enough information to perform a reconstruction when it's needed during compilation. The following example should clarify: @@ -727,7 +727,7 @@ possible arguments: mpirun, mpiexec, srun, e.g.: `mpirun -np [op perspective, it remains a logically centralized entity. Users can interact with data using familiar indexing schemes (e.g., slicing) without concern about the underlying layout. You can find related tutorials [here:](https://github.com/devitocodes/devito/tree/main/examples/userapi). -For example, instead of +For example, instead of ```python t = grid.stepping_dim x, y = grid.dimensions @@ -792,7 +792,7 @@ Typically such a bug occurs in a moderately big code, so how should we proceed? If you are in the cases 1 or 2 above, the first thing to do, regardless of who fixes it (either you directly if feeling brave, or most likely someone from the Devito team), is to create an MFE -- a Minimal Failing Example. An interesting read about MFEs in general is available [here](http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports). The typical workflow in Devito is as follows: -* If the failure is from a Jupyter Notebook, first of all, convert it into a Python file. It's trivial, you can do it directly from within the Jupyter Notebook itself, under the "File" tab there's an option to convert the notebook into a Python file. +* If the failure is from a Jupyter Notebook, first of all, convert it into a Python file. It's trivial, you can do it directly from within the Jupyter Notebook itself, under the "File" tab there's an option to convert the notebook into a Python file. * Then it's time to trim down the code. The idea is to incrementally remove parts of the code until the bug disappears, i.e., Devito compilation and execution reach completion without failures. In doing so, you don't care about things such as breaking the physics, making the method unstable, and so on. So ... * If your Operator has many Eqs, start with removing some of them. Is the bug still there? Great, keeping on removing the unnecessary. Some Functions may now become unnecessary because they were only appearing in the removed Eqs. Then, drop them too from the Python file. * Here's another trick. You have many SubDomains -- try with removing some of them, both their definition and where they're used. @@ -851,7 +851,7 @@ Notes: [top](#Frequently-Asked-Questions) ## How does Devito compute the performance of an Operator? -The section execution time is computed directly in the generated code through cheap timers. The cumulative Operator execution time is computed through Python-level timers and includes the overhead inherent in the processing of the arguments supplied to `op.apply(...)`. +The section execution time is computed directly in the generated code through cheap timers. The cumulative Operator execution time is computed through Python-level timers and includes the overhead inherent in the processing of the arguments supplied to `op.apply(...)`. The floating-point operations are counted once all of the symbolic flop-reducing transformations have been performed during compilation. Devito uses an in-house estimate of cost, rather than SymPy's estimate, to take care of some low-level intricacies. For example, Devito's estimate ignores the cost of integer arithmetic used for offset indexing into multi-dimensional arrays. Examples of how the Devito estimator works are available [here](https://github.com/devitocodes/devito/blob/v4.1/tests/test_dse.py#L265). @@ -891,7 +891,7 @@ It's challenging! Here's a potentially non-exhaustive list of things to check: ## Is there a list of refereed papers related to the Devito project? -Please see https://www.devitoproject.org/publications +Please see https://www.devitoproject.org/publications [top](#Frequently-Asked-Questions) @@ -902,6 +902,6 @@ Please see https://www.devitoproject.org/citing [top](#Frequently-Asked-Questions) ## Where did the name Devito come from? -The precursor project that led to Devito was named by [@ggorman](https://github.com/ggorman) using a backronym generator. He put in some keywords like "open source performant seismic codes", and chose the (weird) name "Opesci". No one knew how to pronounce this, so a common conversation was "How do you pronounce this?" "Opesci, like Joe Pesci". So for the next version we chose to go with a famous Joe Pesci character - Tommy Devito from Goodfellas. The name came up in a discussion between [@navjotk](https://github.com/navjotk) and [@mlange05](https://github.com/mlange05) (mostly the latter) and we stuck with it. +The precursor project that led to Devito was named by [@ggorman](https://github.com/ggorman) using a backronym generator. He put in some keywords like "open source performant seismic codes", and chose the (weird) name "Opesci". No one knew how to pronounce this, so a common conversation was "How do you pronounce this?" "Opesci, like Joe Pesci". So for the next version we chose to go with a famous Joe Pesci character - Tommy Devito from Goodfellas. The name came up in a discussion between [@navjotk](https://github.com/navjotk) and [@mlange05](https://github.com/mlange05) (mostly the latter) and we stuck with it. [top](#Frequently-Asked-Questions) diff --git a/benchmarks/user/README.md b/benchmarks/user/README.md index a683c89d28..4756833cd4 100644 --- a/benchmarks/user/README.md +++ b/benchmarks/user/README.md @@ -147,7 +147,7 @@ selected and used for all remaining timesteps. The "backend compiler" takes as input the code generated by Devito and translates it into a shared object. Supported backend compilers are `gcc`, -`icc`, `pgcc`, `clang`. For each of these compilers, Devito uses some preset compilation +`icc`, `clang`. For each of these compilers, Devito uses some preset compilation flags (e.g., -O3, -march=native, etc). The default backend compiler is `gcc`. To change it, one should set the diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index a676ad3572..937acf2f13 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -1,6 +1,6 @@ from functools import partial from hashlib import sha1 -from itertools import repeat, filterfalse +from itertools import filterfalse from os import environ, path, makedirs from packaging.version import Version from subprocess import (DEVNULL, PIPE, CalledProcessError, check_output, @@ -64,7 +64,7 @@ def sniff_compiler_version(cc, allow_fail=False): elif ver.startswith("icx"): compiler = "icx" elif ver.startswith("pgcc"): - compiler = "pgcc" + raise CompilationError('Portland compiler no longer supported') elif ver.startswith("nvc++"): compiler = "nvc" elif ver.startswith("cray"): @@ -627,7 +627,7 @@ def __lookup_cmds__(self): self.MPICXX = 'mpicxx' -class PGICompiler(Compiler): +class NvidiaCompiler(Compiler): _default_cpp = True @@ -657,30 +657,30 @@ def __init_finalize__(self, **kwargs): if not configuration['safe-math']: self.cflags.append('-fast') - # Default PGI compile for a target is GPU and single threaded host. + # Default compile for a target is GPU and single threaded host. # self.cflags += ['-ta=tesla,host'] def __lookup_cmds__(self): - # NOTE: using `pgc++` instead of `pgcc` because of issue #1219 - self.CC = 'pgc++' - self.CXX = 'pgc++' + self.CC = 'nvc++' + self.CXX = 'nvc++' self.MPICC = 'mpic++' self.MPICXX = 'mpicxx' def add_libraries(self, libs): # Urgh... - # PGIComiler inherits from Compiler inherits from GCCToolchain in codepy - # GCC supports linking versioned shared objects with the syntax + # NvidiaComiler inherits from Compiler inherits from GCCToolchain in codepy + # And _GCC_ supports linking versioned shared objects with the syntax: # `gcc -L/path/to/versioned/lib -l:libfoo.so.2.0 ...` - # But this syntax is not supported by the Portland (or Nvidia) compiler. - # Nor does codepy.GCCToolchain understand that linking to versioned objects - # is a thing that someone might want to do + # But this syntax is not supported by the Nvidia compiler. + # Nor does `codepy.GCCToolchain` understand that linking to versioned objects + # is a thing that someone might want to do. # # Since this is just linking information, we can just tell the linker # (which we invoke using the compiler and the `-Wl,-options` syntax) to # go and look in all of the directories we have provided thus far and # the linker supports the syntax: # `ld -L/path/to/versioned/lib -l:libfoo.so.2.0 ...` + # # Note: It would be nicer to just look in the one _relevant_ lib dir! new = as_list(libs) versioned = filter(lambda s: s.startswith(':'), new) @@ -692,15 +692,6 @@ def add_libraries(self, libs): super().add_libraries(filterfalse(lambda s: s.startswith(':'), new)) -class NvidiaCompiler(PGICompiler): - - def __lookup_cmds__(self): - self.CC = 'nvc++' - self.CXX = 'nvc++' - self.MPICC = 'mpic++' - self.MPICXX = 'mpicxx' - - class CudaCompiler(Compiler): _default_cpp = True @@ -1090,8 +1081,6 @@ def __contains__(self, key): 'aomp': AOMPCompiler, 'amdclang': AOMPCompiler, 'hip': HipCompiler, - 'pgcc': PGICompiler, - 'pgi': PGICompiler, 'nvc': NvidiaCompiler, 'nvc++': NvidiaCompiler, 'nvidia': NvidiaCompiler, From e89e262ec0f25acb0956781baeac6a7e294d71a9 Mon Sep 17 00:00:00 2001 From: JDBetteridge Date: Thu, 6 Nov 2025 19:15:05 +0000 Subject: [PATCH 3/4] misc: Typo and additions --- devito/arch/archinfo.py | 28 +++++++++++++++++++++++++++- devito/arch/compiler.py | 3 ++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index ba711ee855..3104052467 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -1,7 +1,8 @@ """Collection of utilities to detect properties of the underlying architecture.""" +from contextlib import suppress from functools import cached_property -from subprocess import PIPE, Popen, DEVNULL, run +from subprocess import PIPE, Popen, DEVNULL, run, CalledProcessError from pathlib import Path import ctypes import re @@ -11,6 +12,7 @@ import cpuinfo import numpy as np +from packaging.version import parse, InvalidVersion import psutil from devito.logger import warning @@ -553,6 +555,30 @@ def get_cuda_path(): return None +@memoized_func +def get_cuda_version(): + cuda_home = get_cuda_path() + if cuda_home is None: + nvc_version_command = ['nvcc', '--version'] + else: + nvc_version_command = [f'{cuda_home}/bin/nvcc', '--version'] + + cuda_version = None + try: + out = run(nvc_version_command, capture_output=True, text=True) + except (FileNotFoundError, CalledProcessError): + pass + finally: + if out.returncode == 0: + start = out.stdout.find('release') + start = out.stdout.find(',', start) + stop = out.stdout.find('\n', start) + with suppress(InvalidVersion): + cuda_version = parse(out.stdout[start:stop]) + + return cuda_version + + @memoized_func def get_advisor_path(): """ diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 937acf2f13..82b8411fec 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -661,6 +661,7 @@ def __init_finalize__(self, **kwargs): # self.cflags += ['-ta=tesla,host'] def __lookup_cmds__(self): + # Note: Using `nvc++` instead of `nvcc` because of issue #1219 self.CC = 'nvc++' self.CXX = 'nvc++' self.MPICC = 'mpic++' @@ -668,7 +669,7 @@ def __lookup_cmds__(self): def add_libraries(self, libs): # Urgh... - # NvidiaComiler inherits from Compiler inherits from GCCToolchain in codepy + # NvidiaCompiler inherits from Compiler inherits from GCCToolchain in codepy # And _GCC_ supports linking versioned shared objects with the syntax: # `gcc -L/path/to/versioned/lib -l:libfoo.so.2.0 ...` # But this syntax is not supported by the Nvidia compiler. From 7b2209cc49a1f173ac0ad94cfec0c482ae72b278 Mon Sep 17 00:00:00 2001 From: JDBetteridge Date: Fri, 7 Nov 2025 11:49:45 +0000 Subject: [PATCH 4/4] misc: Fixes --- .github/workflows/pytest-gpu.yml | 2 +- devito/arch/archinfo.py | 50 +++++++++++++++++--------------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/.github/workflows/pytest-gpu.yml b/.github/workflows/pytest-gpu.yml index 5fb8468df7..0a9cecbddb 100644 --- a/.github/workflows/pytest-gpu.yml +++ b/.github/workflows/pytest-gpu.yml @@ -47,7 +47,7 @@ jobs: # -------------------- NVIDIA job -------------------- - name: pytest-gpu-acc-nvidia test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py tests/test_operator.py::TestEstimateMemory" - base: "devitocodes/bases:nvidia-nvc" + base: "devitocodes/bases:nvidia-nvc12" runner_label: nvidiagpu test_drive_cmd: "nvidia-smi" # Respect CUDA_VISIBLE_DEVICES and also hard-limit Docker to that device. diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index 3104052467..8aa71a8a16 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -18,30 +18,32 @@ from devito.logger import warning from devito.tools import as_tuple, all_equal, memoized_func -__all__ = ['platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_visible_devices', - 'get_nvidia_cc', 'get_cuda_path', 'get_hip_path', 'check_cuda_runtime', - 'get_m1_llvm_path', 'get_advisor_path', 'Platform', 'Cpu64', 'Intel64', - 'IntelSkylake', 'Amd', 'Arm', 'Power', 'Device', 'NvidiaDevice', - 'AmdDevice', 'IntelDevice', - # Brand-agnostic - 'ANYCPU', 'ANYGPU', - # Intel CPUs - 'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210', - 'SKX', 'KLX', 'CLX', 'CLK', 'SPR', - # AMD CPUs - 'AMD', - # ARM CPUs - 'ARM', 'AppleArm', 'M1', 'M2', 'M3', - 'Graviton', 'GRAVITON2', 'GRAVITON3', 'GRAVITON4', - 'Cortex', 'NvidiaArm', 'GRACE', - # Other legacy CPUs - 'POWER8', 'POWER9', - # Generic GPUs - 'AMDGPUX', 'NVIDIAX', 'INTELGPUX', - # Nvidia GPUs - 'VOLTA', 'AMPERE', 'HOPPER', 'BLACKWELL', - # Intel GPUs - 'PVC', 'INTELGPUMAX', 'MAX1100', 'MAX1550'] +__all__ = [ + 'platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_visible_devices', + 'get_nvidia_cc', 'get_cuda_path', 'get_cuda_version', 'get_hip_path', + 'check_cuda_runtime', 'get_m1_llvm_path', 'get_advisor_path', 'Platform', + 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power', 'Device', + 'NvidiaDevice', 'AmdDevice', 'IntelDevice', + # Brand-agnostic + 'ANYCPU', 'ANYGPU', + # Intel CPUs + 'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210', + 'SKX', 'KLX', 'CLX', 'CLK', 'SPR', + # AMD CPUs + 'AMD', + # ARM CPUs + 'ARM', 'AppleArm', 'M1', 'M2', 'M3', + 'Graviton', 'GRAVITON2', 'GRAVITON3', 'GRAVITON4', + 'Cortex', 'NvidiaArm', 'GRACE', + # Other legacy CPUs + 'POWER8', 'POWER9', + # Generic GPUs + 'AMDGPUX', 'NVIDIAX', 'INTELGPUX', + # Nvidia GPUs + 'VOLTA', 'AMPERE', 'HOPPER', 'BLACKWELL', + # Intel GPUs + 'PVC', 'INTELGPUMAX', 'MAX1100', 'MAX1550' +] @memoized_func