Skip to content

Commit 0678627

Browse files
Merge pull request #2051 from devitocodes/add_icx_support
compiler: Add ICX support
2 parents ba4a49f + e355ed8 commit 0678627

File tree

9 files changed

+131
-51
lines changed

9 files changed

+131
-51
lines changed

.github/workflows/docker-bases.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
dockerfile: './docker/Dockerfile.cpu'
6767
runner: ubuntu-latest
6868

69-
- tag: 'devitocodes/bases:cpu-icc'
69+
- tag: 'devitocodes/bases:cpu-icc, devitocodes/bases:cpu-icx'
7070
arch: 'arch=icc'
7171
version: ''
7272
dockerfile: './docker/Dockerfile.cpu'

.github/workflows/pytest-core-nompi.yml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ jobs:
3838
pytest-ubuntu-py39-gcc9-omp,
3939
pytest-osx-py37-clang-omp,
4040
pytest-docker-py37-gcc-omp,
41-
pytest-docker-py37-icc-omp
41+
pytest-docker-py37-icc-omp,
42+
pytest-docker-py38-icx-omp
4243
]
4344
set: [base, adjoint]
4445
include:
@@ -105,6 +106,13 @@ jobs:
105106
language: "openmp"
106107
sympy: "1.11"
107108

109+
- name: pytest-docker-py38-icx-omp
110+
python-version: '3.8'
111+
os: ubuntu-22.04
112+
arch: "icx"
113+
language: "openmp"
114+
sympy: "1.11"
115+
108116
- set: base
109117
test-set: 'not adjoint'
110118

@@ -133,13 +141,13 @@ jobs:
133141
- name: Set run prefix
134142
run: |
135143
if [[ "${{ matrix.name }}" =~ "docker" ]]; then
136-
echo "RUN_CMD=docker run --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} --name testrun devito_img" >> $GITHUB_ENV
144+
echo "RUN_CMD=docker run --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} -e DEVITO_ARCH=${{ matrix.arch }} --name testrun devito_img" >> $GITHUB_ENV
137145
else
138146
echo "RUN_CMD=" >> $GITHUB_ENV
139147
fi
140148
id: set-run
141149

142-
- name: Install GCC ${{ matrix.arch }}
150+
- name: Install ${{ matrix.arch }} compiler
143151
if: "runner.os == 'linux' && !contains(matrix.name, 'docker')"
144152
run : |
145153
sudo apt-get install -y ${{ matrix.arch }}

devito/arch/archinfo.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,17 @@
1616

1717
__all__ = ['platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_nvidia_cc',
1818
'get_cuda_path', 'get_hip_path', 'check_cuda_runtime', 'get_m1_llvm_path',
19-
'Platform', 'Cpu64', 'Intel64', 'Amd', 'Arm', 'Power', 'Device',
20-
'NvidiaDevice', 'AmdDevice', 'IntelDevice',
21-
'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'KNL', 'KNL7210', # Intel
22-
'AMD', 'ARM', 'M1', 'GRAVITON', # ARM
23-
'POWER8', 'POWER9', # Other loosely supported CPU architectures
24-
'AMDGPUX', 'NVIDIAX', 'INTELGPUX'] # GPUs
19+
'Platform', 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power',
20+
'Device', 'NvidiaDevice', 'AmdDevice', 'IntelDevice',
21+
# Intel
22+
'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210',
23+
'SKX', 'KLX', 'CLX', 'CLK',
24+
# ARM
25+
'AMD', 'ARM', 'M1', 'GRAVITON',
26+
# Other loosely supported CPU architectures
27+
'POWER8', 'POWER9',
28+
# GPUs
29+
'AMDGPUX', 'NVIDIAX', 'INTELGPUX']
2530

2631

2732
@memoized_func
@@ -494,7 +499,7 @@ def get_platform():
494499
if 'phi' in brand:
495500
# Intel Xeon Phi?
496501
return platform_registry['knl']
497-
# Unknown Xeon ? May happen on some virtualizes systems...
502+
# Unknown Xeon ? May happen on some virtualized systems...
498503
return platform_registry['intel64']
499504
elif 'intel' in brand:
500505
# Most likely a desktop i3/i5/i7
@@ -607,6 +612,14 @@ class Intel64(Cpu64):
607612
known_isas = ('cpp', 'sse', 'avx', 'avx2', 'avx512')
608613

609614

615+
class IntelSkylake(Intel64):
616+
pass
617+
618+
619+
class IntelGoldenCode(Intel64):
620+
pass
621+
622+
610623
class Arm(Cpu64):
611624

612625
known_isas = ('fp', 'asimd', 'asimdrdm')
@@ -725,11 +738,12 @@ def march(cls):
725738
IVB = Intel64('ivb')
726739
HSW = Intel64('hsw')
727740
BDW = Intel64('bdw', isa='avx2')
728-
SKX = Intel64('skx')
729-
KLX = Intel64('klx')
730-
CLX = Intel64('clx')
731741
KNL = Intel64('knl')
732742
KNL7210 = Intel64('knl', cores_logical=256, cores_physical=64, isa='avx512')
743+
SKX = IntelSkylake('skx')
744+
KLX = IntelSkylake('klx')
745+
CLX = IntelSkylake('clx')
746+
CLK = IntelSkylake('clk')
733747

734748
ARM = Arm('arm')
735749
GRAVITON = Arm('graviton')
@@ -756,6 +770,7 @@ def march(cls):
756770
'skx': SKX, # Skylake
757771
'klx': KLX, # Kaby Lake
758772
'clx': CLX, # Coffee Lake
773+
'clk': CLK, # Cascade Lake
759774
'knl': KNL,
760775
'knl7210': KNL7210,
761776
'arm': ARM, # Generic ARM CPU

devito/arch/compiler.py

Lines changed: 73 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
from codepy.jit import compile_from_string
1313
from codepy.toolchain import GCCToolchain
1414

15-
from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, SKX, POWER8, POWER9, GRAVITON,
16-
get_nvidia_cc, check_cuda_runtime, get_m1_llvm_path)
15+
from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON,
16+
INTELGPUX, IntelSkylake, get_nvidia_cc, check_cuda_runtime,
17+
get_m1_llvm_path)
1718
from devito.exceptions import CompilationError
1819
from devito.logger import debug, warning, error
1920
from devito.parameters import configuration
@@ -375,13 +376,22 @@ class GNUCompiler(Compiler):
375376
def __init__(self, *args, **kwargs):
376377
super().__init__(*args, **kwargs)
377378

378-
self.cflags += ['-march=native', '-Wno-unused-result', '-Wno-unused-variable',
379-
'-Wno-unused-but-set-variable']
379+
platform = kwargs.pop('platform', configuration['platform'])
380+
381+
self.cflags += ['-march=native', '-Wno-unused-result',
382+
'-Wno-unused-variable', '-Wno-unused-but-set-variable']
383+
380384
if configuration['safe-math']:
381385
self.cflags.append('-fno-unsafe-math-optimizations')
382386
else:
383387
self.cflags.append('-ffast-math')
384388

389+
if isinstance(platform, IntelSkylake):
390+
# The default is `=256` because avx512 slows down the CPU frequency;
391+
# however, we empirically found that stencils generally benefit
392+
# from `=512`
393+
self.cflags.append('-mprefer-vector-width=512')
394+
385395
language = kwargs.pop('language', configuration['language'])
386396
try:
387397
if self.version >= Version("4.9.0"):
@@ -414,7 +424,7 @@ def __init__(self, *args, **kwargs):
414424
class ClangCompiler(Compiler):
415425

416426
def __init__(self, *args, **kwargs):
417-
super(ClangCompiler, self).__init__(*args, **kwargs)
427+
super().__init__(*args, **kwargs)
418428

419429
self.cflags += ['-Wno-unused-result', '-Wno-unused-variable']
420430
if not configuration['safe-math']:
@@ -481,7 +491,7 @@ class AOMPCompiler(Compiler):
481491
"""AMD's fork of Clang for OpenMP offloading on both AMD and NVidia cards."""
482492

483493
def __init__(self, *args, **kwargs):
484-
super(AOMPCompiler, self).__init__(*args, **kwargs)
494+
super().__init__(*args, **kwargs)
485495

486496
self.cflags += ['-Wno-unused-result', '-Wno-unused-variable']
487497
if not configuration['safe-math']:
@@ -531,7 +541,7 @@ def __lookup_cmds__(self):
531541
class PGICompiler(Compiler):
532542

533543
def __init__(self, *args, **kwargs):
534-
super(PGICompiler, self).__init__(*args, cpp=True, **kwargs)
544+
super().__init__(*args, cpp=True, **kwargs)
535545

536546
self.cflags.remove('-std=c99')
537547
self.cflags.remove('-O3')
@@ -671,39 +681,30 @@ def __lookup_cmds__(self):
671681
class IntelCompiler(Compiler):
672682

673683
def __init__(self, *args, **kwargs):
674-
super(IntelCompiler, self).__init__(*args, **kwargs)
675-
676-
self.cflags.append("-xhost")
684+
super().__init__(*args, **kwargs)
677685

678-
language = kwargs.pop('language', configuration['language'])
679686
platform = kwargs.pop('platform', configuration['platform'])
687+
language = kwargs.pop('language', configuration['language'])
688+
self.cflags.append("-xHost")
680689

681690
if configuration['safe-math']:
682691
self.cflags.append("-fp-model=strict")
683692
else:
684-
self.cflags.append('-fast')
693+
self.cflags.append('-fp-model=fast')
685694

686-
if platform is SKX:
695+
if isinstance(platform, IntelSkylake):
687696
# Systematically use 512-bit vectors on skylake
688697
self.cflags.append("-qopt-zmm-usage=high")
689698

690-
try:
691-
if self.version >= Version("15.0.0"):
692-
# Append the OpenMP flag regardless of configuration['language'],
693-
# since icc15 and later versions implement OpenMP 4.0, hence
694-
# they support `#pragma omp simd`
695-
self.ldflags.append('-qopenmp')
696-
except (TypeError, ValueError):
697-
if language == 'openmp':
698-
# Note: fopenmp, not qopenmp, is what is needed by icc versions < 15.0
699-
self.ldflags.append('-fopenmp')
699+
if language == 'openmp':
700+
self.ldflags.append('-qopenmp')
700701

701702
# Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is
702703
if kwargs.get('mpi'):
703-
ver = check_output([self.MPICC, "--version"]).decode("utf-8")
704-
if not ver.startswith("icc"):
705-
warning("The MPI compiler `%s` doesn't use the Intel "
706-
"C/C++ compiler underneath" % self.MPICC)
704+
mpi_distro = sniff_mpi_distro('mpiexec')
705+
if mpi_distro != 'IntelMPI':
706+
warning("Expected Intel MPI distribution with `%s`, but found `%s`"
707+
% (self.__class__.__name__, mpi_distro))
707708

708709
def __lookup_cmds__(self):
709710
self.CC = 'icc'
@@ -727,16 +728,55 @@ def __lookup_cmds__(self):
727728
class IntelKNLCompiler(IntelCompiler):
728729

729730
def __init__(self, *args, **kwargs):
730-
super(IntelKNLCompiler, self).__init__(*args, **kwargs)
731+
super().__init__(*args, **kwargs)
731732

732-
self.cflags += ["-xMIC-AVX512"]
733+
self.cflags.append('-xMIC-AVX512')
733734

734735
language = kwargs.pop('language', configuration['language'])
735736

736737
if language != 'openmp':
737738
warning("Running on Intel KNL without OpenMP is highly discouraged")
738739

739740

741+
class OneapiCompiler(IntelCompiler):
742+
743+
def __init__(self, *args, **kwargs):
744+
super().__init__(*args, **kwargs)
745+
746+
platform = kwargs.pop('platform', configuration['platform'])
747+
language = kwargs.pop('language', configuration['language'])
748+
749+
if language == 'openmp':
750+
self.ldflags.remove('-qopenmp')
751+
self.ldflags.append('-fopenmp')
752+
753+
if language == 'sycl':
754+
self.cflags.append('-fsycl')
755+
if platform is NVIDIAX:
756+
self.cflags.append('-fsycl-targets=nvptx64-cuda')
757+
else:
758+
self.cflags.append('-fsycl-targets=spir64')
759+
760+
if platform is NVIDIAX:
761+
self.cflags.append('-fopenmp-targets=nvptx64-cuda')
762+
if platform is INTELGPUX:
763+
self.cflags.append('-fopenmp-targets=spir64')
764+
self.cflags.append('-fopenmp-target-simd')
765+
766+
if platform is INTELGPUX:
767+
self.cflags.remove('-g') # -g disables some optimizations in IGC
768+
self.cflags.append('-gline-tables-only')
769+
self.cflags.append('-fdebug-info-for-profiling')
770+
771+
def __lookup_cmds__(self):
772+
# OneAPI HPC ToolKit comes with icpx, which is clang++,
773+
# and icx, which is clang
774+
self.CC = 'icx'
775+
self.CXX = 'icpx'
776+
self.MPICC = 'mpicc'
777+
self.MPICX = 'mpicx'
778+
779+
740780
class CustomCompiler(Compiler):
741781

742782
"""
@@ -800,9 +840,11 @@ def __lookup_cmds__(self):
800840
'nvidia': NvidiaCompiler,
801841
'cuda': CudaCompiler,
802842
'osx': ClangCompiler,
803-
'intel': IntelCompiler,
804-
'icpc': IntelCompiler,
843+
'intel': OneapiCompiler,
844+
'icx': OneapiCompiler,
845+
'icpx': OneapiCompiler,
805846
'icc': IntelCompiler,
847+
'icpc': IntelCompiler,
806848
'intel-knl': IntelKNLCompiler,
807849
'knl': IntelKNLCompiler,
808850
'dpcpp': DPCPPCompiler,

devito/parameters.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,11 @@ class switchconfig(object):
235235
Decorator to temporarily change `configuration` parameters.
236236
"""
237237

238-
def __init__(self, **params):
239-
self.params = {k.replace('_', '-'): v for k, v in params.items()}
238+
def __init__(self, condition=True, **params):
239+
if condition:
240+
self.params = {k.replace('_', '-'): v for k, v in params.items()}
241+
else:
242+
self.params = {}
240243

241244
def __call__(self, func, *args, **kwargs):
242245
@wraps(func)

tests/conftest.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from devito.checkpointing import NoopRevolver
1010
from devito.finite_differences.differentiable import EvalDerivative
1111
from devito.arch import Cpu64, Device, sniff_mpi_distro, Arm
12-
from devito.arch.compiler import compiler_registry, IntelCompiler, NvidiaCompiler
12+
from devito.arch.compiler import (compiler_registry, IntelCompiler, OneapiCompiler,
13+
NvidiaCompiler)
1314
from devito.ir.iet import (FindNodes, FindSymbols, Iteration, ParallelBlock,
1415
retrieve_iteration_tree)
1516
from devito.tools import as_tuple
@@ -26,7 +27,8 @@ def skipif(items, whole_module=False):
2627
# Sanity check
2728
accepted = set()
2829
accepted.update({'device', 'device-C', 'device-openmp', 'device-openacc',
29-
'device-aomp', 'cpu64-icc', 'cpu64-nvc', 'cpu64-arm', 'chkpnt'})
30+
'device-aomp', 'cpu64-icc', 'cpu64-icx', 'cpu64-nvc', 'cpu64-arm',
31+
'cpu64-icpx', 'chkpnt'})
3032
accepted.update({'nompi', 'nodevice'})
3133
unknown = sorted(set(items) - accepted)
3234
if unknown:
@@ -70,6 +72,12 @@ def skipif(items, whole_module=False):
7072
isinstance(configuration['platform'], Cpu64):
7173
skipit = "`icc+cpu64` won't work with this test"
7274
break
75+
# Skip if it won't run with OneAPICompiler
76+
if i == 'cpu64-icx' and \
77+
isinstance(configuration['compiler'], OneapiCompiler) and \
78+
isinstance(configuration['platform'], Cpu64):
79+
skipit = "`icx+cpu64` won't work with this test"
80+
break
7381
# Skip if it won't run on Arm
7482
if i == 'cpu64-arm' and isinstance(configuration['platform'], Arm):
7583
skipit = "Arm doesn't support x86-specific instructions"

tests/test_benchmark.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44

55
from benchmarks.user.benchmark import run
66
from devito import configuration, switchconfig
7+
from conftest import skipif
78
from subprocess import check_call
89

910

11+
@skipif('cpu64-icx')
1012
@pytest.mark.parametrize('mode, problem, op', [
1113
('run', 'acoustic', 'forward'), ('run', 'acoustic', 'adjoint'),
1214
('run', 'acoustic', 'jacobian'), ('run', 'acoustic', 'jacobian_adjoint'),

tests/test_buffering.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,6 @@ def test_everything():
701701
assert np.all(u.data == u1.data)
702702

703703

704-
@skipif('cpu64-icc')
705704
@pytest.mark.parametrize('subdomain', ['domain', 'interior'])
706705
def test_stencil_issue_1915(subdomain):
707706
nt = 5

tests/test_dimension.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
SparseFunction, SparseTimeFunction, Eq, Operator, Constant,
1010
Dimension, DefaultDimension, SubDimension, switchconfig,
1111
SubDomain, Lt, Le, Gt, Ge, Ne, Buffer, sin, SpaceDimension,
12-
CustomDimension, dimensions)
12+
CustomDimension, dimensions, configuration)
13+
from devito.arch.compiler import IntelCompiler, OneapiCompiler
1314
from devito.ir.iet import (Conditional, Expression, Iteration, FindNodes,
1415
FindSymbols, retrieve_iteration_tree)
1516
from devito.symbolics import indexify, retrieve_functions, IntDiv
@@ -1382,6 +1383,8 @@ def test_affiness(self):
13821383
iterations = [i for i in FindNodes(Iteration).visit(op) if i.dim is not time]
13831384
assert all(i.is_Affine for i in iterations)
13841385

1386+
@switchconfig(condition=isinstance(configuration['compiler'],
1387+
(IntelCompiler, OneapiCompiler)), safe_math=True)
13851388
def test_sparse_time_function(self):
13861389
nt = 20
13871390

0 commit comments

Comments
 (0)