1212from codepy .jit import compile_from_string
1313from codepy .toolchain import GCCToolchain
1414
15- from devito .arch import (AMDGPUX , Cpu64 , M1 , NVIDIAX , SKX , POWER8 , POWER9 , GRAVITON ,
16- get_nvidia_cc , check_cuda_runtime , get_m1_llvm_path )
15+ from devito .arch import (AMDGPUX , Cpu64 , M1 , NVIDIAX , POWER8 , POWER9 , GRAVITON ,
16+ INTELGPUX , IntelSkylake , get_nvidia_cc , check_cuda_runtime ,
17+ get_m1_llvm_path )
1718from devito .exceptions import CompilationError
1819from devito .logger import debug , warning , error
1920from devito .parameters import configuration
@@ -375,13 +376,22 @@ class GNUCompiler(Compiler):
375376 def __init__ (self , * args , ** kwargs ):
376377 super ().__init__ (* args , ** kwargs )
377378
378- self .cflags += ['-march=native' , '-Wno-unused-result' , '-Wno-unused-variable' ,
379- '-Wno-unused-but-set-variable' ]
379+ platform = kwargs .pop ('platform' , configuration ['platform' ])
380+
381+ self .cflags += ['-march=native' , '-Wno-unused-result' ,
382+ '-Wno-unused-variable' , '-Wno-unused-but-set-variable' ]
383+
380384 if configuration ['safe-math' ]:
381385 self .cflags .append ('-fno-unsafe-math-optimizations' )
382386 else :
383387 self .cflags .append ('-ffast-math' )
384388
389+ if isinstance (platform , IntelSkylake ):
390+ # The default is `=256` because avx512 slows down the CPU frequency;
391+ # however, we empirically found that stencils generally benefit
392+ # from `=512`
393+ self .cflags .append ('-mprefer-vector-width=512' )
394+
385395 language = kwargs .pop ('language' , configuration ['language' ])
386396 try :
387397 if self .version >= Version ("4.9.0" ):
@@ -414,7 +424,7 @@ def __init__(self, *args, **kwargs):
414424class ClangCompiler (Compiler ):
415425
416426 def __init__ (self , * args , ** kwargs ):
417- super (ClangCompiler , self ).__init__ (* args , ** kwargs )
427+ super ().__init__ (* args , ** kwargs )
418428
419429 self .cflags += ['-Wno-unused-result' , '-Wno-unused-variable' ]
420430 if not configuration ['safe-math' ]:
@@ -481,7 +491,7 @@ class AOMPCompiler(Compiler):
481491 """AMD's fork of Clang for OpenMP offloading on both AMD and NVidia cards."""
482492
483493 def __init__ (self , * args , ** kwargs ):
484- super (AOMPCompiler , self ).__init__ (* args , ** kwargs )
494+ super ().__init__ (* args , ** kwargs )
485495
486496 self .cflags += ['-Wno-unused-result' , '-Wno-unused-variable' ]
487497 if not configuration ['safe-math' ]:
@@ -531,7 +541,7 @@ def __lookup_cmds__(self):
531541class PGICompiler (Compiler ):
532542
533543 def __init__ (self , * args , ** kwargs ):
534- super (PGICompiler , self ).__init__ (* args , cpp = True , ** kwargs )
544+ super ().__init__ (* args , cpp = True , ** kwargs )
535545
536546 self .cflags .remove ('-std=c99' )
537547 self .cflags .remove ('-O3' )
@@ -671,39 +681,30 @@ def __lookup_cmds__(self):
671681class IntelCompiler (Compiler ):
672682
673683 def __init__ (self , * args , ** kwargs ):
674- super (IntelCompiler , self ).__init__ (* args , ** kwargs )
675-
676- self .cflags .append ("-xhost" )
684+ super ().__init__ (* args , ** kwargs )
677685
678- language = kwargs .pop ('language' , configuration ['language' ])
679686 platform = kwargs .pop ('platform' , configuration ['platform' ])
687+ language = kwargs .pop ('language' , configuration ['language' ])
688+ self .cflags .append ("-xHost" )
680689
681690 if configuration ['safe-math' ]:
682691 self .cflags .append ("-fp-model=strict" )
683692 else :
684- self .cflags .append ('-fast' )
693+ self .cflags .append ('-fp-model= fast' )
685694
686- if platform is SKX :
695+ if isinstance ( platform , IntelSkylake ) :
687696 # Systematically use 512-bit vectors on skylake
688697 self .cflags .append ("-qopt-zmm-usage=high" )
689698
690- try :
691- if self .version >= Version ("15.0.0" ):
692- # Append the OpenMP flag regardless of configuration['language'],
693- # since icc15 and later versions implement OpenMP 4.0, hence
694- # they support `#pragma omp simd`
695- self .ldflags .append ('-qopenmp' )
696- except (TypeError , ValueError ):
697- if language == 'openmp' :
698- # Note: fopenmp, not qopenmp, is what is needed by icc versions < 15.0
699- self .ldflags .append ('-fopenmp' )
699+ if language == 'openmp' :
700+ self .ldflags .append ('-qopenmp' )
700701
701702 # Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is
702703 if kwargs .get ('mpi' ):
703- ver = check_output ([ self . MPICC , "--version" ]). decode ( "utf-8" )
704- if not ver . startswith ( "icc" ) :
705- warning ("The MPI compiler `%s` doesn't use the Intel "
706- "C/C++ compiler underneath" % self .MPICC )
704+ mpi_distro = sniff_mpi_distro ( 'mpiexec' )
705+ if mpi_distro != 'IntelMPI' :
706+ warning ("Expected Intel MPI distribution with `%s`, but found `%s` "
707+ % ( self .__class__ . __name__ , mpi_distro ) )
707708
708709 def __lookup_cmds__ (self ):
709710 self .CC = 'icc'
@@ -727,16 +728,55 @@ def __lookup_cmds__(self):
727728class IntelKNLCompiler (IntelCompiler ):
728729
729730 def __init__ (self , * args , ** kwargs ):
730- super (IntelKNLCompiler , self ).__init__ (* args , ** kwargs )
731+ super ().__init__ (* args , ** kwargs )
731732
732- self .cflags += [ " -xMIC-AVX512" ]
733+ self .cflags . append ( ' -xMIC-AVX512' )
733734
734735 language = kwargs .pop ('language' , configuration ['language' ])
735736
736737 if language != 'openmp' :
737738 warning ("Running on Intel KNL without OpenMP is highly discouraged" )
738739
739740
741+ class OneapiCompiler (IntelCompiler ):
742+
743+ def __init__ (self , * args , ** kwargs ):
744+ super ().__init__ (* args , ** kwargs )
745+
746+ platform = kwargs .pop ('platform' , configuration ['platform' ])
747+ language = kwargs .pop ('language' , configuration ['language' ])
748+
749+ if language == 'openmp' :
750+ self .ldflags .remove ('-qopenmp' )
751+ self .ldflags .append ('-fopenmp' )
752+
753+ if language == 'sycl' :
754+ self .cflags .append ('-fsycl' )
755+ if platform is NVIDIAX :
756+ self .cflags .append ('-fsycl-targets=nvptx64-cuda' )
757+ else :
758+ self .cflags .append ('-fsycl-targets=spir64' )
759+
760+ if platform is NVIDIAX :
761+ self .cflags .append ('-fopenmp-targets=nvptx64-cuda' )
762+ if platform is INTELGPUX :
763+ self .cflags .append ('-fopenmp-targets=spir64' )
764+ self .cflags .append ('-fopenmp-target-simd' )
765+
766+ if platform is INTELGPUX :
767+ self .cflags .remove ('-g' ) # -g disables some optimizations in IGC
768+ self .cflags .append ('-gline-tables-only' )
769+ self .cflags .append ('-fdebug-info-for-profiling' )
770+
771+ def __lookup_cmds__ (self ):
772+ # OneAPI HPC ToolKit comes with icpx, which is clang++,
773+ # and icx, which is clang
774+ self .CC = 'icx'
775+ self .CXX = 'icpx'
776+ self .MPICC = 'mpicc'
777+ self .MPICX = 'mpicx'
778+
779+
740780class CustomCompiler (Compiler ):
741781
742782 """
@@ -800,9 +840,11 @@ def __lookup_cmds__(self):
800840 'nvidia' : NvidiaCompiler ,
801841 'cuda' : CudaCompiler ,
802842 'osx' : ClangCompiler ,
803- 'intel' : IntelCompiler ,
804- 'icpc' : IntelCompiler ,
843+ 'intel' : OneapiCompiler ,
844+ 'icx' : OneapiCompiler ,
845+ 'icpx' : OneapiCompiler ,
805846 'icc' : IntelCompiler ,
847+ 'icpc' : IntelCompiler ,
806848 'intel-knl' : IntelKNLCompiler ,
807849 'knl' : IntelKNLCompiler ,
808850 'dpcpp' : DPCPPCompiler ,
0 commit comments