diff --git a/src/gcn.jl b/src/gcn.jl index 36165a2c..146d9a33 100644 --- a/src/gcn.jl +++ b/src/gcn.jl @@ -124,3 +124,5 @@ function emit_trap!(job::CompilerJob{GCNCompilerTarget}, builder, mod, inst) end call!(builder, trap_ft, trap) end + +can_vectorize(job::CompilerJob{GCNCompilerTarget}) = true diff --git a/src/interface.jl b/src/interface.jl index 0ad0c9f6..eb55efb8 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -222,6 +222,9 @@ end # Has the runtime available and does not require special handling uses_julia_runtime(@nospecialize(job::CompilerJob)) = false +# Is it legal to run vectorization passes on this target +can_vectorize(@nospecialize(job::CompilerJob)) = false + # Should emit PTLS lookup that can be relocated dump_native(@nospecialize(job::CompilerJob)) = false diff --git a/src/native.jl b/src/native.jl index 5ce73c4a..fdd880ec 100644 --- a/src/native.jl +++ b/src/native.jl @@ -35,3 +35,4 @@ end runtime_slug(job::CompilerJob{NativeCompilerTarget}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))$(job.config.target.jlruntime ? "-jlrt" : "")" uses_julia_runtime(job::CompilerJob{NativeCompilerTarget}) = job.config.target.jlruntime +can_vectorize(job::CompilerJob{NativeCompilerTarget}) = true diff --git a/src/optim.jl b/src/optim.jl index 841b1a5b..2188061b 100644 --- a/src/optim.jl +++ b/src/optim.jl @@ -31,10 +31,7 @@ function buildNewPMPipeline!(mpm, @nospecialize(job::CompilerJob), opt_level) add!(mpm, NewPMFunctionPassManager()) do fpm buildLoopOptimizerPipeline(fpm, job, opt_level) buildScalarOptimizerPipeline(fpm, job, opt_level) - if uses_julia_runtime(job) && opt_level >= 2 - # XXX: we disable vectorization, as this generally isn't useful for GPU targets - # and actually causes issues with some back-end compilers (like Metal). - # TODO: Make this not dependent on `uses_julia_runtime` (likely CPU), but it's own control + if (can_vectorize(job)) && opt_level >= 2 buildVectorPipeline(fpm, job, opt_level) end if isdebug(:optim)