diff --git a/src/context.jl b/src/context.jl index 710bf720..88035e74 100644 --- a/src/context.jl +++ b/src/context.jl @@ -6,7 +6,7 @@ # XXX: rework this -- it doesn't work well when altering the state -export driver, driver!, device, device!, context, context!, global_queue, synchronize +export driver, driver!, device, device!, context, context!, global_queue, synchronize, is_integrated """ driver() -> ZeDriver @@ -111,6 +111,40 @@ function device!(i::Int) return device!(devs[i]) end +""" + is_integrated(dev::ZeDevice=device()) -> Bool + +Check if the given device is an integrated GPU (i.e., integrated with the host processor). + +Integrated GPUs share memory with the CPU and are typically found in laptop and desktop +processors with integrated graphics. + +# Arguments +- `dev::ZeDevice`: The device to check. Defaults to the current device. + +# Returns +- `true` if the device is integrated, `false` otherwise (e.g., discrete GPU). + +# Examples +```julia +if is_integrated() + println("Running on integrated graphics") +else + println("Running on discrete GPU") +end + +# Check a specific device +dev = devices()[1] +is_integrated(dev) +``` + +See also: [`device`](@ref), [`devices`](@ref) +""" +function is_integrated(dev::ZeDevice=device()) + props = oneL0.properties(dev) + return (props.flags & oneL0.ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) != 0 +end + const global_contexts = Dict{ZeDriver,ZeContext}() """ diff --git a/src/device/atomics.jl b/src/device/atomics.jl new file mode 100644 index 00000000..346db57a --- /dev/null +++ b/src/device/atomics.jl @@ -0,0 +1,22 @@ +# Atomic operation device overrides and fallbacks + +# Fallback wrappers for Float32 atomic_inc!/atomic_dec! +# Intel Level Zero doesn't support these directly for floating-point types, +# so we implement them using atomic_add!/atomic_sub! + +@device_override @inline function SPIRVIntrinsics.atomic_inc!(p::LLVMPtr{Float32, AS}) where {AS} + SPIRVIntrinsics.atomic_add!(p, Float32(1)) +end + +@device_override @inline function SPIRVIntrinsics.atomic_dec!(p::LLVMPtr{Float32, AS}) where {AS} + SPIRVIntrinsics.atomic_sub!(p, Float32(1)) +end + +# Float64 fallbacks (if Float64 is supported on device) +@device_override @inline function SPIRVIntrinsics.atomic_inc!(p::LLVMPtr{Float64, AS}) where {AS} + SPIRVIntrinsics.atomic_add!(p, Float64(1)) +end + +@device_override @inline function SPIRVIntrinsics.atomic_dec!(p::LLVMPtr{Float64, AS}) where {AS} + SPIRVIntrinsics.atomic_sub!(p, Float64(1)) +end diff --git a/src/oneAPI.jl b/src/oneAPI.jl index c4db5c76..be0cc727 100644 --- a/src/oneAPI.jl +++ b/src/oneAPI.jl @@ -34,6 +34,7 @@ Base.Experimental.@MethodTable(method_table) include("device/runtime.jl") include("device/array.jl") include("device/quirks.jl") +include("device/atomics.jl") # essential stuff include("context.jl") diff --git a/test/device/intrinsics.jl b/test/device/intrinsics.jl index 5e5605ef..bd2fadd9 100644 --- a/test/device/intrinsics.jl +++ b/test/device/intrinsics.jl @@ -274,149 +274,164 @@ end ############################################################################################ -@testset "atomics (low level)" begin +# @testset "atomics (low level)" begin -@testset "atomic_add($T)" for T in [Int32, UInt32] - a = oneArray([zero(T)]) + @testset "atomic_add($T)" for T in [Int32, UInt32, Float32] + if oneAPI.is_integrated() && T == Float32 + continue + end + a = oneArray([zero(T)]) - function kernel(a, b) - oneAPI.atomic_add!(pointer(a), b) - return + function kernel(a, b) + oneAPI.atomic_add!(pointer(a), b) + return + end + + @oneapi items=256 kernel(a, one(T)) + @test Array(a)[1] == T(256) end - @oneapi items=256 kernel(a, one(T)) - @test Array(a)[1] == T(256) -end + @testset "atomic_sub($T)" for T in [Int32, UInt32, Float32] + if oneAPI.is_integrated() && T == Float32 + continue + end + a = oneArray([T(256)]) -@testset "atomic_sub($T)" for T in [Int32, UInt32] - a = oneArray([T(256)]) + function kernel(a, b) + oneAPI.atomic_sub!(pointer(a), b) + return + end - function kernel(a, b) - oneAPI.atomic_sub!(pointer(a), b) - return + @oneapi items=256 kernel(a, one(T)) + @test Array(a)[1] == T(0) end - @oneapi items=256 kernel(a, one(T)) - @test Array(a)[1] == T(0) -end + @testset "atomic_inc($T)" for T in [Int32, UInt32] + a = oneArray([zero(T)]) -@testset "atomic_inc($T)" for T in [Int32, UInt32] - a = oneArray([zero(T)]) + function kernel(a) + oneAPI.atomic_inc!(pointer(a)) + return + end - function kernel(a) - oneAPI.atomic_inc!(pointer(a)) - return + @oneapi items=256 kernel(a) + @test Array(a)[1] == T(256) end - @oneapi items=256 kernel(a) - @test Array(a)[1] == T(256) -end + @testset "atomic_dec($T)" for T in [Int32, UInt32] + a = oneArray([T(256)]) -@testset "atomic_dec($T)" for T in [Int32, UInt32] - a = oneArray([T(256)]) + function kernel(a) + oneAPI.atomic_dec!(pointer(a)) + return + end - function kernel(a) - oneAPI.atomic_dec!(pointer(a)) - return + @oneapi items=256 kernel(a) + @test Array(a)[1] == T(0) end - @oneapi items=256 kernel(a) - @test Array(a)[1] == T(0) -end + @testset "atomic_min($T)" for T in [Int32, UInt32, Float32] + if oneAPI.is_integrated() && T == Float32 + continue + end + a = oneArray([T(256)]) -@testset "atomic_min($T)" for T in [Int32, UInt32] - a = oneArray([T(256)]) + function kernel(a, T) + i = get_global_id() + oneAPI.atomic_min!(pointer(a), T(i)) + return + end - function kernel(a, T) - i = get_global_id() - oneAPI.atomic_min!(pointer(a), i%T) - return + @oneapi items=256 kernel(a, T) + @test Array(a)[1] == one(T) end - @oneapi items=256 kernel(a, T) - @test Array(a)[1] == one(T) -end + @testset "atomic_max($T)" for T in [Int32, UInt32, Float32] + if oneAPI.is_integrated() && T == Float32 + continue + end + a = oneArray([zero(T)]) -@testset "atomic_max($T)" for T in [Int32, UInt32] - a = oneArray([zero(T)]) + function kernel(a, T) + i = get_global_id() + oneAPI.atomic_max!(pointer(a), T(i)) + return + end - function kernel(a, T) - i = get_global_id() - oneAPI.atomic_max!(pointer(a), i%T) - return + @oneapi items=256 kernel(a, T) + @test Array(a)[1] == T(256) end - @oneapi items=256 kernel(a, T) - @test Array(a)[1] == T(256) -end - -@testset "atomic_and($T)" for T in [Int32, UInt32] - a = oneArray([T(1023)]) + @testset "atomic_and($T)" for T in [Int32, UInt32] + a = oneArray([T(1023)]) - function kernel(a, T) - i = get_global_id() - 1 - k = 1 - for i = 1:i - k *= 2 + function kernel(a, T) + i = get_global_id() - 1 + k = 1 + for i = 1:i + k *= 2 + end + b = 1023 - k # 1023 - 2^i + oneAPI.atomic_and!(pointer(a), T(b)) + return end - b = 1023 - k # 1023 - 2^i - oneAPI.atomic_and!(pointer(a), T(b)) - return - end - @oneapi items=10 kernel(a, T) - @test Array(a)[1] == zero(T) -end + @oneapi items=10 kernel(a, T) + @test Array(a)[1] == zero(T) + end -@testset "atomic_or($T)" for T in [Int32, UInt32] - a = oneArray([zero(T)]) + @testset "atomic_or($T)" for T in [Int32, UInt32] + a = oneArray([zero(T)]) - function kernel(a, T) - i = get_global_id() - b = 1 # 2^(i-1) - for i = 1:i - b *= 2 + function kernel(a, T) + i = get_global_id() + b = 1 # 2^(i-1) + for i = 1:i + b *= 2 + end + b ÷= 2 + oneAPI.atomic_or!(pointer(a), T(b)) + return end - b ÷= 2 - oneAPI.atomic_or!(pointer(a), T(b)) - return - end - @oneapi items=10 kernel(a, T) - @test Array(a)[1] == T(1023) -end + @oneapi items=10 kernel(a, T) + @test Array(a)[1] == T(1023) + end -@testset "atomic_xor($T)" for T in [Int32, UInt32] - a = oneArray([T(1023)]) + @testset "atomic_xor($T)" for T in [Int32, UInt32] + a = oneArray([T(1023)]) - function kernel(a, T) - i = get_global_id() - b = 1 # 2^(i-1) - for i = 1:i - b *= 2 + function kernel(a, T) + i = get_global_id() + b = 1 # 2^(i-1) + for i = 1:i + b *= 2 + end + b ÷= 2 + oneAPI.atomic_xor!(pointer(a), T(b)) + return end - b ÷= 2 - oneAPI.atomic_xor!(pointer(a), T(b)) - return + + @oneapi items=10 kernel(a, T) + @test Array(a)[1] == zero(T) end - @oneapi items=10 kernel(a, T) - @test Array(a)[1] == zero(T) -end + @testset "atomic_xchg($T)" for T in [Int32, UInt32, Float32] + if oneAPI.is_integrated() && T == Float32 + continue + end + a = oneArray([zero(T)]) -@testset "atomic_xchg($T)" for T in [Int32, UInt32, Float32] - a = oneArray([zero(T)]) + function kernel(a, b) + oneAPI.atomic_xchg!(pointer(a), b) + return + end - function kernel(a, b) - oneAPI.atomic_xchg!(pointer(a), b) - return + @oneapi items=256 kernel(a, one(T)) + @test Array(a)[1] == one(T) end - @oneapi items=256 kernel(a, one(T)) - @test Array(a)[1] == one(T) -end - -end +# end