diff --git a/src/threading.c b/src/threading.c index a3dc220..15907c6 100644 --- a/src/threading.c +++ b/src/threading.c @@ -31,6 +31,15 @@ } MKLVersion; +/* Apple Accelerate doesn't allow setting the number of threads directly, it only has an + * option to do single-threaded or multi-threaded. That is controlled via the BLASSetThreading + * API introduced in macOS 15. + * + * These constants are from the vecLib.framework/Headers/thread_api.h file + */ +#define ACCELERATE_BLAS_THREADING_MULTI_THREADED 0 +#define ACCELERATE_BLAS_THREADING_SINGLE_THREADED 1 + /* * We provide a flexible thread getter/setter interface here; by calling `lbt_set_num_threads()` * libblastrampoline will propagate the call through to its loaded libraries as long as the @@ -50,6 +59,7 @@ static char * getter_names[MAX_THREADING_NAMES] = { "nvpl_lapack_get_max_threads", // We special-case MKL in the lookup loop below //"MKL_Domain_Get_Max_Threads", + // We special-case Apple Accelerate below NULL }; @@ -60,6 +70,7 @@ static char * setter_names[MAX_THREADING_NAMES] = { "nvpl_lapack_set_num_threads", // We special-case MKL in the lookup loop below //"MKL_Domain_Set_Num_Threads", + // We special-case Apple Accelerate below NULL }; @@ -129,6 +140,37 @@ LBT_DLLEXPORT int32_t lbt_get_num_threads() { } } } + + // Special case Apple Accelerate because we have to determine if we are single-threaded or multi-threaded + // This API only exists on macOS 15+. + int (*fptr_acc)(void) = lookup_symbol(lib->handle, "BLASGetThreading"); + if (fptr_acc != NULL) { + int nthreads = fptr_acc(); + + if(nthreads == ACCELERATE_BLAS_THREADING_MULTI_THREADED) { + int (*fptr_acc_nthreads)(void) = lookup_symbol(lib->handle, "APPLE_NTHREADS"); + if (fptr_acc != NULL) { + // In Accelerate, there is a symbol called APPLE_NTHREADS, which appears to be a function we + // can call to get an integer saying the number of CPU threads. There is no documentation for this + // anywhere accessible online, but testing two different CPUs seem to suggest it is CPU cores. + // + // Doing this: + // julia> @ccall AppleAccelerate.libacc.APPLE_NTHREADS()::Int + // + // The M2 Max returned 12, M4 Max returned 16, which is the total number of cores (both big and little) + // in each processor. + int nthreads = fptr_acc_nthreads(); + max_threads = max(max_threads, nthreads); + } else { + // This number is arbitrary because we have no idea how many threads are actually in use, + // but greater than 1 to mean multi-threaded. + max_threads = max(max_threads, 2); + } + } else { + // Single-threaded + max_threads = max(max_threads, 1); + } + } } return max_threads; } @@ -157,5 +199,16 @@ LBT_DLLEXPORT void lbt_set_num_threads(int32_t nthreads) { fptr(nthreads, MKL_DOMAIN_BLAS); fptr(nthreads, MKL_DOMAIN_LAPACK); } + + // Special case Apple Accelerate because we have to determine if we must set multi-threaded or single-threaded + // This API only exists on macOS 15+. + int (*fptr_acc)(int) = lookup_symbol(lib->handle, "BLASSetThreading"); + if (fptr_acc != NULL) { + if(nthreads > 1) { + fptr_acc(ACCELERATE_BLAS_THREADING_MULTI_THREADED); + } else { + fptr_acc(ACCELERATE_BLAS_THREADING_SINGLE_THREADED); + } + } } } diff --git a/test/accelerate.jl b/test/accelerate.jl new file mode 100644 index 0000000..177b7d4 --- /dev/null +++ b/test/accelerate.jl @@ -0,0 +1,93 @@ +using Libdl, Test + +# Taken from AppleAccelerate.jl to avoid a dependency on it +const libacc = "/System/Library/Frameworks/Accelerate.framework/Accelerate" +const libacc_info_plist = "/System/Library/Frameworks/Accelerate.framework/Versions/Current/Resources/Info.plist" + +function get_macos_version(normalize=true) + @static if !Sys.isapple() + return nothing + end + + plist_lines = split(String(read("/System/Library/CoreServices/SystemVersion.plist")), "\n") + vers_idx = findfirst(l -> occursin("ProductVersion", l), plist_lines) + if vers_idx === nothing + return nothing + end + + m = match(r">([\d\.]+)<", plist_lines[vers_idx+1]) + if m === nothing + return nothing + end + + ver = VersionNumber(only(m.captures)) + if normalize && ver.major == 16 + return VersionNumber(26, ver.minor, ver.patch) + end + return ver +end + + +# Load the Accelerate library +libacc_handle = dlopen(libacc) +@testset "Accelerate ILP64 loading" begin + # ILP64 requires macOS 13.3+ + if get_macos_version() >= v"13.3" + # Load the ILP64 interface + lbt_forward(lbt_handle, libacc; clear=true, suffix_hint="\x1a\$NEWLAPACK\$ILP64") + + # Test that we have only one library loaded + config = lbt_get_config(lbt_handle) + libs = unpack_loaded_libraries(config) + @test length(libs) == 1 + + # Test that it's Accelerate and it's correctly identified + @test libs[1].libname == libacc + @test libs[1].interface == LBT_INTERFACE_ILP64 + + # Test that `dgemm` forwards to `dgemm_` within the Accelerate library + acc_dgemm = dlsym(libacc_handle, "dgemm\$NEWLAPACK\$ILP64") + @test lbt_get_forward(lbt_handle, "dgemm_", LBT_INTERFACE_ILP64) == acc_dgemm + end +end + +@testset "Accelerate LP64 loading" begin + # New LAPACK interface requires macOS 13.3+ + if get_macos_version() >= v"13.3" + # Load the LP64 interface + lbt_forward(lbt_handle, libacc; clear=true, suffix_hint="\x1a\$NEWLAPACK") + + # Test that we have only one library loaded + config = lbt_get_config(lbt_handle) + libs = unpack_loaded_libraries(config) + @test length(libs) == 1 + + # Test that it's Accelerate and it's correctly identified + @test libs[1].libname == libacc + @test libs[1].interface == LBT_INTERFACE_LP64 + + # Test that `dgemm` forwards to `dgemm_` within the Accelerate library + acc_dgemm = dlsym(libacc_handle, "dgemm\$NEWLAPACK") + @test lbt_get_forward(lbt_handle, "dgemm_", LBT_INTERFACE_LP64) == acc_dgemm + end +end + +@testset "Accelerate threading" begin + # This threading API will only work on v15 and above + if get_macos_version() >= v"15" + lbt_forward(lbt_handle, libacc; clear=true) + + # Set to single-threaded + lbt_set_num_threads(lbt_handle, 1) + @test lbt_get_num_threads(lbt_handle) == 1 + + # Set to multi-threaded + # Accelerate doesn't actually let us say how many threads, so we must test for greater than + lbt_set_num_threads(lbt_handle, 2) + @test lbt_get_num_threads(lbt_handle) > 1 + + # Set back to single-threaded + lbt_set_num_threads(lbt_handle, 1) + @test lbt_get_num_threads(lbt_handle) == 1 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index ec0a442..fa30492 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -255,3 +255,8 @@ end # Run our "direct" tests within Julia include("direct.jl") + +# Run some Apple Accelerate tests, but only on Apple +@static if Sys.isapple() + include("accelerate.jl") +end \ No newline at end of file