Skip to content

Commit a5d7326

Browse files
authored
[Preview] Integrated GPU / Intel GPU support through SYCL. (#7114)
- SYCL support for Tensor ops. - SYCL support for linear algebra. - Update C++ and Python unit tests. These can be run locally, but do not run on GIthub since no integrated or discrete GPU is available. SYCL CPU device is not as well supported by OneAPI and gives errors in some tests. - Build preview sycl wheel [Python 3.10 only] - Reduce tensor indexer MAX_DIMS from 10 to 5. TODO: - Fix SYCL [BUILD_SHARED_LIBS=OFF] CI out of storage github issue. - Build wheels for other Python versions. - Optimize SYCL kernels (especially reduction). - SYCL support for nearest nbr search. - SYCL support for hash grids. - Custom kernels with ParallelFor for supporting geometry operations. Other fixes: * Only run ProjectImagesToAlbedo on CPU on x86_64 due to IPP dependency.
1 parent 9149c9c commit a5d7326

File tree

113 files changed

+2695
-419
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+2695
-419
lines changed

.github/workflows/ubuntu-sycl.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,26 @@ jobs:
4949
docker/docker_test.sh sycl-static
5050
fi
5151
52+
- name: Upload Python wheel and C++ binary package to GitHub artifacts
53+
if: ${{ matrix.BUILD_SHARED_LIBS == 'ON' }}
54+
uses: actions/upload-artifact@v4
55+
with:
56+
name: open3d-sycl-linux-wheel-and-binary
57+
path: |
58+
open3d-*.whl
59+
open3d-*.tar.xz
60+
if-no-files-found: error
61+
- name: Update devel release
62+
if: ${{ github.ref == 'refs/heads/main' && matrix.BUILD_SHARED_LIBS == 'ON' }}
63+
env:
64+
GH_TOKEN: ${{ github.token }}
65+
run: |
66+
if [ ${{ matrix.BUILD_SHARED_LIBS }} == 'ON' ] ; then
67+
gh release upload main-devel open3d-*.whl --clobber
68+
gh release upload main-devel open3d-*.tar.xz --clobber
69+
fi
70+
gh release view main-devel
71+
5272
- name: GCloud CLI auth
5373
if: ${{ github.ref == 'refs/heads/main' }}
5474
uses: 'google-github-actions/auth@v2'

3rdparty/find_dependencies.cmake

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,12 +1534,17 @@ open3d_import_3rdparty_library(3rdparty_uvatlas
15341534
list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_CUSTOM Open3D::3rdparty_uvatlas)
15351535

15361536

1537+
# SYCL link options are specified here. Compile options are only applied to SYCL source files and are specified in cmake/Open3DSYCLTargetSources.cmake
15371538
if(BUILD_SYCL_MODULE)
15381539
add_library(3rdparty_sycl INTERFACE)
15391540
target_link_libraries(3rdparty_sycl INTERFACE
15401541
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:sycl>)
15411542
target_link_options(3rdparty_sycl INTERFACE
1542-
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-fsycl -fsycl-targets=spir64_x86_64>)
1543+
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-fsycl -fsycl-targets=${OPEN3D_SYCL_TARGETS}>)
1544+
if (OPEN3D_SYCL_TARGET_BACKEND_OPTIONS)
1545+
target_link_options(3rdparty_sycl INTERFACE
1546+
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-Xs ${OPEN3D_SYCL_TARGET_BACKEND_OPTIONS}>)
1547+
endif()
15431548
if(NOT BUILD_SHARED_LIBS OR arg_PUBLIC)
15441549
install(TARGETS 3rdparty_sycl EXPORT Open3DTargets)
15451550
endif()
@@ -1565,8 +1570,12 @@ if(OPEN3D_USE_ONEAPI_PACKAGES)
15651570
GROUPED
15661571
INCLUDE_DIRS ${MKL_INCLUDE}/
15671572
LIB_DIR ${MKL_ROOT}/lib/intel64
1568-
LIBRARIES mkl_intel_ilp64 mkl_tbb_thread mkl_core
1573+
LIBRARIES $<$<BOOL:${BUILD_SYCL_MODULE}>:mkl_sycl> mkl_intel_ilp64 mkl_tbb_thread mkl_core
15691574
)
1575+
if (BUILD_SYCL_MODULE)
1576+
# target_link_options(3rdparty_mkl INTERFACE "-Wl,-export-dynamic")
1577+
target_link_libraries(3rdparty_mkl INTERFACE OpenCL)
1578+
endif()
15701579
# MKL definitions
15711580
target_compile_options(3rdparty_mkl INTERFACE "$<$<PLATFORM_ID:Linux,Darwin>:$<$<COMPILE_LANGUAGE:CXX>:-m64>>")
15721581
target_compile_definitions(3rdparty_mkl INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:MKL_ILP64>")

CMakeLists.txt

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,16 @@ else()
6363
option(STATIC_WINDOWS_RUNTIME "Use static (MT/MTd) Windows runtime" ON )
6464
endif()
6565
option(BUILD_SYCL_MODULE "Build SYCL module with Intel oneAPI" OFF)
66+
if(BUILD_SYCL_MODULE)
67+
set(OPEN3D_SYCL_TARGETS "spir64" CACHE STRING
68+
"SYCL targets: spir64 for JIT, or another for AOT compilation. See https://github.com/intel/llvm/blob/sycl/sycl/doc/UsersManual.md."
69+
)
70+
set(OPEN3D_SYCL_TARGET_BACKEND_OPTIONS "" CACHE STRING
71+
"SYCL target backend options, e.g. to compile for a specific device. See https://github.com/intel/llvm/blob/sycl/sycl/doc/UsersManual.md."
72+
)
73+
set(BUILD_ISPC_MODULE OFF CACHE BOOL "Build the ISPC module" FORCE)
74+
set(BUILD_CUDA_MODULE OFF CACHE BOOL "Build the CUDA module" FORCE)
75+
endif()
6676
option(GLIBCXX_USE_CXX11_ABI "Set -D_GLIBCXX_USE_CXX11_ABI=1" ON )
6777
option(ENABLE_SYCL_UNIFIED_SHARED_MEMORY "Enable SYCL unified shared memory" OFF)
6878
if(BUILD_GUI AND (WIN32 OR UNIX AND NOT LINUX_AARCH64 AND NOT APPLE_AARCH64))
@@ -282,12 +292,6 @@ endif()
282292
if(BUILD_SYCL_MODULE AND NOT GLIBCXX_USE_CXX11_ABI)
283293
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires GLIBCXX_USE_CXX11_ABI=ON")
284294
endif()
285-
if(BUILD_SYCL_MODULE AND BUILD_TENSORFLOW_OPS)
286-
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires BUILD_TENSORFLOW_OPS=OFF")
287-
endif()
288-
if(BUILD_SYCL_MODULE AND BUILD_PYTORCH_OPS)
289-
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires BUILD_PYTORCH_OPS=OFF")
290-
endif()
291295
if(BUILD_SYCL_MODULE AND BUILD_CUDA_MODULE)
292296
message(FATAL_ERROR "BUILD_SYCL_MODULE and BUILD_SYCL_MODULE cannot be on at the same time for now.")
293297
endif()

cmake/Open3DSYCLTargetSources.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
#
33
# When BUILD_SYCL_MODULE=ON, set SYCL-specific compile flags for the listed
44
# source files and call target_sources(). If BUILD_SYCL_MODULE=OFF, this
5-
# function directly calls target_sources().
5+
# function directly calls target_sources(). For SYCL link options, see
6+
# 3rdparty/find_dependencies.cmake
67
#
78
# Note: this is not a perfect forwarding to target_sources(), as it only support
89
# limited set of arguments. See the example usage below.
@@ -31,7 +32,7 @@ function(open3d_sycl_target_sources target)
3132
if(BUILD_SYCL_MODULE)
3233
foreach(sycl_file IN LISTS arg_UNPARSED_ARGUMENTS)
3334
set_source_files_properties(${sycl_file} PROPERTIES
34-
COMPILE_OPTIONS -fsycl -fsycl-unnamed-lambda -fsycl-targets=spir64_x86_64)
35+
COMPILE_OPTIONS "-fsycl;-fsycl-targets=${OPEN3D_SYCL_TARGETS}")
3536
if(arg_VERBOSE)
3637
message(STATUS "open3d_sycl_target_sources(${target}): marked ${sycl_file} as SYCL code")
3738
endif()

cmake/Open3DSetGlobalProperties.cmake

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,16 @@ function(open3d_enable_strip target)
2525
endif()
2626
endfunction()
2727

28-
# RPATH handling (for TBB DSO). Check current folder, one folder above and the lib sibling folder
28+
# RPATH handling (for TBB DSO). Check current folder, one folder above and the lib sibling folder.
29+
# Also check the Python virtual env /lib folder for 3rd party dependency libraries installed with `pip install`
2930
set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
3031
if (APPLE)
31-
# Add options to cover the various ways in which open3d shaed lib or apps can be installed wrt TBB DSO
32-
set(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/../;@loader_path/../lib/")
32+
# Add options to cover the various ways in which open3d shared lib or apps can be installed wrt TBB DSO
33+
set(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/../;@loader_path/../lib/;@loader_path/../../../../")
3334
# pybind with open3d shared lib is copied, not cmake-installed, so we need to add .. to build rpath
3435
set(CMAKE_BUILD_RPATH "@loader_path/../")
3536
elseif(UNIX)
36-
set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../;$ORIGIN/../lib/")
37+
set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../;$ORIGIN/../lib/;$ORIGIN/../../../../")
3738
set(CMAKE_BUILD_RPATH "$ORIGIN/../")
3839
endif()
3940

cpp/apps/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ macro(open3d_add_app_common SRC_DIR APP_NAME TARGET_NAME)
130130
target_link_libraries(${TARGET_NAME} PRIVATE Open3D::Open3D TBB::tbb ${ARGN})
131131
set_target_properties(${TARGET_NAME} PROPERTIES FOLDER "apps")
132132

133+
if (BUILD_SYCL_MODULE)
134+
find_package(IntelSYCL REQUIRED) # requires cmake>=3.25 on Windows
135+
add_sycl_to_target(TARGET ${TARGET_NAME})
136+
endif()
137+
133138
open3d_link_3rdparty_libraries(${TARGET_NAME})
134139
open3d_show_and_abort_on_warning(${TARGET_NAME})
135140
open3d_set_global_properties(${TARGET_NAME})

cpp/benchmarks/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ if (BUILD_CUDA_MODULE)
1414
find_package(CUDAToolkit REQUIRED)
1515
target_link_libraries(benchmarks PRIVATE CUDA::cudart)
1616
endif()
17+
if (BUILD_SYCL_MODULE)
18+
find_package(IntelSYCL REQUIRED) # requires cmake>=3.25 on Windows
19+
add_sycl_to_target(TARGET benchmarks)
20+
endif()
1721

1822
open3d_show_and_abort_on_warning(benchmarks)
1923
open3d_set_global_properties(benchmarks)

cpp/open3d/core/CMakeLists.txt

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ target_sources(core PRIVATE
55
CUDAUtils.cpp
66
Device.cpp
77
Dtype.cpp
8-
EigenConverter.cpp
98
Indexer.cpp
109
MemoryManager.cpp
1110
MemoryManagerCached.cpp
@@ -23,6 +22,7 @@ target_sources(core PRIVATE
2322

2423
# Compile regardless BUILD_SYCL_MODULE == ON or OFF.
2524
open3d_sycl_target_sources(core PRIVATE
25+
EigenConverter.cpp
2626
SYCLUtils.cpp
2727
)
2828

@@ -43,22 +43,14 @@ target_sources(core PRIVATE
4343
hashmap/HashSet.cpp
4444
kernel/Kernel.cpp
4545
linalg/AddMM.cpp
46-
linalg/AddMMCPU.cpp
4746
linalg/Det.cpp
4847
linalg/Inverse.cpp
49-
linalg/InverseCPU.cpp
5048
linalg/LeastSquares.cpp
51-
linalg/LeastSquaresCPU.cpp
5249
linalg/LU.cpp
53-
linalg/LUCPU.cpp
5450
linalg/Matmul.cpp
55-
linalg/MatmulCPU.cpp
5651
linalg/Solve.cpp
57-
linalg/SolveCPU.cpp
5852
linalg/SVD.cpp
59-
linalg/SVDCPU.cpp
6053
linalg/Tri.cpp
61-
linalg/TriCPU.cpp
6254
nns/FixedRadiusIndex.cpp
6355
nns/FixedRadiusSearchOps.cpp
6456
nns/KnnIndex.cpp
@@ -73,21 +65,48 @@ set_target_properties(core_impl PROPERTIES CXX_VISIBILITY_PRESET "hidden")
7365

7466
target_sources(core_impl PRIVATE
7567
kernel/Arange.cpp
76-
kernel/ArangeCPU.cpp
7768
kernel/BinaryEW.cpp
78-
kernel/BinaryEWCPU.cpp
7969
kernel/IndexGetSet.cpp
80-
kernel/IndexGetSetCPU.cpp
8170
kernel/IndexReduction.cpp
82-
kernel/IndexReductionCPU.cpp
8371
kernel/NonZero.cpp
84-
kernel/NonZeroCPU.cpp
8572
kernel/Reduction.cpp
86-
kernel/ReductionCPU.cpp
8773
kernel/UnaryEW.cpp
74+
kernel/ArangeCPU.cpp
75+
kernel/BinaryEWCPU.cpp
76+
kernel/IndexGetSetCPU.cpp
77+
kernel/IndexReductionCPU.cpp
78+
kernel/NonZeroCPU.cpp
79+
kernel/ReductionCPU.cpp
8880
kernel/UnaryEWCPU.cpp
81+
linalg/AddMMCPU.cpp
82+
linalg/InverseCPU.cpp
83+
linalg/LeastSquaresCPU.cpp
84+
linalg/LUCPU.cpp
85+
linalg/MatmulCPU.cpp
86+
linalg/SolveCPU.cpp
87+
linalg/SVDCPU.cpp
88+
linalg/TriCPU.cpp
89+
)
90+
91+
if (BUILD_SYCL_MODULE)
92+
open3d_sycl_target_sources(core_impl PRIVATE
8993
kernel/UnaryEWSYCL.cpp
94+
kernel/BinaryEWSYCL.cpp
95+
kernel/ArangeSYCL.cpp
96+
kernel/IndexGetSetSYCL.cpp
97+
kernel/NonZeroSYCL.cpp
98+
kernel/IndexReductionSYCL.cpp
99+
kernel/ReductionSYCL.cpp
100+
linalg/AddMMSYCL.cpp
101+
linalg/InverseSYCL.cpp
102+
linalg/LeastSquaresSYCL.cpp
103+
linalg/LUSYCL.cpp
104+
linalg/MatmulSYCL.cpp
105+
linalg/SolveSYCL.cpp
106+
linalg/SVDSYCL.cpp
107+
linalg/TriSYCL.cpp
90108
)
109+
endif()
91110

92111
if (BUILD_CUDA_MODULE)
93112
target_sources(core PRIVATE

cpp/open3d/core/Device.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,12 @@ struct hash<open3d::core::Device> {
115115
return std::hash<std::string>{}(device.ToString());
116116
}
117117
};
118+
119+
template <>
120+
struct less<open3d::core::Device> {
121+
bool operator()(const open3d::core::Device& lhs,
122+
const open3d::core::Device& rhs) const {
123+
return lhs.ToString() < rhs.ToString();
124+
}
125+
};
118126
} // namespace std

cpp/open3d/core/Indexer.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ class Indexer;
3434
class IndexerIterator;
3535

3636
// Maximum number of dimensions of TensorRef.
37-
static constexpr int64_t MAX_DIMS = 10;
37+
static constexpr int64_t MAX_DIMS = 5;
3838

3939
// Maximum number of inputs of an op.
4040
// MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
41-
static constexpr int64_t MAX_INPUTS = 10;
41+
static constexpr int64_t MAX_INPUTS = 5;
4242

4343
// Maximum number of outputs of an op. This number can be increased when
4444
// necessary.
@@ -110,7 +110,7 @@ struct TensorRef {
110110

111111
TensorRef(const Tensor& t) {
112112
if (t.NumDims() > MAX_DIMS) {
113-
utility::LogError("Tenor has too many dimensions {} > {}.",
113+
utility::LogError("Tensor has too many dimensions {} > {}.",
114114
t.NumDims(), MAX_DIMS);
115115
}
116116
data_ptr_ = const_cast<void*>(t.GetDataPtr());

cpp/open3d/core/Indexer.isph

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
#include "open3d/utility/Helper.isph"
1212

1313
// Maximum number of dimensions of TensorRef.
14-
enum { MAX_DIMS = 10 };
14+
enum { MAX_DIMS = 4 };
1515

1616
// Maximum number of inputs of an op.
1717
// MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
18-
enum { MAX_INPUTS = 10 };
18+
enum { MAX_INPUTS = 4 };
1919

2020
// Maximum number of outputs of an op. This number can be increased when
2121
// necessary.

cpp/open3d/core/ParallelFor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ void ParallelForCPU_(const Device& device, int64_t n, const func_t& func) {
9999
/// \note If you use a lambda function, capture only the required variables
100100
/// instead of all to prevent accidental race conditions. If you want the
101101
/// kernel to be used on both CPU and CUDA, capture the variables by value.
102+
/// \note This does not dispatch to SYCL, since SYCL has extra constraints:
103+
/// - Lambdas may capture by value only.
104+
/// - No function pointers / virtual functions.
105+
/// Auto dispatch to SYCL will enforce these conditions even on CPU devices. Use
106+
/// ParallelForSYCL instead.
102107
template <typename func_t>
103108
void ParallelFor(const Device& device, int64_t n, const func_t& func) {
104109
#ifdef __CUDACC__

cpp/open3d/core/ParallelForSYCL.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// ----------------------------------------------------------------------------
2+
// - Open3D: www.open3d.org -
3+
// ----------------------------------------------------------------------------
4+
// Copyright (c) 2018-2024 www.open3d.org
5+
// SPDX-License-Identifier: MIT
6+
// ----------------------------------------------------------------------------
7+
8+
#pragma once
9+
10+
#include <cstdint>
11+
#include <type_traits>
12+
13+
#include "open3d/core/Device.h"
14+
#include "open3d/core/Indexer.h"
15+
#include "open3d/core/SYCLContext.h"
16+
#include "open3d/utility/Logging.h"
17+
18+
namespace open3d {
19+
namespace core {
20+
21+
/// Run a function in parallel with SYCL.
22+
template <typename Functor, typename... FuncArgs>
23+
void ParallelForSYCL(const Device& device,
24+
Indexer indexer,
25+
FuncArgs... func_args) {
26+
if (!device.IsSYCL()) {
27+
utility::LogError("ParallelFor for SYCL cannot run on device {}.",
28+
device.ToString());
29+
}
30+
int64_t n = indexer.NumWorkloads();
31+
if (n == 0) {
32+
return;
33+
}
34+
auto queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
35+
/// TODO: Specify grid size based on device properties
36+
queue.parallel_for<Functor>(n, [indexer, func_args...](int64_t i) {
37+
Functor ef(indexer, func_args...);
38+
ef(i);
39+
}).wait_and_throw();
40+
}
41+
42+
/// Run a function in parallel with SYCL.
43+
template <typename Functor, typename... FuncArgs>
44+
void ParallelForSYCL(const Device& device,
45+
int64_t num_workloads,
46+
FuncArgs... func_args) {
47+
if (!device.IsSYCL()) {
48+
utility::LogError("ParallelFor for SYCL cannot run on device {}.",
49+
device.ToString());
50+
}
51+
if (num_workloads == 0) {
52+
return;
53+
}
54+
auto queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
55+
/// TODO: Specify grid size based on device properties
56+
queue.parallel_for<Functor>(num_workloads, [func_args...](int64_t i) {
57+
Functor ef(func_args...);
58+
ef(i);
59+
}).wait_and_throw();
60+
}
61+
62+
} // namespace core
63+
} // namespace open3d

0 commit comments

Comments
 (0)