Skip to content
This repository was archived by the owner on Jan 13, 2025. It is now read-only.

Commit c6d3cad

Browse files
authored
fix for DEFAULT TUNING_TARGET on AMD and NVIDIA GPUs (#517)
* Fix iamax/iamin operators for default configuration on NVIDIA GPUs * Fix trsv/tbsv/tpsv operators using DEFAULT and targetting NVIDIA GPUs * Apply changes also to AMD GPUs
1 parent 3a3113a commit c6d3cad

File tree

4 files changed

+55
-40
lines changed

4 files changed

+55
-40
lines changed

src/interface/blas2/backend/default.hpp

+48-10
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,21 @@ typename sb_handle_t::event_t _trsv(
137137
sb_handle_t& sb_handle, index_t _N, container_t0 _mA, index_t _lda,
138138
container_t1 _vx, increment_t _incx,
139139
typename sb_handle_t::event_t _dependencies) {
140-
return blas::internal::_trsv_impl<4, 2, uplo, trn, diag>(
141-
sb_handle, _N, _mA, _lda, _vx, _incx, _dependencies);
140+
const auto device = sb_handle.get_queue().get_device();
141+
if (device.is_gpu()) {
142+
const std::string vendor =
143+
device.template get_info<cl::sycl::info::device::vendor>();
144+
if (vendor.find("Intel") == vendor.npos) {
145+
return blas::internal::_trsv_impl<32, 4, uplo, trn, diag>(
146+
sb_handle, _N, _mA, _lda, _vx, _incx, _dependencies);
147+
} else {
148+
throw std::runtime_error(
149+
"Trsv operator currently not supported on Intel GPUs");
150+
}
151+
} else {
152+
return blas::internal::_trsv_impl<4, 2, uplo, trn, diag>(
153+
sb_handle, _N, _mA, _lda, _vx, _incx, _dependencies);
154+
}
142155
}
143156
} // namespace backend
144157
} // namespace trsv
@@ -152,8 +165,21 @@ typename sb_handle_t::event_t _tbsv(
152165
sb_handle_t& sb_handle, index_t _N, index_t _K, container_t0 _mA,
153166
index_t _lda, container_t1 _vx, increment_t _incx,
154167
const typename sb_handle_t::event_t& _dependencies) {
155-
return blas::internal::_tbsv_impl<4, 2, uplo, trn, diag>(
156-
sb_handle, _N, _K, _mA, _lda, _vx, _incx, _dependencies);
168+
const auto device = sb_handle.get_queue().get_device();
169+
if (device.is_gpu()) {
170+
const std::string vendor =
171+
device.template get_info<cl::sycl::info::device::vendor>();
172+
if (vendor.find("Intel") == vendor.npos) {
173+
return blas::internal::_tbsv_impl<32, 4, uplo, trn, diag>(
174+
sb_handle, _N, _K, _mA, _lda, _vx, _incx, _dependencies);
175+
} else {
176+
throw std::runtime_error(
177+
"Tbsv operator currently not supported on Intel GPUs");
178+
}
179+
} else {
180+
return blas::internal::_tbsv_impl<4, 2, uplo, trn, diag>(
181+
sb_handle, _N, _K, _mA, _lda, _vx, _incx, _dependencies);
182+
}
157183
}
158184
} // namespace backend
159185
} // namespace tbsv
@@ -163,12 +189,24 @@ namespace backend {
163189
template <uplo_type uplo, transpose_type trn, diag_type diag,
164190
typename sb_handle_t, typename index_t, typename container_t0,
165191
typename container_t1, typename increment_t>
166-
typename sb_handle_t::event_t _tpsv(sb_handle_t& sb_handle, index_t _N,
167-
container_t0 _mA, container_t1 _vx,
168-
increment_t _incx,
169-
const typename sb_handle_t::event_t& _dependencies) {
170-
return blas::internal::_tpsv_impl<4, 2, uplo, trn, diag>(sb_handle, _N, _mA,
171-
_vx, _incx, _dependencies);
192+
typename sb_handle_t::event_t _tpsv(
193+
sb_handle_t& sb_handle, index_t _N, container_t0 _mA, container_t1 _vx,
194+
increment_t _incx, const typename sb_handle_t::event_t& _dependencies) {
195+
const auto device = sb_handle.get_queue().get_device();
196+
if (device.is_gpu()) {
197+
const std::string vendor =
198+
device.template get_info<cl::sycl::info::device::vendor>();
199+
if (vendor.find("Intel") == vendor.npos) {
200+
return blas::internal::_tpsv_impl<32, 4, uplo, trn, diag>(
201+
sb_handle, _N, _mA, _vx, _incx, _dependencies);
202+
} else {
203+
throw std::runtime_error(
204+
"Tpsv operator currently not supported on Intel GPUs");
205+
}
206+
} else {
207+
return blas::internal::_tpsv_impl<4, 2, uplo, trn, diag>(
208+
sb_handle, _N, _mA, _vx, _incx, _dependencies);
209+
}
172210
}
173211
} // namespace backend
174212
} // namespace tpsv

src/operations/blas1/IndexMaxMin.hpp

+5-13
Original file line numberDiff line numberDiff line change
@@ -94,22 +94,14 @@ PORTBLAS_INLINE void IndexMaxMin<is_max, is_step0, lhs_t, rhs_t>::eval(
9494
using element_t =
9595
typename ResolveReturnType<op, rhs_t>::type::value_t::value_t;
9696

97-
#ifndef __ADAPTIVECPP__
9897
// reduction within the sub_group
9998
for (index_t i = sg_local_range >> 1; i > 0; i >>= 1) {
100-
if (sg_local_id < i) {
101-
element_t shfl_val = sycl::shift_group_left(sg, val.get_value(), i);
102-
index_t shfl_idx = sycl::shift_group_left(sg, val.get_index(), i);
103-
value_t shfl{shfl_idx, shfl_val};
104-
val = op::eval(val, shfl);
105-
}
99+
element_t shfl_val = cl::sycl::shift_group_left(sg, val.get_value(), i);
100+
index_t shfl_idx = cl::sycl::shift_group_left(sg, val.get_index(), i);
101+
value_t shfl{shfl_idx, shfl_val};
102+
val = op::eval(val, shfl);
106103
}
107-
#else
108-
// AdaptiveCpp uses a different interface "shift_group_left" which is
109-
// recognized by the compiler but throws JIT errors at runtime. Currently this
110-
// part is skipped as non-local memory kernel is never called with
111-
// AdaptiveCpp.
112-
#endif
104+
113105
const index_t lhs_idx =
114106
ndItem.get_group_linear_id() * (local_range / sg_local_range) +
115107
sg.get_group_linear_id();

test/unittest/CMakeLists.txt

+1-16
Original file line numberDiff line numberDiff line change
@@ -106,25 +106,10 @@ endif()
106106
if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT")
107107
if (${DPCPP_SYCL_TARGET} MATCHES "nvidia")
108108
set(TESTS_TO_SKIP
109-
${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp
110-
${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp
111-
${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp
112-
${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp
113-
${PORTBLAS_UNITTEST}/blas2/blas2_trsv_test.cpp
114109
${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp
115110
)
116111
message(WARNING "Targetting NVIDIA hardware with DEFAULT TUNING_TARGET.
117-
Disabling tests for following operators: iamax, iamin, trsv, tbsv, tpsv, trsm.")
118-
elseif (${DPCPP_SYCL_TARGET} MATCHES "amd")
119-
set(TESTS_TO_SKIP
120-
${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp
121-
${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp
122-
${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp
123-
${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp
124-
${PORTBLAS_UNITTEST}/blas2/blas2_trsv_test.cpp
125-
)
126-
message(WARNING "Targetting AMD hardware with DEFAULT TUNING_TARGET.
127-
Disabling tests for following operators: iamax, iamin, tbsv, tpsv, trsv.")
112+
Disabling tests for following operator: trsm.")
128113
endif()
129114
endif()
130115

test/unittest/blas1/blas1_iaminmax_common.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ const auto combi = ::testing::Combine(
7979
::testing::Values("usm", "buf"), // allocation type
8080
::testing::Values(api_type::async, api_type::sync), // Api
8181
::testing::Values(11, 65, 1000000), // size
82-
::testing::Values(-1, 5), // incX
82+
::testing::Values(1, -1, 5), // incX
8383
::testing::Values(generation_mode_t::Random, generation_mode_t::Limit,
8484
generation_mode_t::Incrementing,
8585
generation_mode_t::Decrementing),

0 commit comments

Comments
 (0)