Skip to content

Commit 4f81bab

Browse files
committed
batch: relax SIMD loop for certain operations
When building with a newer version of clang that has more OMP functionality, I uncovered a few more SIMD pragma loops fail to vectorize on clang, so need to use the OSL_OMP_COMPLEX_SIMD_LOOP macro to avoid those errors. Signed-off-by: Larry Gritz <[email protected]>
1 parent ee5ae28 commit 4f81bab

File tree

5 files changed

+10
-9
lines changed

5 files changed

+10
-9
lines changed

src/include/OSL/platform.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@
291291
#define OSL_OMP_SIMD_LOOP(...) OSL_OMP_PRAGMA(omp simd __VA_ARGS__)
292292

293293
#if (OSL_GNUC_VERSION || OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_INTEL_LLVM_COMPILER_VERSION)
294+
// GCC, icc, icx: Use a simd loop for sure
294295
# define OSL_OMP_COMPLEX_SIMD_LOOP(...) OSL_OMP_SIMD_LOOP(__VA_ARGS__)
295296
#else
296297
// Ignore requests to vectorize complex/nested SIMD loops for certain

src/liboslexec/wide/wide_opalgebraic.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ __OSL_OP2(length, Wf, Wv)(void* r_, void* V_)
136136
Wide<const Vec3> wV(V_);
137137
Wide<float> wr(r_);
138138

139-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
139+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(__OSL_WIDTH))
140140
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
141141
Vec3 V = wV[lane];
142142
float r = sfm::length(V);
@@ -155,7 +155,7 @@ __OSL_MASKED_OP2(length, Wf, Wv)(void* r_, void* V_, unsigned int mask_value)
155155
Wide<const Vec3> wV(V_);
156156
Masked<float> wr(r_, Mask(mask_value));
157157

158-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
158+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(__OSL_WIDTH))
159159
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
160160
Vec3 V = wV[lane];
161161
if (wr.mask()[lane]) {
@@ -217,7 +217,7 @@ __OSL_OP2(area, Wf, Wdv)(void* r_, void* DP_)
217217

218218
Wide<float> wr(r_);
219219

220-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
220+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(__OSL_WIDTH))
221221
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
222222
Dual2<Vec3> DP = wDP[lane];
223223

@@ -240,7 +240,7 @@ __OSL_MASKED_OP2(area, Wf, Wdv)(void* r_, void* DP_, unsigned int mask_value)
240240

241241
Masked<float> wr(r_, Mask(mask_value));
242242

243-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
243+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(__OSL_WIDTH))
244244
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
245245
Dual2<Vec3> DP = wDP[lane];
246246
if (wr.mask()[lane]) {
@@ -455,7 +455,7 @@ __OSL_OP2(normalize, Wv, Wv)(void* r_, void* V_)
455455
Wide<const Vec3> wV(V_);
456456
Wide<Vec3> wr(r_);
457457

458-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
458+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(__OSL_WIDTH))
459459
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
460460
Vec3 V = wV[lane];
461461
Vec3 N = sfm::normalize(V);
@@ -473,7 +473,7 @@ __OSL_MASKED_OP2(normalize, Wv, Wv)(void* r_, void* V_, unsigned int mask_value)
473473
Wide<const Vec3> wV(V_);
474474
Masked<Vec3> wr(r_, Mask(mask_value));
475475

476-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
476+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(__OSL_WIDTH))
477477
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
478478
Vec3 V = wV[lane];
479479
if (wr.mask()[lane]) {

src/liboslexec/wide/wide_opcolor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ __OSL_MASKED_OP2(blackbody, Wv, Wf)(void* bsg_, void* wout_, void* wtemp_,
6868
Block<int> computeRequiredBlock;
6969
Wide<int> wcomputeRequired(computeRequiredBlock);
7070

71-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
71+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(__OSL_WIDTH))
7272
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
7373
float temperature = wL[lane];
7474
bool canNotLookup = !cs.can_lookup_blackbody(temperature);

src/liboslexec/wide/wide_opspline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ spline_evaluate_loop_over_wide(const MatrixT& M, RAccessorT wR, XAccessorT wX,
376376

377377
OSL_FORCEINLINE_BLOCK
378378
{
379-
OSL_OMP_PRAGMA(omp simd simdlen(vec_width))
379+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(vec_width))
380380
for (int lane = 0; lane < vec_width; ++lane) {
381381
X_Type x = wX[lane];
382382
auto knots = wK[lane];

src/liboslexec/wide/wide_opstring.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ __OSL_MASKED_OP2(hash, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
9898

9999
OSL_FORCEINLINE_BLOCK
100100
{
101-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
101+
OSL_OMP_COMPLEX_SIMD_LOOP(simdlen(__OSL_WIDTH))
102102
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
103103
ustring s = wS[lane];
104104
if (wR.mask()[lane]) {

0 commit comments

Comments
 (0)