Skip to content

Commit c7d15b6

Browse files
Merge pull request #2129 from devitocodes/drop-collapse1-nvc
compiler: Avoid generating collapse(1)
2 parents 526dcb8 + 58efbeb commit c7d15b6

File tree

5 files changed

+25
-19
lines changed

5 files changed

+25
-19
lines changed

devito/passes/iet/languages/openacc.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ def _make_construct(cls, **kwargs):
2626
return 'acc parallel loop'
2727

2828
@classmethod
29-
def _make_clauses(cls, ncollapsed=None, reduction=None, tile=None, **kwargs):
29+
def _make_clauses(cls, ncollapsed=0, reduction=None, tile=None, **kwargs):
3030
clauses = []
3131

3232
if tile:
3333
clauses.append('tile(%s)' % ','.join(str(i) for i in tile))
34-
elif ncollapsed:
35-
clauses.append('collapse(%d)' % (ncollapsed or 1))
34+
elif ncollapsed > 1:
35+
clauses.append('collapse(%d)' % ncollapsed)
3636

3737
if reduction:
3838
clauses.append(cls._make_clause_reduction_from_imask(reduction))

devito/passes/iet/languages/openmp.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,12 @@ def _make_construct(cls, parallel=False, **kwargs):
4040
return 'omp for'
4141

4242
@classmethod
43-
def _make_clauses(cls, ncollapsed=None, chunk_size=None, nthreads=None,
43+
def _make_clauses(cls, ncollapsed=0, chunk_size=None, nthreads=None,
4444
reduction=None, schedule=None, **kwargs):
4545
clauses = []
4646

47-
clauses.append('collapse(%d)' % (ncollapsed or 1))
47+
if ncollapsed > 1:
48+
clauses.append('collapse(%d)' % ncollapsed)
4849

4950
if chunk_size is not False:
5051
clauses.append('schedule(%s,%s)' % (schedule or 'dynamic',

examples/performance/00_overview.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,7 @@
716716
" const int tid = omp_get_thread_num();\n",
717717
" float (*restrict r0)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr0[tid];\n",
718718
"\n",
719-
" #pragma omp for collapse(1) schedule(dynamic,1)\n",
719+
" #pragma omp for schedule(dynamic,1)\n",
720720
" for (int x = x_m; x <= x_M; x += 1)\n",
721721
" {\n",
722722
" for (int y = y_m - 2; y <= y_M + 2; y += 1)\n",
@@ -855,7 +855,7 @@
855855
" const int tid = omp_get_thread_num();\n",
856856
" float (*restrict r1)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr1[tid];\n",
857857
"\n",
858-
" #pragma omp for collapse(1) schedule(dynamic,1)\n",
858+
" #pragma omp for schedule(dynamic,1)\n",
859859
" for (int x = x_m; x <= x_M; x += 1)\n",
860860
" {\n",
861861
" for (int y = y_m - 2; y <= y_M + 2; y += 1)\n",
@@ -991,7 +991,7 @@
991991
" const int tid = omp_get_thread_num();\n",
992992
" float (*restrict r0)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr0[tid];\n",
993993
"\n",
994-
" #pragma omp for collapse(1) schedule(dynamic,1)\n",
994+
" #pragma omp for schedule(dynamic,1)\n",
995995
" for (int x = x_m; x <= x_M; x += 1)\n",
996996
" {\n",
997997
" for (int y = y_m - 2; y <= y_M + 2; y += 1)\n",
@@ -1557,7 +1557,7 @@
15571557
" const int tid = omp_get_thread_num();\n",
15581558
" float (*restrict r2)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr2[tid];\n",
15591559
"\n",
1560-
" #pragma omp for collapse(1) schedule(dynamic,1)\n",
1560+
" #pragma omp for schedule(dynamic,1)\n",
15611561
" for (int x = x_m; x <= x_M; x += 1)\n",
15621562
" {\n",
15631563
" for (int y = y_m - 2; y <= y_M + 2; y += 1)\n",

tests/test_dle.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,8 @@ def test_incs_no_atomic(self):
816816
# Now only `x` is parallelized
817817
op1 = Operator([Eq(v[t, x, 0, 0], v[t, x, 0, 0] + 1), Inc(uf, 1)],
818818
opt=('advanced', {'openmp': True, 'par-collapse-ncores': 1}))
819-
assert 'collapse(1)' in str(op1)
819+
assert 'omp for' in str(op1)
820+
assert 'collapse' not in str(op1)
820821
assert 'atomic' not in str(op1)
821822

822823
@pytest.mark.parametrize('exprs,simd_level,expected', [
@@ -879,18 +880,22 @@ def test_edge_cases(self, exprs, simd_level, expected):
879880
for i, e in enumerate(list(exprs)):
880881
exprs[i] = eval(e)
881882

882-
op = Operator(exprs, opt=('advanced', {'openmp': True}))
883+
op = Operator(exprs, opt=('advanced', {'openmp': True,
884+
'par-collapse-ncores': 1}))
883885

884886
iterations = FindNodes(Iteration).visit(op)
887+
parallel = [i for i in iterations if i.is_Parallel]
885888
try:
886-
assert 'omp for collapse' in iterations[0].pragmas[0].value
889+
assert 'omp for' in iterations[0].pragmas[0].value
890+
if len(parallel) > 1 and simd_level is not None and simd_level > 1:
891+
assert 'collapse' in iterations[0].pragmas[0].value
887892
if simd_level:
888893
assert 'omp simd' in iterations[simd_level].pragmas[0].value
889894
except:
890895
# E.g. gcc-5 doesn't support array reductions, so the compiler will
891896
# generate different legal code
892897
assert not Ompizer._support_array_reduction(configuration['compiler'])
893-
assert any('omp for collapse' in i.pragmas[0].value
898+
assert any('omp for' in i.pragmas[0].value
894899
for i in iterations if i.pragmas)
895900

896901
op.apply()
@@ -910,7 +915,7 @@ def test_simd_space_invariant(self):
910915
op = Operator(eq, opt=('advanced', {'openmp': True}))
911916
iterations = FindNodes(Iteration).visit(op)
912917

913-
assert 'omp for collapse(1) schedule(static,1)' in iterations[0].pragmas[0].value
918+
assert 'omp for schedule(static,1)' in iterations[0].pragmas[0].value
914919
assert 'omp simd' in iterations[1].pragmas[0].value
915920
assert 'omp simd' in iterations[3].pragmas[0].value
916921

@@ -979,8 +984,8 @@ def test_basic(self):
979984
bns, _ = assert_blocking(op, {'x0_blk0'})
980985

981986
iterations = FindNodes(Iteration).visit(bns['x0_blk0'])
982-
assert iterations[0].pragmas[0].value == 'omp for collapse(1) schedule(dynamic,1)'
983-
assert iterations[2].pragmas[0].value == ('omp parallel for collapse(1) '
987+
assert iterations[0].pragmas[0].value == 'omp for schedule(dynamic,1)'
988+
assert iterations[2].pragmas[0].value == ('omp parallel for '
984989
'schedule(dynamic,1) '
985990
'num_threads(nthreads_nested)')
986991

@@ -1073,11 +1078,11 @@ def test_multiple_subnests_v1(self):
10731078
'omp for collapse(2) schedule(dynamic,1)'
10741079
assert not trees[0][2].pragmas
10751080
assert not trees[0][3].pragmas
1076-
assert trees[0][4].pragmas[0].value == ('omp parallel for collapse(1) '
1081+
assert trees[0][4].pragmas[0].value == ('omp parallel for '
10771082
'schedule(dynamic,1) '
10781083
'num_threads(nthreads_nested)')
10791084
assert not trees[1][2].pragmas
1080-
assert trees[1][3].pragmas[0].value == ('omp parallel for collapse(1) '
1085+
assert trees[1][3].pragmas[0].value == ('omp parallel for '
10811086
'schedule(dynamic,1) '
10821087
'num_threads(nthreads_nested)')
10831088

tests/test_gpu_openacc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def test_tile_insteadof_collapse(self, par_tile):
110110
'acc parallel loop tile(32,4) present(u)'
111111
# Only the AFFINE Iterations are tiled
112112
assert trees[3][1].pragmas[0].value ==\
113-
'acc parallel loop collapse(1) present(src,src_coords,u)'
113+
'acc parallel loop present(src,src_coords,u)'
114114

115115
@pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)),
116116
((32, 4, 4), (8, 8, 8))])

0 commit comments

Comments
 (0)