Skip to content

Commit cddc21c

Browse files
committed
beam3d: try to fix an illegal memory access
1 parent 02a7096 commit cddc21c

File tree

2 files changed

+41
-18
lines changed

2 files changed

+41
-18
lines changed

lcode2dPy/beam3d/beam_calculator.py

+40-17
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,40 @@
99

1010
# Helper function #
1111

12-
def get_beam_substepping_step(xp: np):
13-
def beam_substepping_step(q_m, pz, substepping_energy):
14-
dt = xp.ones_like(q_m, dtype=xp.float64)
15-
max_dt = xp.sqrt(
16-
xp.sqrt(1 / q_m ** 2 + pz ** 2) / substepping_energy)
17-
18-
a = xp.ceil(xp.log2(dt / max_dt))
19-
a[a < 0] = 0
20-
dt /= 2 ** a
12+
# NOTE: We have to write these functions separately and we don't merge them,
13+
# because other implementation options (including from old commits) led
14+
# to an illegal memory access when computing on a GPU. The problem is
15+
# probably in the internals of the cupy library. The specific simulation
16+
# settings will still create the problem, but in different places.
17+
18+
@nb.njit
19+
def beam_substepping_step_numba(q_m, pz, substepping_energy):
20+
dt = np.ones_like(q_m, dtype=np.float64)
21+
max_dt = np.sqrt(np.sqrt(1 / q_m ** 2 + pz ** 2) / substepping_energy)
22+
for i in range(len(q_m)):
23+
while dt[i] > max_dt[i]:
24+
dt[i] /= 2.0
25+
return dt
26+
27+
28+
def get_beam_substepping_step_cupy():
29+
import cupy as cp
30+
31+
calculate_substepping_step = cp.ElementwiseKernel(
32+
in_params="T q_m, T pz, float64 substepping_energy",
33+
out_params="T dt",
34+
operation="""
35+
T max_dt = sqrt(sqrt(1 / (q_m*q_m) + pz*pz) / substepping_energy);
36+
while (dt > max_dt){
37+
dt /= 2;
38+
}
39+
""")
2140

41+
def beam_substepping_step(q_m, pz, substepping_energy):
42+
dt = cp.ones_like(q_m, dtype=cp.float64)
43+
calculate_substepping_step(q_m, pz, substepping_energy, dt)
2244
return dt
2345

24-
if xp is np:
25-
return nb.njit(beam_substepping_step)
26-
2746
return beam_substepping_step
2847

2948

@@ -39,7 +58,12 @@ def __init__(self, config: Config):
3958

4059
self.deposit = get_deposit_beam(config)
4160
self.move_particles = get_move_beam_particles(config)
42-
self.beam_substepping_step = get_beam_substepping_step(self.xp)
61+
62+
pu_type = config.get('processing-unit-type').lower()
63+
if pu_type == 'cpu':
64+
self.beam_substepping_step = beam_substepping_step_numba
65+
if pu_type == 'gpu':
66+
self.beam_substepping_step = get_beam_substepping_step_cupy()
4367

4468
# Helper functions for one time step cicle:
4569

@@ -101,9 +125,8 @@ def move_beam_layer(self, beam_layer: BeamParticles, fell_size,
101125
beam_layer_idx, beam_layer, fields_after_layer,
102126
fields_before_layer, lost_idxes, moved_idxes, fell_idxes)
103127

104-
indexes = self.xp.arange(beam_layer.id.size)
105-
lost = beam_layer.get_layer(indexes[lost_idxes])
106-
moved = beam_layer.get_layer(indexes[moved_idxes])
107-
fell = beam_layer.get_layer(indexes[fell_idxes])
128+
lost = beam_layer.get_layer(lost_idxes)
129+
moved = beam_layer.get_layer(moved_idxes)
130+
fell = beam_layer.get_layer(fell_idxes)
108131

109132
return lost, moved, fell

lcode2dPy/beam3d/move.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -280,12 +280,12 @@ def get_move_beam_particles(config: Config):
280280
xi_step_size = config.getfloat('xi-step')
281281
grid_step_size = config.getfloat('window-width-step-size')
282282
grid_steps = config.getint('window-width-steps')
283-
pu_type = config.get('processing-unit-type').lower()
284283

285284
# Calculate the radius that marks that a particle is lost.
286285
max_radius = grid_step_size * grid_steps / 2
287286
lost_radius = max(0.9 * max_radius, max_radius - 1) # or just max_radius?
288287

288+
pu_type = config.get('processing-unit-type').lower()
289289
if pu_type == 'cpu':
290290
move_beam_particles_kernel = move_beam_particles_kernel_numba
291291
if pu_type == 'gpu':

0 commit comments

Comments
 (0)