@@ -205,57 +205,52 @@ namespace ecolab
205205
206206 struct CellBase
207207 {
208+ size_t m_idx=0 ; // stash the position within the local node vector here
209+ size_t idx () const {return m_idx;}
208210#ifdef SYCL_LANGUAGE_VERSION
209- Ouro::SyclDesc<1 >* desc=nullptr ;
210211 using MemAllocator=Ouro::MultiOuroPQ;
211212 MemAllocator* memAlloc=nullptr ;
212- const sycl::stream* out=nullptr ;
213- size_t idx () const {
214- if (desc) return desc->item .get_global_linear_id ();
215- else return 0 ;
216- }
217213 template <class T > class CellAllocator
218214 {
219215 public:
220- Ouro::SyclDesc<1 >*const * desc=nullptr ;
221- MemAllocator*const * memAlloc=nullptr ;
216+ MemAllocator* memAlloc=nullptr ;
222217 template <class U > friend class Allocator ;
223218 CellAllocator ()=default ;
224- CellAllocator (Ouro::SyclDesc<1 >* const & desc, MemAllocator*const & memAlloc):
225- desc (&desc), memAlloc(&memAlloc) {
226- }
227- template <class U > CellAllocator (const CellAllocator<U>& x):
228- desc(x.desc), memAlloc(x.memAlloc) {}
219+ CellAllocator (MemAllocator* memAlloc): memAlloc(memAlloc) {}
220+ template <class U > CellAllocator (const CellAllocator<U>& x): memAlloc(x.memAlloc) {}
229221 T* allocate (size_t sz) {
230222#ifdef __SYCL_DEVICE_ONLY__
231- if (memAlloc && *memAlloc && desc && *desc ) {
232- auto r=reinterpret_cast <T*>((* memAlloc) ->malloc (**desc ,sz*sizeof (T)));
223+ if (memAlloc) {
224+ auto r=reinterpret_cast <T*>(memAlloc->malloc (Ouro::SyclDesc<>( syclItem (),{}) ,sz*sizeof (T)));
233225 if (!r) syclPrintf (" Mem allocation failed\n " );
234226 return r;
235227 }
228+ syclPrintf (" Missing allocator memAlloc=%x\n " ,memAlloc);
236229 return nullptr ; // TODO raise an error??
237230#else
238231 return sycl::malloc_shared<T>(sz,syclQ ());
239232#endif
240233 }
241- void deallocate (T* p,size_t ) {
234+ void deallocate (T* p,size_t n) {
235+ if (!p) return ;
242236#ifdef __SYCL_DEVICE_ONLY__
243- if (memAlloc && *memAlloc && desc && *desc)
244- (*memAlloc)->free (**desc,p);
237+ if (memAlloc && reinterpret_cast <Ouro::memory_t *>(p)>=memAlloc->memory .d_data &&
238+ reinterpret_cast <Ouro::memory_t *>(p)<memAlloc->memory .d_data_end )
239+ memAlloc->free (Ouro::SyclDesc<>(syclItem (),{}),p);
240+ else
241+ syclPrintf (" leaked %d bytes\n " ,n*sizeof (T));
245242#else
246243 sycl::free (p,syclQ ());
247244#endif
248245 }
249- bool operator ==(const CellAllocator& x) const {return desc==x. desc && memAlloc==x.memAlloc ;}
246+ bool operator ==(const CellAllocator& x) const {return memAlloc==x.memAlloc ;}
250247 };
251248 template <class T > CellAllocator<T> allocator () const {
252- return CellAllocator<T>(desc, memAlloc);
249+ return CellAllocator<T>(memAlloc);
253250 }
254- #else
251+ #else // !SYCL
255252 template <class T > using CellAllocator=std::allocator<T>;
256253 template <class T > CellAllocator<T> allocator () const {return CellAllocator<T>();}
257- size_t m_idx=0 ; // stash the position within the local node vector here
258- size_t idx () const {return m_idx;}
259254#endif
260255 };
261256
@@ -304,6 +299,21 @@ namespace ecolab
304299 graphcode::Allocator<graphcode::ObjectPtr<Cell>>(syclQ(),sycl::usm::alloc::shared)) {}
305300 ~EcolabGraph () {syncThreads ();}
306301#endif
302+ // / apply a functional to all local cells of this processor using the host processor
303+ // / @param f
304+ template <class F >
305+ void hostForAll (F f) {
306+ auto sz=this ->size ();
307+ #ifdef _OPENMP
308+ #pragma omp parallel for
309+ #endif
310+ for (size_t idx=0 ; idx<sz; ++idx) {
311+ auto & cell=*(*this )[idx]->template as <Cell>();
312+ cell.m_idx =idx;
313+ f (cell);
314+ }
315+ }
316+
307317 // / apply a functional to all local cells of this processor in parallel
308318 // / @param f
309319 template <class F >
@@ -313,34 +323,17 @@ namespace ecolab
313323 size_t range=this ->size ()/workGroupSize;
314324 if (range*workGroupSize < this ->size ()) ++range;
315325 syclQ ().submit ([&](auto & h) {
316- #ifndef NDEBUG
317- sycl::stream out (1000000 ,1000 ,h);
318- #endif
319326 h.parallel_for (sycl::nd_range<1 >(range*workGroupSize, workGroupSize), [=,this ](auto i) {
320327 auto idx=i.get_global_linear_id ();
321328 if (idx<this ->size ()) {
322329 auto & cell=*(*this )[idx]->template as <Cell>();
323- Ouro::SyclDesc<> desc (i,{});
324- cell.desc =&desc;
325- #ifndef NDEBUG
326- cell.out =&out;
327- #endif
330+ cell.m_idx =idx;
328331 f (cell);
329- cell.desc =nullptr ;
330- cell.out =nullptr ;
331332 }
332333 });
333334 });
334335#else
335- auto sz=this ->size ();
336- #ifdef _OPENMP
337- #pragma omp parallel for
338- #endif
339- for (size_t idx=0 ; idx<sz; ++idx) {
340- auto & cell=*(*this )[idx]->template as <Cell>();
341- cell.m_idx =idx;
342- f (cell);
343- }
336+ hostForAll (f);
344337#endif
345338 }
346339
@@ -351,28 +344,20 @@ namespace ecolab
351344 template <class F >
352345 void groupedForAll (F f) {
353346#ifdef SYCL_LANGUAGE_VERSION
347+ // TODO - pass in workGroupSize as an optional parameter??
354348 static size_t workGroupSize=32 ;// syclQ().get_device().get_info<sycl::info::device::max_work_group_size>();
355349 syclQ ().submit ([&](auto & h) {
356- #ifndef NDEBUG
357- sycl::stream out (1000000 ,1000 ,h);
358- #endif
359350 h.parallel_for (sycl::nd_range<1 >(this ->size ()*workGroupSize, workGroupSize), [=,this ](auto i) {
360351 auto idx=i.get_group_linear_id ();
361352 if (idx<this ->size ()) {
362353 auto & cell=*(*this )[idx]->template as <Cell>();
363- Ouro::SyclDesc<> desc (i,{});
364- cell.desc =&desc;
365- #ifndef NDEBUG
366- cell.out =&out;
367- #endif
354+ cell.m_idx =idx;
368355 f (cell);
369- cell.desc =nullptr ;
370- cell.out =nullptr ;
371356 }
372357 });
373358 });
374359#else
375- forAll (f);
360+ hostForAll (f);
376361#endif
377362 }
378363
@@ -423,10 +408,17 @@ namespace ecolab
423408 ~GroupLocal () {
424409 sycl::group_barrier (syclGroup ());
425410 if (syclGroup ().leader ())
426- ref ( ).~T ();
411+ (** this ).~T ();
427412 }
428- T& ref () {return reinterpret_cast <T&>(**buffer);}
413+ T& operator *() {return reinterpret_cast <T&>(**buffer);}
414+ #else
415+ T buffer;
416+ public:
417+ template <class ... Args>
418+ GroupLocal (Args&&... args): buffer(std::forward<Args>(args)...) {}
419+ T& operator *() {return buffer;}
429420#endif
421+ T* operator ->() {return &**this ;}
430422 };
431423
432424#ifdef __SYCL_DEVICE_ONLY__
0 commit comments