Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DPC++ multiGPU stencil and/or transpose #479

Draft
wants to merge 72 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
a78fdbd
add stencil DPC++ skeletons
Jun 23, 2020
7af1beb
cleanup comments
Jun 23, 2020
684ac41
some fixes - GPU broken
Jun 23, 2020
d3717be
cleanup
Jun 23, 2020
add9e23
workaround Level Zero SPIR detection
Jun 23, 2020
691938c
some fixes - GPU broken
Jun 23, 2020
b31429b
undo minor mistake
Jun 24, 2020
583795d
add transpose multi-GPU DPC++ skeleton
Jun 24, 2020
053cb6d
WIP
Jun 29, 2020
17ff292
add ignores
Jul 20, 2020
9a95639
fix message
Jul 20, 2020
3113c03
add fill, cleanup dead code
Jul 21, 2020
3d33f2a
start decomposition
Jul 21, 2020
74e286b
device queues class and related methods
Jul 21, 2020
da01de4
use device queues stuff - currently broken
Jul 21, 2020
cc31187
trying to fix
Jul 21, 2020
18931c4
trying to fix
Jul 21, 2020
5351212
trying to fix
Jul 21, 2020
945f87f
working with inlined methods
Jul 21, 2020
994f9cc
working with inlined methods
Jul 21, 2020
f2c874e
cleaned up
Jul 21, 2020
7fca06f
cleaned up
Jul 21, 2020
a0ac723
fixed bugs
Jul 21, 2020
fe2d829
working but to be replaced
Jul 21, 2020
6239694
working
Jul 21, 2020
d299f95
fix input helper comment
Jul 22, 2020
e7bb773
add broadcast and reduce (unused and untested)
Jul 22, 2020
bc0ce20
add stencil DPC++ skeletons
Jun 23, 2020
4de1f83
cleanup comments
Jun 23, 2020
af3de74
some fixes - GPU broken
Jun 23, 2020
12abe88
cleanup
Jun 23, 2020
ce6a713
workaround Level Zero SPIR detection
Jun 23, 2020
5466251
some fixes - GPU broken
Jun 23, 2020
02d1f8d
undo minor mistake
Jun 24, 2020
0d358f9
add transpose multi-GPU DPC++ skeleton
Jun 24, 2020
83892be
WIP
Jun 29, 2020
9a8006a
add ignores
Jul 20, 2020
83faa81
fix message
Jul 20, 2020
4e81836
add fill, cleanup dead code
Jul 21, 2020
767db07
start decomposition
Jul 21, 2020
68f1a94
device queues class and related methods
Jul 21, 2020
452c0a3
use device queues stuff - currently broken
Jul 21, 2020
e068ccf
trying to fix
Jul 21, 2020
fc1a39d
trying to fix
Jul 21, 2020
a99fee7
trying to fix
Jul 21, 2020
ee31d76
working with inlined methods
Jul 21, 2020
4fe3b65
working with inlined methods
Jul 21, 2020
63b2d55
cleaned up
Jul 21, 2020
069ceae
cleaned up
Jul 21, 2020
af851be
fixed bugs
Jul 21, 2020
45c3a0b
working but to be replaced
Jul 21, 2020
aac9768
working
Jul 21, 2020
02591d6
fix input helper comment
Jul 22, 2020
f013421
add broadcast and reduce (unused and untested)
Jul 22, 2020
ed9924b
Merge branch 'dpcpp-multi-gpu-transpose' of https://github.com/jeffha…
Jul 24, 2020
d90f799
Merge branch 'default' into dpcpp-multi-gpu-transpose
Jul 24, 2020
e0fe457
remove unnecessary preprocessor
Jul 24, 2020
f5c510d
remove 2D indexing code that won't work with USM
Jul 24, 2020
76a8bd5
fix a bunch of problems
Jul 24, 2020
b5666fd
need a unit test for the collectives...
Jul 26, 2020
ee43a2d
whitespace
Jul 26, 2020
b94896d
hoist invariant; start alltoall
Jul 26, 2020
da594fb
progress on unit test
Jul 26, 2020
0a74fa0
work around issue with CPU-only in "multigpu" tester
Jul 26, 2020
edb8687
broadcast and reduce tested
Jul 26, 2020
dfacd83
there are bugs somewhere
Jul 26, 2020
2bd3063
merge fix
Sep 6, 2020
c222179
Merge branch 'default' into dpcpp-multi-gpu-transpose
Oct 14, 2020
1e855f3
Merge branch 'default' into dpcpp-multi-gpu-transpose
Oct 26, 2020
0193c93
never commit binaries
Oct 28, 2020
69f39c5
never commit binaries
Oct 28, 2020
7a08484
Merge branch 'default' into dpcpp-multi-gpu-transpose
Oct 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
device queues class and related methods
Jeff Hammond committed Jul 21, 2020
commit 74e286bae457bd1b7eb7e5ef68e1f0eab1d0e21d
150 changes: 150 additions & 0 deletions Cxx11/prk_sycl.h
Original file line number Diff line number Diff line change
@@ -4,12 +4,17 @@
#include <cstdlib>
#include <iostream>

//#include <iterator> // std::distance
#include <boost/range/adaptor/indexed.hpp>

#include "CL/sycl.hpp"

#ifdef __COMPUTECPP__
#include "SYCL/experimental/usm.h"
#endif

#include "prk_util.h" // prk::vector

namespace sycl = cl::sycl;

#ifdef __COMPUTECPP__
@@ -83,6 +88,151 @@ namespace prk {
#endif
}

class queues {

private:
std::vector<sycl::queue> list;

public:
queues(bool use_cpu = true, bool use_gpu = true)
{
auto platforms = sycl::platform::get_platforms();
for (auto & p : platforms) {
auto pname = p.get_info<sycl::info::platform::name>();
std::cout << "*Platform: " << pname << std::endl;
if ( pname.find("Level-Zero") != std::string::npos) {
std::cout << "*Level Zero GPU skipped" << std::endl;
break;
}
if ( pname.find("Intel") == std::string::npos) {
std::cout << "*non-Intel skipped" << std::endl;
break;
}
auto devices = p.get_devices();
for (auto & d : devices ) {
std::cout << "**Device: " << d.get_info<sycl::info::device::name>() << std::endl;
if ( d.is_cpu() && use_cpu ) {
std::cout << "**Device is CPU - adding to vector of queues" << std::endl;
list.push_back(sycl::queue(d));
}
if ( d.is_gpu() && use_gpu ) {
std::cout << "**Device is GPU - adding to vector of queues" << std::endl;
list.push_back(sycl::queue(d));
}
}
}
}

int size(void)
{
return list.size();
}

void wait(int i)
{
list[i].wait();
}

void waitall(void)
{
for (auto & i : list) {
i.wait();
}
}

template <typename T>
void allocate(std::vector<T*> & device_pointers,
size_t num_elements)
{
std::cout << "allocate" << std::endl;
for (const auto & l : list | boost::adaptors::indexed(0) ) {
auto i = l.index();
auto v = l.value();
device_pointers[i] = syclx::malloc_device<T>(num_elements, v);
std::cout << i << ": " << device_pointers[i] << ", " << num_elements << std::endl;
}
}

template <typename T>
void free(std::vector<T*> & device_pointers)
{
std::cout << "free" << std::endl;
for (const auto & l : list | boost::adaptors::indexed(0) ) {
auto i = l.index();
auto v = l.value();
syclx::free(device_pointers[i], v);
}
}

template <typename T>
void gather(T * host_pointer,
const std::vector<T*> & device_pointers,
size_t num_elements)
{
std::cout << "gather" << std::endl;
for (const auto & l : list | boost::adaptors::indexed(0) ) {
auto i = l.index();
auto v = l.value();
auto bytes = num_elements * sizeof(T);
auto target = &host_pointer[i * bytes];
auto source = device_pointers[i];
v.memcpy(target, source, bytes);
}
}

template <typename T>
void gather(prk::vector<T> & host_pointer,
const std::vector<T*> & device_pointers,
size_t num_elements)
{
std::cout << "gather" << std::endl;
for (const auto & l : list | boost::adaptors::indexed(0) ) {
auto i = l.index();
auto v = l.value();
auto bytes = num_elements * sizeof(T);
auto target = &host_pointer[i * bytes];
auto source = device_pointers[i];
v.memcpy(target, source, bytes);
}
}

template <typename T>
void scatter(std::vector<T*> & device_pointers,
const T * host_pointer,
size_t num_elements)
{
std::cout << "scatter" << std::endl;
for (const auto & l : list | boost::adaptors::indexed(0) ) {
auto i = l.index();
auto v = l.value();
auto bytes = num_elements * sizeof(T);
auto target = device_pointers[i];
auto source = &host_pointer[i * bytes];
v.memcpy(target, source, bytes);
}
}

template <typename T>
void scatter(std::vector<T*> & device_pointers,
prk::vector<T> & host_pointer,
size_t num_elements)
{
std::cout << "scatter" << std::endl;
for (const auto & l : list | boost::adaptors::indexed(0) ) {
auto i = l.index();
auto v = l.value();
auto bytes = num_elements * sizeof(T);
auto target = device_pointers[i];
auto source = &host_pointer[i * bytes];
std::cout << i << ": " << target << ", " << source << std::endl;
v.memcpy(target, source, bytes);
}
}



};

} // namespace SYCL

} // namespace prk