Skip to content

[SYCL] Make host task timestamps share the same base as device tasks #18710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: sycl
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sycl/include/sycl/detail/host_profiling_info.hpp
Original file line number Diff line number Diff line change
@@ -16,12 +16,19 @@ namespace sycl {
inline namespace _V1 {
namespace detail {

class device_impl;

/// Profiling info for the host execution.
class __SYCL_EXPORT HostProfilingInfo {
uint64_t StartTime = 0;
uint64_t EndTime = 0;
device_impl *Device = nullptr;

public:
// Sets the device associated with a queue that was used to submit the host
// task.
void setDevice(device_impl *Dev) { Device = Dev; }

/// Returns event's start time.
///
/// \return event's start time in nanoseconds.
57 changes: 33 additions & 24 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
@@ -113,10 +113,25 @@ void event_impl::setComplete() {
assert(false && "setComplete is not supported for non-host event");
}

static uint64_t inline getTimestamp() {
auto Timestamp = std::chrono::high_resolution_clock::now().time_since_epoch();
return std::chrono::duration_cast<std::chrono::nanoseconds>(Timestamp)
.count();
static uint64_t inline getTimestamp(device_impl *Device) {
if (Device) {
try {
return Device->getCurrentDeviceTime();
} catch (sycl::exception &e) {
if (e.code() == sycl::errc::feature_not_supported)
throw sycl::exception(
make_error_code(errc::profiling),
std::string("Unable to get command group submission time: ") +
e.what());
std::rethrow_exception(std::current_exception());
}
} else {
// Returning host time
using namespace std::chrono;
return duration_cast<nanoseconds>(
high_resolution_clock::now().time_since_epoch())
.count();
}
}

ur_event_handle_t event_impl::getHandle() const { return MEvent.load(); }
@@ -179,13 +194,20 @@ event_impl::event_impl(const QueueImplPtr &Queue)
void event_impl::setQueue(const QueueImplPtr &Queue) {
MQueue = Queue;
MIsProfilingEnabled = Queue->MIsProfilingEnabled;

// TODO After setting the queue, the event is no longer default
// constructed. Consider a design change which would allow
// for such a change regardless of the construction method.
MIsDefaultConstructed = false;
}

void event_impl::setSubmittedQueue(const QueueImplPtr &SubmittedQueue) {
MSubmittedQueue = SubmittedQueue;
if (MHostProfilingInfo && SubmittedQueue) {
device_impl &Device = SubmittedQueue->getDeviceImpl();
MHostProfilingInfo->setDevice(&Device);
}
}

void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID,
uint64_t &IId) const {
void *TraceEvent = nullptr;
@@ -476,9 +498,9 @@ event_impl::get_backend_info<info::device::backend_version>() const {
}
#endif

void HostProfilingInfo::start() { StartTime = getTimestamp(); }
void HostProfilingInfo::start() { StartTime = getTimestamp(Device); }

void HostProfilingInfo::end() { EndTime = getTimestamp(); }
void HostProfilingInfo::end() { EndTime = getTimestamp(Device); }

ur_native_handle_t event_impl::getNative() {
if (isHost())
@@ -568,23 +590,10 @@ void event_impl::setSubmissionTime() {
if (!MIsProfilingEnabled && !MProfilingTagEvent)
return;

if (QueueImplPtr Queue = MQueue.lock()) {
try {
MSubmitTime = Queue->getDeviceImpl().getCurrentDeviceTime();
} catch (sycl::exception &e) {
if (e.code() == sycl::errc::feature_not_supported)
throw sycl::exception(
make_error_code(errc::profiling),
std::string("Unable to get command group submission time: ") +
e.what());
std::rethrow_exception(std::current_exception());
}
} else {
// Returning host time
using namespace std::chrono;
MSubmitTime = duration_cast<nanoseconds>(
high_resolution_clock::now().time_since_epoch())
.count();
std::weak_ptr<queue_impl> Queue = isHost() ? MSubmittedQueue : MQueue;
if (QueueImplPtr QueuePtr = Queue.lock()) {
device_impl &Device = QueuePtr->getDeviceImpl();
MSubmitTime = getTimestamp(&Device);
}
}

4 changes: 1 addition & 3 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
@@ -241,9 +241,7 @@ class event_impl {
/// Sets original queue used for submission.
///
/// @return
void setSubmittedQueue(const QueueImplPtr &SubmittedQueue) {
MSubmittedQueue = SubmittedQueue;
};
void setSubmittedQueue(const QueueImplPtr &SubmittedQueue);

/// Indicates if this event is not associated with any command and doesn't
/// have native handle.
57 changes: 49 additions & 8 deletions sycl/test-e2e/Basic/submit_time.cpp
Original file line number Diff line number Diff line change
@@ -46,30 +46,71 @@ int main(void) {
// All shortcut memory operations use queue_impl::submitMemOpHelper.
// This test covers memcpy as a representative, extend if other operations
// diverge.
uint64_t memcpy_submit_time = 0;
uint64_t memcpy_start_time = 0;
uint64_t memcpy_end_time = 0;
for (int i = 0; i < 5; i++) {
auto memcpy_event = q.memcpy(dest, data, sizeof(int) * n);
memcpy_event.wait();

auto submit_time =
auto memcpy_submit_time =
memcpy_event
.get_profiling_info<sycl::info::event_profiling::command_submit>();
auto start_time =
auto memcpy_start_time =
memcpy_event
.get_profiling_info<sycl::info::event_profiling::command_start>();
auto end_time =
auto memcpy_end_time =
memcpy_event
.get_profiling_info<sycl::info::event_profiling::command_end>();

// Print for debugging
std::cout << "Memcpy Event - Submit: " << submit_time
<< ", Start: " << start_time << ", End: " << end_time
<< std::endl;
std::cout << "Memcpy Event - Submit: " << memcpy_submit_time
<< ", Start: " << memcpy_start_time
<< ", End: " << memcpy_end_time << std::endl;

assert(submit_time != 0 && "Submit time should not be zero");
assert((submit_time <= start_time) && (start_time <= end_time));
assert(memcpy_submit_time != 0 && "Submit time should not be zero");
assert((memcpy_submit_time <= memcpy_start_time) &&
(memcpy_start_time <= memcpy_end_time));
}

sycl::free(data, q);
sycl::free(dest, q);

// Check that host_task profiling timestamps share the same base as device
// tasks.
auto host_task_event =
q.submit([&](sycl::handler &cgh) { cgh.host_task([=]() {}); });
q.wait();
auto device_task_event = q.submit([&](sycl::handler &cgh) {
cgh.parallel_for<class DeviceTask>(sycl::range<1>(1), [=](sycl::id<1>) {});
});

const uint64_t host_submitted = host_task_event.template get_profiling_info<
sycl::info::event_profiling::command_submit>();
const uint64_t host_start = host_task_event.template get_profiling_info<
sycl::info::event_profiling::command_start>();
const uint64_t host_end = host_task_event.template get_profiling_info<
sycl::info::event_profiling::command_end>();
std::cout << "Submit time: " << host_submitted
<< ", Start time: " << host_start << ", End time: " << host_end
<< std::endl;

const uint64_t device_submitted =
device_task_event.template get_profiling_info<
sycl::info::event_profiling::command_submit>();
const uint64_t device_start = device_task_event.template get_profiling_info<
sycl::info::event_profiling::command_start>();
const uint64_t device_end = device_task_event.template get_profiling_info<
sycl::info::event_profiling::command_end>();
std::cout << "Device Submit time: " << device_submitted
<< ", Device Start time: " << device_start
<< ", Device End time: " << device_end << std::endl;
assert(host_submitted >= memcpy_submit_time &&
host_submitted <= device_submitted &&
"Host and device submit expected to share the same base");
assert(host_start >= memcpy_start_time && host_start <= device_start &&
"Host and device start expected to share the same base");
assert(host_end >= memcpy_end_time && host_end <= device_end &&
"Host and device end expected to share the same base");
return 0;
}
1 change: 1 addition & 0 deletions sycl/test/abi/sycl_symbols_windows.dump
Original file line number Diff line number Diff line change
@@ -4375,6 +4375,7 @@
?setArgHelper@handler@_V1@sycl@@AEAAXHAEAVwork_group_memory_impl@detail@23@@Z
?setArgsHelper@handler@_V1@sycl@@AEAAXH@Z
?setArgsToAssociatedAccessors@handler@_V1@sycl@@AEAAXXZ
?setDevice@HostProfilingInfo@detail@_V1@sycl@@QEAAXPEAVdevice_impl@234@@Z
?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@@Z
?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXVkernel@23@@Z
?setKernelCacheConfig@handler@_V1@sycl@@AEAAXW4StableKernelCacheConfig@123@@Z