Skip to content

Commit c79ef94

Browse files
committed
[SYCL] Make host task timestamps share the same base as device tasks
Currently, we use std::chrono to record `command_submit`, `command_start` and `command_end` timestamps for host tasks. So, if there there is a mix of device tasks and host tasks submitted to the queue, they have a different time base and inconvenient to use/compare. This PR makes host task timestamps to have the same time base as device tasks.
1 parent cc21caf commit c79ef94

File tree

5 files changed

+90
-33
lines changed

5 files changed

+90
-33
lines changed

sycl/include/sycl/detail/host_profiling_info.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,27 @@
1010

1111
#include <sycl/detail/export.hpp> // for __SYCL_EXPORT
1212

13+
#include <memory> // for std::weak_ptr
1314
#include <stdint.h> // for uint64_t
1415

1516
namespace sycl {
1617
inline namespace _V1 {
1718
namespace detail {
1819

20+
class queue_impl;
21+
1922
/// Profiling info for the host execution.
2023
class __SYCL_EXPORT HostProfilingInfo {
2124
uint64_t StartTime = 0;
2225
uint64_t EndTime = 0;
26+
// Queue that was used to submit the host task.
27+
std::weak_ptr<queue_impl> SubmitQueue;
2328

2429
public:
30+
// Sets the queue that was used to submit the host task.
31+
void setSubmitQueue(const std::weak_ptr<queue_impl> &Queue) {
32+
SubmitQueue = Queue;
33+
}
2534
/// Returns event's start time.
2635
///
2736
/// \return event's start time in nanoseconds.

sycl/source/detail/event_impl.cpp

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,26 @@ void event_impl::setComplete() {
113113
assert(false && "setComplete is not supported for non-host event");
114114
}
115115

116-
static uint64_t inline getTimestamp() {
117-
auto Timestamp = std::chrono::high_resolution_clock::now().time_since_epoch();
118-
return std::chrono::duration_cast<std::chrono::nanoseconds>(Timestamp)
119-
.count();
116+
static uint64_t inline getTimestamp(
117+
const std::weak_ptr<queue_impl> &QueueWeakPtr) {
118+
if (QueueImplPtr Queue = QueueWeakPtr.lock()) {
119+
try {
120+
return Queue->getDeviceImpl().getCurrentDeviceTime();
121+
} catch (sycl::exception &e) {
122+
if (e.code() == sycl::errc::feature_not_supported)
123+
throw sycl::exception(
124+
make_error_code(errc::profiling),
125+
std::string("Unable to get command group submission time: ") +
126+
e.what());
127+
std::rethrow_exception(std::current_exception());
128+
}
129+
} else {
130+
// Returning host time
131+
using namespace std::chrono;
132+
return duration_cast<nanoseconds>(
133+
high_resolution_clock::now().time_since_epoch())
134+
.count();
135+
}
120136
}
121137

122138
ur_event_handle_t event_impl::getHandle() const { return MEvent.load(); }
@@ -179,7 +195,6 @@ event_impl::event_impl(const QueueImplPtr &Queue)
179195
void event_impl::setQueue(const QueueImplPtr &Queue) {
180196
MQueue = Queue;
181197
MIsProfilingEnabled = Queue->MIsProfilingEnabled;
182-
183198
// TODO After setting the queue, the event is no longer default
184199
// constructed. Consider a design change which would allow
185200
// for such a change regardless of the construction method.
@@ -476,9 +491,9 @@ event_impl::get_backend_info<info::device::backend_version>() const {
476491
}
477492
#endif
478493

479-
void HostProfilingInfo::start() { StartTime = getTimestamp(); }
494+
void HostProfilingInfo::start() { StartTime = getTimestamp(SubmitQueue); }
480495

481-
void HostProfilingInfo::end() { EndTime = getTimestamp(); }
496+
void HostProfilingInfo::end() { EndTime = getTimestamp(SubmitQueue); }
482497

483498
ur_native_handle_t event_impl::getNative() {
484499
if (isHost())
@@ -568,24 +583,10 @@ void event_impl::setSubmissionTime() {
568583
if (!MIsProfilingEnabled && !MProfilingTagEvent)
569584
return;
570585

571-
if (QueueImplPtr Queue = MQueue.lock()) {
572-
try {
573-
MSubmitTime = Queue->getDeviceImpl().getCurrentDeviceTime();
574-
} catch (sycl::exception &e) {
575-
if (e.code() == sycl::errc::feature_not_supported)
576-
throw sycl::exception(
577-
make_error_code(errc::profiling),
578-
std::string("Unable to get command group submission time: ") +
579-
e.what());
580-
std::rethrow_exception(std::current_exception());
581-
}
582-
} else {
583-
// Returning host time
584-
using namespace std::chrono;
585-
MSubmitTime = duration_cast<nanoseconds>(
586-
high_resolution_clock::now().time_since_epoch())
587-
.count();
588-
}
586+
if (isHost())
587+
MSubmitTime = getTimestamp(MSubmittedQueue);
588+
else
589+
MSubmitTime = getTimestamp(MQueue);
589590
}
590591

591592
uint64_t event_impl::getSubmissionTime() { return MSubmitTime; }

sycl/source/detail/event_impl.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ class event_impl {
243243
/// @return
244244
void setSubmittedQueue(const QueueImplPtr &SubmittedQueue) {
245245
MSubmittedQueue = SubmittedQueue;
246+
if (MHostProfilingInfo)
247+
MHostProfilingInfo->setSubmitQueue(SubmittedQueue);
246248
};
247249

248250
/// Indicates if this event is not associated with any command and doesn't

sycl/test-e2e/Basic/submit_time.cpp

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,30 +46,71 @@ int main(void) {
4646
// All shortcut memory operations use queue_impl::submitMemOpHelper.
4747
// This test covers memcpy as a representative, extend if other operations
4848
// diverge.
49+
uint64_t memcpy_submit_time = 0;
50+
uint64_t memcpy_start_time = 0;
51+
uint64_t memcpy_end_time = 0;
4952
for (int i = 0; i < 5; i++) {
5053
auto memcpy_event = q.memcpy(dest, data, sizeof(int) * n);
5154
memcpy_event.wait();
5255

53-
auto submit_time =
56+
auto memcpy_submit_time =
5457
memcpy_event
5558
.get_profiling_info<sycl::info::event_profiling::command_submit>();
56-
auto start_time =
59+
auto memcpy_start_time =
5760
memcpy_event
5861
.get_profiling_info<sycl::info::event_profiling::command_start>();
59-
auto end_time =
62+
auto memcpy_end_time =
6063
memcpy_event
6164
.get_profiling_info<sycl::info::event_profiling::command_end>();
6265

6366
// Print for debugging
64-
std::cout << "Memcpy Event - Submit: " << submit_time
65-
<< ", Start: " << start_time << ", End: " << end_time
66-
<< std::endl;
67+
std::cout << "Memcpy Event - Submit: " << memcpy_submit_time
68+
<< ", Start: " << memcpy_start_time
69+
<< ", End: " << memcpy_end_time << std::endl;
6770

68-
assert(submit_time != 0 && "Submit time should not be zero");
69-
assert((submit_time <= start_time) && (start_time <= end_time));
71+
assert(memcpy_submit_time != 0 && "Submit time should not be zero");
72+
assert((memcpy_submit_time <= memcpy_start_time) &&
73+
(memcpy_start_time <= memcpy_end_time));
7074
}
7175

7276
sycl::free(data, q);
7377
sycl::free(dest, q);
78+
79+
// Check that host_task profiling timestamps share the same base as device
80+
// tasks.
81+
auto host_task_event =
82+
q.submit([&](sycl::handler &cgh) { cgh.host_task([=]() {}); });
83+
q.wait();
84+
auto device_task_event = q.submit([&](sycl::handler &cgh) {
85+
cgh.parallel_for<class DeviceTask>(sycl::range<1>(1), [=](sycl::id<1>) {});
86+
});
87+
88+
const uint64_t host_submitted = host_task_event.template get_profiling_info<
89+
sycl::info::event_profiling::command_submit>();
90+
const uint64_t host_start = host_task_event.template get_profiling_info<
91+
sycl::info::event_profiling::command_start>();
92+
const uint64_t host_end = host_task_event.template get_profiling_info<
93+
sycl::info::event_profiling::command_end>();
94+
std::cout << "Submit time: " << host_submitted
95+
<< ", Start time: " << host_start << ", End time: " << host_end
96+
<< std::endl;
97+
98+
const uint64_t device_submitted =
99+
device_task_event.template get_profiling_info<
100+
sycl::info::event_profiling::command_submit>();
101+
const uint64_t device_start = device_task_event.template get_profiling_info<
102+
sycl::info::event_profiling::command_start>();
103+
const uint64_t device_end = device_task_event.template get_profiling_info<
104+
sycl::info::event_profiling::command_end>();
105+
std::cout << "Device Submit time: " << device_submitted
106+
<< ", Device Start time: " << device_start
107+
<< ", Device End time: " << device_end << std::endl;
108+
assert(host_submitted >= memcpy_submit_time &&
109+
host_submitted <= device_submitted &&
110+
"Host and device submit expected to share the same base");
111+
assert(host_start >= memcpy_start_time && host_start <= device_start &&
112+
"Host and device start expected to share the same base");
113+
assert(host_end >= memcpy_end_time && host_end <= device_end &&
114+
"Host and device end expected to share the same base");
74115
return 0;
75116
}

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@
295295
??0AccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z
296296
??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_K_NAEBVproperty_list@23@@Z
297297
??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_N_K4AEBVproperty_list@23@@Z
298+
??0HostProfilingInfo@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z
299+
??0HostProfilingInfo@detail@_V1@sycl@@QEAA@AEBV0123@@Z
298300
??0HostProfilingInfo@detail@_V1@sycl@@QEAA@XZ
299301
??0LocalAccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VLocalAccessorImplHost@detail@_V1@sycl@@@std@@@Z
300302
??0LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z
@@ -499,6 +501,7 @@
499501
??0tls_code_loc_t@detail@_V1@sycl@@QEAA@AEBUcode_location@123@@Z
500502
??0tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ
501503
??1AccessorBaseHost@detail@_V1@sycl@@QEAA@XZ
504+
??1HostProfilingInfo@detail@_V1@sycl@@QEAA@XZ
502505
??1LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ
503506
??1SYCLCategory@detail@_V1@sycl@@UEAA@XZ
504507
??1SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ
@@ -4390,6 +4393,7 @@
43904393
?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z
43914394
?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ
43924395
?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ
4396+
?setSubmitQueue@HostProfilingInfo@detail@_V1@sycl@@QEAAXAEBV?$weak_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@@Z
43934397
?setType@handler@_V1@sycl@@AEAAXW4CGType@detail@23@@Z
43944398
?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z
43954399
?set_access_mode@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KW4address_access_mode@12345@AEBVcontext@45@@Z

0 commit comments

Comments
 (0)