Skip to content

[Offload] Add olWaitEvents #150036

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions offload/liboffload/API/Queue.td
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,23 @@ def : Function {
let returns = [];
}

def : Function {
let name = "olWaitEvents";
let desc = "Make any future work submitted to this queue wait until the provided events are complete.";
let details = [
"All events in `Events` must complete before the queue is unblocked.",
"The input events can be from any queue on any device provided by the same platform as `Queue`.",
];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
Param<"ol_event_handle_t *", "Events", "list of `NumEvents` events to wait for", PARAM_IN>,
Param<"size_t", "NumEvents", "size of `Events`", PARAM_IN>,
];
let returns = [
Return<"OL_ERRC_INVALID_NULL_HANDLE", ["Any event handle in the list is NULL"]>,
];
}

def : Enum {
let name = "ol_queue_info_t";
let desc = "Supported queue info.";
Expand Down
22 changes: 22 additions & 0 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,28 @@ Error olSyncQueue_impl(ol_queue_handle_t Queue) {
return Error::success();
}

Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events,
size_t NumEvents) {
auto *Device = Queue->Device->Device;

for (size_t I = 0; I < NumEvents; I++) {
auto *Event = Events[I];

if (!Event)
return Plugin::error(ErrorCode::INVALID_NULL_HANDLE,
"olWaitEvents asked to wait on a NULL event");

// Do nothing if the event is for this queue
if (Event->Queue == Queue)
continue;

if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo))
return Err;
}

return Error::success();
}

Error olGetQueueInfoImplDetail(ol_queue_handle_t Queue,
ol_queue_info_t PropName, size_t PropSize,
void *PropValue, size_t *PropSizeRet) {
Expand Down
3 changes: 2 additions & 1 deletion offload/unittests/OffloadAPI/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ add_offload_unittest("queue"
queue/olSyncQueue.cpp
queue/olDestroyQueue.cpp
queue/olGetQueueInfo.cpp
queue/olGetQueueInfoSize.cpp)
queue/olGetQueueInfoSize.cpp
queue/olWaitEvents.cpp)

add_offload_unittest("symbol"
symbol/olGetSymbol.cpp
Expand Down
2 changes: 2 additions & 0 deletions offload/unittests/OffloadAPI/device_code/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ add_offload_test_device_code(localmem_static.c localmem_static)
add_offload_test_device_code(global.c global)
add_offload_test_device_code(global_ctor.c global_ctor)
add_offload_test_device_code(global_dtor.c global_dtor)
add_offload_test_device_code(sequence.c sequence)

add_custom_target(offload_device_binaries DEPENDS
foo.bin
Expand All @@ -19,5 +20,6 @@ add_custom_target(offload_device_binaries DEPENDS
global.bin
global_ctor.bin
global_dtor.bin
sequence.bin
)
set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
11 changes: 11 additions & 0 deletions offload/unittests/OffloadAPI/device_code/sequence.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include <gpuintrin.h>
#include <stdint.h>

__gpu_kernel void sequence(uint32_t idx, uint32_t *inout) {
if (idx == 0)
inout[idx] = 0;
else if (idx == 1)
inout[idx] = 1;
else
inout[idx] = inout[idx - 1] + inout[idx - 2];
}
148 changes: 148 additions & 0 deletions offload/unittests/OffloadAPI/queue/olWaitEvents.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
//===------- Offload API tests - olWaitEvents -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "../common/Fixtures.hpp"
#include <OffloadAPI.h>
#include <gtest/gtest.h>

struct olWaitEventsTest : OffloadProgramTest {
void SetUp() override {
RETURN_ON_FATAL_FAILURE(OffloadProgramTest::SetUpWith("sequence"));
ASSERT_SUCCESS(
olGetSymbol(Program, "sequence", OL_SYMBOL_KIND_KERNEL, &Kernel));
LaunchArgs.Dimensions = 1;
LaunchArgs.GroupSize = {1, 1, 1};
LaunchArgs.NumGroups = {1, 1, 1};
LaunchArgs.DynSharedMemory = 0;
}

void TearDown() override {
RETURN_ON_FATAL_FAILURE(OffloadProgramTest::TearDown());
}

ol_symbol_handle_t Kernel = nullptr;
ol_kernel_launch_size_args_t LaunchArgs{};
};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olWaitEventsTest);

TEST_P(olWaitEventsTest, Success) {
constexpr size_t NUM_KERNELS = 16;
ol_queue_handle_t Queues[NUM_KERNELS];
ol_event_handle_t Events[NUM_KERNELS];

void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
NUM_KERNELS * sizeof(uint32_t), &Mem));
struct {
uint32_t Idx;
void *Mem;
} Args{0, Mem};

for (size_t I = 0; I < NUM_KERNELS; I++) {
Args.Idx = I;

ASSERT_SUCCESS(olCreateQueue(Device, &Queues[I]));

if (I > 0)
ASSERT_SUCCESS(olWaitEvents(Queues[I], &Events[I - 1], 1));

ASSERT_SUCCESS(olLaunchKernel(Queues[I], Device, Kernel, &Args,
sizeof(Args), &LaunchArgs, &Events[I]));
}

ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1]));

uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 2; i < NUM_KERNELS; i++) {
ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]);
}
}

TEST_P(olWaitEventsTest, SuccessSingleQueue) {
constexpr size_t NUM_KERNELS = 16;
ol_queue_handle_t Queue;
ol_event_handle_t Events[NUM_KERNELS];

ASSERT_SUCCESS(olCreateQueue(Device, &Queue));

void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
NUM_KERNELS * sizeof(uint32_t), &Mem));
struct {
uint32_t Idx;
void *Mem;
} Args{0, Mem};

for (size_t I = 0; I < NUM_KERNELS; I++) {
Args.Idx = I;

if (I > 0)
ASSERT_SUCCESS(olWaitEvents(Queue, &Events[I - 1], 1));

ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
&LaunchArgs, &Events[I]));
}

ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1]));

uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 2; i < NUM_KERNELS; i++) {
ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]);
}
}

TEST_P(olWaitEventsTest, SuccessMultipleEvents) {
constexpr size_t NUM_KERNELS = 16;
ol_queue_handle_t Queues[NUM_KERNELS];
ol_event_handle_t Events[NUM_KERNELS];

void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
NUM_KERNELS * sizeof(uint32_t), &Mem));
struct {
uint32_t Idx;
void *Mem;
} Args{0, Mem};

for (size_t I = 0; I < NUM_KERNELS; I++) {
Args.Idx = I;

ASSERT_SUCCESS(olCreateQueue(Device, &Queues[I]));

if (I > 0)
ASSERT_SUCCESS(olWaitEvents(Queues[I], Events, I));

ASSERT_SUCCESS(olLaunchKernel(Queues[I], Device, Kernel, &Args,
sizeof(Args), &LaunchArgs, &Events[I]));
}

ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1]));

uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 2; i < NUM_KERNELS; i++) {
ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]);
}
}

TEST_P(olWaitEventsTest, InvalidNullQueue) {
ol_event_handle_t Event;
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitEvents(nullptr, &Event, 1));
}

TEST_P(olWaitEventsTest, InvalidNullEvent) {
ol_queue_handle_t Queue;
ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olWaitEvents(Queue, nullptr, 1));
}

TEST_P(olWaitEventsTest, InvalidNullInnerEvent) {
ol_queue_handle_t Queue;
ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
ol_event_handle_t Event = nullptr;
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitEvents(Queue, &Event, 1));
}
Loading