Skip to content

Commit d829db7

Browse files
authored
[UR][HIP] Select the correct binary in a multi-arch bundle (#19092)
This commit changes the HIP adapter to select the correct binary for the device when a bundle contains binaries built for multiple AMDGPU architectures. Similarly to other adapaters, the HIP adapter would previously select the first 'amdgcn' binary it came across. This works fine for the common case where the program was compiled for one architecture but may fail otherwise. To aid in this, the SYCL runtime passes some extra information into urDeviceSelectBinary via the pre-existing 'pNext' field of ur_device_binary_t. It does this only for the HIP backend. The HIP adapater then parses this binary information as a clang offload bundle, which conveniently contains specific triple & architecture information for each binary. For this we re-use the code that the offload adapter was using, making it common and fixing a bug in the version matching logic.
1 parent c28ad7c commit d829db7

File tree

8 files changed

+199
-92
lines changed

8 files changed

+199
-92
lines changed

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,11 +1529,24 @@ RTDeviceBinaryImage *getBinImageFromMultiMap(
15291529
if (DeviceFilteredImgs.empty())
15301530
return nullptr;
15311531

1532-
std::vector<ur_device_binary_t> UrBinaries(DeviceFilteredImgs.size());
1533-
for (uint32_t BinaryCount = 0; BinaryCount < DeviceFilteredImgs.size();
1534-
BinaryCount++) {
1535-
UrBinaries[BinaryCount].pDeviceTargetSpec = getUrDeviceTarget(
1536-
getRawImg(DeviceFilteredImgs[BinaryCount])->DeviceTargetSpec);
1532+
const size_t NumImgs = DeviceFilteredImgs.size();
1533+
// Pass extra information to the HIP adapter to aid in binary selection. We
1534+
// pass it the raw binary as a {ptr, length} pair.
1535+
std::vector<std::pair<const unsigned char *, size_t>> UrBinariesStorage;
1536+
if (DeviceImpl.getBackend() == backend::ext_oneapi_hip)
1537+
UrBinariesStorage.reserve(NumImgs);
1538+
1539+
std::vector<ur_device_binary_t> UrBinaries(NumImgs);
1540+
for (uint32_t BinaryCount = 0; BinaryCount < NumImgs; BinaryCount++) {
1541+
sycl_device_binary RawImg = getRawImg(DeviceFilteredImgs[BinaryCount]);
1542+
UrBinaries[BinaryCount].pDeviceTargetSpec =
1543+
getUrDeviceTarget(RawImg->DeviceTargetSpec);
1544+
if (DeviceImpl.getBackend() == backend::ext_oneapi_hip) {
1545+
UrBinariesStorage.emplace_back(
1546+
RawImg->BinaryStart,
1547+
std::distance(RawImg->BinaryStart, RawImg->BinaryEnd));
1548+
UrBinaries[BinaryCount].pNext = &UrBinariesStorage[BinaryCount];
1549+
}
15371550
}
15381551

15391552
uint32_t ImgInd = 0;

unified-runtime/source/adapters/hip/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD")
157157
${PROJECT_NAME}::headers
158158
${PROJECT_NAME}::common
159159
${PROJECT_NAME}::umf
160+
ur_common
160161
rocmdrv
161162
)
162163

unified-runtime/source/adapters/hip/device.cpp

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "adapter.hpp"
1313
#include "context.hpp"
1414
#include "event.hpp"
15+
#include "offload_bundle_parser.hpp"
1516

1617
#include <array>
1718
#include <hip/hip_runtime.h>
@@ -1158,25 +1159,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
11581159

11591160
/// \return UR_RESULT_SUCCESS If available, the first binary that is PTX
11601161
///
1161-
UR_APIEXPORT ur_result_t UR_APICALL
1162-
urDeviceSelectBinary(ur_device_handle_t, const ur_device_binary_t *pBinaries,
1163-
uint32_t NumBinaries, uint32_t *pSelectedBinary) {
1164-
// Look for an image for the HIP target, and return the first one that is
1165-
// found
1162+
UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary(
1163+
ur_device_handle_t hDevice, const ur_device_binary_t *pBinaries,
1164+
uint32_t NumBinaries, uint32_t *pSelectedBinary) {
1165+
// Look for an image for the HIP target. If we have a clang offload bundle,
1166+
// try to return an exact match on the architecture to ensure we select the
1167+
// correct binary in a multi-architecture bundle. If we can't find an exact
1168+
// match, we return the first one that is found.
11661169
#if defined(__HIP_PLATFORM_AMD__)
11671170
const char *BinaryType = UR_DEVICE_BINARY_TARGET_AMDGCN;
11681171
#elif defined(__HIP_PLATFORM_NVIDIA__)
11691172
const char *BinaryType = UR_DEVICE_BINARY_TARGET_NVPTX64;
11701173
#else
11711174
#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
11721175
#endif
1176+
std::optional<uint32_t> FirstBackupCandidate;
1177+
1178+
hipDeviceProp_t Props;
1179+
UR_CHECK_ERROR(hipGetDeviceProperties(&Props, hDevice->get()));
1180+
1181+
// The arch name is the 'gfxABC' architecture, and occasionally ends with
1182+
// architecture feature strings separated by colons. We're only interested in
1183+
// the first part, so take up until the (optional) first colon.
1184+
std::string_view ArchName = Props.gcnArchName;
1185+
ArchName = ArchName.substr(0, ArchName.find_first_of(":"));
1186+
11731187
for (uint32_t i = 0; i < NumBinaries; i++) {
1174-
if (strcmp(pBinaries[i].pDeviceTargetSpec, BinaryType) == 0) {
1175-
*pSelectedBinary = i;
1176-
return UR_RESULT_SUCCESS;
1188+
if (strcmp(pBinaries[i].pDeviceTargetSpec, BinaryType) != 0) {
1189+
continue;
1190+
}
1191+
// If we've been given the actual binary by the SYCL runtime to inspect,
1192+
// attempt to parse it as a clang offload bundle.
1193+
using BinaryBlobTy = std::pair<const unsigned char *, size_t>;
1194+
if (auto *const BinaryBlob = (const BinaryBlobTy *)pBinaries[i].pNext) {
1195+
if (auto Parser = HipOffloadBundleParser::load(BinaryBlob->first,
1196+
BinaryBlob->second)) {
1197+
if (Parser->containsBundle(ArchName)) {
1198+
*pSelectedBinary = i;
1199+
return UR_RESULT_SUCCESS;
1200+
}
1201+
}
1202+
}
1203+
if (!FirstBackupCandidate) {
1204+
FirstBackupCandidate = i;
11771205
}
11781206
}
11791207

1208+
if (FirstBackupCandidate) {
1209+
*pSelectedBinary = *FirstBackupCandidate;
1210+
return UR_RESULT_SUCCESS;
1211+
}
1212+
11801213
// No image can be loaded for the given device
11811214
return UR_RESULT_ERROR_INVALID_BINARY;
11821215
}

unified-runtime/source/adapters/offload/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ target_link_libraries(${TARGET_NAME} PRIVATE
6262
${PROJECT_NAME}::headers
6363
${PROJECT_NAME}::common
6464
${PROJECT_NAME}::umf
65+
ur_common
6566
${UR_OFFLOAD_INSTALL_DIR}/lib/libLLVMOffload.so
6667
${ADDITIONAL_LINK_LIBS}
6768
)

unified-runtime/source/adapters/offload/program.cpp

Lines changed: 1 addition & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "context.hpp"
1616
#include "device.hpp"
17+
#include "offload_bundle_parser.hpp"
1718
#include "platform.hpp"
1819
#include "program.hpp"
1920
#include "ur2offload.hpp"
@@ -70,85 +71,6 @@ ur_result_t ProgramCreateCudaWorkaround(ur_context_handle_t, const uint8_t *,
7071
}
7172
#endif
7273

73-
// https://clang.llvm.org/docs/ClangOffloadBundler.html#bundled-binary-file-layout
74-
class HipOffloadBundleParser {
75-
static constexpr std::string_view Magic = "__CLANG_OFFLOAD_BUNDLE__";
76-
const uint8_t *Buff;
77-
size_t Length;
78-
79-
struct __attribute__((packed)) BundleEntry {
80-
uint64_t ObjectOffset;
81-
uint64_t ObjectSize;
82-
uint64_t EntryIdSize;
83-
char EntryIdStart;
84-
};
85-
86-
struct __attribute__((packed)) BundleHeader {
87-
const char HeaderMagic[Magic.size()];
88-
uint64_t EntryCount;
89-
BundleEntry FirstEntry;
90-
};
91-
92-
HipOffloadBundleParser() = delete;
93-
HipOffloadBundleParser(const uint8_t *Buff, size_t Length)
94-
: Buff(Buff), Length(Length) {}
95-
96-
public:
97-
static std::optional<HipOffloadBundleParser> load(const uint8_t *Buff,
98-
size_t Length) {
99-
if (std::string_view{reinterpret_cast<const char *>(Buff), Length}.find(
100-
Magic) != 0) {
101-
return std::nullopt;
102-
}
103-
return HipOffloadBundleParser(Buff, Length);
104-
}
105-
106-
ur_result_t extract(std::string_view SearchTargetId,
107-
const uint8_t *&OutBinary, size_t &OutLength) {
108-
const char *Limit = reinterpret_cast<const char *>(&Buff[Length]);
109-
110-
// The different check here means that a binary consisting of only the magic
111-
// bytes (but nothing else) will result in INVALID_PROGRAM rather than being
112-
// treated as a non-bundle
113-
auto *Header = reinterpret_cast<const BundleHeader *>(Buff);
114-
if (reinterpret_cast<const char *>(&Header->FirstEntry) > Limit) {
115-
return UR_RESULT_ERROR_INVALID_PROGRAM;
116-
}
117-
118-
const auto *CurrentEntry = &Header->FirstEntry;
119-
for (uint64_t I = 0; I < Header->EntryCount; I++) {
120-
if (&CurrentEntry->EntryIdStart > Limit) {
121-
return UR_RESULT_ERROR_INVALID_PROGRAM;
122-
}
123-
auto EntryId = std::string_view(&CurrentEntry->EntryIdStart,
124-
CurrentEntry->EntryIdSize);
125-
if (EntryId.end() > Limit) {
126-
return UR_RESULT_ERROR_INVALID_PROGRAM;
127-
}
128-
129-
// Will match either "hip" or "hipv4"
130-
bool isHip = EntryId.find("hip") == 0;
131-
bool VersionMatches =
132-
EntryId.find_last_of(SearchTargetId) == EntryId.size() - 1;
133-
134-
if (isHip && VersionMatches) {
135-
OutBinary = reinterpret_cast<const uint8_t *>(
136-
&Buff[CurrentEntry->ObjectOffset]);
137-
OutLength = CurrentEntry->ObjectSize;
138-
139-
if (reinterpret_cast<const char *>(&OutBinary[OutLength]) > Limit) {
140-
return UR_RESULT_ERROR_INVALID_PROGRAM;
141-
}
142-
return UR_RESULT_SUCCESS;
143-
}
144-
145-
CurrentEntry = reinterpret_cast<const BundleEntry *>(EntryId.end());
146-
}
147-
148-
return UR_RESULT_ERROR_INVALID_PROGRAM;
149-
}
150-
};
151-
15274
} // namespace
15375

15476
UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(

unified-runtime/source/common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ add_ur_library(ur_common STATIC
2323
ur_util.cpp
2424
ur_util.hpp
2525
latency_tracker.hpp
26+
offload_bundle_parser.cpp
27+
offload_bundle_parser.hpp
2628
$<$<PLATFORM_ID:Windows>:windows/ur_lib_loader.cpp>
2729
$<$<PLATFORM_ID:Linux,Darwin>:linux/ur_lib_loader.cpp>
2830
)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
*
3+
* Copyright (C) 2025 Intel Corporation
4+
*
5+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
* Exceptions. See LICENSE.TXT
7+
*
8+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9+
*
10+
*/
11+
12+
#include "offload_bundle_parser.hpp"
13+
#include <string>
14+
15+
std::optional<HipOffloadBundleParser>
16+
HipOffloadBundleParser::load(const uint8_t *Buff, size_t Length) {
17+
if (std::string_view{reinterpret_cast<const char *>(Buff), Length}.find(
18+
Magic) != 0) {
19+
return std::nullopt;
20+
}
21+
return HipOffloadBundleParser(Buff, Length);
22+
}
23+
24+
ur_result_t HipOffloadBundleParser::extract(std::string_view SearchTargetId,
25+
const uint8_t *&OutBinary,
26+
size_t &OutLength) {
27+
if (auto Entry = containsBundle(SearchTargetId)) {
28+
OutBinary = &Buff[Entry->ObjectOffset];
29+
OutLength = Entry->ObjectSize;
30+
31+
if (const uint8_t *Limit = &Buff[Length]; &OutBinary[OutLength] <= Limit) {
32+
return UR_RESULT_SUCCESS;
33+
}
34+
}
35+
return UR_RESULT_ERROR_INVALID_PROGRAM;
36+
}
37+
38+
std::optional<HipOffloadBundleParser::BundleEntry>
39+
HipOffloadBundleParser::containsBundle(std::string_view SearchTargetId) {
40+
const uint8_t *Limit = &Buff[Length];
41+
42+
// The different check here means that a binary consisting of only the magic
43+
// bytes (but nothing else) will result in INVALID_PROGRAM rather than being
44+
// treated as a non-bundle
45+
auto *Header = reinterpret_cast<const BundleHeader *>(Buff);
46+
if (reinterpret_cast<const uint8_t *>(&Header->FirstEntry) > Limit) {
47+
return std::nullopt;
48+
}
49+
50+
// std::string_view::ends_with is C++20. Until then, roll our own. Note then
51+
// this is the equivalent form listed on en.cppreference.com.
52+
auto ends_with = [](std::string_view str, std::string_view sv) {
53+
return str.size() >= sv.size() &&
54+
str.compare(str.size() - sv.size(), std::string::npos, sv) == 0;
55+
};
56+
57+
const auto *CurrentEntry = &Header->FirstEntry;
58+
for (uint64_t I = 0; I < Header->EntryCount; I++) {
59+
const uint8_t *EntryBytes = &CurrentEntry->EntryIdStart;
60+
if (EntryBytes > Limit ||
61+
(EntryBytes + CurrentEntry->EntryIdSize) > Limit) {
62+
return std::nullopt;
63+
}
64+
auto EntryId = std::string_view(reinterpret_cast<const char *>(EntryBytes),
65+
CurrentEntry->EntryIdSize);
66+
67+
// Will match either "hip" or "hipv4"
68+
bool isHip = EntryId.find("hip") == 0;
69+
bool VersionMatches = ends_with(EntryId, SearchTargetId);
70+
71+
if (isHip && VersionMatches) {
72+
return *CurrentEntry;
73+
}
74+
75+
CurrentEntry = reinterpret_cast<const BundleEntry *>(
76+
EntryBytes + CurrentEntry->EntryIdSize);
77+
}
78+
79+
return std::nullopt;
80+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
*
3+
* Copyright (C) 2025 Intel Corporation
4+
*
5+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
* Exceptions. See LICENSE.TXT
7+
*
8+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9+
*
10+
*/
11+
12+
#include <ur_api.h>
13+
14+
#include <cstdint>
15+
#include <optional>
16+
#include <string_view>
17+
18+
#ifdef _MSC_VER
19+
#define PACKED(d) __pragma(pack(push, 1)) d __pragma(pack(pop))
20+
#else
21+
#define PACKED(d) d __attribute__((packed))
22+
#endif
23+
24+
// https://clang.llvm.org/docs/ClangOffloadBundler.html#bundled-binary-file-layout
25+
class HipOffloadBundleParser {
26+
static constexpr std::string_view Magic = "__CLANG_OFFLOAD_BUNDLE__";
27+
const uint8_t *Buff;
28+
size_t Length;
29+
30+
PACKED(struct BundleEntry {
31+
uint64_t ObjectOffset;
32+
uint64_t ObjectSize;
33+
uint64_t EntryIdSize;
34+
uint8_t EntryIdStart;
35+
});
36+
37+
PACKED(struct BundleHeader {
38+
const char HeaderMagic[Magic.size()];
39+
uint64_t EntryCount;
40+
BundleEntry FirstEntry;
41+
});
42+
43+
HipOffloadBundleParser() = delete;
44+
HipOffloadBundleParser(const uint8_t *Buff, size_t Length)
45+
: Buff(Buff), Length(Length) {}
46+
47+
public:
48+
static std::optional<HipOffloadBundleParser> load(const uint8_t *Buff,
49+
size_t Length);
50+
51+
ur_result_t extract(std::string_view SearchTargetId,
52+
const uint8_t *&OutBinary, size_t &OutLength);
53+
54+
std::optional<BundleEntry> containsBundle(std::string_view SearchTargetId);
55+
};

0 commit comments

Comments
 (0)