Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions projects/hotswap/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_library(hsa-hotswap SHARED
hotswap_tool.cpp
hotswap.cpp
hotswap_gfx_query.cpp
hotswap_loader_policy.cpp
${HOTSWAP_PLATFORM_IO_SRC}
)

Expand Down Expand Up @@ -68,21 +69,37 @@ file(WRITE "${_hotswap_co_hdr}"
"static const unsigned char kGfx1250MinCo[] = {${_hotswap_co_arr}};\n")
target_include_directories(hotswap_test PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

# Unit tests for the gfx-target / ASIC-revision query logic. The test compiles
# the portable hotswap_gfx_query.cpp unit alongside the test translation unit
# and supplies its own stubs for the HSA entry points, so this target needs
# the relevant headers but must NOT link the real HSA library (doing so would
# clash with the in-file stubs).
# Unit tests for gfx-target / ASIC-revision query logic and rewrite-policy
# decisions. The test supplies its own HSA stubs, so this target needs the
# relevant headers but must NOT link the real HSA library (doing so would clash
# with the in-file stubs).
add_executable(hotswap_tool_test
tests/hotswap_tool_test.cpp
hotswap_gfx_query.cpp
hotswap_loader_policy.cpp
)
target_include_directories(hotswap_tool_test PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
${HSA_RUNTIME_INC}
${HSA_RUNTIME_INC}/..
)

# Loader-path tests for libhsa-hotswap.so. The test includes hotswap_tool.cpp
# directly and supplies stub HSA/COMGR entry points, so it does not link the
# real HSA runtime or COMGR.
add_executable(hotswap_loader_test
tests/hotswap_loader_test.cpp
hotswap_gfx_query.cpp
hotswap_loader_policy.cpp
${HOTSWAP_PLATFORM_IO_SRC}
)
target_include_directories(hotswap_loader_test PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
${HSA_RUNTIME_INC}
${HSA_RUNTIME_INC}/..
)

enable_testing()
add_test(NAME hotswap_test COMMAND hotswap_test)
add_test(NAME hotswap_tool_test COMMAND hotswap_tool_test)
add_test(NAME hotswap_loader_test COMMAND hotswap_loader_test)
8 changes: 3 additions & 5 deletions projects/hotswap/hotswap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size);
///
/// Both ISA names are supplied by the caller: source_isa typically comes from
/// the code object (see GetCodeObjectIsaName) and target_isa from the running
/// GPU (e.g. the HSA agent), but either may be overridden. COMGR's
/// amd_comgr_hotswap_rewrite (linked directly) applies whatever transformation
/// the source/target pair calls for -- same-ISA stepping patches (e.g. gfx1250
/// B0 to A0) or cross-family transpilation -- and returns the rewritten code
/// object. If no transformation is needed, the output is a copy of the input.
/// GPU (e.g. the HSA agent), but either may be overridden. This wrapper passes
/// the request to COMGR's amd_comgr_hotswap_rewrite and returns COMGR's output
/// bytes to the caller.
///
/// On success, *out_data and *out_size describe the rewritten code object.
/// If *out_data differs from elf_data, it was allocated by this function
Expand Down
9 changes: 3 additions & 6 deletions projects/hotswap/hotswap_gfx_query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ std::string extract_gfx_target(const std::string &isa_name) {
if (pos == std::string::npos)
return {};
auto end = std::find_if_not(isa_name.begin() + pos, isa_name.end(),
[](unsigned char c) { return std::isalnum(c); });
[](unsigned char c) {
return std::isalnum(c) || c == '-';
});
return isa_name.substr(pos, end - isa_name.begin() - pos);
}

Expand Down Expand Up @@ -94,9 +96,4 @@ void reset_gfx_revision_cache() {
g_cache.clear();
}

bool gate_allows_hotswap(const AgentGfxRevision &gfx) {
return gfx.revision_valid && gfx.gfx_target == "gfx1250" &&
gfx.asic_revision == 0; // A0
}

} // namespace rocr::hotswap
16 changes: 6 additions & 10 deletions projects/hotswap/hotswap_gfx_query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,21 @@ struct AgentGfxRevision {
// "amdgcn-amd-amdhsa--gfx1250:sramecc+:xnack-"), or an empty string on failure.
std::string get_agent_isa_name(hsa_agent_t agent);

// Extracts the gfx target (e.g. "gfx1250") from a full HSA ISA name. Returns an
// empty string when no gfx target is present. The returned token stops at the
// first non-alphanumeric character so feature suffixes (":sramecc+", etc.) are
// dropped.
// Extracts the gfx target (e.g. "gfx1250" or "gfx12-5-generic") from a full
// HSA ISA name. Returns an empty string when no gfx target is present. The
// returned token preserves hyphenated processor names and stops before feature
// suffixes (":sramecc+", etc.).
std::string extract_gfx_target(const std::string &isa_name);

// Queries the agent's gfx target and ASIC revision via the HSA runtime. The
// result is cached per agent handle, since code-object loads can be frequent.
// This function intentionally encodes no gating policy; callers apply
// gate_allows_hotswap() (below) to decide whether to act.
// This function intentionally encodes no rewrite policy; callers apply the
// policy in hotswap_loader_policy.{hpp,cpp}.
AgentGfxRevision query_agent_gfx_revision(hsa_agent_t agent);

// Clears the per-agent-handle cache used by query_agent_gfx_revision().
void reset_gfx_revision_cache();

// HotSwap's activation policy: rewriting is performed only for gfx1250 silicon
// at ASIC revision A0 (and only when the revision was successfully queried).
bool gate_allows_hotswap(const AgentGfxRevision &gfx);

} // namespace rocr::hotswap

#endif // ROCR_HOTSWAP_GFX_QUERY_HPP
106 changes: 106 additions & 0 deletions projects/hotswap/hotswap_loader_policy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
//===- hotswap_loader_policy.cpp - HotSwap loader decision policy ---------===//
//
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "hotswap_loader_policy.hpp"

#include <algorithm>
#include <cctype>
#include <cstddef>

namespace rocr::hotswap {

namespace {

constexpr char Gfx1250[] = "gfx1250";
constexpr char Gfx12_5Generic[] = "gfx12-5-generic";
constexpr char Gfx125Prefix[] = "gfx125";
constexpr char Gfx1250B0Feature[] = ":gfx1250-b0-specific+";
constexpr char Gfx1250A0Feature[] = ":gfx1250-b0-specific-";

enum class Gfx1250Stepping {
B0,
A0,
};

const char *gfx1250_stepping_feature(Gfx1250Stepping stepping) {
return stepping == Gfx1250Stepping::B0 ? Gfx1250B0Feature : Gfx1250A0Feature;
}

bool is_gfx12_5_target(const std::string &gfx_target) {
constexpr size_t Gfx125PrefixLen = sizeof(Gfx125Prefix) - 1;
if (gfx_target == Gfx12_5Generic) {
return true;
}
if (gfx_target.size() <= Gfx125PrefixLen ||
gfx_target.compare(0, Gfx125PrefixLen, Gfx125Prefix) != 0) {
return false;
}
return std::all_of(gfx_target.begin() + Gfx125PrefixLen, gfx_target.end(),
[](unsigned char c) { return std::isdigit(c); });
}

std::string with_gfx1250_stepping_feature(const std::string &isa_name,
Gfx1250Stepping stepping) {
if (extract_gfx_target(isa_name) != Gfx1250 ||
isa_name.find(Gfx1250B0Feature) != std::string::npos ||
isa_name.find(Gfx1250A0Feature) != std::string::npos) {
return isa_name;
}
return isa_name + gfx1250_stepping_feature(stepping);
}

} // namespace

bool gate_allows_hotswap(const AgentGfxRevision &gfx) {
return gfx.revision_valid && gfx.gfx_target == Gfx1250 &&
gfx.asic_revision == 0; // A0
}

bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx,
const RewriteOptions &options) {
return gate_allows_hotswap(gfx) ||
(options.gfx12_5_rewrite_requested &&
is_gfx12_5_target(gfx.gfx_target));
}

std::optional<RewriteDecision>
decide_hotswap_rewrite(const AgentGfxRevision &gfx,
const std::string &source_isa,
const std::string &target_isa,
const RewriteOptions &options) {
if (source_isa.empty() || target_isa.empty()) {
return std::nullopt;
}

std::string source_gfx = extract_gfx_target(source_isa);

if (gate_allows_hotswap(gfx) && source_gfx == Gfx1250 &&
extract_gfx_target(target_isa) == Gfx1250) {
return RewriteDecision{
with_gfx1250_stepping_feature(source_isa, Gfx1250Stepping::B0),
with_gfx1250_stepping_feature(target_isa, Gfx1250Stepping::A0)};
}

if (!options.gfx12_5_rewrite_requested ||
!is_gfx12_5_target(gfx.gfx_target) || !is_gfx12_5_target(source_gfx)) {
return std::nullopt;
}

// ROCm/rocm-systems#7581 established the loader-side invariant that this
// opt-in path uses the code object's processor, not a source->agent retarget.
RewriteDecision decision{source_isa, source_isa};

if (source_gfx == Gfx1250) {
decision.source_isa =
with_gfx1250_stepping_feature(source_isa, Gfx1250Stepping::B0);
decision.target_isa =
with_gfx1250_stepping_feature(source_isa, Gfx1250Stepping::B0);
}

return decision;
}

} // namespace rocr::hotswap
53 changes: 53 additions & 0 deletions projects/hotswap/hotswap_loader_policy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===- hotswap_loader_policy.hpp - HotSwap loader decision policy ---------===//
//
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Centralized loader policy for selecting whether HotSwap should call COMGR and
// which source/target ISA pair it should pass. COMGR owns validation and all
// code-object transformations after the call crosses this boundary.
//
//===----------------------------------------------------------------------===//

#ifndef ROCR_HOTSWAP_LOADER_POLICY_HPP
#define ROCR_HOTSWAP_LOADER_POLICY_HPP

#include "hotswap_gfx_query.hpp"

#include <optional>
#include <string>

namespace rocr::hotswap {

struct RewriteOptions {
bool gfx12_5_rewrite_requested = false;
};

struct RewriteDecision {
std::string source_isa;
std::string target_isa;
};

// HotSwap's baseline gfx1250 route is active only for gfx1250 silicon at ASIC
// revision A0 (and only when the revision was successfully queried).
bool gate_allows_hotswap(const AgentGfxRevision &gfx);

// Agent-level precheck used by the loader to avoid source-ISA parsing when no
// local routing condition can possibly apply.
bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx,
const RewriteOptions &options);

// Returns the COMGR ISA pair for this load, or std::nullopt when the original
// code object should be loaded unchanged. The decision is limited to loader
// routing and ISA-pair construction; COMGR decides which rewrite work, if any,
// is enabled for the request.
std::optional<RewriteDecision>
decide_hotswap_rewrite(const AgentGfxRevision &gfx,
const std::string &source_isa,
const std::string &target_isa,
const RewriteOptions &options);

} // namespace rocr::hotswap

#endif // ROCR_HOTSWAP_LOADER_POLICY_HPP
63 changes: 40 additions & 23 deletions projects/hotswap/hotswap_tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
#include "hotswap.hpp"
#include "hotswap_gfx_query.hpp"
#include "hotswap_platform_io.hpp"
#include "hotswap_loader_policy.hpp"
#include <algorithm>
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <hsa.h>
#include <hsa_api_trace.h>
#include <hsa_ext_amd.h>
Expand All @@ -41,9 +43,12 @@ namespace {
namespace hotswap_io = rocr::hotswap::platform_io;

using rocr::hotswap::AgentGfxRevision;
using rocr::hotswap::gate_allows_hotswap;
using rocr::hotswap::decide_hotswap_rewrite;
using rocr::hotswap::get_agent_isa_name;
using rocr::hotswap::has_candidate_hotswap_rewrite;
using rocr::hotswap::query_agent_gfx_revision;
using rocr::hotswap::RewriteDecision;
using rocr::hotswap::RewriteOptions;

using ByteVec = std::shared_ptr<std::vector<uint8_t>>;
using OwnedElf = std::unique_ptr<void, decltype(&std::free)>;
Expand Down Expand Up @@ -107,6 +112,11 @@ void stash_bytes(uint64_t handle, const uint8_t *data, size_t size) {
g_reader_map[handle] = ReaderEntry{std::move(vec), false, false};
}

bool gfx12_5_rewrite_requested() {
const char *value = std::getenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES");
return value && value[0] != '\0' && std::strcmp(value, "0") != 0;
}

bool try_get_reader_entry(uint64_t handle, ByteVec *bytes, bool *from_file) {
std::scoped_lock lock(g_reader_map_mutex);
const auto it = g_reader_map.find(handle);
Expand Down Expand Up @@ -254,29 +264,24 @@ hsa_status_t load_rewritten_reader(hsa_executable_t executable, hsa_agent_t agen
}

hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agent,
hsa_code_object_reader_t code_object_reader,
const char *options,
hsa_loaded_code_object_t *loaded_code_object,
const ByteVec &local_bytes) {
// Source ISA from the code object, target ISA from the running GPU.
const std::string source_isa = rocr::hotswap::GetCodeObjectIsaName(
local_bytes->data(), local_bytes->size());
const std::string target_isa = get_agent_isa_name(agent);
if (source_isa.empty() || target_isa.empty()) {
HOTSWAP_LOG("hotswap: rewrite SKIP empty isa (src='%s' tgt='%s' size=%zu)\n",
source_isa.c_str(), target_isa.c_str(), local_bytes->size());
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}

const ByteVec &local_bytes,
const RewriteDecision &decision,
const RewriteOptions &rewrite_options) {
// Route through RetargetCodeObject once policy has selected a request. A
// same-processor source/target pair can still be meaningful to COMGR.
void *out_elf = nullptr;
size_t out_elf_size = 0;
const int rc = rocr::hotswap::RetargetCodeObject(
local_bytes->data(), local_bytes->size(), source_isa.c_str(),
target_isa.c_str(), &out_elf, &out_elf_size);
local_bytes->data(), local_bytes->size(), decision.source_isa.c_str(),
decision.target_isa.c_str(), &out_elf, &out_elf_size);

HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s in=%zu rc=%d out=%zu changed=%d\n",
source_isa.c_str(), target_isa.c_str(), local_bytes->size(), rc,
out_elf_size, out_elf != local_bytes->data());
HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s gfx12_5_opt_in=%d in=%zu "
"rc=%d out=%zu changed=%d\n",
decision.source_isa.c_str(), decision.target_isa.c_str(),
rewrite_options.gfx12_5_rewrite_requested, local_bytes->size(),
rc, out_elf_size, out_elf != local_bytes->data());

if (rc != 0 || out_elf == local_bytes->data()) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
Expand Down Expand Up @@ -311,20 +316,32 @@ hsa_status_t HSA_API hotswap_load_agent_code_object(
reader_from_file);
}

// Gate HotSwap to gfx1250 A0 silicon. On any other GPU or stepping, load
// the original code object unchanged instead of routing through COMGR.
const AgentGfxRevision gfx = query_agent_gfx_revision(agent);
if (!gate_allows_hotswap(gfx)) {
const RewriteOptions rewrite_options{gfx12_5_rewrite_requested()};
if (!has_candidate_hotswap_rewrite(gfx, rewrite_options)) {
HOTSWAP_LOG("hotswap: gate BLOCKED (gfx=%s rev=%u valid=%d)\n",
gfx.gfx_target.c_str(), gfx.asic_revision, gfx.revision_valid);
return load_original_reader(executable, agent, code_object_reader,
options, loaded_code_object,
reader_from_file);
}

const std::string source_isa = rocr::hotswap::GetCodeObjectIsaName(
local_bytes->data(), local_bytes->size());
const std::string target_isa = get_agent_isa_name(agent);
const auto decision =
decide_hotswap_rewrite(gfx, source_isa, target_isa, rewrite_options);
if (!decision) {
HOTSWAP_LOG("hotswap: decision NONE (gfx=%s src='%s' tgt='%s')\n",
gfx.gfx_target.c_str(), source_isa.c_str(), target_isa.c_str());
return load_original_reader(executable, agent, code_object_reader,
options, loaded_code_object,
reader_from_file);
}

const hsa_status_t status = try_retarget_and_load(
executable, agent, code_object_reader, options, loaded_code_object,
local_bytes);
executable, agent, options, loaded_code_object, local_bytes, *decision,
rewrite_options);
if (status == HSA_STATUS_SUCCESS) {
return status;
}
Expand Down
Loading
Loading