Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vulkan: subgroup size test #12087

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,49 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
return supported;
}

struct GpuPipelineConfig {
// List of all aliases for a given GPU.
// For example, this can include names like "NAVI10", "RX 5700", etc.
std::vector<std::string> device_names;

// Mapping of pipeline names to their specific subgroup sizes.
// Example: {"soft_max_f32", 64}.
std::unordered_map<std::string, uint32_t> pipelines;

// Default subgroup size for this GPU.
// Defaults to 0 if not explicitly provided.
uint32_t default_subgroup_size = 0;
};

// Define configurations for different GPUs.
static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
{
{"NAVI10", "NAVI14", "RX 5700", "RX 5600", "RX 5500"},
{
{"soft_max_f32", 64}, {"soft_max_f32_wg512", 64},
{"soft_max_f32_f16", 64}, {"soft_max_f32_f16_wg512", 64},
{"im2col_f32", 64}, {"im2col_f32_f16", 64},
},
32
},
};

static uint32_t get_subgroup_size(const std::string &pipeline_name, const std::string &device_name) {
for (const auto &config : gpu_pipeline_configs) {
for (const auto &alias : config.device_names) {
if (device_name.find(alias) != std::string::npos) {
auto pipIt = config.pipelines.find(pipeline_name);
if (pipIt != config.pipelines.end() && pipIt->second != 0) {
return pipIt->second;
}
return config.default_subgroup_size;
}
}
}
// If no matching configuration is found, return 0.
return 0;
}

static void ggml_vk_load_shaders(vk_device& device) {
VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")");

Expand Down Expand Up @@ -1543,11 +1586,17 @@ static void ggml_vk_load_shaders(vk_device& device) {
device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>();
}

vk::PhysicalDeviceProperties2 props2;
device->physical_device.getProperties2(&props2);
std::string device_name = props2.properties.deviceName.data();

std::vector<std::future<void>> compiles;
auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void* spv_data, const std::string &entrypoint,
uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, const std::vector<uint32_t>& specialization_constants,
uint32_t align, bool disable_robustness = false, bool require_full_subgroups = false, uint32_t required_subgroup_size = 0) {

required_subgroup_size = get_subgroup_size(name, device_name);

if (!pipeline) {
pipeline = std::make_shared<vk_pipeline_struct>();
pipeline->name = name;
Expand Down Expand Up @@ -2699,7 +2748,9 @@ static void ggml_vk_print_gpu_info(size_t idx) {
subgroup_props.pNext = &driver_props;
physical_device.getProperties2(&props2);

const size_t subgroup_size = subgroup_props.subgroupSize;
uint32_t default_subgroup_size = get_subgroup_size("", props2.properties.deviceName.data());
const size_t subgroup_size = (default_subgroup_size != 0) ? default_subgroup_size : subgroup_props.subgroupSize;

const bool uma = props2.properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;

bool fp16_storage = false;
Expand Down
Loading