diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index d32ba4efbc98a..5bcd4fbbabca7 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -1423,6 +1423,49 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec return supported; } +struct GpuPipelineConfig { + // List of all aliases for a given GPU. + // For example, this can include names like "NAVI10", "RX 5700", etc. + std::vector device_names; + + // Mapping of pipeline names to their specific subgroup sizes. + // Example: {"soft_max_f32", 64}. + std::unordered_map pipelines; + + // Default subgroup size for this GPU. + // Defaults to 0 if not explicitly provided. + uint32_t default_subgroup_size = 0; +}; + +// Define configurations for different GPUs. +static std::vector gpu_pipeline_configs = { + { + {"NAVI10", "NAVI14", "RX 5700", "RX 5600", "RX 5500"}, + { + {"soft_max_f32", 64}, {"soft_max_f32_wg512", 64}, + {"soft_max_f32_f16", 64}, {"soft_max_f32_f16_wg512", 64}, + {"im2col_f32", 64}, {"im2col_f32_f16", 64}, + }, + 32 + }, +}; + +static uint32_t get_subgroup_size(const std::string &pipeline_name, const std::string &device_name) { + for (const auto &config : gpu_pipeline_configs) { + for (const auto &alias : config.device_names) { + if (device_name.find(alias) != std::string::npos) { + auto pipIt = config.pipelines.find(pipeline_name); + if (pipIt != config.pipelines.end() && pipIt->second != 0) { + return pipIt->second; + } + return config.default_subgroup_size; + } + } + } + // If no matching configuration is found, return 0. + return 0; +} + static void ggml_vk_load_shaders(vk_device& device) { VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")"); @@ -1543,11 +1586,17 @@ static void ggml_vk_load_shaders(vk_device& device) { device->pipeline_matmul_id_f32 = std::make_shared(); } + vk::PhysicalDeviceProperties2 props2; + device->physical_device.getProperties2(&props2); + std::string device_name = props2.properties.deviceName.data(); + std::vector> compiles; auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void* spv_data, const std::string &entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array wg_denoms, const std::vector& specialization_constants, uint32_t align, bool disable_robustness = false, bool require_full_subgroups = false, uint32_t required_subgroup_size = 0) { + required_subgroup_size = get_subgroup_size(name, device_name); + if (!pipeline) { pipeline = std::make_shared(); pipeline->name = name; @@ -2699,7 +2748,9 @@ static void ggml_vk_print_gpu_info(size_t idx) { subgroup_props.pNext = &driver_props; physical_device.getProperties2(&props2); - const size_t subgroup_size = subgroup_props.subgroupSize; + uint32_t default_subgroup_size = get_subgroup_size("", props2.properties.deviceName.data()); + const size_t subgroup_size = (default_subgroup_size != 0) ? default_subgroup_size : subgroup_props.subgroupSize; + const bool uma = props2.properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; bool fp16_storage = false;