Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,7 @@ if(VLLM_GPU_LANG STREQUAL "HIP")
# _rocm_C extension
#
set(VLLM_ROCM_EXT_SRC
"csrc/rocm/skinny_gemms.cu"
"csrc/rocm/torch_bindings.cpp"
"csrc/rocm/attention.cu")

Expand Down
6 changes: 6 additions & 0 deletions csrc/rocm/ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@ void paged_attention(torch::Tensor& out, torch::Tensor& exp_sums,
const std::optional<torch::Tensor>& alibi_slopes,
const std::string& kv_cache_dtype, torch::Tensor& k_scale,
torch::Tensor& v_scale);

void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c,
at::Tensor& scale_a, at::Tensor& scale_b, const int64_t CuCount);

torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b,
const int64_t CuCount);
Loading