Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions .github/workflows/build_kernel_windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ jobs:
strategy:
matrix:
os: [ windows-2022 ]
python: [ '3.12', '3.13' ]
python: [ 3.12 ]
torch: [
{ version: '2.8', cuda: '12.9.1', wheel: '129' }
# { version: '2.9.1', cuda: '12.6.3', wheel: '126' },
{ version: '2.9.1', cuda: '12.8.1', wheel: '128' },
# { version: '2.9.1', cuda: '13.0.1', wheel: '130' }
]

name: Build kernel
Expand All @@ -32,7 +34,7 @@ jobs:
- uses: actions/checkout@v5

# CUDA environment setup
- uses: N-Storm/[email protected].28
- uses: Jimver/[email protected].29
id: setup-cuda-toolkit
with:
cuda: ${{ matrix.torch.cuda }} # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch?
Expand All @@ -56,12 +58,7 @@ jobs:
cache: 'pip'

- name: Install PyTorch
run: pip install torch --index-url https://download.pytorch.org/whl/cu129

- name: Build activation kernel
run: ( scripts\windows\builder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force )
# - name: Copy activation kernel
# run: cp -rL examples/activation/build activation-kernel
run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.torch.wheel }}

- name: Build cutlass GEMM kernel
run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force )
Expand Down
9 changes: 5 additions & 4 deletions build2cmake/src/templates/cuda/preamble.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,11 @@ message(STATUS "Rendered for platform {{ platform }}")
{% if platform == 'windows' %}
include(${CMAKE_CURRENT_LIST_DIR}/cmake/windows.cmake)

# This preprocessor macro should be defined in building with MSVC but not for CUDA and co.
# Also, if not using MVSC, this may not be set too ...
# So we explicitly set it to avoid any side effect due to preprocessor-guards not being defined.
add_compile_definitions(_WIN32>)
if(GPU_LANG STREQUAL "CUDA")
add_compile_definitions(USE_CUDA=1)
elseif(GPU STREQUAL "HIP")
add_compile_definitions(USE_ROCM=1)
endif()

# Generate standardized build name
run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version")
Expand Down
Loading