diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 24a36e6..e803f68 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -12,9 +12,11 @@ jobs: strategy: matrix: os: [ windows-2022 ] - python: [ '3.12', '3.13' ] + python: [ 3.12 ] torch: [ - { version: '2.8', cuda: '12.9.1', wheel: '129' } +# { version: '2.9.1', cuda: '12.6.3', wheel: '126' }, + { version: '2.9.1', cuda: '12.8.1', wheel: '128' }, +# { version: '2.9.1', cuda: '13.0.1', wheel: '130' } ] name: Build kernel @@ -32,7 +34,7 @@ jobs: - uses: actions/checkout@v5 # CUDA environment setup - - uses: N-Storm/cuda-toolkit@v0.2.28 + - uses: Jimver/cuda-toolkit@v0.2.29 id: setup-cuda-toolkit with: cuda: ${{ matrix.torch.cuda }} # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch? @@ -56,12 +58,7 @@ jobs: cache: 'pip' - name: Install PyTorch - run: pip install torch --index-url https://download.pytorch.org/whl/cu129 - - - name: Build activation kernel - run: ( scripts\windows\builder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) -# - name: Copy activation kernel -# run: cp -rL examples/activation/build activation-kernel + run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.torch.wheel }} - name: Build cutlass GEMM kernel run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/cuda/preamble.cmake index 78cd834..1f709da 100644 --- a/build2cmake/src/templates/cuda/preamble.cmake +++ b/build2cmake/src/templates/cuda/preamble.cmake @@ -104,10 +104,11 @@ message(STATUS "Rendered for platform {{ platform }}") {% if platform == 'windows' %} include(${CMAKE_CURRENT_LIST_DIR}/cmake/windows.cmake) -# This preprocessor macro should be defined in building with MSVC but not for CUDA and co. -# Also, if not using MVSC, this may not be set too ... -# So we explicitly set it to avoid any side effect due to preprocessor-guards not being defined. -add_compile_definitions(_WIN32>) +if(GPU_LANG STREQUAL "CUDA") + add_compile_definitions(USE_CUDA=1) +elseif(GPU STREQUAL "HIP") + add_compile_definitions(USE_ROCM=1) +endif() # Generate standardized build name run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version")