Skip to content

Commit e8ddcf5

Browse files
authored
Fix some compilation errors, warnings, and clippy issues (#156)
1 parent 8fc34d5 commit e8ddcf5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1440
-1515
lines changed

crates/blastoff/src/context.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,13 @@ impl CublasContext {
140140
) -> Result<T> {
141141
unsafe {
142142
// cudaStream_t is the same as CUstream
143-
sys::v2::cublasSetStream_v2(self.raw, mem::transmute(stream.as_inner())).to_result()?;
143+
sys::v2::cublasSetStream_v2(
144+
self.raw,
145+
mem::transmute::<*mut cust::sys::CUstream_st, *mut cublas_sys::v2::CUstream_st>(
146+
stream.as_inner(),
147+
),
148+
)
149+
.to_result()?;
144150
let res = func(self)?;
145151
// reset the stream back to NULL just in case someone calls with_stream, then drops the stream, and tries to
146152
// execute a raw sys function with the context's handle.
@@ -227,10 +233,11 @@ impl CublasContext {
227233
/// ```
228234
pub fn set_math_mode(&self, math_mode: MathMode) -> Result<()> {
229235
unsafe {
230-
Ok(
231-
sys::v2::cublasSetMathMode(self.raw, mem::transmute(math_mode.bits()))
232-
.to_result()?,
236+
Ok(sys::v2::cublasSetMathMode(
237+
self.raw,
238+
mem::transmute::<u32, cublas_sys::v2::cublasMath_t>(math_mode.bits()),
233239
)
240+
.to_result()?)
234241
}
235242
}
236243

crates/blastoff/src/level1.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ fn check_stride<T: BlasDatatype>(x: &impl GpuBuffer<T>, n: usize, stride: Option
2424
);
2525
}
2626

27-
/// Scalar and Vector-based operations such as `min`, `max`, `axpy`, `copy`, `dot`, `nrm2`, `rot`, `rotg`, `rotm`, `rotmg`, `scal`, and `swap`.
28-
27+
/// Scalar and Vector-based operations such as `min`, `max`, `axpy`, `copy`, `dot`,
28+
/// `nrm2`, `rot`, `rotg`, `rotm`, `rotmg`, `scal`, and `swap`.
2929
impl CublasContext {
3030
/// Same as [`CublasContext::amin`] but with an explicit stride.
3131
///

crates/blastoff/src/lib.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,22 +93,17 @@ pub(crate) mod private {
9393

9494
/// An optional operation to apply to a matrix before a matrix operation. This includes
9595
/// no operation, transpose, or conjugate transpose.
96-
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
96+
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
9797
pub enum MatrixOp {
9898
/// No operation, leave the matrix as is. This is the default.
99+
#[default]
99100
None,
100101
/// Transpose the matrix in place.
101102
Transpose,
102103
/// Conjugate transpose the matrix in place.
103104
ConjugateTranspose,
104105
}
105106

106-
impl Default for MatrixOp {
107-
fn default() -> Self {
108-
MatrixOp::None
109-
}
110-
}
111-
112107
impl MatrixOp {
113108
/// Returns the corresponding `cublasOperation_t` for this operation.
114109
pub fn to_raw(self) -> sys::v2::cublasOperation_t {

crates/cuda_builder/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,13 +246,13 @@ impl CudaBuilder {
246246

247247
/// Emit LLVM IR, the exact same as rustc's `--emit=llvm-ir`.
248248
pub fn emit_llvm_ir(mut self, emit_llvm_ir: bool) -> Self {
249-
self.emit = emit_llvm_ir.then(|| EmitOption::LlvmIr);
249+
self.emit = emit_llvm_ir.then_some(EmitOption::LlvmIr);
250250
self
251251
}
252252

253253
/// Emit LLVM Bitcode, the exact same as rustc's `--emit=llvm-bc`.
254254
pub fn emit_llvm_bitcode(mut self, emit_llvm_bitcode: bool) -> Self {
255-
self.emit = emit_llvm_bitcode.then(|| EmitOption::Bitcode);
255+
self.emit = emit_llvm_bitcode.then_some(EmitOption::Bitcode);
256256
self
257257
}
258258

@@ -435,7 +435,7 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
435435
}
436436

437437
let mut cargo = Command::new("cargo");
438-
cargo.args(&[
438+
cargo.args([
439439
"build",
440440
"--lib",
441441
"--message-format=json-render-diagnostics",
@@ -525,7 +525,7 @@ fn get_last_artifact(out: &str) -> Option<PathBuf> {
525525
}
526526
})
527527
.filter(|line| line.reason == "compiler-artifact")
528-
.last()
528+
.next_back()
529529
.expect("Did not find output file in rustc output");
530530

531531
let mut filenames = last

crates/cuda_std/src/float.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -356,12 +356,13 @@ impl GpuFloat for f32 {
356356
f32_intrinsic!(self, atan())
357357
}
358358

359-
/// Computes the four quadrant arctangent of `self` (`y`) and `other` (`x`) in radians.
359+
/// Computes the four quadrant arctangent of `self` (`y`) and `other` (`x`) in
360+
/// radians.
360361
///
361-
/// * `x = 0`, `y = 0`: `0`
362-
/// * `x >= 0`: `arctan(y/x)` -> `[-pi/2, pi/2]`
363-
/// * `y >= 0`: `arctan(y/x) + pi` -> `(pi/2, pi]`
364-
/// * `y < 0`: `arctan(y/x) - pi` -> `(-pi, -pi/2)`intrinsics
362+
/// * `x = 0`, `y = 0`: `0`
363+
/// * `x >= 0`: `arctan(y/x)` -> `[-pi/2, pi/2]`
364+
/// * `y >= 0`: `arctan(y/x) + pi` -> `(pi/2, pi]`
365+
/// * `y < 0`: `arctan(y/x) - pi` -> `(-pi, -pi/2)`intrinsics
365366
#[inline]
366367
fn atan2(self, other: f32) -> f32 {
367368
f32_intrinsic!(self, atan2(other))
@@ -687,12 +688,13 @@ impl GpuFloat for f64 {
687688
f64_intrinsic!(self, atan())
688689
}
689690

690-
/// Computes the four quadrant arctangent of `self` (`y`) and `other` (`x`) in radians.
691+
/// Computes the four quadrant arctangent of `self` (`y`) and `other` (`x`) in
692+
/// radians.
691693
///
692-
/// * `x = 0`, `y = 0`: `0`
693-
/// * `x >= 0`: `arctan(y/x)` -> `[-pi/2, pi/2]`
694-
/// * `y >= 0`: `arctan(y/x) + pi` -> `(pi/2, pi]`
695-
/// * `y < 0`: `arctan(y/x) - pi` -> `(-pi, -pi/2)`intrinsics
694+
/// * `x = 0`, `y = 0`: `0`
695+
/// * `x >= 0`: `arctan(y/x)` -> `[-pi/2, pi/2]`
696+
/// * `y >= 0`: `arctan(y/x) + pi` -> `(pi/2, pi]`
697+
/// * `y < 0`: `arctan(y/x) - pi` -> `(-pi, -pi/2)`intrinsics
696698
#[inline]
697699
fn atan2(self, other: f64) -> f64 {
698700
f64_intrinsic!(self, atan2(other))

crates/cuda_std/src/shared.rs

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,41 @@
22
33
use crate::gpu_only;
44

5-
/// Statically allocates a buffer large enough for `len` elements of `array_type`, yielding
6-
/// a `*mut array_type` that points to uninitialized shared memory. `len` must be a constant expression.
5+
/// Statically allocates a buffer large enough for `len` elements of `array_type`,
6+
/// yielding a `*mut array_type` that points to uninitialized shared memory. `len` must
7+
/// be a constant expression.
78
///
8-
/// Note that this allocates the memory __statically__, it expands to a static in the `shared` address space.
9-
/// Therefore, calling this macro multiple times in a loop will always yield the same data. However, separate
10-
/// invocations of the macro will yield different buffers.
9+
/// Note that this allocates the memory __statically__, it expands to a static in the
10+
/// `shared` address space. Therefore, calling this macro multiple times in a loop will
11+
/// always yield the same data. However, separate invocations of the macro will yield
12+
/// different buffers.
1113
///
12-
/// The data is uninitialized by default, therefore, you must be careful to not read the data before it is written to.
13-
/// The semantics of what "uninitialized" actually means on the GPU (i.e. if it yields unknown data or if it is UB to read it whatsoever)
14-
/// are not well known, so even if the type is valid for any backing memory, make sure to not read uninitialized data.
14+
/// The data is uninitialized by default, therefore, you must be careful to not read the
15+
/// data before it is written to. The semantics of what "uninitialized" actually means
16+
/// on the GPU (i.e. if it yields unknown data or if it is UB to read it whatsoever) are
17+
/// not well known, so even if the type is valid for any backing memory, make sure to
18+
/// not read uninitialized data.
1519
///
1620
/// # Safety
1721
///
18-
/// Shared memory usage is fundamentally extremely unsafe and impossible to statically prove, therefore
19-
/// the burden of correctness is on the user. Some of the things you must ensure in your usage of
20-
/// shared memory are:
21-
/// - Shared memory is only shared across __thread blocks__, not the entire device, therefore it is
22-
/// unsound to try and rely on sharing data across more than one block.
23-
/// - You must write to the shared buffer before reading from it as the data is uninitialized by default.
24-
/// - [`thread::sync_threads`](crate::thread::sync_threads) must be called before relying on the results of other
25-
/// threads, this ensures every thread has reached that point before going on. For example, reading another thread's
26-
/// data after writing to the buffer.
27-
/// - No access may be out of bounds, this usually means making sure the amount of threads and their dimensions are correct.
22+
/// Shared memory usage is fundamentally extremely unsafe and impossible to statically
23+
/// prove, therefore the burden of correctness is on the user. Some of the things you
24+
/// must ensure in your usage of shared memory are:
2825
///
29-
/// It is suggested to run your executable in `cuda-memcheck` to make sure usages of shared memory are right.
26+
/// - Shared memory is only shared across __thread blocks__, not the entire device,
27+
/// therefore it is unsound to try and rely on sharing data across more than one
28+
/// block.
29+
/// - You must write to the shared buffer before reading from it as the data is
30+
/// uninitialized by default.
31+
/// - [`thread::sync_threads`](crate::thread::sync_threads) must be called before
32+
/// relying on the results of other threads, this ensures every thread has reached
33+
/// that point before going on. For example, reading another thread's data after
34+
/// writing to the buffer.
35+
/// - No access may be out of bounds, this usually means making sure the amount of
36+
/// threads and their dimensions are correct.
37+
///
38+
/// It is suggested to run your executable in `cuda-memcheck` to make sure usages of
39+
/// shared memory are right.
3040
///
3141
/// # Examples
3242
///

0 commit comments

Comments
 (0)