-
Notifications
You must be signed in to change notification settings - Fork 254
Remove wrappers for deprecated CUSPARSE functions #2919
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/cusparse/level2.jl b/lib/cusparse/level2.jl
index f18257e84..f77a5d9f5 100644
--- a/lib/cusparse/level2.jl
+++ b/lib/cusparse/level2.jl
@@ -105,15 +105,18 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsv2_bufferSize, :cusparseSbsrsv2_
end
function sv2(transa::SparseChar, uplo::SparseChar, diag::SparseChar, alpha::Number,
- A::CuSparseMatrixBSR{T}, X::CuVector{T}, index::SparseChar) where T
+ A::CuSparseMatrixBSR{T}, X::CuVector{T}, index::SparseChar
+ ) where {T}
sv2!(transa,uplo,diag,alpha,A,copy(X),index)
end
function sv2(transa::SparseChar, uplo::SparseChar, diag::SparseChar,
- A::CuSparseMatrixBSR{T}, X::CuVector{T}, index::SparseChar) where T
+ A::CuSparseMatrixBSR{T}, X::CuVector{T}, index::SparseChar
+ ) where {T}
sv2!(transa,uplo,diag,one(T),A,copy(X),index)
end
function sv2(transa::SparseChar, uplo::SparseChar,
- A::CuSparseMatrixBSR{T}, X::CuVector{T}, index::SparseChar) where T
+ A::CuSparseMatrixBSR{T}, X::CuVector{T}, index::SparseChar
+ ) where {T}
sv2!(transa,uplo,'N',one(T),A,copy(X),index)
end
diff --git a/lib/cusparse/level3.jl b/lib/cusparse/level3.jl
index ccc08fdff..b251db32e 100644
--- a/lib/cusparse/level3.jl
+++ b/lib/cusparse/level3.jl
@@ -80,15 +80,17 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsm2_bufferSize, :cusparseSbsrsm2_
end
function sm2(transa::SparseChar, transxy::SparseChar, uplo::SparseChar, diag::SparseChar,
- alpha::Number, A::CuSparseMatrixBSR{T}, X::StridedCuMatrix{T},
+ alpha::Number, A::CuSparseMatrixBSR{T}, X::StridedCuMatrix{T},
index::SparseChar) where T
sm2!(transa,transxy,uplo,diag,alpha,A,copy(X),index)
end
function sm2(transa::SparseChar, transxy::SparseChar, uplo::SparseChar, diag::SparseChar,
- A::CuSparseMatrixBSR{T}, X::StridedCuMatrix{T}, index::SparseChar) where T
+ A::CuSparseMatrixBSR{T}, X::StridedCuMatrix{T}, index::SparseChar
+ ) where {T}
sm2!(transa,transxy,uplo,diag,one(T),A,copy(X),index)
end
function sm2(transa::SparseChar, transxy::SparseChar, uplo::SparseChar,
- A::CuSparseMatrixBSR{T}, X::StridedCuMatrix{T}, index::SparseChar) where T
+ A::CuSparseMatrixBSR{T}, X::StridedCuMatrix{T}, index::SparseChar
+ ) where {T}
sm2!(transa,transxy,uplo,'N',one(T),A,copy(X),index)
end |
68b1e11 to
8bffff9
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CUDA.jl Benchmarks
| Benchmark suite | Current: 8bffff9 | Previous: f99784f | Ratio |
|---|---|---|---|
latency/precompile |
57202304132 ns |
57505943660.5 ns |
0.99 |
latency/ttfp |
8145313774 ns |
8175596902 ns |
1.00 |
latency/import |
4546125271 ns |
4548330811 ns |
1.00 |
integration/volumerhs |
9619200 ns |
9627247.5 ns |
1.00 |
integration/byval/slices=1 |
147528 ns |
147004 ns |
1.00 |
integration/byval/slices=3 |
426395 ns |
426015 ns |
1.00 |
integration/byval/reference |
145423 ns |
144995 ns |
1.00 |
integration/byval/slices=2 |
286912 ns |
286391 ns |
1.00 |
integration/cudadevrt |
103877 ns |
103484 ns |
1.00 |
kernel/indexing |
14536 ns |
14144.5 ns |
1.03 |
kernel/indexing_checked |
15227 ns |
14979 ns |
1.02 |
kernel/occupancy |
711.2867132867133 ns |
696.1307189542483 ns |
1.02 |
kernel/launch |
2327.1111111111113 ns |
2152.1111111111113 ns |
1.08 |
kernel/rand |
15225 ns |
17139 ns |
0.89 |
array/reverse/1d |
20211 ns |
20127 ns |
1.00 |
array/reverse/2dL_inplace |
67057 ns |
66835 ns |
1.00 |
array/reverse/1dL |
70469 ns |
70275 ns |
1.00 |
array/reverse/2d |
22430.5 ns |
22347 ns |
1.00 |
array/reverse/1d_inplace |
10125 ns |
9616 ns |
1.05 |
array/reverse/2d_inplace |
13650 ns |
13357 ns |
1.02 |
array/reverse/2dL |
74453 ns |
74201 ns |
1.00 |
array/reverse/1dL_inplace |
67274 ns |
66737 ns |
1.01 |
array/copy |
21342 ns |
21034 ns |
1.01 |
array/iteration/findall/int |
159292 ns |
158193.5 ns |
1.01 |
array/iteration/findall/bool |
140544 ns |
140393.5 ns |
1.00 |
array/iteration/findfirst/int |
162754.5 ns |
161096 ns |
1.01 |
array/iteration/findfirst/bool |
163021.5 ns |
161802 ns |
1.01 |
array/iteration/scalar |
75207 ns |
72492 ns |
1.04 |
array/iteration/logical |
218340 ns |
214800 ns |
1.02 |
array/iteration/findmin/1d |
51665 ns |
50555 ns |
1.02 |
array/iteration/findmin/2d |
97251 ns |
96485 ns |
1.01 |
array/reductions/reduce/Int64/1d |
44237 ns |
43782 ns |
1.01 |
array/reductions/reduce/Int64/dims=1 |
49808 ns |
44868.5 ns |
1.11 |
array/reductions/reduce/Int64/dims=2 |
62136 ns |
61893.5 ns |
1.00 |
array/reductions/reduce/Int64/dims=1L |
89604 ns |
89229.5 ns |
1.00 |
array/reductions/reduce/Int64/dims=2L |
88910 ns |
88425 ns |
1.01 |
array/reductions/reduce/Float32/1d |
38769 ns |
37869 ns |
1.02 |
array/reductions/reduce/Float32/dims=1 |
42502.5 ns |
52159 ns |
0.81 |
array/reductions/reduce/Float32/dims=2 |
60655 ns |
60004 ns |
1.01 |
array/reductions/reduce/Float32/dims=1L |
52940 ns |
52689 ns |
1.00 |
array/reductions/reduce/Float32/dims=2L |
73319 ns |
72514 ns |
1.01 |
array/reductions/mapreduce/Int64/1d |
44424 ns |
44158 ns |
1.01 |
array/reductions/mapreduce/Int64/dims=1 |
45158 ns |
44917.5 ns |
1.01 |
array/reductions/mapreduce/Int64/dims=2 |
62001 ns |
61973 ns |
1.00 |
array/reductions/mapreduce/Int64/dims=1L |
89497 ns |
89251 ns |
1.00 |
array/reductions/mapreduce/Int64/dims=2L |
89019.5 ns |
88531 ns |
1.01 |
array/reductions/mapreduce/Float32/1d |
39100 ns |
37472.5 ns |
1.04 |
array/reductions/mapreduce/Float32/dims=1 |
42509 ns |
52480 ns |
0.81 |
array/reductions/mapreduce/Float32/dims=2 |
60397 ns |
60249 ns |
1.00 |
array/reductions/mapreduce/Float32/dims=1L |
53503 ns |
52856 ns |
1.01 |
array/reductions/mapreduce/Float32/dims=2L |
73106 ns |
72524 ns |
1.01 |
array/broadcast |
20595 ns |
20030 ns |
1.03 |
array/copyto!/gpu_to_gpu |
13579 ns |
11386 ns |
1.19 |
array/copyto!/cpu_to_gpu |
216561.5 ns |
216869 ns |
1.00 |
array/copyto!/gpu_to_cpu |
285550 ns |
286424.5 ns |
1.00 |
array/accumulate/Int64/1d |
125492 ns |
124830 ns |
1.01 |
array/accumulate/Int64/dims=1 |
84291 ns |
83529 ns |
1.01 |
array/accumulate/Int64/dims=2 |
158835 ns |
157818 ns |
1.01 |
array/accumulate/Int64/dims=1L |
1710347 ns |
1709864 ns |
1.00 |
array/accumulate/Int64/dims=2L |
967254.5 ns |
966626.5 ns |
1.00 |
array/accumulate/Float32/1d |
110078.5 ns |
109404 ns |
1.01 |
array/accumulate/Float32/dims=1 |
81659 ns |
80482 ns |
1.01 |
array/accumulate/Float32/dims=2 |
148151 ns |
147930.5 ns |
1.00 |
array/accumulate/Float32/dims=1L |
1619775 ns |
1618960.5 ns |
1.00 |
array/accumulate/Float32/dims=2L |
698996 ns |
698784 ns |
1.00 |
array/construct |
1280.45 ns |
1292.1 ns |
0.99 |
array/random/randn/Float32 |
49493 ns |
45521 ns |
1.09 |
array/random/randn!/Float32 |
25381 ns |
24996 ns |
1.02 |
array/random/rand!/Int64 |
27613 ns |
27279 ns |
1.01 |
array/random/rand!/Float32 |
9026 ns |
8755 ns |
1.03 |
array/random/rand/Int64 |
30343 ns |
30194 ns |
1.00 |
array/random/rand/Float32 |
13275 ns |
13350 ns |
0.99 |
array/permutedims/4d |
60768 ns |
59834 ns |
1.02 |
array/permutedims/2d |
55013 ns |
53788.5 ns |
1.02 |
array/permutedims/3d |
55852 ns |
54729 ns |
1.02 |
array/sorting/1d |
2762493.5 ns |
2758146 ns |
1.00 |
array/sorting/by |
3370386 ns |
3344254 ns |
1.01 |
array/sorting/2d |
1090652 ns |
1080860 ns |
1.01 |
cuda/synchronization/stream/auto |
1035.8 ns |
1008.3 ns |
1.03 |
cuda/synchronization/stream/nonblocking |
7773.799999999999 ns |
7614.8 ns |
1.02 |
cuda/synchronization/stream/blocking |
823.6701030927835 ns |
789.4040404040404 ns |
1.04 |
cuda/synchronization/context/auto |
1173.3 ns |
1182.6 ns |
0.99 |
cuda/synchronization/context/nonblocking |
7514.5 ns |
8392.2 ns |
0.90 |
cuda/synchronization/context/blocking |
889.2037037037037 ns |
890.468085106383 ns |
1.00 |
This comment was automatically generated by workflow using github-action-benchmark.
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## master #2919 +/- ##
==========================================
+ Coverage 88.39% 89.24% +0.84%
==========================================
Files 150 150
Lines 13206 13084 -122
==========================================
+ Hits 11674 11677 +3
+ Misses 1532 1407 -125 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
I think these were all removed in CUDA 12.0 so we can remove the wrapper code. The BSR methods are still present in 12.x, btw.