Skip to content

Commit b33d168

Browse files
committed
Add file test for CUDA installation
1 parent 329f3c1 commit b33d168

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

hello

643 KB
Binary file not shown.

hello.cu

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#include <stdio.h>
2+
3+
__global__
4+
void saxpy(int n, float a, float *x, float *y)
5+
{
6+
int i = blockIdx.x*blockDim.x + threadIdx.x;
7+
if (i < n) y[i] = a*x[i] + y[i];
8+
}
9+
10+
int main(void)
11+
{
12+
int N = 1<<20;
13+
float *x, *y, *d_x, *d_y;
14+
x = (float*)malloc(N*sizeof(float));
15+
y = (float*)malloc(N*sizeof(float));
16+
17+
cudaMalloc(&d_x, N*sizeof(float));
18+
cudaMalloc(&d_y, N*sizeof(float));
19+
20+
for (int i = 0; i < N; i++) {
21+
x[i] = 1.0f;
22+
y[i] = 2.0f;
23+
}
24+
25+
cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
26+
cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);
27+
28+
// Perform SAXPY on 1M elements
29+
saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);
30+
31+
cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);
32+
33+
float maxError = 0.0f;
34+
for (int i = 0; i < N; i++)
35+
maxError = max(maxError, abs(y[i]-4.0f));
36+
printf("Max error: %f\n", maxError);
37+
38+
cudaFree(d_x);
39+
cudaFree(d_y);
40+
free(x);
41+
free(y);
42+
}

0 commit comments

Comments
 (0)