Skip to content

Commit 3bb9c5f

Browse files
committed
Improve usability by improving direct assignment of AccessProxy
1 parent f80cb05 commit 3bb9c5f

File tree

2 files changed

+27
-32
lines changed

2 files changed

+27
-32
lines changed

examples/basic/normal.cu

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,7 @@ inline void checkCudaError(cudaError_t err) {
1515
__global__ void kernel(int prob_size, CudaMav<int> * input, CudaMav<int> * output){
1616
int id = threadIdx.x + blockIdx.x * blockDim.x;
1717
if (id < prob_size) {
18-
// This works
19-
int temp = (int)(*input)[id];
20-
(*output)[id] = temp;
21-
22-
// This also works
23-
//(*output)[id] = (int)(*input)[id];
24-
25-
// Print to console
26-
//printf("input[%d] = %d\n", id, static_cast<int>((*input)[id]));
27-
28-
// This does not work for some reason
29-
//(*output)[id] = (*input)[id];
30-
31-
// This also does not work
32-
//output->operator[](id) = input->operator[](id);
18+
(*output)[id] = (*input)[id];
3319
}
3420

3521
}

src/cuda_mav.cu

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <stdexcept>
2-
#include <stdio.h>
2+
#include <cstdio>
33
#include <fstream>
44
#include <functional>
55
#include <sstream>
@@ -16,7 +16,6 @@ class CudaMav
1616
int blockDimY;
1717
int blockDimZ;
1818
int warpSize;
19-
int statusMessage;
2019
unsigned int originalSize_read;
2120
unsigned int currentSize_read;
2221
unsigned int originalSize_write;
@@ -28,16 +27,17 @@ class CudaMav
2827
struct MemoryAccessLog {
2928
// Store the address which was addressed
3029
T* address;
30+
3131
// Store the thread id which accessed the address
32-
int threadId;
32+
unsigned int threadId;
3333
// Store the block id which accessed the address
34-
int blockId;
34+
unsigned int blockId;
3535

3636
// Constructor which decomposes the block and thread id into the packed long
3737
__host__ __device__ MemoryAccessLog(T* address, int blockId, int threadId) : address(address), threadId(threadId), blockId(blockId) {}
3838

3939
// Empty constructor
40-
__host__ __device__ MemoryAccessLog() : address(nullptr), threadId(-1), blockId(-1) {}
40+
__host__ __device__ MemoryAccessLog() : address(nullptr), threadId(0), blockId(0) {}
4141
};
4242

4343
private:
@@ -80,13 +80,23 @@ private:
8080
AccessProxy() = delete;
8181

8282
// Overload the assignment operator so we can write to the array
83-
__device__ void operator = (T value) {
83+
__device__ AccessProxy &operator = (const T &value) {
8484
cudaMav->set(index, value);
85+
return *this;
86+
}
87+
88+
// When accessing the array, and also assign a value to the access, we assign AccessProxy to AccessProxy
89+
// For this reason we need to define the assignment operator for AccessProxy, so that the actual values get changed
90+
__device__ AccessProxy &operator = (const AccessProxy &other) {
91+
if (this != &other) {
92+
cudaMav->set(index, other.cudaMav->get(other.index));
93+
}
94+
return *this;
8595
}
8696

87-
// Overload the cast operator so we can read from the array
88-
// Leaving the explicit out, wont throw an error, but might result in unexpected behaviour
89-
__device__ explicit operator T() {
97+
// Overload the cast operator, so we can read from the array
98+
// Leaving the explicit out, won't throw an error, but might result in unexpected behaviour
99+
__device__ /*explicit*/ operator T() const {
90100
return cudaMav->get(index);
91101
}
92102
};
@@ -230,7 +240,7 @@ public:
230240
__host__ CudaMav(T* array_data, unsigned int size = 100000)
231241
{
232242

233-
h_constantData = new GlobalSettings{ -1, -1, -1, -1, -1, -1, -1, 0, size, 0, size, 0};
243+
h_constantData = new GlobalSettings{ -1, -1, -1, -1, -1, -1, -1, size, 0, size, 0};
234244

235245
// Allocate the memory on the device for the d_constantData and check if it was successful
236246
checkCudaError(cudaMalloc(&d_constantData, sizeof(GlobalSettings)), "Could not allocate array to store kernel data on device.");
@@ -277,12 +287,11 @@ public:
277287
d_constantData->blockDimZ = blockDim.z;
278288
// Store the warp size
279289
d_constantData->warpSize = warpSize;
280-
};
281-
290+
}
282291

283292
// Get the block and thread id
284-
int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
285-
int threadId = threadIdx.x + threadIdx.y * blockDim.x + blockDim.x * blockDim.y * threadIdx.z;
293+
unsigned int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
294+
unsigned int threadId = threadIdx.x + threadIdx.y * blockDim.x + blockDim.x * blockDim.y * threadIdx.z;
286295

287296
// Get the address of the data
288297
T* address = &d_data[index];
@@ -323,11 +332,11 @@ public:
323332
d_constantData->blockDimZ = blockDim.z;
324333
// Store the warp size
325334
d_constantData->warpSize = warpSize;
326-
};
335+
}
327336

328337
// Get the block and thread id
329-
int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
330-
int threadId = threadIdx.x + threadIdx.y * blockDim.x + blockDim.x * blockDim.y * threadIdx.z;
338+
unsigned int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
339+
unsigned int threadId = threadIdx.x + threadIdx.y * blockDim.x + blockDim.x * blockDim.y * threadIdx.z;
331340

332341
// Get the address of the data
333342
T* address = &d_data[index];

0 commit comments

Comments
 (0)