1
1
#include < stdexcept>
2
- #include < stdio.h >
2
+ #include < cstdio >
3
3
#include < fstream>
4
4
#include < functional>
5
5
#include < sstream>
@@ -16,7 +16,6 @@ class CudaMav
16
16
int blockDimY;
17
17
int blockDimZ;
18
18
int warpSize ;
19
- int statusMessage;
20
19
unsigned int originalSize_read;
21
20
unsigned int currentSize_read;
22
21
unsigned int originalSize_write;
@@ -28,16 +27,17 @@ class CudaMav
28
27
struct MemoryAccessLog {
29
28
// Store the address which was addressed
30
29
T* address;
30
+
31
31
// Store the thread id which accessed the address
32
- int threadId;
32
+ unsigned int threadId;
33
33
// Store the block id which accessed the address
34
- int blockId;
34
+ unsigned int blockId;
35
35
36
36
// Constructor which decomposes the block and thread id into the packed long
37
37
__host__ __device__ MemoryAccessLog (T* address, int blockId, int threadId) : address(address), threadId(threadId), blockId(blockId) {}
38
38
39
39
// Empty constructor
40
- __host__ __device__ MemoryAccessLog () : address(nullptr ), threadId(- 1 ), blockId(- 1 ) {}
40
+ __host__ __device__ MemoryAccessLog () : address(nullptr ), threadId(0 ), blockId(0 ) {}
41
41
};
42
42
43
43
private:
@@ -80,13 +80,23 @@ private:
80
80
AccessProxy () = delete ;
81
81
82
82
// Overload the assignment operator so we can write to the array
83
- __device__ void operator = (T value) {
83
+ __device__ AccessProxy & operator = (const T & value) {
84
84
cudaMav->set (index , value);
85
+ return *this ;
86
+ }
87
+
88
+ // When accessing the array, and also assign a value to the access, we assign AccessProxy to AccessProxy
89
+ // For this reason we need to define the assignment operator for AccessProxy, so that the actual values get changed
90
+ __device__ AccessProxy &operator = (const AccessProxy &other) {
91
+ if (this != &other) {
92
+ cudaMav->set (index , other.cudaMav ->get (other.index ));
93
+ }
94
+ return *this ;
85
95
}
86
96
87
- // Overload the cast operator so we can read from the array
88
- // Leaving the explicit out, wont throw an error, but might result in unexpected behaviour
89
- __device__ explicit operator T () {
97
+ // Overload the cast operator, so we can read from the array
98
+ // Leaving the explicit out, won't throw an error, but might result in unexpected behaviour
99
+ __device__ /* explicit*/ operator T () const {
90
100
return cudaMav->get (index );
91
101
}
92
102
};
@@ -230,7 +240,7 @@ public:
230
240
__host__ CudaMav (T* array_data, unsigned int size = 100000 )
231
241
{
232
242
233
- h_constantData = new GlobalSettings{ -1 , -1 , -1 , -1 , -1 , -1 , -1 , 0 , size, 0 , size, 0 };
243
+ h_constantData = new GlobalSettings{ -1 , -1 , -1 , -1 , -1 , -1 , -1 , size, 0 , size, 0 };
234
244
235
245
// Allocate the memory on the device for the d_constantData and check if it was successful
236
246
checkCudaError (cudaMalloc (&d_constantData, sizeof (GlobalSettings)), " Could not allocate array to store kernel data on device." );
@@ -277,12 +287,11 @@ public:
277
287
d_constantData->blockDimZ = blockDim .z ;
278
288
// Store the warp size
279
289
d_constantData->warpSize = warpSize ;
280
- };
281
-
290
+ }
282
291
283
292
// Get the block and thread id
284
- int blockId = blockIdx .x + blockIdx .y * gridDim .x + gridDim .x * gridDim .y * blockIdx .z ;
285
- int threadId = threadIdx .x + threadIdx .y * blockDim .x + blockDim .x * blockDim .y * threadIdx .z ;
293
+ unsigned int blockId = blockIdx .x + blockIdx .y * gridDim .x + gridDim .x * gridDim .y * blockIdx .z ;
294
+ unsigned int threadId = threadIdx .x + threadIdx .y * blockDim .x + blockDim .x * blockDim .y * threadIdx .z ;
286
295
287
296
// Get the address of the data
288
297
T* address = &d_data[index ];
@@ -323,11 +332,11 @@ public:
323
332
d_constantData->blockDimZ = blockDim .z ;
324
333
// Store the warp size
325
334
d_constantData->warpSize = warpSize ;
326
- };
335
+ }
327
336
328
337
// Get the block and thread id
329
- int blockId = blockIdx .x + blockIdx .y * gridDim .x + gridDim .x * gridDim .y * blockIdx .z ;
330
- int threadId = threadIdx .x + threadIdx .y * blockDim .x + blockDim .x * blockDim .y * threadIdx .z ;
338
+ unsigned int blockId = blockIdx .x + blockIdx .y * gridDim .x + gridDim .x * gridDim .y * blockIdx .z ;
339
+ unsigned int threadId = threadIdx .x + threadIdx .y * blockDim .x + blockDim .x * blockDim .y * threadIdx .z ;
331
340
332
341
// Get the address of the data
333
342
T* address = &d_data[index ];
0 commit comments