commit 4bd9658bceab5d4dc01e41b8410802fd3fef3859
parent 1cf91a1710beee16200b48487cacf9bc45bb4938
Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Mon, 13 Jan 2014 14:21:07 +0100
The number of memory chunks increased to 95% of total memory
Diffstat:
3 files changed, 38 insertions(+), 17 deletions(-)
diff --git a/scrub.cu b/scrub.cu
@@ -6,6 +6,7 @@
#define VERSION "0.1"
#define VALUETOWRITE 1234
+#define MEMCHUNKS 19
__global__ void write_value(int* d_mem, long unsigned int n_ints,
unsigned int nx, unsigned int ny)
@@ -78,19 +79,24 @@ int main(int argc, char** argv)
size_t mem_size = prop.totalGlobalMem;
printf("global memory size: %lu bytes\n", mem_size);
- long unsigned int n_ints = mem_size/sizeof(int)/20;
+ long unsigned int n_ints = mem_size/sizeof(int)/(MEMCHUNKS+1);
printf("overwriting the first %ld bytes, corresponding to %ld int values "
"or the first %.1f%% of the global device memory.\n",
- n_ints*sizeof(int), n_ints,
- (float)100*n_ints*sizeof(int)/prop.totalGlobalMem);
- int* d_mem;
- if (cudaMalloc((void**)&d_mem, n_ints*sizeof(int))
- == cudaErrorMemoryAllocation) {
- fprintf(stderr, "Error: Could not allocate the requested amount of "
- "global memory on the device.\n");
- cudaDeviceReset();
- exit(EXIT_FAILURE);
+ n_ints*sizeof(int)*MEMCHUNKS, n_ints*MEMCHUNKS,
+ (float)100*n_ints*sizeof(int)*MEMCHUNKS/prop.totalGlobalMem);
+
+ int* d[MEMCHUNKS]; // array of device pointers
+ int i;
+ for (i=0; i<MEMCHUNKS; i++) {
+ if (cudaMalloc((void**)&d[i], n_ints*sizeof(int))
+ == cudaErrorMemoryAllocation) {
+ fprintf(stderr, "Error: Could not allocate the requested amount of "
+ "global memory on the device.\n");
+ cudaDeviceReset();
+ exit(EXIT_FAILURE);
+ }
}
+ checkForCudaErrors("After memory allocation");
dim3 dimBlock(prop.maxThreadsPerBlock, 1, 1);
unsigned int grid_size = iDivUp(n_ints, prop.maxThreadsPerBlock);
@@ -101,15 +107,18 @@ int main(int argc, char** argv)
exit(EXIT_FAILURE);
}
dim3 dimGrid(grid_size, 1, 1);
- //printf("dimBlock = %d,%d,%d\n", dimBlock.x, dimBlock.y, dimBlock.z);
- //printf("dimGrid = %d,%d,%d\n", dimGrid.x, dimGrid.y, dimGrid.z);
- write_value<<<dimGrid, dimBlock>>>(d_mem, n_ints, 1, 1);
- cudaThreadSynchronize();
- checkForCudaErrors("After write_value");
+ for (i=0; i<MEMCHUNKS; i++) {
+ write_value<<<dimGrid, dimBlock>>>(d[i], n_ints, 1, 1);
+ cudaThreadSynchronize();
+ checkForCudaErrors("After write_value", i);
+ }
- cudaFree(d_mem);
- checkForCudaErrors("After cudaFree(d_mem)");
+
+ for (i=0; i<MEMCHUNKS; i++) {
+ cudaFree(d[i]);
+ checkForCudaErrors("After cudaFree(d[i])", i);
+ }
cudaDeviceReset();
diff --git a/utility.cu b/utility.cu
@@ -17,6 +17,17 @@ void checkForCudaErrors(const char* checkpoint_description)
}
}
+void checkForCudaErrors(const char* checkpoint_description, int iteration)
+{
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ fprintf(stderr, "CUDA error detected at: %s at iteration %d.\n"
+ "System error string: %s\n", checkpoint_description, iteration,
+ cudaGetErrorString(err));
+ exit(EXIT_FAILURE);
+ }
+}
+
//Round a / b to nearest higher integer value
unsigned int iDivUp(unsigned int a, unsigned int b)
{
diff --git a/utility.cuh b/utility.cuh
@@ -3,6 +3,7 @@
#define UTILITY_CUH_
void checkForCudaErrors(const char* checkpoint_description);
+void checkForCudaErrors(const char* checkpoint_description, int iteration);
unsigned int iDivUp(unsigned int a, unsigned int b);
#endif