cuda-memscrub

Scrubs the global device memory of CUDA GPUs
git clone git://src.adamsgaard.dk/cuda-memscrub
Log | Files | Refs | README

commit 4bd9658bceab5d4dc01e41b8410802fd3fef3859
parent 1cf91a1710beee16200b48487cacf9bc45bb4938
Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date:   Mon, 13 Jan 2014 14:21:07 +0100

The number of memory chunks increased to 95% of total memory

Diffstat:
Mscrub.cu | 43++++++++++++++++++++++++++-----------------
Mutility.cu | 11+++++++++++
Mutility.cuh | 1+
3 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/scrub.cu b/scrub.cu @@ -6,6 +6,7 @@ #define VERSION "0.1" #define VALUETOWRITE 1234 +#define MEMCHUNKS 19 __global__ void write_value(int* d_mem, long unsigned int n_ints, unsigned int nx, unsigned int ny) @@ -78,19 +79,24 @@ int main(int argc, char** argv) size_t mem_size = prop.totalGlobalMem; printf("global memory size: %lu bytes\n", mem_size); - long unsigned int n_ints = mem_size/sizeof(int)/20; + long unsigned int n_ints = mem_size/sizeof(int)/(MEMCHUNKS+1); printf("overwriting the first %ld bytes, corresponding to %ld int values " "or the first %.1f%% of the global device memory.\n", - n_ints*sizeof(int), n_ints, - (float)100*n_ints*sizeof(int)/prop.totalGlobalMem); - int* d_mem; - if (cudaMalloc((void**)&d_mem, n_ints*sizeof(int)) - == cudaErrorMemoryAllocation) { - fprintf(stderr, "Error: Could not allocate the requested amount of " - "global memory on the device.\n"); - cudaDeviceReset(); - exit(EXIT_FAILURE); + n_ints*sizeof(int)*MEMCHUNKS, n_ints*MEMCHUNKS, + (float)100*n_ints*sizeof(int)*MEMCHUNKS/prop.totalGlobalMem); + + int* d[MEMCHUNKS]; // array of device pointers + int i; + for (i=0; i<MEMCHUNKS; i++) { + if (cudaMalloc((void**)&d[i], n_ints*sizeof(int)) + == cudaErrorMemoryAllocation) { + fprintf(stderr, "Error: Could not allocate the requested amount of " + "global memory on the device.\n"); + cudaDeviceReset(); + exit(EXIT_FAILURE); + } } + checkForCudaErrors("After memory allocation"); dim3 dimBlock(prop.maxThreadsPerBlock, 1, 1); unsigned int grid_size = iDivUp(n_ints, prop.maxThreadsPerBlock); @@ -101,15 +107,18 @@ int main(int argc, char** argv) exit(EXIT_FAILURE); } dim3 dimGrid(grid_size, 1, 1); - //printf("dimBlock = %d,%d,%d\n", dimBlock.x, dimBlock.y, dimBlock.z); - //printf("dimGrid = %d,%d,%d\n", dimGrid.x, dimGrid.y, dimGrid.z); - write_value<<<dimGrid, dimBlock>>>(d_mem, n_ints, 1, 1); - cudaThreadSynchronize(); - checkForCudaErrors("After write_value"); + for (i=0; i<MEMCHUNKS; i++) { + write_value<<<dimGrid, dimBlock>>>(d[i], n_ints, 1, 1); + cudaThreadSynchronize(); + checkForCudaErrors("After write_value", i); + } - cudaFree(d_mem); - checkForCudaErrors("After cudaFree(d_mem)"); + + for (i=0; i<MEMCHUNKS; i++) { + cudaFree(d[i]); + checkForCudaErrors("After cudaFree(d[i])", i); + } cudaDeviceReset(); diff --git a/utility.cu b/utility.cu @@ -17,6 +17,17 @@ void checkForCudaErrors(const char* checkpoint_description) } } +void checkForCudaErrors(const char* checkpoint_description, int iteration) +{ + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + fprintf(stderr, "CUDA error detected at: %s at iteration %d.\n" + "System error string: %s\n", checkpoint_description, iteration, + cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + //Round a / b to nearest higher integer value unsigned int iDivUp(unsigned int a, unsigned int b) { diff --git a/utility.cuh b/utility.cuh @@ -3,6 +3,7 @@ #define UTILITY_CUH_ void checkForCudaErrors(const char* checkpoint_description); +void checkForCudaErrors(const char* checkpoint_description, int iteration); unsigned int iDivUp(unsigned int a, unsigned int b); #endif