The number of memory chunks increased to 95% of total memory - cuda-memscrub - Scrubs the global device memory of CUDA GPUs

commit 4bd9658bceab5d4dc01e41b8410802fd3fef3859
parent 1cf91a1710beee16200b48487cacf9bc45bb4938
Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date:   Mon, 13 Jan 2014 14:21:07 +0100

The number of memory chunks increased to 95% of total memory

Diffstat:
M scrub.cu  | 43 ++++++++++++++++++++++++++-----------------
M utility.cu  | 11 +++++++++++
M utility.cuh  | 1 +

3 files changed, 38 insertions(+), 17 deletions(-)
diff --git a/scrub.cu b/scrub.cu
@@ -6,6 +6,7 @@
 
 #define VERSION "0.1"
 #define VALUETOWRITE 1234
+#define MEMCHUNKS 19
 
 __global__ void write_value(int* d_mem, long unsigned int n_ints,
         unsigned int nx, unsigned int ny)
@@ -78,19 +79,24 @@ int main(int argc, char** argv)
     size_t mem_size = prop.totalGlobalMem;
     printf("global memory size: %lu bytes\n", mem_size);
 
-    long unsigned int n_ints = mem_size/sizeof(int)/20;
+    long unsigned int n_ints = mem_size/sizeof(int)/(MEMCHUNKS+1);
     printf("overwriting the first %ld bytes, corresponding to %ld int values "
             "or the first %.1f%% of the global device memory.\n",
-            n_ints*sizeof(int), n_ints,
-            (float)100*n_ints*sizeof(int)/prop.totalGlobalMem);
-    int* d_mem;
-    if (cudaMalloc((void**)&d_mem, n_ints*sizeof(int))
-            == cudaErrorMemoryAllocation) {
-        fprintf(stderr, "Error: Could not allocate the requested amount of "
-                "global memory on the device.\n");
-        cudaDeviceReset();
-        exit(EXIT_FAILURE);
+            n_ints*sizeof(int)*MEMCHUNKS, n_ints*MEMCHUNKS,
+            (float)100*n_ints*sizeof(int)*MEMCHUNKS/prop.totalGlobalMem);
+
+    int* d[MEMCHUNKS];  // array of device pointers
+    int i;
+    for (i=0; i<MEMCHUNKS; i++) {
+        if (cudaMalloc((void**)&d[i], n_ints*sizeof(int))
+                    == cudaErrorMemoryAllocation) {
+            fprintf(stderr, "Error: Could not allocate the requested amount of "
+                    "global memory on the device.\n");
+            cudaDeviceReset();
+            exit(EXIT_FAILURE);
+        }
     }
+    checkForCudaErrors("After memory allocation");
 
     dim3 dimBlock(prop.maxThreadsPerBlock, 1, 1);
     unsigned int grid_size = iDivUp(n_ints, prop.maxThreadsPerBlock);
@@ -101,15 +107,18 @@ int main(int argc, char** argv)
         exit(EXIT_FAILURE);
     }
     dim3 dimGrid(grid_size, 1, 1);
-    //printf("dimBlock = %d,%d,%d\n", dimBlock.x, dimBlock.y, dimBlock.z);
-    //printf("dimGrid = %d,%d,%d\n", dimGrid.x, dimGrid.y, dimGrid.z);
 
-    write_value<<<dimGrid, dimBlock>>>(d_mem, n_ints, 1, 1);
-    cudaThreadSynchronize();
-    checkForCudaErrors("After write_value");
+    for (i=0; i<MEMCHUNKS; i++) {
+        write_value<<<dimGrid, dimBlock>>>(d[i], n_ints, 1, 1);
+        cudaThreadSynchronize();
+        checkForCudaErrors("After write_value", i);
+    }
 
-    cudaFree(d_mem);
-    checkForCudaErrors("After cudaFree(d_mem)");
+
+    for (i=0; i<MEMCHUNKS; i++) {
+        cudaFree(d[i]);
+        checkForCudaErrors("After cudaFree(d[i])", i);
+    }
 
     cudaDeviceReset();
 
diff --git a/utility.cu b/utility.cu
@@ -17,6 +17,17 @@ void checkForCudaErrors(const char* checkpoint_description)
     }
 }
 
+void checkForCudaErrors(const char* checkpoint_description, int iteration)
+{
+    cudaError_t err = cudaGetLastError();
+    if (err != cudaSuccess) {
+        fprintf(stderr, "CUDA error detected at: %s at iteration %d.\n"
+                "System error string: %s\n", checkpoint_description, iteration,
+                cudaGetErrorString(err));
+        exit(EXIT_FAILURE);
+    }
+}
+
 //Round a / b to nearest higher integer value
 unsigned int iDivUp(unsigned int a, unsigned int b)
 {
diff --git a/utility.cuh b/utility.cuh
@@ -3,6 +3,7 @@
 #define UTILITY_CUH_
 
 void checkForCudaErrors(const char* checkpoint_description);
+void checkForCudaErrors(const char* checkpoint_description, int iteration);
 unsigned int iDivUp(unsigned int a, unsigned int b);
 
 #endif

	cuda-memscrub Scrubs the global device memory of CUDA GPUs
	git clone git://src.adamsgaard.dk/cuda-memscrub
	Log \| Files \| Refs \| README

M	scrub.cu	\|	43	++++++++++++++++++++++++++-----------------
M	utility.cu	\|	11	+++++++++++
M	utility.cuh	\|	1	+