cuda-memscrub

scrubs the global device memory of CUDA GPUs
git clone git://src.adamsgaard.dk/cuda-memscrub # fast
git clone https://src.adamsgaard.dk/cuda-memscrub.git # slow
Log | Files | Refs | README | LICENSE Back to index

scrub.cu (4333B)


      1 #include <stdio.h>
      2 #include <string.h>
      3 //#include <cuda.h>
      4 //#include <cutil.h>
      5 #include "utility.cuh"
      6 
      7 #define VERSION "0.1"
      8 #define VALUETOWRITE 1234
      9 #define MEMCHUNKS 19
     10 
     11 __global__ void write_value(int* d_mem, long unsigned int n_ints,
     12         unsigned int nx, unsigned int ny)
     13 {
     14     // 3d thread index
     15     unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
     16     unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
     17     unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
     18 
     19     // 1d thread index
     20     long unsigned int idx = x + nx*y + nx*ny*z;
     21 
     22     if (idx < n_ints) {
     23         __syncthreads();
     24         d_mem[idx] = VALUETOWRITE;
     25     }
     26 }
     27 
     28 int main(int argc, char** argv)
     29 {
     30     int target_device = 0;
     31 
     32     if (argc == 2 &&
     33             (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0)) {
     34         printf("CUDA memory scrubber, version %s\n", VERSION);
     35         printf("License GPLv3+: GNU GPL version 3 or later "
     36                 "http://gnu.org/licenses/gpl.html\n"
     37                 "There is NO WARRANTY, to the extent permitted by law.\n"
     38                 "Written by Anders Damsgaard <andersd@riseup.net>\n"
     39                 "Maintained at "
     40                 "https://github.com/anders-dc/cuda-memscrub\n");
     41         exit(0);
     42     } else if (argc == 3 &&
     43             (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--device") == 0)) {
     44         target_device = atoi(argv[2]);
     45     } else if (argc == 2 &&
     46             (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)) {
     47         printf("CUDA memory scrubber. Usage:\n"
     48                 " %s [OPTIONS]\n", argv[0]);
     49         printf("Options:\n"
     50                 "\t-h, --help\t\tshow this information\n"
     51                 "\t-v, --version\t\tshow version information\n"
     52                 "\t-d <n>, --device <n>\tscrub device with index n\n");
     53         exit(0);
     54     } else if (argc > 1) {
     55         fprintf(stderr, "argument not understood. See %s for usage "
     56                 "information\n", argv[0]);
     57         exit(EXIT_FAILURE);
     58     }
     59 
     60     checkForCudaErrors("Before initializing CUDA device");
     61 
     62     int device_count;
     63     cudaGetDeviceCount(&device_count);
     64     cudaDeviceProp prop;
     65 
     66     if (device_count == 0) {
     67         fprintf(stderr, "Error: No CUDA-enabled devices available. Bye.\n");
     68         exit(EXIT_FAILURE);
     69     } else if (target_device >= device_count) {
     70         fprintf(stderr, "Error: No CUDA-enabled device by id %d is detected.\n",
     71                 target_device);
     72         exit(EXIT_FAILURE);
     73     } else {
     74         cudaGetDeviceProperties(&prop, target_device);
     75         printf("target: device %d, %s\n", target_device, prop.name);
     76         cudaChooseDevice(&target_device, &prop);
     77     }
     78 
     79     size_t mem_size = prop.totalGlobalMem;
     80     printf("global memory size: %lu bytes\n", mem_size);
     81 
     82     long unsigned int n_ints = mem_size/sizeof(int)/(MEMCHUNKS+1);
     83     printf("overwriting the first %ld bytes, corresponding to %ld int values "
     84             "or the first %.1f%% of the global device memory.\n",
     85             n_ints*sizeof(int)*MEMCHUNKS, n_ints*MEMCHUNKS,
     86             (float)100*n_ints*sizeof(int)*MEMCHUNKS/prop.totalGlobalMem);
     87 
     88     int* d[MEMCHUNKS];  // array of device pointers
     89     int i;
     90     for (i=0; i<MEMCHUNKS; i++) {
     91         if (cudaMalloc((void**)&d[i], n_ints*sizeof(int))
     92                     == cudaErrorMemoryAllocation) {
     93             fprintf(stderr, "Error: Could not allocate the requested amount of "
     94                     "global memory on the device.\n");
     95             cudaDeviceReset();
     96             exit(EXIT_FAILURE);
     97         }
     98     }
     99     checkForCudaErrors("After memory allocation");
    100 
    101     dim3 dimBlock(prop.maxThreadsPerBlock, 1, 1);
    102     unsigned int grid_size = iDivUp(n_ints, prop.maxThreadsPerBlock);
    103     if (grid_size > prop.maxGridSize[0]) {
    104         fprintf(stderr, "Error: The device cannot handle a grid large enough to"
    105                 " handle the array size\n");
    106         cudaDeviceReset();
    107         exit(EXIT_FAILURE);
    108     }
    109     dim3 dimGrid(grid_size, 1, 1);
    110 
    111     for (i=0; i<MEMCHUNKS; i++) {
    112         write_value<<<dimGrid, dimBlock>>>(d[i], n_ints, 1, 1);
    113         cudaThreadSynchronize();
    114         checkForCudaErrors("After write_value", i);
    115     }
    116 
    117 
    118     for (i=0; i<MEMCHUNKS; i++) {
    119         cudaFree(d[i]);
    120         checkForCudaErrors("After cudaFree(d[i])", i);
    121     }
    122 
    123     cudaDeviceReset();
    124 
    125     return 0;
    126 }