scrub.cu (4333B)
1 #include <stdio.h> 2 #include <string.h> 3 //#include <cuda.h> 4 //#include <cutil.h> 5 #include "utility.cuh" 6 7 #define VERSION "0.1" 8 #define VALUETOWRITE 1234 9 #define MEMCHUNKS 19 10 11 __global__ void write_value(int* d_mem, long unsigned int n_ints, 12 unsigned int nx, unsigned int ny) 13 { 14 // 3d thread index 15 unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; 16 unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; 17 unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; 18 19 // 1d thread index 20 long unsigned int idx = x + nx*y + nx*ny*z; 21 22 if (idx < n_ints) { 23 __syncthreads(); 24 d_mem[idx] = VALUETOWRITE; 25 } 26 } 27 28 int main(int argc, char** argv) 29 { 30 int target_device = 0; 31 32 if (argc == 2 && 33 (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0)) { 34 printf("CUDA memory scrubber, version %s\n", VERSION); 35 printf("License GPLv3+: GNU GPL version 3 or later " 36 "http://gnu.org/licenses/gpl.html\n" 37 "There is NO WARRANTY, to the extent permitted by law.\n" 38 "Written by Anders Damsgaard <andersd@riseup.net>\n" 39 "Maintained at " 40 "https://github.com/anders-dc/cuda-memscrub\n"); 41 exit(0); 42 } else if (argc == 3 && 43 (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--device") == 0)) { 44 target_device = atoi(argv[2]); 45 } else if (argc == 2 && 46 (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)) { 47 printf("CUDA memory scrubber. Usage:\n" 48 " %s [OPTIONS]\n", argv[0]); 49 printf("Options:\n" 50 "\t-h, --help\t\tshow this information\n" 51 "\t-v, --version\t\tshow version information\n" 52 "\t-d <n>, --device <n>\tscrub device with index n\n"); 53 exit(0); 54 } else if (argc > 1) { 55 fprintf(stderr, "argument not understood. See %s for usage " 56 "information\n", argv[0]); 57 exit(EXIT_FAILURE); 58 } 59 60 checkForCudaErrors("Before initializing CUDA device"); 61 62 int device_count; 63 cudaGetDeviceCount(&device_count); 64 cudaDeviceProp prop; 65 66 if (device_count == 0) { 67 fprintf(stderr, "Error: No CUDA-enabled devices available. Bye.\n"); 68 exit(EXIT_FAILURE); 69 } else if (target_device >= device_count) { 70 fprintf(stderr, "Error: No CUDA-enabled device by id %d is detected.\n", 71 target_device); 72 exit(EXIT_FAILURE); 73 } else { 74 cudaGetDeviceProperties(&prop, target_device); 75 printf("target: device %d, %s\n", target_device, prop.name); 76 cudaChooseDevice(&target_device, &prop); 77 } 78 79 size_t mem_size = prop.totalGlobalMem; 80 printf("global memory size: %lu bytes\n", mem_size); 81 82 long unsigned int n_ints = mem_size/sizeof(int)/(MEMCHUNKS+1); 83 printf("overwriting the first %ld bytes, corresponding to %ld int values " 84 "or the first %.1f%% of the global device memory.\n", 85 n_ints*sizeof(int)*MEMCHUNKS, n_ints*MEMCHUNKS, 86 (float)100*n_ints*sizeof(int)*MEMCHUNKS/prop.totalGlobalMem); 87 88 int* d[MEMCHUNKS]; // array of device pointers 89 int i; 90 for (i=0; i<MEMCHUNKS; i++) { 91 if (cudaMalloc((void**)&d[i], n_ints*sizeof(int)) 92 == cudaErrorMemoryAllocation) { 93 fprintf(stderr, "Error: Could not allocate the requested amount of " 94 "global memory on the device.\n"); 95 cudaDeviceReset(); 96 exit(EXIT_FAILURE); 97 } 98 } 99 checkForCudaErrors("After memory allocation"); 100 101 dim3 dimBlock(prop.maxThreadsPerBlock, 1, 1); 102 unsigned int grid_size = iDivUp(n_ints, prop.maxThreadsPerBlock); 103 if (grid_size > prop.maxGridSize[0]) { 104 fprintf(stderr, "Error: The device cannot handle a grid large enough to" 105 " handle the array size\n"); 106 cudaDeviceReset(); 107 exit(EXIT_FAILURE); 108 } 109 dim3 dimGrid(grid_size, 1, 1); 110 111 for (i=0; i<MEMCHUNKS; i++) { 112 write_value<<<dimGrid, dimBlock>>>(d[i], n_ints, 1, 1); 113 cudaThreadSynchronize(); 114 checkForCudaErrors("After write_value", i); 115 } 116 117 118 for (i=0; i<MEMCHUNKS; i++) { 119 cudaFree(d[i]); 120 checkForCudaErrors("After cudaFree(d[i])", i); 121 } 122 123 cudaDeviceReset(); 124 125 return 0; 126 }