cs193g课程镜像
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

89 lines
2.5 KiB

// This example demonstrates how to check for CUDA errors,
// which indicate programming bugs, or otherwise broken
// program behavior.
#include <stdlib.h>
#include <stdio.h>
// our old friend, the "fill-with-7" kernel
__global__ void kernel(int *array)
{
int index = blockIdx.x * blockDim.x + threadIdx.x;
array[index] = 7;
}
int main(void)
{
// create arrays of 256 elements
int num_elements = 256;
// compute the size of the arrays in bytes
int num_bytes = num_elements * sizeof(int);
// pointers to host & device arrays
int *device_array = 0;
int *host_array = 0;
// malloc a host array
host_array = (int*)malloc(num_bytes);
// cudaMalloc a device array
cudaMalloc((void**)&device_array, num_bytes);
// if either memory allocation failed, report an error message
if(host_array == 0 || device_array == 0)
{
printf("couldn't allocate memory\n");
return 1;
}
// choose a grid configuration
int block_size = 128;
int grid_size = num_elements / block_size;
// accidentally pass the kernel a pointer to the host array
// this will cause the kernel to crash when it attempts to
// dereferences a host pointer
kernel<<<grid_size,block_size>>>(host_array);
// make the host wait until the kernel is finished executing before
// checking for the last CUDA error. otherwise, we won't detect
// an error if one occurred
cudaThreadSynchronize();
// this kind of "blocking" operation is usually only appropriate during
// the debugging process because it forces the host to wait on the device
// while it could be busy doing other things. once the code has been
// debugged, frequent error checking code should be eliminated or disabled
// ask CUDA for the last error to occur (if one exists)
cudaError_t error = cudaGetLastError();
if(error != cudaSuccess)
{
// something's gone wrong
// print out the CUDA error as a string
printf("CUDA Error: %s\n", cudaGetErrorString(error));
// we can't recover from the error -- exit the program
return 1;
}
// no error occurred, proceed as usual
// download and inspect the result on the host:
cudaMemcpy(host_array, device_array, num_bytes, cudaMemcpyDeviceToHost);
// print out the first 10 elements
for(int i=0; i < 10; ++i)
{
printf("%d ", host_array[i]);
}
printf("\n");
// deallocate memory
free(host_array);
cudaFree(device_array);
}