cudaAvgFilter/main.cu

#include <opencv2/opencv.hpp>
#include <iostream>
#include <cuda.h>

__device__ int index()
{
    int i = threadIdx.x + blockDim.x * (threadIdx.y + blockDim.y * (threadIdx.z + blockDim.z * (blockIdx.x + gridDim.x * (blockIdx.y + gridDim.y * (blockIdx.z)))));
    return i;
}

__global__ void cudaAvgKernel(unsigned char *src, unsigned char *dst, int rows, int cols, int r)
{
    int i = index();
    int row = i / cols;
    int col = i % cols;

    int rs = row - r;
    int re = row + r + 1;
    int cs = col - r;
    int ce = col + r + 1;

    if (rs < 0)
    {
        rs = 0;
    }
    if (cs < 0)
    {
        cs = 0;
    }
    if (re > rows)
    {
        re = rows;
    }
    if (ce > cols)
    {
        ce = cols;
    }

    int s = 0;
    for (int r = rs; r < re; r++)
    {
        for (int c = cs; c < ce; c++)
        {
            s += src[rs * cols + c];
        }
    }
    if (i < rows * cols)
    {
        dst[i] = s / (re - rs) / (ce - cs);
    }
}

int main()
{
    cv::VideoCapture cap(0);
    cv::Mat image;
    cv::Mat gray;
    cv::Mat dst;
    while (true)
    {
        cap.read(image);
        cv::cvtColor(image, gray, cv::COLOR_RGB2GRAY);
        dst = gray.clone();

        unsigned char *srcGpu;
        unsigned char *dstGpu;
        cudaError_t err = cudaGetLastError();

        cudaMalloc(&srcGpu, gray.cols * gray.rows * 2);
        err = cudaGetLastError();
        std::cout << "1 err code =" << err << ", msg = " << cudaGetErrorString(err) << std::endl;
        cudaMalloc(&dstGpu, gray.cols * gray.rows * 2);
        err = cudaGetLastError();
        std::cout << "2 err code =" << err << ", msg = " << cudaGetErrorString(err) << std::endl;
        std::cout << "srcgpu = " << (int)srcGpu << ", dstgpu = " << (int)dstGpu << std::endl;
        std::cout << "gray.data index = " << (int)gray.data << std::endl;
        cudaMemcpy(srcGpu, gray.data, gray.rows * gray.cols, cudaMemcpyHostToDevice);
        err = cudaGetLastError();
        std::cout << "3 err code =" << err << ", msg = " << cudaGetErrorString(err) << std::endl;

        cudaAvgKernel<<<gray.rows, gray.cols>>>(srcGpu, dstGpu, gray.rows, gray.cols, 5);
        err = cudaGetLastError();
        std::cout << "4 err code =" << err << ", msg = " << cudaGetErrorString(err) << std::endl;
        cudaDeviceSynchronize();
        cudaMemcpy(dst.data, dstGpu, gray.rows * gray.cols, cudaMemcpyDeviceToHost);

        cv::imshow("default", gray);
        cv::imshow("mirror", dst);
        cudaFree(srcGpu);
        cudaFree(dstGpu);

        if (cv::waitKey(20) == 'q')
        {
            break;
        }
    }

    image.release();
    cap.release();
    return 0;
}