diff --git a/cuRFilter.sln b/cuRFilter.sln new file mode 100644 index 0000000..880a381 --- /dev/null +++ b/cuRFilter.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.33627.172 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuRFilter", "cuRFilter\cuRFilter.vcxproj", "{F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}.Debug|x64.ActiveCfg = Debug|x64 + {F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}.Debug|x64.Build.0 = Debug|x64 + {F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}.Release|x64.ActiveCfg = Release|x64 + {F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {9176E986-5B87-4096-B603-4E14C8A0CB65} + EndGlobalSection +EndGlobal diff --git a/cuRFilter/kernel.cu b/cuRFilter/kernel.cu new file mode 100644 index 0000000..376e3ee --- /dev/null +++ b/cuRFilter/kernel.cu @@ -0,0 +1,134 @@ + +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include +#include + + +__global__ void kernelR(double3* src, uchar3* dst, double* maskhALL, double* maskvALL, int height, int width, float sigma_s, float sigma_r, int no_of_iter) +{ + int id = threadIdx.y * blockDim.x + threadIdx.x; + double* maskh = maskhALL + height * id; + double* maskv = maskvALL + width * id; + + if (id < height) + { + for (int i = 0; i < width - 1; i++) + maskv[i] = 1 + sigma_s / sigma_r * (abs(src[id * width + i + 1].x - src[id * width + i].x) + + abs(src[id * width + i + 1].y - src[id * width + i].y) + + abs(src[id * width + i + 1].z - src[id * width + i].z)); + maskv[width - 1] = 1; + } + if (id < width) + { + for (int j = 0; j < height - 1; j++) + maskh[j] = 1 + sigma_s / sigma_r * (abs(src[(j + 1) * width + id].x - src[j * width + id].x) + + abs(src[(j + 1) * width + id].y - src[j * width + id].y) + + abs(src[(j + 1) * width + id].z - src[j * width + id].z)); + maskh[height - 1] = 1; + } + // __syncthreads(); + // printf("id = %d\n", id); + + + for (int i = 0; i < no_of_iter; i++) + { + float sigma_h = sigma_s * sqrt(3.0) * pow(2.0, (no_of_iter - (i + 1))) / sqrt(pow(4.0, no_of_iter) - 1); + float a = (float)exp((-1.0 * sqrt(2.0)) / sigma_h); + + + if (id < height) + for (int j = 1; j < width; j++) + { + src[id * width + j].x += (src[id * width + j - 1].x - src[id * width + j].x) * pow(a, maskv[j]); + src[id * width + j].y += (src[id * width + j - 1].y - src[id * width + j].y) * pow(a, maskv[j]); + src[id * width + j].z += (src[id * width + j - 1].z - src[id * width + j].z) * pow(a, maskv[j]); + } + + ///test + + __syncthreads(); + if (id < height) + for (int j = width - 2; j >= 0; j--) + { + src[id * width + j].x += (src[id * width + j + 1].x - src[id * width + j].x) * pow(a, maskv[j]); + src[id * width + j].y += (src[id * width + j + 1].y - src[id * width + j].y) * pow(a, maskv[j]); + src[id * width + j].z += (src[id * width + j + 1].z - src[id * width + j].z) * pow(a, maskv[j]); + } + + __syncthreads(); + if (id < width) + for (int j = 1; j < height; j++) + { + src[j * width + id].x += (src[(j - 1) * width + id].x - src[j * width + id].x) * pow(a, maskh[j]); + src[j * width + id].y += (src[(j - 1) * width + id].y - src[j * width + id].y) * pow(a, maskh[j]); + src[j * width + id].z += (src[(j - 1) * width + id].z - src[j * width + id].z) * pow(a, maskh[j]); + } + __syncthreads(); + if (id < width) + for (int j = height - 2; j >= 0; j--) + { + src[j * width + id].x += (src[(j + 1) * width + id].x - src[j * width + id].x) * pow(a, maskh[j]); + src[j * width + id].y += (src[(j + 1) * width + id].y - src[j * width + id].y) * pow(a, maskh[j]); + src[j * width + id].z += (src[(j + 1) * width + id].z - src[j * width + id].z) * pow(a, maskh[j]); + } + __syncthreads(); + } + if (id < height) + for (int j = 0; j < width; j++) + { + dst[id * width + j].x = src[id * width + j].x > 1 ? 255 : (uchar)(src[id * width + j].x * 255.0); + dst[id * width + j].y = src[id * width + j].y > 1 ? 255 : (uchar)(src[id * width + j].y * 255.0); + dst[id * width + j].z = src[id * width + j].z > 1 ? 255 : (uchar)(src[id * width + j].z * 255.0); + + } +} + + + +int main() +{ + cudaError_t err; + + cv::Mat image = cv::imread("D:/opencv/modules/core/misc/objc/test/resources/lena.png"); + cv::Mat dst = cv::Mat(image.rows, image.cols, CV_8UC3); + double3* src_GPU; + uchar3* dst_GPU; + cudaMalloc(&src_GPU, image.rows * image.cols * 3 * 8); + cudaMalloc(&dst_GPU, image.rows * image.cols * 3); + + double* maskv; + double* maskh; + + cudaMalloc(&maskv, sizeof(double) * image.cols * image.rows); + cudaMalloc(&maskh, sizeof(double) * image.cols * image.rows); + + + std::chrono::time_point start, end; + start = std::chrono::high_resolution_clock::now(); + cv::Mat temp; + image.convertTo(temp, CV_64FC3, 1.0 / 255.0); + cudaMemcpy(src_GPU, temp.data, image.rows * image.cols * 3 * 8, cudaMemcpyHostToDevice); + kernelR << <1, dim3(32, 32) >> > (src_GPU, dst_GPU, maskv, maskh, image.rows, image.cols, 43, 0.7, 3); + cudaDeviceSynchronize(); + cudaMemcpy(dst.data, dst_GPU, dst.rows * dst.cols * 3, cudaMemcpyDeviceToHost); + end = std::chrono::high_resolution_clock::now(); + std::cout << "gpu cost " << std::chrono::duration_cast(end - start).count() << " ns " << std::endl; + + + start = std::chrono::high_resolution_clock::now(); + cv::Mat edge; + cv::edgePreservingFilter(image, edge, 1, 43, 0.7); + end = std::chrono::high_resolution_clock::now(); + std::cout << "cpu cost " << std::chrono::duration_cast(end - start).count() << " ns " << std::endl; + + cv::imshow("cpu", edge); + cv::imshow("gpu ", dst); + cv::imshow("sub", dst - edge); + cv::imshow("sub2", edge - dst); + + cv::waitKey(0); + return 0; +} \ No newline at end of file