Browse Source

init

master
zara 2 years ago
parent
commit
b92d10bc09
  1. 25
      cuRFilter.sln
  2. 134
      cuRFilter/kernel.cu

25
cuRFilter.sln

@ -0,0 +1,25 @@ @@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.5.33627.172
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuRFilter", "cuRFilter\cuRFilter.vcxproj", "{F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}.Debug|x64.ActiveCfg = Debug|x64
{F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}.Debug|x64.Build.0 = Debug|x64
{F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}.Release|x64.ActiveCfg = Release|x64
{F8C4AC1F-11D9-48AE-B3A7-D4AFAA6DEEA4}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {9176E986-5B87-4096-B603-4E14C8A0CB65}
EndGlobalSection
EndGlobal

134
cuRFilter/kernel.cu

@ -0,0 +1,134 @@ @@ -0,0 +1,134 @@

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <math.h>
#include <stdio.h>
#include <opencv2/opencv.hpp>
__global__ void kernelR(double3* src, uchar3* dst, double* maskhALL, double* maskvALL, int height, int width, float sigma_s, float sigma_r, int no_of_iter)
{
int id = threadIdx.y * blockDim.x + threadIdx.x;
double* maskh = maskhALL + height * id;
double* maskv = maskvALL + width * id;
if (id < height)
{
for (int i = 0; i < width - 1; i++)
maskv[i] = 1 + sigma_s / sigma_r * (abs(src[id * width + i + 1].x - src[id * width + i].x) +
abs(src[id * width + i + 1].y - src[id * width + i].y) +
abs(src[id * width + i + 1].z - src[id * width + i].z));
maskv[width - 1] = 1;
}
if (id < width)
{
for (int j = 0; j < height - 1; j++)
maskh[j] = 1 + sigma_s / sigma_r * (abs(src[(j + 1) * width + id].x - src[j * width + id].x) +
abs(src[(j + 1) * width + id].y - src[j * width + id].y) +
abs(src[(j + 1) * width + id].z - src[j * width + id].z));
maskh[height - 1] = 1;
}
// __syncthreads();
// printf("id = %d\n", id);
for (int i = 0; i < no_of_iter; i++)
{
float sigma_h = sigma_s * sqrt(3.0) * pow(2.0, (no_of_iter - (i + 1))) / sqrt(pow(4.0, no_of_iter) - 1);
float a = (float)exp((-1.0 * sqrt(2.0)) / sigma_h);
if (id < height)
for (int j = 1; j < width; j++)
{
src[id * width + j].x += (src[id * width + j - 1].x - src[id * width + j].x) * pow(a, maskv[j]);
src[id * width + j].y += (src[id * width + j - 1].y - src[id * width + j].y) * pow(a, maskv[j]);
src[id * width + j].z += (src[id * width + j - 1].z - src[id * width + j].z) * pow(a, maskv[j]);
}
///test
__syncthreads();
if (id < height)
for (int j = width - 2; j >= 0; j--)
{
src[id * width + j].x += (src[id * width + j + 1].x - src[id * width + j].x) * pow(a, maskv[j]);
src[id * width + j].y += (src[id * width + j + 1].y - src[id * width + j].y) * pow(a, maskv[j]);
src[id * width + j].z += (src[id * width + j + 1].z - src[id * width + j].z) * pow(a, maskv[j]);
}
__syncthreads();
if (id < width)
for (int j = 1; j < height; j++)
{
src[j * width + id].x += (src[(j - 1) * width + id].x - src[j * width + id].x) * pow(a, maskh[j]);
src[j * width + id].y += (src[(j - 1) * width + id].y - src[j * width + id].y) * pow(a, maskh[j]);
src[j * width + id].z += (src[(j - 1) * width + id].z - src[j * width + id].z) * pow(a, maskh[j]);
}
__syncthreads();
if (id < width)
for (int j = height - 2; j >= 0; j--)
{
src[j * width + id].x += (src[(j + 1) * width + id].x - src[j * width + id].x) * pow(a, maskh[j]);
src[j * width + id].y += (src[(j + 1) * width + id].y - src[j * width + id].y) * pow(a, maskh[j]);
src[j * width + id].z += (src[(j + 1) * width + id].z - src[j * width + id].z) * pow(a, maskh[j]);
}
__syncthreads();
}
if (id < height)
for (int j = 0; j < width; j++)
{
dst[id * width + j].x = src[id * width + j].x > 1 ? 255 : (uchar)(src[id * width + j].x * 255.0);
dst[id * width + j].y = src[id * width + j].y > 1 ? 255 : (uchar)(src[id * width + j].y * 255.0);
dst[id * width + j].z = src[id * width + j].z > 1 ? 255 : (uchar)(src[id * width + j].z * 255.0);
}
}
int main()
{
cudaError_t err;
cv::Mat image = cv::imread("D:/opencv/modules/core/misc/objc/test/resources/lena.png");
cv::Mat dst = cv::Mat(image.rows, image.cols, CV_8UC3);
double3* src_GPU;
uchar3* dst_GPU;
cudaMalloc(&src_GPU, image.rows * image.cols * 3 * 8);
cudaMalloc(&dst_GPU, image.rows * image.cols * 3);
double* maskv;
double* maskh;
cudaMalloc(&maskv, sizeof(double) * image.cols * image.rows);
cudaMalloc(&maskh, sizeof(double) * image.cols * image.rows);
std::chrono::time_point<std::chrono::steady_clock> start, end;
start = std::chrono::high_resolution_clock::now();
cv::Mat temp;
image.convertTo(temp, CV_64FC3, 1.0 / 255.0);
cudaMemcpy(src_GPU, temp.data, image.rows * image.cols * 3 * 8, cudaMemcpyHostToDevice);
kernelR << <1, dim3(32, 32) >> > (src_GPU, dst_GPU, maskv, maskh, image.rows, image.cols, 43, 0.7, 3);
cudaDeviceSynchronize();
cudaMemcpy(dst.data, dst_GPU, dst.rows * dst.cols * 3, cudaMemcpyDeviceToHost);
end = std::chrono::high_resolution_clock::now();
std::cout << "gpu cost " << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << " ns " << std::endl;
start = std::chrono::high_resolution_clock::now();
cv::Mat edge;
cv::edgePreservingFilter(image, edge, 1, 43, 0.7);
end = std::chrono::high_resolution_clock::now();
std::cout << "cpu cost " << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << " ns " << std::endl;
cv::imshow("cpu", edge);
cv::imshow("gpu ", dst);
cv::imshow("sub", dst - edge);
cv::imshow("sub2", edge - dst);
cv::waitKey(0);
return 0;
}
Loading…
Cancel
Save