今天心血来潮,想把传统的卷积算法实现一份不采用各种加速方式,仅优化算法逻辑的纯净版本。
写完发现性能还可以,特发出来分享之,若有博友在此基础上,进行了再次优化,那就更赞了。
算法很简单:
代码语言:javascript复制inline unsigned char Clamp2Byte(int n) {
return (((255 - n) >> 31) | (n & ~(n >> 31)));
}
void Convolution2D(unsigned char * data, unsigned int width, unsigned int height, unsigned int channels, int * filter, unsigned char filterW, unsigned char cfactor, unsigned char bias) {
unsigned char * tmpData = (unsigned char * ) malloc(width * height * channels);
int factor = 256 / cfactor;
int halfW = filterW / 2;
if (channels == 3 || channels == 4) {
for (int y = 0; y < height; y ) {
int y1 = y - halfW height;
for (int x = 0; x < width; x ) {
int x1 = x - halfW width;
int r = 0;
int g = 0;
int b = 0;
unsigned int p = (y * width x) * channels;
for (unsigned int fx = 0; fx < filterW; fx ) {
int dx = (x1 fx) % width;
int fidx = fx * (filterW);
for (unsigned int fy = 0; fy < filterW; fy ) {
int pos = (((y1 fy) % height) * width dx) * channels;
int * pfilter = & filter[fidx (fy)];
r = data[pos] * ( * pfilter);
g = data[pos 1] * ( * pfilter);
b = data[pos 2] * ( * pfilter);
}
}
tmpData[p] = Clamp2Byte(((factor * r) >> 8) bias);
tmpData[p 1] = Clamp2Byte(((factor * g) >> 8) bias);
tmpData[p 2] = Clamp2Byte(((factor * b) >> 8) bias);
}
}
} else
if (channels == 1) {
for (int y = 0; y < height; y ) {
int y1 = y - halfW height;
for (int x = 0; x < width; x ) {
int r = 0;
unsigned int p = (y * width x);
int x1 = x - halfW width;
for (unsigned int fx = 0; fx < filterW; fx ) {
int dx = (x1 fx) % width;
int fidx = fx * (filterW);
for (unsigned int fy = 0; fy < filterW; fy ) {
int pos = (((y1 fy) % height) * width dx);
int szfilter = filter[fidx (fy)];
r = data[pos] * szfilter;
}
}
tmpData[p] = Clamp2Byte(((factor * r) >> 8) bias);
}
}
}
memcpy(data, tmpData, width * height * channels);
free(tmpData);
}
调用例子:
代码语言:javascript复制例子
//模糊
int Blurfilter[25] = {
0, 0, 1, 0, 0,
0, 1, 1, 1, 0,
1, 1, 1, 1, 1,
0, 1, 1, 1, 0,
0, 0, 1, 0, 0,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, Blurfilter, 5, 13, 0);
// 运动模糊
int MotionBlurfilter[81] = {
1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, MotionBlurfilter, 9, 9, 0);
//边缘探测1
int edges1filter[25] = {
-1, 0, 0, 0, 0,
0, -2, 0, 0, 0,
0, 0, 6, 0, 0,
0, 0, 0, -2, 0,
0, 0, 0, 0, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges1filter, 5, 1, 0);
//边缘探测2
int edges2filter[9] = {
-1, -1, -1, -1, 8, -1, -1, -1, -1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges2filter, 3, 1, 0);
//锐化1
int sharpen1filter[9] = {
-1, -1, -1, -1, 9, -1, -1, -1, -1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen1filter, 3, 1, 0);
//锐化2
int sharpen2filter[25] = {
-1, -1, -1, -1, -1, -1, 2, 2, 2, -1, -1, 2, 8, 2, -1, -1, 2, 2, 2, -1, -1, -1, -1, -1, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen2filter, 5, 8, 0);
//锐化3
int sharpen3filter[9] = {
1, 1, 1,
1, -7, 1,
1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen3filter, 3, 1, 0);
// 浮雕1
int Embossfilter[9] = {
-1, -1, 0, -1, 0, 1,
0, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, Embossfilter, 3, 1, 128);
// 浮雕2
int emboss2filter[25] = {
-1, -1, -1, -1, 0, -1, -1, -1, 0, 1, -1, -1, 0, 1, 1, -1, 0, 1, 1, 1,
0, 1, 1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, emboss2filter, 5, 1, 128);
// 均值模糊1
int meanfilter[9] = {
1, 1, 1,
1, 1, 1,
1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, meanfilter, 3, 9, 0);
// 均值模糊2
int mean2filter[81] = {
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, mean2filter, 9, 81, 0);
博主在一张大小为960x1280的图片,进行了边缘探测卷积核的处理,在博主机子上耗时是100毫秒。
代码语言:javascript复制//边缘探测1
int edges1filter[25] = {
-1, 0, 0, 0, 0,
0, -2, 0, 0, 0,
0, 0, 6, 0, 0,
0, 0, 0, -2, 0,
0, 0, 0, 0, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges1filter, 5, 1, 0);
效果图:
其他相关资料,见各种百科网站。
关键词:卷积(英语:Convolution)