海面漂浮物垃圾识别检测算法通过yolo系列网络框架模型算法,海面漂浮物垃圾识别检测算法一旦识别到海面的漂浮物垃圾,海面漂浮物垃圾识别检测算法立即发出预警信号。海面漂浮物垃圾识别检测算法目标检测架构分为两种,一种是two-stage,一种是one-stage,区别就在于 two-stage 有region proposal过程,类似于一种海选过程,网络会根据候选区域生成位置和类别,而one-stage直接从图片生成位置和类别
在介绍海面漂浮物垃圾识别检测算法Yolo算法之前,首先先介绍一下滑动窗口技术,这对我们理解海面漂浮物垃圾识别检测算法Yolo算法是有帮助的。海面漂浮物垃圾识别检测算法采用滑动窗口的目标检测算法思路非常简单,它将检测问题转化为了图像分类问题。其基本原理就是采用不同大小和比例(宽高比)的窗口在整张图片上以一定的步长进行滑动,然后对这些窗口对应的区域做图像分类,这样就可以实现对整张图片的检测了,如DPM就是采用这种思路。
但是这个方法有致命的缺点,就是你并不知道要检测的目标大小是什么规模,所以你要设置不同大小和比例的窗口去滑动,而且还要选取合适的步长。但是这样会产生很多的子区域,并且都要经过分类器去做预测,这需要很大的计算量,所以你的分类器不能太复杂,因为要保证速度。解决思路之一就是减少要分类的子区域,这就是R-CNN的一个改进策略,海面漂浮物垃圾识别检测算法采用了selective search方法来找到最有可能包含目标的子区域(Region Proposal),其实可以看成采用启发式方法过滤掉很多子区域,这会提升效率。
海面漂浮物垃圾识别检测算法检测速度非常快。标准版本的YOLO可以每秒处理 45 张图像;海面漂浮物垃圾识别检测算法的极速版本每秒可以处理150帧图像。这就意味着 海面漂浮物垃圾识别检测算法可以以小于 25 毫秒延迟,实时地处理视频。对于欠实时系统,在准确率保证的情况下,海面漂浮物垃圾识别检测算法速度快于其他方法。
代码语言:javascript复制#include "DenseTrackStab.h"
#include "Initialize.h"
#include "Descriptors.h"
#include "OpticalFlow.h"
#include <time.h>
using namespace cv;
//如果要可视化轨迹,将show_track设置为1
int show_track = 0;
int main(int argc, char** argv)
{
//读入并打开视频文件
VideoCapture capture;
char* video = argv[1];
int flag = arg_parse(argc, argv);
capture.open(video);
if(!capture.isOpened()) {
fprintf(stderr, "Could not initialize capturing..n");
return -1;
}
//这句代码是我自己添加的,源代码中没有提供bb_file的输入接口
char* bb_file = argv[2];
int frame_num = 0;
TrackInfo trackInfo;
DescInfo hogInfo, hofInfo, mbhInfo;
//初始化轨迹信息变量
InitTrackInfo(&trackInfo, track_length, init_gap);
InitDescInfo(&hogInfo, 8, false, patch_size, nxy_cell, nt_cell);
InitDescInfo(&hofInfo, 9, true, patch_size, nxy_cell, nt_cell);
InitDescInfo(&mbhInfo, 8, false, patch_size, nxy_cell, nt_cell);
SeqInfo seqInfo;
InitSeqInfo(&seqInfo, video);
//初始化bb信息,将bb_file中的信息加载到bb_list中
std::vector<Frame> bb_list;
if(bb_file) {
LoadBoundBox(bb_file, bb_list);
assert(bb_list.size() == seqInfo.length);
}
if(flag)
seqInfo.length = end_frame - start_frame 1;
if(show_track == 1)
namedWindow("DenseTrackStab", 0);
//初始化surf特征检测器
//此处200为阈值,数值越小则用于匹配的特征点越多,效果越好(不一定),速度越慢
SurfFeatureDetector detector_surf(200);
SurfDescriptorExtractor extractor_surf(true, true);
std::vector<Point2f> prev_pts_flow, pts_flow;
std::vector<Point2f> prev_pts_surf, pts_surf;
std::vector<Point2f> prev_pts_all, pts_all;
std::vector<KeyPoint> prev_kpts_surf, kpts_surf;
Mat prev_desc_surf, desc_surf;
Mat flow, human_mask;
Mat image, prev_grey, grey;
std::vector<float> fscales(0);
std::vector<Size> sizes(0);
std::vector<Mat> prev_grey_pyr(0), grey_pyr(0), flow_pyr(0), flow_warp_pyr(0);
std::vector<Mat> prev_poly_pyr(0), poly_pyr(0), poly_warp_pyr(0);
std::vector<std::list<Track> > xyScaleTracks;
int init_counter = 0; // 记录何时应该计算新的特征点
while(true) {
Mat frame;
int i, j, c;
// 读入新的帧
capture >> frame;
if(frame.empty())
break;
if(frame_num < start_frame || frame_num > end_frame) {
frame_num ;
continue;
}
/*-----------------------对第一帧做处理-------------------------*/
//由于光流需要两帧进行计算,故第一帧不计算光流
if(frame_num == start_frame) {
image.create(frame.size(), CV_8UC3);
grey.create(frame.size(), CV_8UC1);
prev_grey.create(frame.size(), CV_8UC1);
InitPry(frame, fscales, sizes);
BuildPry(sizes, CV_8UC1, prev_grey_pyr);
BuildPry(sizes, CV_8UC1, grey_pyr);
BuildPry(sizes, CV_32FC2, flow_pyr);
BuildPry(sizes, CV_32FC2, flow_warp_pyr);
BuildPry(sizes, CV_32FC(5), prev_poly_pyr);
BuildPry(sizes, CV_32FC(5), poly_pyr);
BuildPry(sizes, CV_32FC(5), poly_warp_pyr);
xyScaleTracks.resize(scale_num);
frame.copyTo(image);
cvtColor(image, prev_grey, CV_BGR2GRAY);
//对于每个图像尺度分别密集采样特征点
for(int iScale = 0; iScale < scale_num; iScale ) {
if(iScale == 0)
prev_grey.copyTo(prev_grey_pyr[0]);
else
resize(prev_grey_pyr[iScale-1], prev_grey_pyr[iScale], prev_grey_pyr[iScale].size(), 0, 0, INTER_LINEAR);
// 密集采样特征点
std::vector<Point2f> points(0);
DenseSample(prev_grey_pyr[iScale], points, quality, min_distance);
// 保存特征点
std::list<Track>& tracks = xyScaleTracks[iScale];
for(i = 0; i < points.size(); i )
tracks.push_back(Track(points[i], trackInfo, hogInfo, hofInfo, mbhInfo));
}
// compute polynomial expansion
my::FarnebackPolyExpPyr(prev_grey, prev_poly_pyr, fscales, 7, 1.5);
//human_mask即将人体框外的部分记作1,框内部分记作0
//在计算surf特征时不计算框内特征(即不使用人身上的特征点做匹配)
human_mask = Mat::ones(frame.size(), CV_8UC1);
if(bb_file)
InitMaskWithBox(human_mask, bb_list[frame_num].BBs);
detector_surf.detect(prev_grey, prev_kpts_surf, human_mask);
extractor_surf.compute(prev_grey, prev_kpts_surf, prev_desc_surf);
frame_num ;
continue;
}
/*-----------------------对后续帧做处理-------------------------*/
init_counter ;
frame.copyTo(image);
cvtColor(image, grey, CV_BGR2GRAY);
// 计算新一帧的surf特征,并与前一帧的surf特帧做匹配
// surf特征只在图像的原始尺度上计算
if(bb_file)
InitMaskWithBox(human_mask, bb_list[frame_num].BBs);
detector_surf.detect(grey, kpts_surf, human_mask);
extractor_surf.compute(grey, kpts_surf, desc_surf);
ComputeMatch(prev_kpts_surf, kpts_surf, prev_desc_surf, desc_surf, prev_pts_surf, pts_surf);
// 在所有尺度上计算光流,并用光流计算前后帧的匹配
my::FarnebackPolyExpPyr(grey, poly_pyr, fscales, 7, 1.5);
my::calcOpticalFlowFarneback(prev_poly_pyr, poly_pyr, flow_pyr, 10, 2);
MatchFromFlow(prev_grey, flow_pyr[0], prev_pts_flow, pts_flow, human_mask);
// 结合SURF的匹配和光流的匹配
MergeMatch(prev_pts_flow, pts_flow, prev_pts_surf, pts_surf, prev_pts_all, pts_all);
//用上述点匹配计算前后两帧图像之间的投影变换矩阵H
//为了避免由于匹配点多数量过少造成 投影变换矩阵计算出错,当匹配很少时直接取单位矩阵作为H
Mat H = Mat::eye(3, 3, CV_64FC1);
if(pts_all.size() > 50) {
std::vector<unsigned char> match_mask;
Mat temp = findHomography(prev_pts_all, pts_all, RANSAC, 1, match_mask);
if(countNonZero(Mat(match_mask)) > 25)
H = temp;
}
//使用上述得到的投影变换矩阵H对当前帧图像进行warp,从而消除相机造成的运动
Mat H_inv = H.inv();
Mat grey_warp = Mat::zeros(grey.size(), CV_8UC1);
MyWarpPerspective(prev_grey, grey, grey_warp, H_inv); // warp the second frame
// 用变换后的图像重新计算各个尺度上的光流图像
my::FarnebackPolyExpPyr(grey_warp, poly_warp_pyr, fscales, 7, 1.5);
my::calcOpticalFlowFarneback(prev_poly_pyr, poly_warp_pyr, flow_warp_pyr, 10, 2);
//在每个尺度分别计算特征
for(int iScale = 0; iScale < scale_num; iScale ) {
//尺度0不缩放,其余尺度使用插值方法缩放
if(iScale == 0)
grey.copyTo(grey_pyr[0]);
else
resize(grey_pyr[iScale-1], grey_pyr[iScale], grey_pyr[iScale].size(), 0, 0, INTER_LINEAR);
int width = grey_pyr[iScale].cols;
int height = grey_pyr[iScale].rows;
// compute the integral histograms
DescMat* hogMat = InitDescMat(height 1, width 1, hogInfo.nBins);
HogComp(prev_grey_pyr[iScale], hogMat->desc, hogInfo);
DescMat* hofMat = InitDescMat(height 1, width 1, hofInfo.nBins);
HofComp(flow_warp_pyr[iScale], hofMat->desc, hofInfo);
DescMat* mbhMatX = InitDescMat(height 1, width 1, mbhInfo.nBins);
DescMat* mbhMatY = InitDescMat(height 1, width 1, mbhInfo.nBins);
MbhComp(flow_warp_pyr[iScale], mbhMatX->desc, mbhMatY->desc, mbhInfo);
// 在当前尺度 追踪特征点的轨迹,并计算相关的特征
std::list<Track>& tracks = xyScaleTracks[iScale];
for (std::list<Track>::iterator iTrack = tracks.begin(); iTrack != tracks.end();) {
int index = iTrack->index;
Point2f prev_point = iTrack->point[index];
int x = std::min<int>(std::max<int>(cvRound(prev_point.x), 0), width-1);
int y = std::min<int>(std::max<int>(cvRound(prev_point.y), 0), height-1);
Point2f point;
point.x = prev_point.x flow_pyr[iScale].ptr<float>(y)[2*x];
point.y = prev_point.y flow_pyr[iScale].ptr<float>(y)[2*x 1];
if(point.x <= 0 || point.x >= width || point.y <= 0 || point.y >= height) {
iTrack = tracks.erase(iTrack);
continue;
}
iTrack->disp[index].x = flow_warp_pyr[iScale].ptr<float>(y)[2*x];
iTrack->disp[index].y = flow_warp_pyr[iScale].ptr<float>(y)[2*x 1];
// get the descriptors for the feature point
RectInfo rect;
GetRect(prev_point, rect, width, height, hogInfo);
GetDesc(hogMat, rect, hogInfo, iTrack->hog, index);
GetDesc(hofMat, rect, hofInfo, iTrack->hof, index);
GetDesc(mbhMatX, rect, mbhInfo, iTrack->mbhX, index);
GetDesc(mbhMatY, rect, mbhInfo, iTrack->mbhY, index);
iTrack->addPoint(point);
// 在原始尺度上可视化轨迹
if(show_track == 1 && iScale == 0)
DrawTrack(iTrack->point, iTrack->index, fscales[iScale], image);
// 若轨迹的长度达到了预设长度,在iDT中应该是设置为15
// 达到长度后就可以输出各个特征了
if(iTrack->index >= trackInfo.length) {
std::vector<Point2f> trajectory(trackInfo.length 1);
for(int i = 0; i <= trackInfo.length; i)
trajectory[i] = iTrack->point[i]*fscales[iScale];
std::vector<Point2f> displacement(trackInfo.length);
for (int i = 0; i < trackInfo.length; i)
displacement[i] = iTrack->disp[i]*fscales[iScale];
float mean_x(0), mean_y(0), var_x(0), var_y(0), length(0);
if(IsValid(trajectory, mean_x, mean_y, var_x, var_y, length) && IsCameraMotion(displacement)) {
// output the trajectory
printf("%dt%ft%ft%ft%ft%ft%ft", frame_num, mean_x, mean_y, var_x, var_y, length, fscales[iScale]);
// for spatio-temporal pyramid
printf("%ft", std::min<float>(std::max<float>(mean_x/float(seqInfo.width), 0), 0.999));
printf("%ft", std::min<float>(std::max<float>(mean_y/float(seqInfo.height), 0), 0.999));
printf("%ft", std::min<float>(std::max<float>((frame_num - trackInfo.length/2.0 - start_frame)/float(seqInfo.length), 0), 0.999));
// output the trajectory
for (int i = 0; i < trackInfo.length; i)
printf("%ft%ft", displacement[i].x, displacement[i].y);
//实际上,traj特征的效果一般,可以去掉,那么输出以下几个就好了
//如果需要保存输出的特征,可以修改PrintDesc函数
PrintDesc(iTrack->hog, hogInfo, trackInfo);
PrintDesc(iTrack->hof, hofInfo, trackInfo);
PrintDesc(iTrack->mbhX, mbhInfo, trackInfo);
PrintDesc(iTrack->mbhY, mbhInfo, trackInfo);
printf("n");
}
iTrack = tracks.erase(iTrack);
continue;
}
iTrack;
}
ReleDescMat(hogMat);
ReleDescMat(hofMat);
ReleDescMat(mbhMatX);
ReleDescMat(mbhMatY);
if(init_counter != trackInfo.gap)
continue;
// detect new feature points every gap frames
std::vector<Point2f> points(0);
for(std::list<Track>::iterator iTrack = tracks.begin(); iTrack != tracks.end(); iTrack )
points.push_back(iTrack->point[iTrack->index]);
DenseSample(grey_pyr[iScale], points, quality, min_distance);
// save the new feature points
for(i = 0; i < points.size(); i )
tracks.push_back(Track(points[i], trackInfo, hogInfo, hofInfo, mbhInfo));
}
//这里有好多个copyTo prev_xxx
//因为计算光流,surf匹配等都需要上一帧的信息,故在每帧处理完后保存该帧信息,用作下一帧计算时用
init_counter = 0;
grey.copyTo(prev_grey);
for(i = 0; i < scale_num; i ) {
grey_pyr[i].copyTo(prev_grey_pyr[i]);
poly_pyr[i].copyTo(prev_poly_pyr[i]);
}
prev_kpts_surf = kpts_surf;
desc_surf.copyTo(prev_desc_surf);
frame_num ;
if( show_track == 1 ) {
imshow( "DenseTrackStab", image);
c = cvWaitKey(3);
if((char)c == 27) break;
}
}
if( show_track == 1 )
destroyWindow("DenseTrackStab");
return 0;
}
<std::list