MatConvNet中mnist源码解析

2018-01-03 11:05:07 浏览数 (2)

  本文的代码来自MatConvNet   下面是自己对代码的注释:   cnn_mnist_init.m

代码语言:javascript复制
function net = cnn_mnist_init(varargin)
% CNN_MNIST_LENET Initialize a CNN similar for MNIST
opts.useBatchNorm = true ;   #batchNorm是否使用
opts.networkType = 'simplenn' ;  #网络结构使用lenet结构
opts = vl_argparse(opts, varargin) ;
rng('default');
rng(0) ;
f=1/100 ;
net.layers = {} ;
# 定义各层参数,type是网络的层属性,stride为步长,pad为填充
# method中max为最大池化
net.layers{end 1} = struct('type', 'conv', ...
                           'weights', {{f*randn(5,5,1,20, 'single'), zeros(1, 20, 'single')}}, ...
                           'stride', 1, ...
                           'pad', 0) ;
net.layers{end 1} = struct('type', 'pool', ...
                           'method', 'max', ...
                           'pool', [2 2], ...
                           'stride', 2, ...
                           'pad', 0) ;
net.layers{end 1} = struct('type', 'conv', ...
                           'weights', {{f*randn(5,5,20,50, 'single'),zeros(1,50,'single')}}, ...
                           'stride', 1, ...
                           'pad', 0) ;
net.layers{end 1} = struct('type', 'pool', ...
                           'method', 'max', ...
                           'pool', [2 2], ...
                           'stride', 2, ...
                           'pad', 0) ;
net.layers{end 1} = struct('type', 'conv', ...
                           'weights', {{f*randn(4,4,50,500, 'single'),  zeros(1,500,'single')}}, ...
                           'stride', 1, ...
                           'pad', 0) ;
net.layers{end 1} = struct('type', 'relu') ;
net.layers{end 1} = struct('type', 'conv', ...
                           'weights', {{f*randn(1,1,500,10, 'single'), zeros(1,10,'single')}}, ...
                           'stride', 1, ...
                           'pad', 0) ;
net.layers{end 1} = struct('type', 'softmaxloss') ;

# optionally switch to batch normalization
# BN层一般用在卷积到池化过程中,激活函数前面,这里是在第1,4,7层插入BN
if opts.useBatchNorm
  net = insertBnorm(net, 1) ;
  net = insertBnorm(net, 4) ;
  net = insertBnorm(net, 7) ;
end

# Meta parameters
net.meta.inputSize = [27 27 1] ;  #输入大小[w,h,channel],这里是灰度图片,单通道为1
net.meta.trainOpts.learningRate = 0.001 ; #学习率
net.meta.trainOpts.numEpochs = 20 ; #epoch次数,注意这里不是所谓的迭代次数
net.meta.trainOpts.batchSize = 100 ; #批处理,这里就是mini-batchsize,batchSize大小对训练过程的影响见我另外一篇博客:卷积神经网络四大问题之一

# Fill in defaul values
net = vl_simplenn_tidy(net) ;

# Switch to DagNN if requested
# 选择不同的网络结构,这里就使用的simplenn结构
switch lower(opts.networkType)
  case 'simplenn'
    % done
  case 'dagnn'
    net = dagnn.DagNN.fromSimpleNN(net, 'canonicalNames', true) ;
    net.addLayer('error', dagnn.Loss('loss', 'classerror'), ...
             {'prediction','label'}, 'error') ;
  otherwise
    assert(false) ;
end

% --------------------------------------------------------------------
function net = insertBnorm(net, l)   #具体的BN函数
% --------------------------------------------------------------------
assert(isfield(net.layers{l}, 'weights'));
ndim = size(net.layers{l}.weights{1}, 4);
layer = struct('type', 'bnorm', ...
               'weights', {{ones(ndim, 1, 'single'), zeros(ndim, 1, 'single')}}, ...
               'learningRate', [1 1 0.05], ...
               'weightDecay', [0 0]) ;
net.layers{l}.biases = [] ;
net.layers = horzcat(net.layers(1:l), layer, net.layers(l 1:end)) ; #horzcat水平方向矩阵连接,这里就是重新构建网络结构,将BN层插入到lennt中

cnn_mnist_experiments.m

代码语言:javascript复制
%% Experiment with the cnn_mnist_fc_bnorm
[net_bn, info_bn] = cnn_mnist(...
  'expDir', 'data/mnist-bnorm', 'useBnorm', true);

[net_fc, info_fc] = cnn_mnist(...
  'expDir', 'data/mnist-baseline', 'useBnorm', false);
# 以下就是画图的代码
figure(1) ; clf ;
subplot(1,2,1) ;  # 第一张图
semilogy(info_fc.val.objective', 'o-') ; hold all ;
semilogy(info_bn.val.objective', ' --') ;  #表示y坐标轴是对数坐标系
xlabel('Training samples [x 10^3]'); ylabel('energy') ;
grid on ; #加入网格
h=legend('BSLN', 'BNORM') ;  #加入标注
set(h,'color','none');
title('objective') ;
subplot(1,2,2) ;
plot(info_fc.val.error', 'o-') ; hold all ;
plot(info_bn.val.error', ' --') ;
h=legend('BSLN-val','BSLN-val-5','BNORM-val','BNORM-val-5') ;
grid on ;
xlabel('Training samples [x 10^3]'); ylabel('error') ;
set(h,'color','none') ;
title('error') ;
drawnow ;

  运行结果得到的图:

0 人点赞