卷积神经网络(Convolutional Neural Network, CNN)是一种前馈神经网络,它的人工神经元可以响应一部分覆盖范围内的周围单元,对于大型图像处理有出色表现。通过卷积、池化、激活等操作的配合,卷积神经网络能够较好的学习到空间上关联的特征。
给定一张输入图像,对于一个卷积层的输出特征图,用类别相对于通道的梯度对这个特征图中的每个通道进行加权。直观上来看,理解这个技巧的一种方法是,你是用“每个通道对类别的重要程度”对“输入图像对不同通道的激活强度”的空间图进行加权,从而得到了“输入图像对类别的激活强度”的空间图。
导入模块
代码语言:javascript复制from keras.models import Model
from keras.layers import Conv2D, Input
from keras.initializers import Constant
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg19 import VGG19, decode_predictions
from keras.preprocessing import image
import keras.backend as K
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
tf.compat.v1.disable_eager_execution()
config = tf.compat.v1.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(sess)
自定义函数
代码语言:javascript复制# returns the i:th layer's activations of model
def get_activations(i, model):
return K.function([model.layers[0].input], [model.layers[i].output])
代码语言:javascript复制# shows the activation heat map of the input image
def show_heatmap(inp_img, model, index, alpha=0.7, debug=False):
# convert the image into a numpy array
inp_arr = np.array(inp_img).reshape(1, inp_img.width, inp_img.height, 3)
# predict the class of the image and print the top 3 predictions
pred = model.predict([inp_arr])
print([(label, conf) for _, label, conf in decode_predictions(pred)[0][:3]])
# fetch the activations of layer index
out = get_activations(index, model)([inp_arr])[0][0]
dprint("activations", out.shape, debug=debug)
# for each region of the activation map, calculate the average filter activations
out_avg = np.mean(out, -1)
dprint("post avg", out_avg.shape, debug=debug)
# repeat the array into 3 dimensions
out_avg = np.repeat(out_avg[:, :, np.newaxis], 3, axis=2)
dprint("post repeat", out_avg.shape, debug=debug)
# normalize the values into the range [0,1]
dprint("pre normalize", np.amin(out_avg), np.amax(out_avg), debug=debug)
out_avg /= np.amax(out_avg)
dprint("post normalize", np.amin(out_avg), np.amax(out_avg), debug=debug)
# transform the values into RGB range with a pink tint
out_avg *= (255,0,128)
dprint("post denormalize", np.amin(out_avg), np.amax(out_avg), debug=debug)
# convert the average activations into an image and resize it to the input shape
heatmap = Image.fromarray(np.uint8(out_avg))
heatmap = heatmap.resize((inp_img.width, inp_img.height), Image.BICUBIC)
# superimpose the heatmap on top of the input image
input_heatmap = Image.blend(inp_img, heatmap, alpha)
# show the result
display(input_heatmap)
查看模型结构
代码语言:javascript复制vgg = VGG19()
activations_index = -5
assert "block5_pool" == vgg.layers[activations_index].name
vgg.summary()
代码语言:javascript复制Model: "vgg19"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
block1_conv1 (Conv2D) (None, 224, 224, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 224, 224, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 112, 112, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 112, 112, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 112, 112, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 56, 56, 128) 0
_________________________________________________________________
block3_conv1 (Conv2D) (None, 56, 56, 256) 295168
_________________________________________________________________
block3_conv2 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_conv3 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_conv4 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_pool (MaxPooling2D) (None, 28, 28, 256) 0
_________________________________________________________________
block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160
_________________________________________________________________
block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_conv4 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_pool (MaxPooling2D) (None, 14, 14, 512) 0
_________________________________________________________________
block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv4 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_pool (MaxPooling2D) (None, 7, 7, 512) 0
_________________________________________________________________
flatten (Flatten) (None, 25088) 0
_________________________________________________________________
fc1 (Dense) (None, 4096) 102764544
_________________________________________________________________
fc2 (Dense) (None, 4096) 16781312
_________________________________________________________________
predictions (Dense) (None, 1000) 4097000
=================================================================
Total params: 143,667,240
Trainable params: 143,667,240
Non-trainable params: 0
_________________________________________________________________
heatmap可视化
代码语言:javascript复制def load_images(path, img_w=224, img_h=224):
paths = glob(path)
for p in paths:
inp_img = Image.open(p)
yield inp_img.resize((img_w, img_h))
代码语言:javascript复制for img in load_images('cnn_heatmap-master/img/*jpg'):
show_heatmap(img, vgg, index=activations_index, alpha=0.8, debug=False)
代码语言:javascript复制[('balloon', 0.9993456), ('maraca', 0.0004651971), ('parachute', 0.00011761015)]
代码语言:javascript复制[('yawl', 0.5431526), ('schooner', 0.14926615), ('missile', 0.11245762)]
代码语言:javascript复制[('dalmatian', 0.9418971), ('whippet', 0.034014802), ('Saluki', 0.0052429065)]
代码语言:javascript复制[('African_elephant', 0.88891256), ('tusker', 0.060588483), ('Indian_elephant', 0.048468523)]
代码语言:javascript复制[('library', 0.99159205), ('bookcase', 0.0043088547), ('bookshop', 0.0039026877)]
代码语言:javascript复制[('military_uniform', 0.31986043), ('rifle', 0.09741558), ('projectile', 0.08197986)]
代码语言:javascript复制[('necklace', 0.48364076), ('thimble', 0.09268853), ('chain', 0.07222184)]
代码语言:javascript复制[('wood_rabbit', 0.64666176), ('hare', 0.3432548), ('quail', 0.005277607)]
代码语言:javascript复制[('racer', 0.763868), ('golfcart', 0.05818335), ('cab', 0.027369801)]
代码语言:javascript复制[('space_shuttle', 0.45176795), ('missile', 0.33484006), ('projectile', 0.11019625)]
代码语言:javascript复制[('volcano', 0.8036871), ('conch', 0.0742617), ('gown', 0.03851746)]
以台风的云图为例
代码语言:javascript复制img = Image.open('typhoon.png')
# Create a convolutional neural network with 4 convolutional layers
inputs = Input((img.width,img.height,3))
conv1 = Conv2D(filters=3, kernel_size=3, strides=2, kernel_initializer=Constant(0.03))(inputs)
conv2 = Conv2D(filters=3, kernel_size=3, strides=2, kernel_initializer=Constant(0.03))(conv1)
conv3 = Conv2D(filters=3, kernel_size=3, strides=2, kernel_initializer=Constant(0.03))(conv2)
conv4 = Conv2D(filters=3, kernel_size=3, strides=2, kernel_initializer=Constant(0.03))(conv3)
outputs = Conv2D(filters=3, kernel_size=3, strides=2, kernel_initializer=Constant(0.07))(conv4)
model = Model(inputs, outputs)
model.compile(loss='mse', optimizer='adam') # dummy parameters
model.summary()
inp_arr = np.array(img).reshape(1, img.width, img.height, 3)
print("#"*5, "Input", "#"*5)
display(img)
# Show the output at each layer
for i, _ in enumerate(model.layers[:-1], start=1):
print("#"*5, model.layers[i].name, "#"*5)
activations = get_activations(i, model)([inp_arr])[0][0]
activations = np.uint8(activations)
activations = Image.fromarray(activations)
resized_activations = activations.resize((img.width, img.height), Image.BICUBIC)
print("Convolution output:")
display(activations)
print("Convolution output resized to input image dimensions:")
display(resized_activations)
代码语言:javascript复制Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 600, 600, 3)] 0
_________________________________________________________________
conv2d (Conv2D) (None, 299, 299, 3) 84
_________________________________________________________________
conv2d_1 (Conv2D) (None, 149, 149, 3) 84
_________________________________________________________________
conv2d_2 (Conv2D) (None, 74, 74, 3) 84
_________________________________________________________________
conv2d_3 (Conv2D) (None, 36, 36, 3) 84
_________________________________________________________________
conv2d_4 (Conv2D) (None, 17, 17, 3) 84
=================================================================
Total params: 420
Trainable params: 420
Non-trainable params: 0
_________________________________________________________________
##### Input #####
不同卷积层的heatmap
代码语言:javascript复制def heatmap(model, data_img, layer_idx, img_show=None, pred_idx=None):
# 图像处理
if data_img.shape.__len__() != 4:
# 由于用作输入的img需要预处理,用作显示的img需要原图,因此分开两个输入
if img_show is None:
img_show = data_img
# 缩放
input_shape = K.int_shape(model.input)[1:3] # (28,28)
data_img = image.img_to_array(image.array_to_img(data_img).resize(input_shape))
# 添加一个维度->(1, 224, 224, 3)
data_img = np.expand_dims(data_img, axis=0)
if pred_idx is None:
# 预测
preds = model.predict(data_img)
# 获取最高预测项的index
pred_idx = np.argmax(preds[0])
# 目标输出估值
target_output = model.output[:, pred_idx]
# 目标层的输出代表各通道关注的位置
last_conv_layer_output = model.layers[layer_idx].output
# 求最终输出对目标层输出的导数(优化目标层输出),代表目标层输出对结果的影响
grads = K.gradients(target_output, last_conv_layer_output)[0]
# 将每个通道的导数取平均,值越高代表该通道影响越大
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model.input], [pooled_grads, last_conv_layer_output[0]])
pooled_grads_value, conv_layer_output_value = iterate([data_img])
# 将各通道关注的位置和各通道的影响乘起来
for i in range(conv_layer_output_value.shape[-1]):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
# 对各通道取平均得图片位置对结果的影响
heatmap = np.mean(conv_layer_output_value, axis=-1)
# 规范化
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
# plt.matshow(heatmap)
# plt.show()
# 叠加图片
# 缩放成同等大小
heatmap = cv2.resize(heatmap, (img_show.shape[1], img_show.shape[0]))
heatmap = np.uint8(255 * heatmap)
# 将热图应用于原始图像.由于opencv热度图为BGR,需要转RGB
superimposed_img = img_show cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)[:,:,::-1] * 0.4
# 截取转uint8
superimposed_img = np.minimum(superimposed_img, 255).astype('uint8')
return superimposed_img, heatmap
# 生成所有卷积层的热度图
def heatmaps(model, data_img, img_show=None):
if img_show is None:
img_show = np.array(data_img)
# Resize
input_shape = K.int_shape(model.input)[1:3] # (28,28,1)
data_img = image.img_to_array(image.array_to_img(data_img).resize(input_shape))
# 添加一个维度->(1, 224, 224, 3)
data_img = np.expand_dims(data_img, axis=0)
# 预测
preds = model.predict(data_img)
# 获取最高预测项的index
pred_idx = np.argmax(preds[0])
print("预测为:%d(%f)" % (pred_idx, preds[0][pred_idx]))
indexs = []
for i in range(model.layers.__len__()):
if 'conv' in model.layers[i].name:
indexs.append(i)
print('模型共有%d个卷积层' % indexs.__len__())
fig = plt.figure(figsize=(18,12))
plt.suptitle('heatmaps for each conv')
for i in range(indexs.__len__()):
ret = heatmap(model, data_img, indexs[i], img_show=img_show, pred_idx=pred_idx)
plt.subplot(np.ceil(np.sqrt(indexs.__len__()*2)), np.ceil(np.sqrt(indexs.__len__()*2)), i*2 1).set_title(model.layers[indexs[i]].name)
plt.imshow(ret[0])
plt.axis('off')
plt.subplot(np.ceil(np.sqrt(indexs.__len__()*2)), np.ceil(np.sqrt(indexs.__len__()*2)), i*2 2).set_title(model.layers[indexs[i]].name)
plt.imshow(ret[1])
plt.axis('off')
plt.show()
代码语言:javascript复制model = VGG16(weights='imagenet')
data_img = image.img_to_array(image.load_img('typhoon.png'))
# VGG16预处理:RGB转BGR,并对每一个颜色通道去均值中心化
data_img = preprocess_input(data_img)
img_show = image.img_to_array(image.load_img('typhoon.png'))
heatmaps(model, data_img, img_show)
代码语言:javascript复制预测为:147(0.511088)
模型共有13个卷积层