1 搭建卷积神经网络
1.0 网络结构
1.2 网络分析
序号 | 网络层 | 描述 |
---|---|---|
1 | 卷积层 | 一张原始图像(28, 28, 1),batch=1,经过卷积处理,得到图像特征(28, 28, 32) |
2 | 下采样 | 即池化层,最大池化后图像特征(14, 14, 32) |
3 | 卷积层 | 将池化特征(14, 14, 32)卷积处理后,得到图像特征(14, 14, 64) |
4 | 下采样 | 最大池化,得到图像特征(7, 7, 64) |
5 | 全连接层 | 将上一层即池化层的图像特征经过矩阵内积计算,拉成一个向量(7764=3136),特征为(1, 3136) |
6 | 全连接层 | 继续矩阵计算,得到特征为(1, 512) |
7 | 全连接 | 高斯矩阵计算,得到特征(1, 10) |
2 网络结构
2.1 网络结构可视化
2.2 网络结构-源
代码语言:txt复制def conv2d(input_tensor, ksize, strides, pad, name_w, name_b):
weights = tf.get_variable(name=name_w, shape=ksize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
biases = tf.get_variable(name=name_b, shape=[ksize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(input_tensor, weights, strides=strides, padding=pad)
conv = tf.nn.relu(conv biases)
return conv
def max_pooling(input_tensor, ksize, strides, pad):
max_pool = tf.nn.max_pool(input_tensor, ksize=ksize, strides=strides, padding=pad)
return max_pool
def fullc(input_tensor, wsize, name_w, name_b):
weights = tf.get_variable(name=name_w, shape=wsize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
biases = tf.get_variable(name=name_b, shape=[wsize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
fullc = tf.matmul(input_tensor, weights) biases
return fullc
def inference(inputs, keep_prob):
with tf.name_scope("conv_1"):
conv_1 = conv2d(inputs, [5, 5, 1, 32], [1, 1, 1, 1], "SAME", "cw_1", "cb_1")
with tf.name_scope("max_pool_1"):
pooling_1 = max_pooling(conv_1, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
with tf.name_scope("conv_2"):
conv_2 = conv2d(pooling_1, [5, 5, 32, 64], [1, 1, 1, 1], "SAME", "cw_2", "cb_2")
with tf.name_scope("max_pool_2"):
pooling_2 = max_pooling(conv_2, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
feature_shape = pooling_2.get_shape()
flatten_1 = feature_shape[1].value * feature_shape[2].value * feature_shape[3].value
feature_reshape = tf.reshape(pooling_2, [-1, flatten_1])
with tf.name_scope("fc_1"):
fc_1 = fullc(feature_reshape, [flatten_1, 512], "fw_1", "fb_1")
fc_1 = tf.nn.dropout(fc_1, keep_prob)
with tf.name_scope("fc_2"):
fc_2 = fullc(fc_1, [512, 10], "fw_2", "fb_2")
return fc_2
3 训练及测试
3.1 载入数据
代码语言:txt复制mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
3.2 训练及保存模型
代码语言:txt复制def train_new(mnist):
inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
prediction = inference(inputs, 0.5)
loss = loss_cal(prediction, labels)
accuracy = evaluation(prediction, labels)
summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))
train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
saver = tf.train.Saver()
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
for i in range(TRAINING_STEPS):
img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
_, loss_value, acc, summary = sess.run([train_step, loss, accuracy, summary_op], feed_dict={inputs: img_inputs, labels: img_labels})
# _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})
if i % 10 == 0:
print("After {} training steps, loss is {}, accuracy: {}".format(i, loss_value, acc))
# print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME))
summary_writer.add_summary(summary, i)
4 载入模型及预测
代码语言:txt复制def load_model_only_with_params():
g_params = tf.Graph()
with g_params.as_default():
inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
prediction = inference(inputs, 0.5)
mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
img = mnist.test.images[0]
img = np.reshape(img, (1, 28, 28, 1))
img_label = mnist.test.labels[0]
img_label = np.argmax(img_label)
with tf.Session(graph=g_params) as sess:
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state("./conv_models")
model_path = ckpt.model_checkpoint_path
saver.restore(sess, model_path)
pre = sess.run(prediction, feed_dict={inputs: img})
pre_num = tf.argmax(pre, 1)
pre_num = sess.run(pre_num)
print("prediction: {}, real: {}".format(pre_num[0], img_label))
代码语言:txt复制prediction: 7, real: 7
5 完整程序
代码语言:txt复制import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import os
import numpy as np
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001
LEARNING_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
MODEL_PATH = "./conv_models"
MODEL_NAME = "conv_model.ckpt"
LOG_DIR = "./logs"
BATCH_SIZE = 100
if not os.path.exists(MODEL_PATH):
os.makedirs(MODEL_PATH)
def conv2d(input_tensor, ksize, strides, pad, name_w, name_b):
weights = tf.get_variable(name=name_w, shape=ksize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
biases = tf.get_variable(name=name_b, shape=[ksize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(input_tensor, weights, strides=strides, padding=pad)
conv = tf.nn.relu(conv biases)
return conv
def max_pooling(input_tensor, ksize, strides, pad):
max_pool = tf.nn.max_pool(input_tensor, ksize=ksize, strides=strides, padding=pad)
return max_pool
def fullc(input_tensor, wsize, name_w, name_b):
weights = tf.get_variable(name=name_w, shape=wsize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
biases = tf.get_variable(name=name_b, shape=[wsize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
fullc = tf.matmul(input_tensor, weights) biases
return fullc
def inference(inputs, keep_prob):
with tf.name_scope("conv_1"):
conv_1 = conv2d(inputs, [5, 5, 1, 32], [1, 1, 1, 1], "SAME", "cw_1", "cb_1")
with tf.name_scope("max_pool_1"):
pooling_1 = max_pooling(conv_1, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
with tf.name_scope("conv_2"):
conv_2 = conv2d(pooling_1, [5, 5, 32, 64], [1, 1, 1, 1], "SAME", "cw_2", "cb_2")
with tf.name_scope("max_pool_2"):
pooling_2 = max_pooling(conv_2, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
feature_shape = pooling_2.get_shape()
flatten_1 = feature_shape[1].value * feature_shape[2].value * feature_shape[3].value
feature_reshape = tf.reshape(pooling_2, [-1, flatten_1])
with tf.name_scope("fc_1"):
fc_1 = fullc(feature_reshape, [flatten_1, 512], "fw_1", "fb_1")
fc_1 = tf.nn.dropout(fc_1, keep_prob)
with tf.name_scope("fc_2"):
fc_2 = fullc(fc_1, [512, 10], "fw_2", "fb_2")
return fc_2
def loss_cal(prediction, labels):
with tf.name_scope("loss"):
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=tf.argmax(labels, 1))
loss = tf.reduce_mean(cross_entropy)
tf.summary.scalar("loss", loss)
return loss
def evaluation(logits, labels):
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy)
return accuracy
def train(mnist):
inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
global_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
prediction = inference(inputs, 0.5)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=tf.argmax(labels, 1))
loss = tf.reduce_mean(cross_entropy)
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# accuracy = evaluation(prediction, labels)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name="train")
saver = tf.train.Saver()
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(TRAINING_STEPS):
img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={inputs: img_inputs, labels: img_labels})
# _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})
if i % 10 == 0:
print("After {} training steps, loss is {}".format(step, loss_value))
# print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME), global_step=global_step)
def train_new(mnist):
inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
prediction = inference(inputs, 0.5)
loss = loss_cal(prediction, labels)
accuracy = evaluation(prediction, labels)
summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))
train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
saver = tf.train.Saver()
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
for i in range(TRAINING_STEPS):
img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
_, loss_value, acc, summary = sess.run([train_step, loss, accuracy, summary_op], feed_dict={inputs: img_inputs, labels: img_labels})
# _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})
if i % 10 == 0:
print("After {} training steps, loss is {}, accuracy: {}".format(i, loss_value, acc))
# print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME))
summary_writer.add_summary(summary, i)
def load_model_only_with_params():
g_params = tf.Graph()
with g_params.as_default():
inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
prediction = inference(inputs, 0.5)
mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
img = mnist.test.images[0]
img = np.reshape(img, (1, 28, 28, 1))
img_label = mnist.test.labels[0]
img_label = np.argmax(img_label)
with tf.Session(graph=g_params) as sess:
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state("./conv_models")
model_path = ckpt.model_checkpoint_path
saver.restore(sess, model_path)
pre = sess.run(prediction, feed_dict={inputs: img})
pre_num = tf.argmax(pre, 1)
pre_num = sess.run(pre_num)
print("prediction: {}, real: {}".format(pre_num[0], img_label))
def main(argv=None):
mnist = input_data.read_data_sets("./mnist_data", one_hot=True).
# 训练
train_new(mnist)
# 载入训练模型
# load_model_only_with_params()
if __name__ == "__main__":
tf.app.run()