关于深度学习系列笔记七（优化参数）

本文的主要目的是尝试不同的隐藏层、隐藏单元、激活函数、损失函数，看其训练精度和验证精度。
尝试不同的隐藏层，1个隐藏层时在验证集上表现最好；4个隐藏层在训练集上表现最好，但验证集效果反而最差，存在过度拟合
尝试不同的隐藏单元，隐藏单元为16时在验证集上表现最好；隐藏单元为128时在训练集表现最好，但验证集效果不佳
尝试不同的激活函数，softplus在验证集上表现最好，但softmax持续提升能力很强；tanh在训练集上表现最好
尝试不同的损失函数，mean_squeared_logarithmic_error在训练集上表现最好，但binary_crossentropy在验证集上表现相对最好
代码示例

代码语言：javascript复制
from keras
.datasets import imdb

def printshape(x):
    #print('数据值=',x)
    print('#----------------')
    print('#数据形状=',x.shape)
    print('#数据张量=',x.ndim)
    print('#数据类型=',x.dtype)

#加载IMDB 数据集
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(path="D:/Python36/Coding/PycharmProjects/ttt/imdb.npz", num_words=10000)

max([max(sequence) for sequence in train_data]) #9999

word_index = imdb.get_word_index(path="D:/Python36/Coding/PycharmProjects/ttt/imdb_word_index.json")
#44864, 'gussied': 65111, "bullock's": 32066, "'delivery'": 65112
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
#5111: 'gussied', 32066: "bullock's", 65112: "'delivery'"
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
#train_data[0] = [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
#decoded_review= ? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all

test_setence = 'this film was just brilliant casting'
test_index = ' '.join([str(word_index.get(i) 3) for i in test_setence.split(' ')])
#test_index = 14 22 16 43 530 973
#准备数据
# 填充列表，使其具有相同的长度，再将列表转换成形状为 (samples, word_indices)的整数张量，然后网络第一层使用能处理这种整数张量的层（即Embedding 层）。
# 对列表进行 one-hot 编码，将其转换为 0 和 1 组成的向量。举个例子，序列[3, 5]将会被转换为10 000 维向量，只有索引为3 和5 的元素是1，其余元素都是0。然后网络第一层可以用Dense 层，它能够处理浮点数向量数据。
import numpy as np
#创建一个形状为(len(sequences),dimension) 的零矩阵
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

#将训练数据向量化
#将测试数据向量化
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)
#x_train
#[[0. 1. 1. ... 0. 0. 0.]
# ...
# [0. 1. 1. ... 0. 0. 0.]]

#标签向量化
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')
#留出验证集
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]

from keras import models
from keras import layers
from keras import optimizers
import matplotlib.pyplot as plt

colors = ['red','blue','green','black']
units = [1,2,3,4]

fig=plt.figure(figsize=(8,6))
ax=fig.add_subplot(1,1,1)
for i in range(len(units)):
    unit=units[i]
    model = models.Sequential()
    model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
    if unit == 1:
        model.add(layers.Dense(1, activation='sigmoid'))
    elif unit == 2:
        model.add(layers.Dense(16, activation='relu'))
        model.add(layers.Dense(1, activation='sigmoid'))
    elif unit == 3:
        model.add(layers.Dense(16, activation='relu'))
        model.add(layers.Dense(16, activation='relu'))
        model.add(layers.Dense(1, activation='sigmoid'))
    else:
        model.add(layers.Dense(16, activation='relu'))
        model.add(layers.Dense(16, activation='relu'))
        model.add(layers.Dense(16, activation='relu'))
        model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(partial_x_train,partial_y_train,epochs=10,batch_size=256,validation_data=(x_val, y_val))
    history_dict = history.history
    print("history_dict%s =" %history_dict)
    acc = history_dict['acc']
    val_acc = history_dict['val_acc']
    epochs = range(1, len(acc)   1)
    ax.plot(epochs, acc, 'bo', label='Training acc,layer=%s' %unit,color=colors[i])
    ax.plot(epochs, val_acc, 'b', label='Validation acc,layer=%s' %unit,color=colors[i])
ax.legend(loc='best')
ax.set_title('Training and validation accuracy by different layer')
ax.set_xlabel('Epochs')
ax.set_ylabel('Accuracy')
plt.show()

units = [16,32,64,128]
fig=plt.figure(figsize=(8,6))
ax=fig.add_subplot(1,1,1)
for i in range(len(units)):
    unit=units[i]
    model = models.Sequential()
    model.add(layers.Dense(unit, activation='relu', input_shape=(10000,)))
    model.add(layers.Dense(unit, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(partial_x_train,partial_y_train,epochs=10,batch_size=256,validation_data=(x_val, y_val))
    history_dict = history.history
    print("history_dict%s =" %history_dict)
    acc = history_dict['acc']
    val_acc = history_dict['val_acc']
    epochs = range(1, len(acc)   1)
    ax.plot(epochs, acc, 'bo', label='Training acc,hidden unit=%s' %unit,color=colors[i])
    ax.plot(epochs, val_acc, 'b', label='Validation acc,hidden unit=%s' %unit,color=colors[i])
ax.legend(loc='best')
ax.set_title('Training and validation accuracy by different hidden unit')
ax.set_xlabel('Epochs')
ax.set_ylabel('Accuracy')
plt.show()

units = ['relu','tanh','softmax','softplus']
fig=plt.figure(figsize=(8,6))
ax=fig.add_subplot(1,1,1)
for i in range(len(units)):
    unit=units[i]
    model = models.Sequential()
    model.add(layers.Dense(16, activation=unit, input_shape=(10000,)))
    model.add(layers.Dense(16, activation=unit))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(partial_x_train,partial_y_train,epochs=10,batch_size=256,validation_data=(x_val, y_val))
    history_dict = history.history
    print("history_dict%s =" %history_dict)
    acc = history_dict['acc']
    val_acc = history_dict['val_acc']
    epochs = range(1, len(acc)   1)
    ax.plot(epochs, acc, 'bo', label='Training acc,activation=%s' %unit,color=colors[i])
    ax.plot(epochs, val_acc, 'b', label='Validation acc,activation=%s' %unit,color=colors[i])
ax.legend(loc='best')
ax.set_title('Training and validation accuracy by different activation')
ax.set_xlabel('Epochs')
ax.set_ylabel('Accuracy')
plt.show()

units = ['binary_crossentropy','mean_squared_error','mean_squared_logarithmic_error','hinge']
fig=plt.figure(figsize=(8,6))
ax=fig.add_subplot(1,1,1)
for i in range(len(units)):
    unit=units[i]
    model = models.Sequential()
    model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop', loss=unit, metrics=['accuracy'])
    history = model.fit(partial_x_train,partial_y_train,epochs=10,batch_size=256,validation_data=(x_val, y_val))
    history_dict = history.history
    print("history_dict%s =" %history_dict)
    acc = history_dict['acc']
    val_acc = history_dict['val_acc']
    epochs = range(1, len(acc)   1)
    ax.plot(epochs, acc, 'bo', label='Training acc,lose function=%s' %unit,color=colors[i])
    ax.plot(epochs, val_acc, 'b', label='Validation acc,lose function=%s' %unit,color=colors[i])
ax.legend(loc='best')
ax.set_title('Training and validation accuracy by lose function')
ax.set_xlabel('Epochs')
ax.set_ylabel('Accuracy')
plt.show()

#通常需要对原始数据进行大量预处理，以便将其转换为张量输入到神经网络中。单词序 列可以编码为二进制向量，但也有其他编码方式。 
#带有 relu 激活的 Dense 层堆叠，可以解决很多种问题（包括情感分类），你可能会经 常用到这种模型。 
#对于二分类问题（两个输出类别），网络的最后一层应该是只有一个单元并使用 sigmoid 激活的 Dense 层，网络输出应该是 0~1 范围内的标量，表示概率值。 
#对于二分类问题的 sigmoid 标量输出，你应该使用 binary_crossentropy 损失函数。 
#无论你的问题是什么，rmsprop 优化器通常都是足够好的选择。这一点你无须担心。 
#随着神经网络在训练数据上的表现越来越好，模型最终会过拟合，并在前所未见的数据 上得到越来越差的结果。一定要一直监控模型在训练集之外的数据上的性能。
binary error mean
0 人点赞