文章目录
-
- 1. CNN 卷积神经网络
- 2. 预训练模型
- 3. RNN 循环神经网络
学习于:简单粗暴 TensorFlow 2
1. CNN 卷积神经网络
卷积神经网络,卷积后尺寸计算
tf.keras.layers.Conv2D
,tf.keras.layers.MaxPool2D
# CNN 模型
class myCNN(tf.keras.Model):
def __init__(self):
super().__init__()
self.conv1 = tf.keras.layers.Conv2D(
filters=32,
kernel_size=[5,5],
padding='same',
activation='relu'
)
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2,2],strides=2)
self.conv2 = tf.keras.layers.Conv2D(
filters=64,
kernel_size=[5,5],
padding='same',
activation='relu'
)
self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2,2],strides=2)
self.flatten = tf.keras.layers.Reshape(target_shape=(7*7*64,))
self.dense1 = tf.keras.layers.Dense(units=1024,activation='relu')
self.dense2 = tf.keras.layers.Dense(units=10)
def call(self, inputs): # [m, 28 , 28 , 1]
# size = (n 2p-f)/s 1,same 保持尺寸不变
x = self.conv1(inputs) # [m, 28 , 28 , 32]
# 池化时 p 常为 0
x = self.pool1(x) # [m, 14 , 14 , 32]
x = self.conv2(x) # [m, 14 , 14 , 64]
x = self.pool2(x) # [m, 7 , 7 , 64]
x = self.flatten(x) # [m, 7*7*64]
x = self.dense1(x) # [m, 1024]
x = self.dense2(x) # [m, 10]
outputs = tf.nn.softmax(x) # [m, 10]
return outputs
2. 预训练模型
mymodel = tf.keras.applications.MobileNetV2()
,可以调用 VGG16 、 VGG19 、 ResNet 、 MobileNet 等内置模型,使用预训练好的权重初始化网络
import tensorflow as tf
import tensorflow_datasets as tfds
num_epoch = 2
batch_size = 16
learning_rate = 1e-3
version = tf.__version__
gpu_ok = tf.config.list_physical_devices('GPU')
print("tf version:", version, "nuse GPU", gpu_ok)
# 会自动加载数据集,从网络下载
dataset = tfds.load("tf_flowers", split=tfds.Split.TRAIN, as_supervised=True)
# 数据归一化、打乱、分批
dataset = dataset.map(lambda img, label: (tf.image.resize(img, (224, 224)) / 255.0, label)).shuffle(1024).batch(
batch_size)
# 加载预训练模型
model = tf.keras.applications.MobileNetV2(include_top=True, weights=None, classes=5)
# weights 默认: "imagenet", 也可以不用预训练权值,include_top 有最后的FC层
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
for e in range(num_epoch):
for images, labels in dataset:
with tf.GradientTape() as tape:
pred = model(images, training=True) # *****设置training模式*****
loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=labels, y_pred=pred)
loss = tf.reduce_mean(loss)
print("loss: {}".format(loss.numpy()))
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables))
注:如果数据集下载缓慢,我传到csdn了,免费下载
3. RNN 循环神经网络
- 数据预处理,字符 与 idx 的相互转换映射, 字符集
- 获取 batch_size 个样本、每个样本的下一个字符(标签)
import tensorflow as tf
import numpy as np
class Dataloader():
def __init__(self):
path = tf.keras.utils.get_file('nietzsche.txt',
origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with open(path, encoding='utf-8') as f:
self.raw_text = f.read().lower()
self.chars = sorted(list(set(self.raw_text)))
self.char_idx = dict((c, i) for i, c in enumerate(self.chars))
self.idx_char = dict((i, c) for i, c in enumerate(self.chars))
self.text = [self.char_idx[c] for c in self.raw_text] # 原始文本的 ids
def get_batch(self, seq_len, batch_size):
seq = []
next_char = []
for i in range(batch_size):
idx = np.random.randint(0, len(self.text) - seq_len)
seq.append(self.text[idx: idx seq_len])
next_char.append(self.text[idx seq_len])
return np.array(seq), np.array(next_char) # [batch_size, seq_len] [batch_size]
- 建模,
tf.keras.layers.LSTMCell
class myRNN(tf.keras.Model):
def __init__(self, num_chars, batch_size, seq_len):
super().__init__()
self.num_chars = num_chars
self.seq_len = seq_len
self.batch_size = batch_size
self.cell = tf.keras.layers.LSTMCell(units=256)
self.dense = tf.keras.layers.Dense(units=self.num_chars)
def call(self, inputs, from_logits=False):
inputs = tf.one_hot(inputs, depth=self.num_chars) # [batch_size, seq_len, num_chars]
# 获取初始状态
state = self.cell.get_initial_state(batch_size=self.batch_size, dtype=tf.float32) # [batch_size, 256]
for t in range(self.seq_len):
output, state = self.cell(inputs[:, t, :], state)
logits = self.dense(output) # [batch_size, num_chars]
if from_logits: # 控制是否softmax归一化
return logits
else:
return tf.nn.softmax(logits)
def predict(self, inputs, temperature=1.0):
# temperature调节词汇的丰富度,对概率进行调整
batch_size, _ = tf.shape(inputs)
logits = self(inputs, from_logits=True)
# self 调用 __call()__, __call()__ 调用 call()
prob = tf.nn.softmax(logits / temperature).numpy()
# [batch_size 64, num_chars]
return np.array([np.random.choice(self.num_chars, p=prob[i, :])
for i in range(batch_size.numpy())])
# 每个样本,下一个字符 按预测概率随机采样
- 训练
num_batches = 1000
seq_len = 40 # 输入序列长度
batch_size = 64
learning_rate = 1e-3
data_loader = Dataloader()
model = myRNN(num_chars=len(data_loader.chars),
batch_size=batch_size,
seq_len=seq_len)
# 优化器
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
# 训练迭代
for i in range(num_batches):
# 获取批量数据
X, y = data_loader.get_batch(seq_len, batch_size)
# 梯度记录器
with tf.GradientTape() as tape:
# 前向传播
y_pred = model(X)
# 计算损失
loss = tf.keras.losses.sparse_categorical_crossentropy(y, y_pred)
loss = tf.reduce_mean(loss)
print("batch:{}, loss {}".format(i, loss.numpy()))
# 计算梯度
grads = tape.gradient(loss, model.variables)
# 更新参数
optimizer.apply_gradients(zip(grads, model.variables))
- 预测
# 拿出一个样本
X_, _ = data_loader.get_batch(seq_len, 1)
for diversity in [0.2, 0.5, 1.0, 1.2]: # 词汇丰富度
X = X_
print('diversity {}'.format(diversity))
for t in range(400): # 输出长度 400
y_pred = model.predict(X, diversity) # 预测字符的 id
print(data_loader.idx_char[y_pred[0]], end='', flush=True)
# 输出预测的字符
X = np.concatenate([X[:, 1:], np.expand_dims(y_pred, axis=1)], axis=-1)
# 滑动1位,作为下一次的输入
print("n")
输出:
代码语言:javascript复制diversity 0.2
the the sere the s and the s and the the sere the the s and in the sere the ches the the sere the the sore the the s and the the s and the serend the seres the the the serely the the the s all the the the s and the the sere the the the sere the ther the the sorece the ninge sore the the the s of sell the pint the s the the the the the the the s of the serere the the s and the sere the s the the t
diversity 0.5
ere---and the ne ous bored bo s the the ande sereng to then hithe the the
he sesthard on the non there the mores sor the the thit fus the ches sored the seresit and the the ntithe s at all sent for the fas theng the d end the ind che the the serangen
the for ole the soll dund, and chered and the
pereropher of the resiged the the s lore not the the s as the s the dethere hor the s mone se soull
diversity 1.0
tyrive oop art rrame nd michicosentiun, luind the trourd tho t ts.cseseyreve oud s mhendgcomrools bored ere s oll ow ons, here blprlen, pforzede ntor, in this mis je,iof tore. hon bf cerign then thect nene hfurlilin of fallll devety irtes" the whiy ins puncaliridut drerales alder, as inen waveructache semaltou no aven it yuranty ahd oar in -whe s urofeg to the
serecying
sicoradt-i0nior anetheragl
diversity 1.2
y, dpr thoucg,", ind soncrea5sfporcul os_; fac alin th thel. (owel, the
nniv poteer" hithichp-hispin2, ho thas d-lher wrekek---fe l seh rabf ssit afolyicud i iedy, d
chendle-hand-a ne
lef urovut, phetif po'n. wskin ef; phtors eve mdd ali all
an icig tedt g main aisec cowstixgeof adt vinnd thas phinte
lllivita ou
is
toup tualy as isscppomeofea2y
ieëy ounscded!wheor ome sllat , hhe"se, ouondibis
丰富度 大一些的时候,预测出来的字符多样一些