# 不显示python使用过程中的警告
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
def reset_graph(seed=42):
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)
return
# with tf.Session( config=tf.ConfigProto(gpu_options=gpu_options) ) as sess:
with tf.Session( ) as sess:
print( sess.run( tf.constant(1) ) )
1
# 构建一个包含2个时间t的RNN
n_inputs = 3
n_neurons = 5
X0 = tf.placeholder( tf.float32, [None, n_inputs] )
X1 = tf.placeholder( tf.float32, [None, n_inputs] )
Wx = tf.Variable( tf.random_normal(shape=[n_inputs, n_neurons]), dtype=tf.float32 )
Wy = tf.Variable( tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32 ))
b = tf.Variable( tf.zeros([1, n_neurons], dtype=tf.float32) )
Y0 = tf.tanh( tf.matmul( X0, Wx ) + b )
Y1 = tf.tanh( tf.matmul( Y0, Wy ) + tf.matmul( X1, Wx ) + b )
init = tf.global_variables_initializer()
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1
with tf.Session() as sess:
init.run()
Y0_val, Y1_val = sess.run( [Y0, Y1], feed_dict={ X0:X0_batch, X1:X1_batch } )
print( Y0_val )
print( Y1_val )
[[ 0.43158025 -0.9130973 0.2609397 -0.9974923 -0.9943459 ]
[ 0.9968195 -0.99999946 0.99997216 -0.99999976 -1. ]
[ 0.99998724 -1. 1. -1. -1. ]
[ 0.999971 -1. 1. 0.64810324 -0.99999994]]
[[ 0.9999944 -1. 1. -1. -1. ]
[-0.85875237 0.29986963 -0.99652356 0.96680504 -0.3667912 ]
[ 0.9989384 -1. 0.99999887 -0.9993215 -1. ]
[ 0.94780797 -0.9997977 0.99790573 0.8633509 -0.99999946]]
reset_graph()
X0 = tf.placeholder( tf.float32, [None, n_inputs] )
X1 = tf.placeholder( tf.float32, [None, n_inputs] )
basic_cell = tf.contrib.rnn.BasicRNNCell( num_units=n_neurons )
output_seqs, states = tf.contrib.rnn.static_rnn( basic_cell, [X0, X1], dtype=tf.float32 )
Y0, Y1 = output_seqs
init = tf.global_variables_initializer()
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])
with tf.Session() as sess:
init.run()
Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})
print( Y0_val )
print( Y1_val )
merged = basic_rnn = tf.summary.merge_all()
writer = basic_rnn = tf.summary.FileWriter( "./tf_logs/basic_rnn/", sess.graph )
writer.close()
[[ 0.30741337 -0.32884312 -0.6542847 -0.9385059 0.52089024]
[ 0.9912275 -0.95425415 -0.7518078 -0.9995208 0.98202336]
[ 0.99992675 -0.99783254 -0.82473516 -0.99999636 0.99947786]
[ 0.9967709 -0.6875061 0.8419969 0.93039113 0.81206834]]
[[ 0.99998885 -0.9997606 -0.06679297 -0.9999803 0.99982214]
[-0.65249425 -0.5152086 -0.37968948 -0.5922594 -0.08968376]
[ 0.998624 -0.99715203 -0.03308632 -0.9991566 0.9932902 ]
[ 0.99681675 -0.9598194 0.39660627 -0.8307605 0.7967197 ]]
n_steps = 2
n_inputs = 3
n_neurons = 5
reset_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs,
dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])
init = tf.global_variables_initializer()
X_batch = np.array([
# t = 0 t = 1
[[0, 1, 2], [9, 8, 7]], # instance 1
[[3, 4, 5], [0, 0, 0]], # instance 2
[[6, 7, 8], [6, 5, 4]], # instance 3
[[9, 0, 1], [3, 2, 1]], # instance 4
])
with tf.Session() as sess:
init.run()
outputs_val = outputs.eval(feed_dict={X: X_batch})
print( outputs_val)
[[[-0.4565232 -0.6806412 0.40938237 0.631045 -0.45732823]
[-0.94288003 -0.9998869 0.9405581 0.99999845 -0.99999976]]
[[-0.80015343 -0.99218273 0.78177965 0.9971032 -0.9964609 ]
[-0.637116 0.11300934 0.5798437 0.43105593 -0.6371699 ]]
[[-0.93605185 -0.99983793 0.9308867 0.9999814 -0.99998313]
[-0.9165386 -0.99456036 0.89605415 0.9998719 -0.9999751 ]]
[[ 0.99273676 -0.9981933 -0.5554365 0.99890316 -0.9953323 ]
[-0.02746333 -0.7319198 0.7827872 0.9525682 -0.9781772 ]]]
n_steps = 2
n_inputs = 3
n_neurons = 5
reset_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn( basic_cell, X, dtype=tf.float32 )
init = tf.global_variables_initializer()
X_batch = np.array([
# t = 0 t = 1
[[0, 1, 2], [9, 8, 7]], # instance 1
[[3, 4, 5], [0, 0, 0]], # instance 2
[[6, 7, 8], [6, 5, 4]], # instance 3
[[9, 0, 1], [3, 2, 1]], # instance 4
])
with tf.Session() as sess:
init.run()
output_val = outputs.eval( feed_dict={ X:X_batch } )
print( output_val )
[[[ 0.8087223 -0.5231244 -0.6716494 -0.6976225 -0.54384494]
[ 0.9995454 0.9933981 -0.9999836 0.99919224 -0.9837949 ]]
[[ 0.9954711 -0.02155101 -0.9948289 0.17964771 -0.8317369 ]
[-0.06013342 0.4030144 0.02884478 -0.2943758 -0.8568158 ]]
[[ 0.9999026 0.4911105 -0.9999316 0.84138334 -0.944468 ]
[ 0.9940618 0.9581599 -0.99768937 0.98646176 -0.91752493]]
[[-0.8063292 0.93928134 -0.9730989 0.99996096 0.9743306 ]
[ 0.95047355 -0.51205146 -0.27763975 0.83108056 0.81631833]]]
n_steps = 2
n_inputs = 3
n_neurons = 5
reset_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
seq_length = tf.placeholder(tf.int32, [None])
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,
sequence_length=seq_length)
init = tf.global_variables_initializer()
X_batch = np.array([
# step 0 step 1
[[0, 1, 2], [9, 8, 7]], # instance 1
[[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)
[[6, 7, 8], [6, 5, 4]], # instance 3
[[9, 0, 1], [3, 2, 1]], # instance 4
])
# 第二个数据为1,表示第二个instance只包含了t=0时刻,即时序列长度为1,而其他的instance包含t=0与1两个时刻
seq_length_batch = np.array([2, 1, 2, 2])
with tf.Session() as sess:
init.run()
outputs_val, states_val = sess.run( [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch} )
print( outputs_val ) # 返回所有instance所有序列的值,有些小于最长序列的instance会补0
print( states_val ) # 返回所有instance的最终状态,其中只有第二个instance返回的是t=0的输出,其他都是t=1时刻的输出
[[[ 0.731557 0.3483572 0.50582004 -0.22882834 -0.4402272 ]
[-0.61832327 0.99999994 0.865848 0.97933763 0.99022454]]
[[ 0.84387034 0.9969754 0.84789455 0.43032196 0.09275493]
[ 0. 0. 0. 0. 0. ]]
[[ 0.9115923 0.9999905 0.9595445 0.8189222 0.5774024 ]
[-0.7754546 0.99987745 0.9784728 0.7317201 0.8887761 ]]
[[-0.99952275 0.9997785 0.5974465 0.99309695 0.9984741 ]
[-0.62119997 0.5400083 0.9693844 0.19893228 0.19448037]]]
[[-0.61832327 0.99999994 0.865848 0.97933763 0.99022454]
[ 0.84387034 0.9969754 0.84789455 0.43032196 0.09275493]
[-0.7754546 0.99987745 0.9784728 0.7317201 0.8887761 ]
[-0.62119997 0.5400083 0.9693844 0.19893228 0.19448037]]
reset_graph()
n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10
learning_rate = 0.001
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)
logits = tf.layers.dense(states, n_outputs) # 取出最后一个状态的输出值,相当于y(28)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./dataset/mnist")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels
n_epochs = 20
batch_size = 500
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
X_batch = X_batch.reshape((-1, n_steps, n_inputs))
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
Extracting ./dataset/mnist/train-images-idx3-ubyte.gz
Extracting ./dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ./dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ./dataset/mnist/t10k-labels-idx1-ubyte.gz
0 Train accuracy: 0.902 Test accuracy: 0.8982
1 Train accuracy: 0.95 Test accuracy: 0.9365
2 Train accuracy: 0.946 Test accuracy: 0.9445
3 Train accuracy: 0.976 Test accuracy: 0.9573
4 Train accuracy: 0.972 Test accuracy: 0.9611
5 Train accuracy: 0.958 Test accuracy: 0.9547
6 Train accuracy: 0.97 Test accuracy: 0.9656
7 Train accuracy: 0.966 Test accuracy: 0.9657
8 Train accuracy: 0.976 Test accuracy: 0.9712
9 Train accuracy: 0.976 Test accuracy: 0.9721
10 Train accuracy: 0.972 Test accuracy: 0.9733
11 Train accuracy: 0.984 Test accuracy: 0.9695
12 Train accuracy: 0.98 Test accuracy: 0.9739
13 Train accuracy: 0.978 Test accuracy: 0.9724
14 Train accuracy: 0.982 Test accuracy: 0.9725
15 Train accuracy: 0.988 Test accuracy: 0.9735
16 Train accuracy: 0.982 Test accuracy: 0.9733
17 Train accuracy: 0.986 Test accuracy: 0.9755
18 Train accuracy: 0.99 Test accuracy: 0.9754
19 Train accuracy: 0.986 Test accuracy: 0.9759
reset_graph()
n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10
learning_rate = 0.001
# 构建3层的RNN
n_layers = 3
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])
layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,
activation=tf.nn.relu)
for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
states_concat = tf.concat(axis=1, values=states) # concat可以将一个tuple中的所有数在特定通道方向上进行连接
logits = tf.layers.dense(states_concat, n_outputs) # 取出最后一个状态的输出值,相当于y(28)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./dataset/mnist")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels
n_epochs = 20
batch_size = 500
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
X_batch = X_batch.reshape((-1, n_steps, n_inputs))
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
Extracting ./dataset/mnist/train-images-idx3-ubyte.gz
Extracting ./dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ./dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ./dataset/mnist/t10k-labels-idx1-ubyte.gz
0 Train accuracy: 0.91 Test accuracy: 0.9134
1 Train accuracy: 0.964 Test accuracy: 0.9615
2 Train accuracy: 0.972 Test accuracy: 0.9619
3 Train accuracy: 0.99 Test accuracy: 0.9689
4 Train accuracy: 0.98 Test accuracy: 0.9744
5 Train accuracy: 0.978 Test accuracy: 0.9726
6 Train accuracy: 0.99 Test accuracy: 0.9765
7 Train accuracy: 0.982 Test accuracy: 0.9834
8 Train accuracy: 0.99 Test accuracy: 0.9812
9 Train accuracy: 0.988 Test accuracy: 0.9783
10 Train accuracy: 0.984 Test accuracy: 0.9827
11 Train accuracy: 0.988 Test accuracy: 0.9822
12 Train accuracy: 0.994 Test accuracy: 0.9828
13 Train accuracy: 0.992 Test accuracy: 0.9856
14 Train accuracy: 0.994 Test accuracy: 0.9786
15 Train accuracy: 0.998 Test accuracy: 0.9866
16 Train accuracy: 0.998 Test accuracy: 0.9825
17 Train accuracy: 0.992 Test accuracy: 0.9871
18 Train accuracy: 0.994 Test accuracy: 0.9857
19 Train accuracy: 0.996 Test accuracy: 0.9788
states_concat = tf.concat(axis=0, values=states)
states_concat
OutputProjectionWrapper
对神经元进行包装,它不影响RN的状态,只是在输出上加入一个线性的全连接层,所有的全连接层实现权值共享。# data preparation
t_min, t_max = 0, 30
resolution = 0.1
def time_series(t):
return t * np.sin(t) / 3 + 2 * np.sin(t*5)
def next_batch(batch_size, n_steps):
t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
Ts = t0 + np.arange(0., n_steps + 1) * resolution
ys = time_series(Ts)
return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)
reset_graph()
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
# 输入的是一个序列,每个时刻只有一个值,输出也是一个序列,每个时刻只有一个值,相当于下一个序列
X = tf.placeholder( tf.float32, [None, n_steps, n_inputs] )
y = tf.placeholder( tf.float32, [None, n_steps, n_outputs] )
cell = tf.contrib.rnn.OutputProjectionWrapper( tf.contrib.rnn.BasicRNNCell( num_units=n_neurons, activation=tf.nn.relu ), output_size=n_outputs )
outputs, states = tf.nn.dynamic_rnn( cell, X, dtype=tf.float32 )
learning_rate = 0.001
loss = tf.reduce_mean( tf.square( outputs - y ) )
optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate )
training_op = optimizer.minimize( loss )
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iter = 1500
batch_size = 500
with tf.Session() as sess:
init.run()
for iteration in range(n_iter):
X_batch, y_batch = next_batch( batch_size, n_steps )
sess.run( training_op, feed_dict={ X:X_batch, y:y_batch } )
if iteration % 100 == 0:
mse = loss.eval( feed_dict={ X:X_batch, y:y_batch } )
print( iteration, ", MSE: ", mse )
saver.save( sess, "./models/rnn/my_time_series_model" )
0 , MSE: 22.603624
100 , MSE: 0.7702466
200 , MSE: 0.28123116
300 , MSE: 0.113177
400 , MSE: 0.0779924
500 , MSE: 0.056141626
600 , MSE: 0.053441137
700 , MSE: 0.04956764
800 , MSE: 0.05015872
900 , MSE: 0.053384278
1000 , MSE: 0.050169982
1100 , MSE: 0.047106195
1200 , MSE: 0.046533123
1300 , MSE: 0.04629165
1400 , MSE: 0.04463054
# load model and predict
t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))
n_steps = 20
t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)
with tf.Session() as sess:
saver.restore( sess, "./models/rnn/my_time_series_model" )
# 去除最后一个值,用于对比
X_new = time_series( np.array( t_instance[:-1].reshape( -1, n_steps, n_inputs ) ) )
y_pred = sess.run( outputs, feed_dict={ X:X_new } )
plt.plot( t_instance[:-1], time_series( t_instance[:-1] ), 'ro-', label="real lines" )
plt.plot( t_instance[1:], y_pred.flatten(), 'b*--', label="predcited lines" )
plt.legend()
plt.show()
INFO:tensorflow:Restoring parameters from ./models/rnn/my_time_series_model
OutputProjectionWrapper
,用来将RNN的输出序列的维度降低为1(在每个step),我们可以采用另外一种更加高效的方法:之前的RNN输出的shape都是 [batch_size,n_steps,n_neurons] [ b a t c h _ s i z e , n _ s t e p s , n _ n e u r o n s ] ,我们可以将其reshape为 [batch_size∗n_steps,n_neurons] [ b a t c h _ s i z e ∗ n _ s t e p s , n _ n e u r o n s ] ,然后用全连接层处理得到输出,设置其shape为 [batch_size∗n_steps,n_outputs] [ b a t c h _ s i z e ∗ n _ s t e p s , n _ o u t p u t s ] ,然后在将其reshape为 [batch_size,n_steps,n_outputs] [ b a t c h _ s i z e , n _ s t e p s , n _ o u t p u t s ] reset_graph()
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
learning_rate = 0.001
# 输入的是一个序列,每个时刻只有一个值,输出也是一个序列,每个时刻只有一个值,相当于下一个序列
X = tf.placeholder( tf.float32, [None, n_steps, n_inputs] )
y = tf.placeholder( tf.float32, [None, n_steps, n_outputs] )
cell = tf.contrib.rnn.BasicRNNCell( num_units=n_neurons, activation=tf.nn.relu )
rnn_outputs, states = tf.nn.dynamic_rnn( cell, X, dtype=tf.float32 )
# 先reshape,再用FC处理,然后再reshape,得到输出
stacked_rnn_outputs = tf.reshape( rnn_outputs, [-1, n_neurons] )
stacked_outputs = tf.layers.dense( stacked_rnn_outputs, n_outputs )
outputs = tf.reshape( stacked_outputs, [-1, n_steps, n_outputs] )
loss = tf.reduce_mean( tf.square( outputs - y ) )
optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate )
training_op = optimizer.minimize( loss )
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iter = 1500
batch_size = 500
with tf.Session() as sess:
init.run()
for iteration in range(n_iter):
X_batch, y_batch = next_batch( batch_size, n_steps )
sess.run( training_op, feed_dict={ X:X_batch, y:y_batch } )
if iteration % 100 == 0:
mse = loss.eval( feed_dict={ X:X_batch, y:y_batch } )
print( iteration, ", MSE: ", mse )
saver.save( sess, "./models/rnn/my_time_series_model" )
0 , MSE: 11.881364
100 , MSE: 0.36172634
200 , MSE: 0.107290134
300 , MSE: 0.06558608
400 , MSE: 0.05752879
500 , MSE: 0.054240808
600 , MSE: 0.05000111
700 , MSE: 0.04845157
800 , MSE: 0.0483876
900 , MSE: 0.049780883
1000 , MSE: 0.0478195
1100 , MSE: 0.04629565
1200 , MSE: 0.04530079
1300 , MSE: 0.045294493
1400 , MSE: 0.044130925
with tf.Session() as sess: # not shown in the book
saver.restore(sess, "./models/rnn/my_time_series_model") # not shown
# 全0的初始值
sequence1 = [0.] * n_steps
for iteration in range(len(t) - n_steps):
X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)
y_pred = sess.run(outputs, feed_dict={X: X_batch})
sequence1.append(y_pred[0, -1, 0])
# time_series的初始值
sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]
for iteration in range(len(t) - n_steps):
X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)
y_pred = sess.run(outputs, feed_dict={X: X_batch})
sequence2.append(y_pred[0, -1, 0])
plt.figure(figsize=(11,4))
plt.subplot(121)
plt.plot(t, sequence1, "b-")
plt.plot(t[:n_steps], sequence1[:n_steps], "b-", linewidth=3)
plt.xlabel("Time")
plt.ylabel("Value")
plt.subplot(122)
plt.plot(t, sequence2, "b-")
plt.plot(t[:n_steps], sequence2[:n_steps], "b-", linewidth=3)
plt.xlabel("Time")
plt.show()
INFO:tensorflow:Restoring parameters from ./models/rnn/my_time_series_model
reset_graph()
n_inputs = 2
n_steps = 5
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
n_neurons = 100
n_layers = 3
layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
init = tf.global_variables_initializer()
X_batch = np.random.rand(2, n_steps, n_inputs)
with tf.Session() as sess:
init.run()
outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})
print( outputs_val.shape ) # 输出的维度为[batch_size, n_steps, n_neurons]
print( len(states_val) ) # state是长度为层数的tuple
(2, 5, 100)
3
# 这段代码指定的device没有作用
with tf.device( "/gpu:0" ):
layer1 = tf.contrib.rnn.BasicRNNCell( num_units=n_neurons )
DeviceCellWrapper
的类进行处理,可以实现在多个device中创建multilayer RNN中的不同layertf.nn.rnn_cell.DeviceWrapper
类class DeviceCellWrapper(tf.contrib.rnn.RNNCell):
def __init__(self, device, cell):
self._cell = cell
self._device = device
@property
def state_size(self):
return self._cell.state_size
@property
def output_size(self):
return self._cell.output_size
def __call__(self, inputs, state, scope=None):
with tf.device(self._device):
return self._cell(inputs, state, scope)
reset_graph()
n_inputs = 5
n_steps = 20
n_neurons = 100
X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])
devices = ["/cpu:0", "/gpu:0", "/gpu:1"] # 在不同的device中创建不同的层
cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))
for dev in devices]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
init = tf.global_variables_initializer()
with tf.Session() as sess:
init.run()
print(sess.run(outputs, feed_dict={X: np.random.rand(2, n_steps, n_inputs)}))
[[[ 0.06828328 -0.11375453 0.06424566 ... -0.24244206 -0.04821675
-0.12077259]
[ 0.07453808 -0.22510499 0.20471567 ... -0.14811225 -0.09225387
-0.04429062]
[ 0.13780874 -0.14680627 -0.00956541 ... -0.08136036 0.07381526
-0.03125764]
...
[-0.2540025 -0.3207857 0.3992359 ... -0.26669195 0.33505762
-0.03757678]
[ 0.22596699 -0.09880796 -0.274223 ... -0.13386028 -0.25443038
-0.36498213]
[ 0.16559371 -0.3343584 0.34313312 ... -0.36904442 0.06908777
0.4657412 ]]
[[ 0.00489879 -0.03151968 0.02628037 ... -0.19341365 -0.07303753
0.00451886]
[ 0.03073939 -0.05795513 0.1778592 ... -0.20945792 0.0520001
-0.07436947]
[ 0.00192375 -0.25690767 0.12488239 ... 0.02644877 -0.2504646
-0.12239385]
...
[-0.13501374 -0.06209685 0.15950367 ... -0.20012137 -0.3338359
-0.09281676]
[-0.44347283 0.2032329 0.12526968 ... -0.0796242 0.27046493
0.31883997]
[ 0.23965771 -0.22903351 0.07749572 ... -0.02653921 0.08402385
0.02313656]]]
DropoutWrapper
函数,用dropout防止过拟合reset_graph()
n_inputs = 1
n_neurons = 100
n_layers = 3
n_steps = 20
n_outputs = 1
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
# 在训练时,采用dropout,在测试时,使用全部的输入,不使用dropout
keep_prob = tf.placeholder_with_default(1.0, shape=())
cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
for layer in range(n_layers)]
cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)
for cell in cells]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells_drop)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
learning_rate = 0.01
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 1500
batch_size = 500
train_keep_prob = 0.5
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
_, mse = sess.run([training_op, loss],
feed_dict={X: X_batch, y: y_batch,
keep_prob: train_keep_prob})
if iteration % 100 == 0: # not shown in the book
print(iteration, "Training MSE:", mse) # not shown
saver.save(sess, "./models/rnn/my_ts_dropout_model")
0 Training MSE: 17.523228
100 Training MSE: 3.6864297
200 Training MSE: 2.5945766
300 Training MSE: 2.608631
400 Training MSE: 2.354856
500 Training MSE: 2.166124
600 Training MSE: 2.0215302
700 Training MSE: 1.842349
800 Training MSE: 2.4512978
900 Training MSE: 1.9394426
1000 Training MSE: 2.3554778
1100 Training MSE: 2.031637
1200 Training MSE: 1.9235188
1300 Training MSE: 1.8453777
1400 Training MSE: 1.9560652
# load from disk and predict using rnn
with tf.Session() as sess:
saver.restore(sess, "./models/rnn/my_ts_dropout_model")
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("Testing the model", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "y*", markersize=10, label="target")
plt.plot(t_instance[1:], y_pred[0,:,0], "r.", markersize=10, label="prediction")
plt.legend(loc="upper left")
plt.xlabel("Time")
plt.show()
INFO:tensorflow:Restoring parameters from ./models/rnn/my_ts_dropout_model
truncated backpropagation through time
,但是这会使得RNN无法学习到long-term的特征更多解释的参考链接:https://yunaitong.cn/understanding-lstm-networks.html
forget gate
,丢失部分信息,然后通过加法操作
,添加一些由输入门(input gate)
选择后的信息,处理之后, c(t) c ( t ) 不经过任何额外的处理,便直接输出,去往下一个step;同时 c(t) c ( t ) 也会经过tanh
函数处理,然后与输出门(output gate)
进行点乘操作之后,得到 h(t) h ( t ) ,这也就是这个time step中这个LSTM cell的输出 y(t) y ( t ) 。门控单元(gate controllers)
,因为它们需要确定信息是否能够通过,因此激活函数都是sigmoid函数,如果网络的输出为0,则关闭这个gate,为1则打开这个gate forget gate
控制长时状态的哪些部分被删除(由 f(t) f ( t ) 控制)input gate
控制 g(t) g ( t ) 中的哪一部分被添加到长时状态中(由 i(t) i ( t ) 控制)output gate
控制长时状态中的哪个部分被读取,同时在这一时刻输出至 h(t) h ( t ) 与 y(t) y ( t ) ,它由 o(t) o ( t ) 控制use_peepholes=True
即可。GRU cell的结构如下
GRU cell是对LSTM cell的简化,同时也能取得很好的效果,GRU cell主要在以下几个方面进行了简化
GRU Cell
就可以创建GRU cellembedding
,该矩阵中的元素为实数。随着BP的训练过程,含义相近的两个word的embedding会越来越靠近(embedding的差的范数会很小)。sequence_length
作为一个变量;也可以将所有的sentence用pad填充成一个固定的长度,再输入rnn中进行训练sampled_softmax_loss()
函数实现tf.nn.legacy_seq2seq
模块可以十分方便地构建Encoder-Decoder模型from six.moves import urllib
import errno
import os
import zipfile
WORDS_PATH = "./dataset/words"
WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'
def mkdir_p(path):
"""Create directories, ok if they already exist.
This is for python 2 support. In python >=3.2, simply use:
>>> os.makedirs(path, exist_ok=True)
"""
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):
os.makedirs(words_path, exist_ok=True)
zip_path = os.path.join(words_path, "words.zip")
if not os.path.exists(zip_path):
urllib.request.urlretrieve(words_url, zip_path)
with zipfile.ZipFile(zip_path) as f: # 如果报错,可能是下载过程中出现了一些问题,需要重新下载或者手动下载到指定目录
data = f.read(f.namelist()[0])
return data.decode("ascii").split()
# 读取下载的words
words = fetch_words_data()
print( len(words ) )
17005207
# 将word转化为integer
from collections import Counter
vocabulary_size = 50000
vocabulary = [("UNK", None)] + Counter(words).most_common(vocabulary_size - 1)
vocabulary = np.array([word for word, _ in vocabulary])
dictionary = {word: code for code, word in enumerate(vocabulary)}
data = np.array([dictionary.get(word, 0) for word in words])
print( words[:5] )
print( data[:5] )
['anarchism', 'originated', 'as', 'a', 'term']
[5234 3081 12 6 195]
import random
from collections import deque
def generate_batch(batch_size, num_skips, skip_window):
global data_index
assert batch_size % num_skips == 0
assert num_skips <= 2 * skip_window
batch = np.ndarray(shape=(batch_size), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
span = 2 * skip_window + 1 # [ skip_window target skip_window ]
buffer = deque(maxlen=span)
for _ in range(span):
buffer.append(data[data_index])
data_index = (data_index + 1) % len(data)
for i in range(batch_size // num_skips):
target = skip_window # target label at the center of the buffer
targets_to_avoid = [ skip_window ]
for j in range(num_skips):
while target in targets_to_avoid:
target = random.randint(0, span - 1)
targets_to_avoid.append(target)
batch[i * num_skips + j] = buffer[skip_window]
labels[i * num_skips + j, 0] = buffer[target]
buffer.append(data[data_index])
data_index = (data_index + 1) % len(data)
return batch, labels
data_index=0
batch, labels = generate_batch(8, 2, 1)
print( batch )
print( labels.flatten() )
[3081 3081 12 12 6 6 195 195]
[ 12 5234 6 3081 12 195 6 2]
batch, [vocabulary[word] for word in batch]
(array([3081, 3081, 12, 12, 6, 6, 195, 195], dtype=int32),
['originated', 'originated', 'as', 'as', 'a', 'a', 'term', 'term'])
batch_size = 128
embedding_size = 128 # Dimension of the embedding vector.
skip_window = 1 # How many words to consider left and right.
num_skips = 2 # How many times to reuse an input to generate a label.
# We pick a random validation set to sample nearest neighbors. Here we limit the
# validation samples to the words that have a low numeric ID, which by
# construction are also the most frequent.
valid_size = 16 # Random set of words to evaluate similarity on.
valid_window = 100 # Only pick dev samples in the head of the distribution.
valid_examples = np.random.choice(valid_window, valid_size, replace=False)
num_sampled = 64 # Number of negative examples to sample.
learning_rate = 0.01
reset_graph()
# Input data.
train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
vocabulary_size = 50000
embedding_size = 150
# Look up embeddings for inputs.
init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)
embeddings = tf.Variable(init_embeds)
train_inputs = tf.placeholder(tf.int32, shape=[None])
embed = tf.nn.embedding_lookup(embeddings, train_inputs)
# Construct the variables for the NCE loss
nce_weights = tf.Variable(
tf.truncated_normal([vocabulary_size, embedding_size],
stddev=1.0 / np.sqrt(embedding_size)))
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
loss = tf.reduce_mean(
tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed,
num_sampled, vocabulary_size))
# Construct the Adam optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
# Compute the cosine similarity between minibatch examples and all embeddings.
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keep_dims=True))
normalized_embeddings = embeddings / norm
valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)
similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)
# Add variable initializer.
init = tf.global_variables_initializer()
# 训练模型
num_steps = 10001
with tf.Session() as session:
init.run()
average_loss = 0
for step in range(num_steps):
print("\rIteration: {}".format(step), end="\t")
batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)
feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}
# We perform one update step by evaluating the training op (including it
# in the list of returned values for session.run()
_, loss_val = session.run([training_op, loss], feed_dict=feed_dict)
average_loss += loss_val
if step % 2000 == 0:
if step > 0:
average_loss /= 2000
# The average loss is an estimate of the loss over the last 2000 batches.
print("Average loss at step ", step, ": ", average_loss)
average_loss = 0
# Note that this is expensive (~20% slowdown if computed every 500 steps)
if step % 10000 == 0:
sim = similarity.eval()
for i in range(valid_size):
valid_word = vocabulary[valid_examples[i]]
top_k = 8 # number of nearest neighbors
nearest = (-sim[i, :]).argsort()[1:top_k+1]
log_str = "Nearest to %s:" % valid_word
for k in range(top_k):
close_word = vocabulary[nearest[k]]
log_str = "%s %s," % (log_str, close_word)
print(log_str)
final_embeddings = normalized_embeddings.eval()
Iteration: 0 Average loss at step 0 : 285.43389892578125
Nearest to only: desperation, harmed, tanaka, drown, alkenes, candu, laughter, illustrating,
Nearest to or: copyleft, haag, weaned, skilled, gv, gdansk, carcassonne, stokes,
Nearest to united: iii, citizen, crows, decimals, eutelsat, dcc, auckland, ennis,
Nearest to first: liquidity, steinbeck, profoundly, integration, inhabiting, ticino, incrimination, acclaimed,
Nearest to he: transitioned, winchell, resh, goldsmiths, standardised, markings, pursued, satirized,
Nearest to not: censor, fucking, venetian, lu, quarto, contractor, headway, stylus,
Nearest to many: spreadsheets, redeemer, nominees, absurd, alerts, xxvi, transylvanian, autonegotiation,
Nearest to been: powerpc, maccabean, precarious, hounds, hazael, gol, linear, schuster,
Nearest to an: gutierrez, dyess, privations, archaeological, bijection, kon, joh, insemination,
Nearest to six: melds, signer, hurtful, paws, ev, melodies, perennially, adf,
Nearest to however: frank, referring, maximization, beltway, liechtenstein, oxus, erik, vicki,
Nearest to has: camelopardalis, lessen, learning, ji, duddy, brontosaurus, unabomber, semipalatinsk,
Nearest to see: elsinore, gaines, esque, battleship, whip, hl, postings, tint,
Nearest to had: catalytic, frankenstein, tam, lefty, grenville, lineker, shtml, sartre,
Nearest to one: imagines, tijuana, hindrance, motorcyclist, steadfastly, lords, letting, hutchinson,
Nearest to d: schuster, asgard, intriguing, catus, jewellery, leptons, goodwill, prosthetic,
Iteration: 2000 Average loss at step 2000 : 130.98741731071473
Iteration: 4000 Average loss at step 4000 : 62.76376576328278
Iteration: 6000 Average loss at step 6000 : 42.172603695631025
Iteration: 8000 Average loss at step 8000 : 31.732391747474672
Iteration: 10000 Average loss at step 10000 : 25.78748117876053
Nearest to only: one, slovenians, delay, alerts, not, citrate, essendon, wolsey,
Nearest to or: of, uppercase, aruba, arendt, and, spassky, judo, ataxia,
Nearest to united: states, taxi, pus, inconsistencies, worldwide, statistic, phoenician, transmit,
Nearest to first: in, intercession, by, hep, fourier, phalanx, of, sly,
Nearest to he: it, observations, holmes, asparagales, had, that, illyrians, nurse,
Nearest to not: to, they, always, bogus, natchez, otherwise, assist, attend,
Nearest to many: the, astrological, chomsky, accredited, people, transporting, uppercase, boosting,
Nearest to been: has, parte, by, donated, cameron, vajrayana, it, illyrians,
Nearest to an: and, the, microtubules, ustinov, tetrapods, parsley, complement, crouching,
Nearest to six: five, nine, one, eight, seven, three, zero, two,
Nearest to however: ampere, eclipsed, scrimmage, coulomb, mj, precipitation, mathfrak, sidebands,
Nearest to has: is, been, willem, chadic, ataxia, columbus, contended, automorphism,
Nearest to see: heretics, bartholomew, pavements, destructive, topalov, doctorate, groundwater, helm,
Nearest to had: he, but, departures, assassinate, legend, surprisingly, columbus, deuterostomes,
Nearest to one: nine, two, six, seven, three, five, eight, four,
Nearest to d: nine, interlocking, ads, khorasan, perspectives, one, american, circus,
# 保存当期啊你的embedding
np.save("./models/words/my_final_embeddings.npy", final_embeddings)
def plot_with_labels(low_dim_embs, labels):
assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
plt.figure(figsize=(12, 12)) #in inches
for i, label in enumerate(labels):
x, y = low_dim_embs[i,:]
plt.scatter(x, y)
plt.annotate(label,
xy=(x, y),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
from sklearn.manifold import TSNE
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
plot_only = 100
low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
labels = [vocabulary[i] for i in range(plot_only)]
plot_with_labels(low_dim_embs, labels)
plt.show()