littletomatodonkey

tensorflow RNN

RNN介绍

setup code

# 不显示python使用过程中的警告
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os


def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    return

# with tf.Session( config=tf.ConfigProto(gpu_options=gpu_options) ) as sess:
with tf.Session(  ) as sess:
    print( sess.run( tf.constant(1) ) )

简介

RNN(recurrent neural networks)是用于预测未来的一些状态，可以用于股票预测、语义分析、句子生成、音乐生成等场景中。
RNN中经常会遇到梯度爆炸与梯度弥散问题，在RNN中，提出的解决方法有：LSTM、GRU元等。

递归神经元(Recurrent Neurons)

前向的网络是从输入到输出，而RNN除了有从输入到输出的通道之外，输出的值也会到达输入（反向通道），在下一个step时被使用
创建一层RN的方法：在每一个时间t，每个神经元接受输入向量 x(t) 以及上一个step的输出向量 y(t−1) 。这里的输入与输出都是向量，如果只有一个神经元节点，则输出是标量，这与CNN不同，结构如下：

每个RN对于 x(t) 与 y(t−1) 分别有一个权值向量，为 wx 与 wy ，一个RN的输出为
$y (t) = ϕ (x T (t) w x + y T (t - 1) w y + b)$
也可以把一层中所有的RN写成矩阵的形式（每一行是一个RN）。
在这里需要注意的是： y(t) 与 y(t−1) 有关，以此类推，它与之前所有step的输出都有关

记忆单元(memory cells)

因为step为t时刻的输出与之前所有的输出都有关系，因此可以认为RN是有记忆的
前面的描述中， t−1 时刻的输出直接作用于 t 时刻，但是我们也可以将其利用函数进行处理之后再作用于t时刻

输入与输出序列

RNN可以对输入序列进行处理，得到输出序列
输入为序列，输出为序列：RNN可以用于预测股票等时间序列的问题
输入为序列，输出为一个向量：RNN可以用于情感分析，如视频的情感取向等
输入为向量，输出为序列：比如bottom-left network等
把sequence-to-vector network称为encoder，vector-to-sequence network称为decoder。如果在encoder后面加入decoder，则可以用于语言翻译等，encoder可以将句子序列转化为vector，decoder可以将vector转化为另一种语言表示的句子序列

# 构建一个包含2个时间t的RNN
n_inputs  = 3
n_neurons = 5

X0 = tf.placeholder( tf.float32, [None, n_inputs] )
X1 = tf.placeholder( tf.float32, [None, n_inputs] )

Wx = tf.Variable( tf.random_normal(shape=[n_inputs, n_neurons]), dtype=tf.float32 )
Wy = tf.Variable( tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32 ))
b = tf.Variable( tf.zeros([1, n_neurons], dtype=tf.float32) )

Y0 = tf.tanh( tf.matmul( X0, Wx ) + b )
Y1 = tf.tanh( tf.matmul( Y0, Wy ) + tf.matmul( X1, Wx ) + b )

init = tf.global_variables_initializer()

X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run( [Y0, Y1], feed_dict={ X0:X0_batch, X1:X1_batch } )
    print( Y0_val )
    print( Y1_val )

[[ 0.43158025 -0.9130973   0.2609397  -0.9974923  -0.9943459 ]
 [ 0.9968195  -0.99999946  0.99997216 -0.99999976 -1.        ]
 [ 0.99998724 -1.          1.         -1.         -1.        ]
 [ 0.999971   -1.          1.          0.64810324 -0.99999994]]
[[ 0.9999944  -1.          1.         -1.         -1.        ]
 [-0.85875237  0.29986963 -0.99652356  0.96680504 -0.3667912 ]
 [ 0.9989384  -1.          0.99999887 -0.9993215  -1.        ]
 [ 0.94780797 -0.9997977   0.99790573  0.8633509  -0.99999946]]

TF中RNN的使用

Static Unrolling Through Time

static_rnn可以创建一个由链式单元组成的按时间轴展开的RNN网络，但是static_rnn对于t很大的网络的处理有很大限制，同时可能会有超过内存限制的问题

reset_graph()

X0 = tf.placeholder( tf.float32, [None, n_inputs] )
X1 = tf.placeholder( tf.float32, [None, n_inputs] )

basic_cell = tf.contrib.rnn.BasicRNNCell( num_units=n_neurons )
output_seqs, states = tf.contrib.rnn.static_rnn( basic_cell, [X0, X1], dtype=tf.float32 )

Y0, Y1 = output_seqs
init = tf.global_variables_initializer()

X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})
    print( Y0_val )
    print( Y1_val )
    merged = basic_rnn = tf.summary.merge_all()
    writer = basic_rnn = tf.summary.FileWriter( "./tf_logs/basic_rnn/", sess.graph )
    writer.close()

[[ 0.30741337 -0.32884312 -0.6542847  -0.9385059   0.52089024]
 [ 0.9912275  -0.95425415 -0.7518078  -0.9995208   0.98202336]
 [ 0.99992675 -0.99783254 -0.82473516 -0.99999636  0.99947786]
 [ 0.9967709  -0.6875061   0.8419969   0.93039113  0.81206834]]
[[ 0.99998885 -0.9997606  -0.06679297 -0.9999803   0.99982214]
 [-0.65249425 -0.5152086  -0.37968948 -0.5922594  -0.08968376]
 [ 0.998624   -0.99715203 -0.03308632 -0.9991566   0.9932902 ]
 [ 0.99681675 -0.9598194   0.39660627 -0.8307605   0.7967197 ]]

n_steps = 2
n_inputs = 3
n_neurons = 5

reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs,
                                                dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])

init = tf.global_variables_initializer()

X_batch = np.array([
        # t = 0      t = 1 
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})
    print( outputs_val)

[[[-0.4565232  -0.6806412   0.40938237  0.631045   -0.45732823]
  [-0.94288003 -0.9998869   0.9405581   0.99999845 -0.99999976]]

 [[-0.80015343 -0.99218273  0.78177965  0.9971032  -0.9964609 ]
  [-0.637116    0.11300934  0.5798437   0.43105593 -0.6371699 ]]

 [[-0.93605185 -0.99983793  0.9308867   0.9999814  -0.99998313]
  [-0.9165386  -0.99456036  0.89605415  0.9998719  -0.9999751 ]]

 [[ 0.99273676 -0.9981933  -0.5554365   0.99890316 -0.9953323 ]
  [-0.02746333 -0.7319198   0.7827872   0.9525682  -0.9781772 ]]]

Dynamic Unrolling Through Time

dynamic_rnn可以解决上面遇到的问题
在BP时，经常出现OOM的问题，我们可以设置swap_memory=true,交换CPU与GPU内存，避免OOM的问题

n_steps = 2
n_inputs = 3
n_neurons = 5

reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn( basic_cell, X, dtype=tf.float32 )

init = tf.global_variables_initializer()

X_batch = np.array([
        # t = 0      t = 1 
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])

with tf.Session() as sess:
    init.run()
    output_val = outputs.eval( feed_dict={ X:X_batch } )
    print( output_val )

[[[ 0.8087223  -0.5231244  -0.6716494  -0.6976225  -0.54384494]
  [ 0.9995454   0.9933981  -0.9999836   0.99919224 -0.9837949 ]]

 [[ 0.9954711  -0.02155101 -0.9948289   0.17964771 -0.8317369 ]
  [-0.06013342  0.4030144   0.02884478 -0.2943758  -0.8568158 ]]

 [[ 0.9999026   0.4911105  -0.9999316   0.84138334 -0.944468  ]
  [ 0.9940618   0.9581599  -0.99768937  0.98646176 -0.91752493]]

 [[-0.8063292   0.93928134 -0.9730989   0.99996096  0.9743306 ]
  [ 0.95047355 -0.51205146 -0.27763975  0.83108056  0.81631833]]]

变长输入序列的处理

n_steps = 2
n_inputs = 3
n_neurons = 5

reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)

seq_length = tf.placeholder(tf.int32, [None])
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,
                                    sequence_length=seq_length)

init = tf.global_variables_initializer()
X_batch = np.array([
        # step 0     step 1
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])
# 第二个数据为1，表示第二个instance只包含了t=0时刻，即时序列长度为1，而其他的instance包含t=0与1两个时刻
seq_length_batch = np.array([2, 1, 2, 2])

with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run( [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch} )
    print( outputs_val ) # 返回所有instance所有序列的值，有些小于最长序列的instance会补0
    print( states_val ) # 返回所有instance的最终状态，其中只有第二个instance返回的是t=0的输出，其他都是t=1时刻的输出

[[[ 0.731557    0.3483572   0.50582004 -0.22882834 -0.4402272 ]
  [-0.61832327  0.99999994  0.865848    0.97933763  0.99022454]]

 [[ 0.84387034  0.9969754   0.84789455  0.43032196  0.09275493]
  [ 0.          0.          0.          0.          0.        ]]

 [[ 0.9115923   0.9999905   0.9595445   0.8189222   0.5774024 ]
  [-0.7754546   0.99987745  0.9784728   0.7317201   0.8887761 ]]

 [[-0.99952275  0.9997785   0.5974465   0.99309695  0.9984741 ]
  [-0.62119997  0.5400083   0.9693844   0.19893228  0.19448037]]]
[[-0.61832327  0.99999994  0.865848    0.97933763  0.99022454]
 [ 0.84387034  0.9969754   0.84789455  0.43032196  0.09275493]
 [-0.7754546   0.99987745  0.9784728   0.7317201   0.8887761 ]
 [-0.62119997  0.5400083   0.9693844   0.19893228  0.19448037]]

可变长输出序列的处理

我们可以预先知道输入序列的长度，但是对于无法预知输出序列的长度，比如在翻译中，我们无法得知输出序列的长度，在这里定义输出为end-of-sequence token(EOS token)，任何超过这个标记的输出都会被忽略

训练RNN

训练RNN最常用的方法就是将网络安按照时间轴展开，然后使用BP算法进行训练(backpropagation through time, BPTT).

用RNN做分类

reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

logits = tf.layers.dense(states, n_outputs)  # 取出最后一个状态的输出值，相当于y(28)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./dataset/mnist")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

n_epochs = 20
batch_size = 500

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

Extracting ./dataset/mnist/train-images-idx3-ubyte.gz
Extracting ./dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ./dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ./dataset/mnist/t10k-labels-idx1-ubyte.gz
0 Train accuracy: 0.902 Test accuracy: 0.8982
1 Train accuracy: 0.95 Test accuracy: 0.9365
2 Train accuracy: 0.946 Test accuracy: 0.9445
3 Train accuracy: 0.976 Test accuracy: 0.9573
4 Train accuracy: 0.972 Test accuracy: 0.9611
5 Train accuracy: 0.958 Test accuracy: 0.9547
6 Train accuracy: 0.97 Test accuracy: 0.9656
7 Train accuracy: 0.966 Test accuracy: 0.9657
8 Train accuracy: 0.976 Test accuracy: 0.9712
9 Train accuracy: 0.976 Test accuracy: 0.9721
10 Train accuracy: 0.972 Test accuracy: 0.9733
11 Train accuracy: 0.984 Test accuracy: 0.9695
12 Train accuracy: 0.98 Test accuracy: 0.9739
13 Train accuracy: 0.978 Test accuracy: 0.9724
14 Train accuracy: 0.982 Test accuracy: 0.9725
15 Train accuracy: 0.988 Test accuracy: 0.9735
16 Train accuracy: 0.982 Test accuracy: 0.9733
17 Train accuracy: 0.986 Test accuracy: 0.9755
18 Train accuracy: 0.99 Test accuracy: 0.9754
19 Train accuracy: 0.986 Test accuracy: 0.9759

构建多层的RNN的效果可能会更好

reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

# 构建3层的RNN
n_layers = 3

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,
                                      activation=tf.nn.relu)
          for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)

states_concat = tf.concat(axis=1, values=states) # concat可以将一个tuple中的所有数在特定通道方向上进行连接
logits = tf.layers.dense(states_concat, n_outputs)  # 取出最后一个状态的输出值，相当于y(28)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./dataset/mnist")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

n_epochs = 20
batch_size = 500

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

Extracting ./dataset/mnist/train-images-idx3-ubyte.gz
Extracting ./dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ./dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ./dataset/mnist/t10k-labels-idx1-ubyte.gz
0 Train accuracy: 0.91 Test accuracy: 0.9134
1 Train accuracy: 0.964 Test accuracy: 0.9615
2 Train accuracy: 0.972 Test accuracy: 0.9619
3 Train accuracy: 0.99 Test accuracy: 0.9689
4 Train accuracy: 0.98 Test accuracy: 0.9744
5 Train accuracy: 0.978 Test accuracy: 0.9726
6 Train accuracy: 0.99 Test accuracy: 0.9765
7 Train accuracy: 0.982 Test accuracy: 0.9834
8 Train accuracy: 0.99 Test accuracy: 0.9812
9 Train accuracy: 0.988 Test accuracy: 0.9783
10 Train accuracy: 0.984 Test accuracy: 0.9827
11 Train accuracy: 0.988 Test accuracy: 0.9822
12 Train accuracy: 0.994 Test accuracy: 0.9828
13 Train accuracy: 0.992 Test accuracy: 0.9856
14 Train accuracy: 0.994 Test accuracy: 0.9786
15 Train accuracy: 0.998 Test accuracy: 0.9866
16 Train accuracy: 0.998 Test accuracy: 0.9825
17 Train accuracy: 0.992 Test accuracy: 0.9871
18 Train accuracy: 0.994 Test accuracy: 0.9857
19 Train accuracy: 0.996 Test accuracy: 0.9788

states_concat = tf.concat(axis=0, values=states)
states_concat

预测时间序列

RNN中，如果需要预测时间序列，可以选择一个时间周期，将这个某一时刻 t ，一个周期内的数据视为一个输入序列，然后将下一时刻t+1的一个周期内的数据设置为输出序列，从而构建了训练数据的输入输出序列
如果只设置输出为下一个时刻的值，则可以用于对下一时刻的状态进行预测。
如果需要预测时间序列问题，则输出为一个向量，如果此时也需要对某一时刻的值进行预测，则我们可以使用OutputProjectionWrapper对神经元进行包装，它不影响RN的状态，只是在输出上加入一个线性的全连接层，所有的全连接层实现权值共享。

# data preparation
t_min, t_max = 0, 30
resolution = 0.1

def time_series(t):
    return t * np.sin(t) / 3 + 2 * np.sin(t*5)

def next_batch(batch_size, n_steps):
    t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
    Ts = t0 + np.arange(0., n_steps + 1) * resolution
    ys = time_series(Ts)
    return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)

reset_graph()

n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1

# 输入的是一个序列，每个时刻只有一个值，输出也是一个序列，每个时刻只有一个值，相当于下一个序列
X = tf.placeholder( tf.float32, [None, n_steps, n_inputs] )
y = tf.placeholder( tf.float32, [None, n_steps, n_outputs] )

cell = tf.contrib.rnn.OutputProjectionWrapper( tf.contrib.rnn.BasicRNNCell( num_units=n_neurons, activation=tf.nn.relu ), output_size=n_outputs )
outputs, states = tf.nn.dynamic_rnn( cell, X, dtype=tf.float32 )

learning_rate = 0.001

loss = tf.reduce_mean( tf.square( outputs - y ) )
optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate )
training_op = optimizer.minimize( loss )

init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_iter = 1500
batch_size = 500

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iter):
        X_batch, y_batch = next_batch( batch_size, n_steps )
        sess.run( training_op, feed_dict={ X:X_batch, y:y_batch } )
        if iteration % 100 == 0:
            mse = loss.eval( feed_dict={ X:X_batch, y:y_batch } )
            print( iteration, ", MSE: ", mse )
    saver.save( sess, "./models/rnn/my_time_series_model" )

0 , MSE:  22.603624
100 , MSE:  0.7702466
200 , MSE:  0.28123116
300 , MSE:  0.113177
400 , MSE:  0.0779924
500 , MSE:  0.056141626
600 , MSE:  0.053441137
700 , MSE:  0.04956764
800 , MSE:  0.05015872
900 , MSE:  0.053384278
1000 , MSE:  0.050169982
1100 , MSE:  0.047106195
1200 , MSE:  0.046533123
1300 , MSE:  0.04629165
1400 , MSE:  0.04463054

# load model and predict
t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))

n_steps = 20
t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)

with tf.Session() as sess:
    saver.restore( sess, "./models/rnn/my_time_series_model" )
    # 去除最后一个值，用于对比
    X_new = time_series( np.array( t_instance[:-1].reshape( -1, n_steps, n_inputs ) ) )
    y_pred = sess.run( outputs, feed_dict={ X:X_new } )

plt.plot( t_instance[:-1], time_series( t_instance[:-1] ), 'ro-', label="real lines"  )
plt.plot( t_instance[1:], y_pred.flatten(), 'b*--', label="predcited lines"  )
plt.legend()
plt.show()

INFO:tensorflow:Restoring parameters from ./models/rnn/my_time_series_model

上面使用了OutputProjectionWrapper，用来将RNN的输出序列的维度降低为1(在每个step)，我们可以采用另外一种更加高效的方法：之前的RNN输出的shape都是 [batch_size,n_steps,n_neurons] ，我们可以将其reshape为 [batch_size∗n_steps,n_neurons] ，然后用全连接层处理得到输出，设置其shape为 [batch_size∗n_steps,n_outputs] ，然后在将其reshape为 [batch_size,n_steps,n_outputs]

reset_graph()

n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
learning_rate = 0.001

# 输入的是一个序列，每个时刻只有一个值，输出也是一个序列，每个时刻只有一个值，相当于下一个序列
X = tf.placeholder( tf.float32, [None, n_steps, n_inputs] )
y = tf.placeholder( tf.float32, [None, n_steps, n_outputs] )

cell = tf.contrib.rnn.BasicRNNCell( num_units=n_neurons, activation=tf.nn.relu )
rnn_outputs, states = tf.nn.dynamic_rnn( cell, X, dtype=tf.float32 )

# 先reshape，再用FC处理，然后再reshape，得到输出
stacked_rnn_outputs = tf.reshape( rnn_outputs, [-1, n_neurons] )
stacked_outputs = tf.layers.dense( stacked_rnn_outputs, n_outputs )
outputs = tf.reshape( stacked_outputs, [-1, n_steps, n_outputs] )

loss = tf.reduce_mean( tf.square( outputs - y ) )
optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate )
training_op = optimizer.minimize( loss )

init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_iter = 1500
batch_size = 500

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iter):
        X_batch, y_batch = next_batch( batch_size, n_steps )
        sess.run( training_op, feed_dict={ X:X_batch, y:y_batch } )
        if iteration % 100 == 0:
            mse = loss.eval( feed_dict={ X:X_batch, y:y_batch } )
            print( iteration, ", MSE: ", mse )
    saver.save( sess, "./models/rnn/my_time_series_model" )

0 , MSE:  11.881364
100 , MSE:  0.36172634
200 , MSE:  0.107290134
300 , MSE:  0.06558608
400 , MSE:  0.05752879
500 , MSE:  0.054240808
600 , MSE:  0.05000111
700 , MSE:  0.04845157
800 , MSE:  0.0483876
900 , MSE:  0.049780883
1000 , MSE:  0.0478195
1100 , MSE:  0.04629565
1200 , MSE:  0.04530079
1300 , MSE:  0.045294493
1400 , MSE:  0.044130925

采用之前的方法，我们可以根据一个序列的输出，对输出序列进行预测，以此类推，得到未来任意时间的输出
关于预测序列的初始化，我们可以赋初始值0，也可以从之前的训练数据的最后一段中截取一些出来作为预测的序列

with tf.Session() as sess:                        # not shown in the book
    saver.restore(sess, "./models/rnn/my_time_series_model") # not shown

    # 全0的初始值
    sequence1 = [0.] * n_steps 
    for iteration in range(len(t) - n_steps):
        X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)
        y_pred = sess.run(outputs, feed_dict={X: X_batch})
        sequence1.append(y_pred[0, -1, 0])

    # time_series的初始值
    sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]
    for iteration in range(len(t) - n_steps):
        X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)
        y_pred = sess.run(outputs, feed_dict={X: X_batch})
        sequence2.append(y_pred[0, -1, 0])

plt.figure(figsize=(11,4))
plt.subplot(121)
plt.plot(t, sequence1, "b-")
plt.plot(t[:n_steps], sequence1[:n_steps], "b-", linewidth=3)
plt.xlabel("Time")
plt.ylabel("Value")

plt.subplot(122)
plt.plot(t, sequence2, "b-")
plt.plot(t[:n_steps], sequence2[:n_steps], "b-", linewidth=3)
plt.xlabel("Time")
plt.show()

INFO:tensorflow:Restoring parameters from ./models/rnn/my_time_series_model

Deep RNN

之前提到的多层RNN十分常见，如果RNN的层数很多，则可以组成Deep RNN

reset_graph()

n_inputs = 2
n_steps = 5

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

n_neurons = 100
n_layers = 3

layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
          for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)

init = tf.global_variables_initializer()

X_batch = np.random.rand(2, n_steps, n_inputs)

with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})
    print( outputs_val.shape ) # 输出的维度为[batch_size, n_steps, n_neurons]
    print( len(states_val) ) # state是长度为层数的tuple

(2, 5, 100)
3

在多个GPU上运行RNN

BasicRNNCell是一个factory，在创建时，不会创建cell，因此指定device也不会起到作用
如果不加额外的处理，在创建多层RNN时，所有的RNN都只能在一个device中被创建

# 这段代码指定的device没有作用
with tf.device( "/gpu:0" ):
    layer1 = tf.contrib.rnn.BasicRNNCell( num_units=n_neurons )

创建一个DeviceCellWrapper的类进行处理，可以实现在多个device中创建multilayer RNN中的不同layer
也可以直接使用tf.nn.rnn_cell.DeviceWrapper类

class DeviceCellWrapper(tf.contrib.rnn.RNNCell):
  def __init__(self, device, cell):
    self._cell = cell
    self._device = device

  @property
  def state_size(self):
    return self._cell.state_size

  @property
  def output_size(self):
    return self._cell.output_size

  def __call__(self, inputs, state, scope=None):
    with tf.device(self._device):
        return self._cell(inputs, state, scope)

reset_graph()

n_inputs = 5
n_steps = 20
n_neurons = 100

X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])

devices = ["/cpu:0", "/gpu:0", "/gpu:1"] # 在不同的device中创建不同的层
cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))
         for dev in devices]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    print(sess.run(outputs, feed_dict={X: np.random.rand(2, n_steps, n_inputs)}))

[[[ 0.06828328 -0.11375453  0.06424566 ... -0.24244206 -0.04821675
   -0.12077259]
  [ 0.07453808 -0.22510499  0.20471567 ... -0.14811225 -0.09225387
   -0.04429062]
  [ 0.13780874 -0.14680627 -0.00956541 ... -0.08136036  0.07381526
   -0.03125764]
  ...
  [-0.2540025  -0.3207857   0.3992359  ... -0.26669195  0.33505762
   -0.03757678]
  [ 0.22596699 -0.09880796 -0.274223   ... -0.13386028 -0.25443038
   -0.36498213]
  [ 0.16559371 -0.3343584   0.34313312 ... -0.36904442  0.06908777
    0.4657412 ]]

 [[ 0.00489879 -0.03151968  0.02628037 ... -0.19341365 -0.07303753
    0.00451886]
  [ 0.03073939 -0.05795513  0.1778592  ... -0.20945792  0.0520001
   -0.07436947]
  [ 0.00192375 -0.25690767  0.12488239 ...  0.02644877 -0.2504646
   -0.12239385]
  ...
  [-0.13501374 -0.06209685  0.15950367 ... -0.20012137 -0.3338359
   -0.09281676]
  [-0.44347283  0.2032329   0.12526968 ... -0.0796242   0.27046493
    0.31883997]
  [ 0.23965771 -0.22903351  0.07749572 ... -0.02653921  0.08402385
    0.02313656]]]

dropout

为了防止RNN训练中发生过拟合，可以使用DropoutWrapper函数，用dropout防止过拟合

reset_graph()

n_inputs = 1
n_neurons = 100
n_layers = 3
n_steps = 20
n_outputs = 1

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])

# 在训练时，采用dropout，在测试时，使用全部的输入，不使用dropout
keep_prob = tf.placeholder_with_default(1.0, shape=())
cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
         for layer in range(n_layers)]
cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)
              for cell in cells]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells_drop)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)

learning_rate = 0.01

stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])

loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_iterations = 1500
batch_size = 500
train_keep_prob = 0.5

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iterations):
        X_batch, y_batch = next_batch(batch_size, n_steps)
        _, mse = sess.run([training_op, loss],
                          feed_dict={X: X_batch, y: y_batch,
                                     keep_prob: train_keep_prob})
        if iteration % 100 == 0:                   # not shown in the book
            print(iteration, "Training MSE:", mse) # not shown

    saver.save(sess, "./models/rnn/my_ts_dropout_model")

0 Training MSE: 17.523228
100 Training MSE: 3.6864297
200 Training MSE: 2.5945766
300 Training MSE: 2.608631
400 Training MSE: 2.354856
500 Training MSE: 2.166124
600 Training MSE: 2.0215302
700 Training MSE: 1.842349
800 Training MSE: 2.4512978
900 Training MSE: 1.9394426
1000 Training MSE: 2.3554778
1100 Training MSE: 2.031637
1200 Training MSE: 1.9235188
1300 Training MSE: 1.8453777
1400 Training MSE: 1.9560652

# load from disk and predict using rnn
with tf.Session() as sess:
    saver.restore(sess, "./models/rnn/my_ts_dropout_model")

    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
    y_pred = sess.run(outputs, feed_dict={X: X_new})

plt.title("Testing the model", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "y*", markersize=10, label="target")
plt.plot(t_instance[1:], y_pred[0,:,0], "r.", markersize=10, label="prediction")
plt.legend(loc="upper left")
plt.xlabel("Time")
plt.show()

INFO:tensorflow:Restoring parameters from ./models/rnn/my_ts_dropout_model

训练很多个step时容易出现的一些问题

如果需要对很大的time step进行训练，这与DNN类似，都会产生梯度弥散或者梯度爆炸的问题，之前在DNN中使用的方法，如RELU、BN、梯度裁剪、Dropout等方法都可以解决这个问题，但是如果time step很长，训练速度也会十分缓慢。
如果训练速度很慢，可以减少训练过程中的time step个数，称为truncated backpropagation through time，但是这会使得RNN无法学习到long-term的特征
针对上述方案中RNN无法学习长时特征的缺点，可以采取一个折衷方案：我们可以选取过去很长时间内的粗粒度时间，再选取最近的细粒度时间，这样模型可以学习到长时间范围内的粗粒度信息与段时间范围内的细粒度信息，但是无法获得长时间范围内的细粒度信息
采用很大的step进行训练，除了训练时间很长，另外一个问题是较远step的信息会逐渐丢失，即RNN无法解决长时依赖问题(http://blog.csdn.net/heyongluoyao8/article/details/48636251)，我们对于这种问题提出了一些解决方案，如LSTM与GRU。

LSTM(Long Short-Term Memory)单元

LSTM很早就被提出，可以将LSTM cell视为一个基本的单元，它相对于之前的RNN，性能会更好，能够解决模型对于长时依赖的问题
对于一个最基本的LSTcell，它含有4个神经网络层，其结构如下所示

更多解释的参考链接：https://yunaitong.cn/understanding-lstm-networks.html

LSTM的核心概念为cell state，cell state会穿过所有的cell，同时与每一个cell有较小的交互，cell中的信息可以通过判断gate的状态确定是否需要被传送到cell state中
LSTM可以通过gate(门)对cell state添加或者删除信息，gate是由一个sigmoid网络层与一个点乘操作组成，sigmoid输出为0~1，0表示不不允许信息通过，1表示允许信息通过，1个LSTM cell有三个这样的gate，用于控制cell state。

LSTM cell组成详解

1个LSTM cell包含2个状态 h(t) 与 c(t) ， c 代表cell，可以将h(t)视为短时状态， c(t) 视为长时状态， c(t−1) 穿过1个LSTM cell，它首先会经过一个forget gate，丢失部分信息，然后通过加法操作，添加一些由输入门(input gate)选择后的信息，处理之后， c(t) 不经过任何额外的处理，便直接输出，去往下一个step；同时 c(t) 也会经过tanh函数处理，然后与输出门(output gate)进行点乘操作之后，得到 h(t) ，这也就是这个time step中这个LSTM cell的输出 y(t) 。
当前时刻的输入 x(t) 与上一时刻的短时状态 h(t−1) 作用于4个不同的全连接层，主要有以下几个目的
- 最主要的一层是输出 g(t) ，它的激活函数为tanh。在一般的cell中，它会直接输出到 y(t) 与 h(t) ；在LSTM cell中，它会将部分信息储存在长时状态(long-term state)中。
- 另外三层都是门控单元(gate controllers)，因为它们需要确定信息是否能够通过，因此激活函数都是sigmoid函数，如果网络的输出为0，则关闭这个gate，为1则打开这个gate
  - forget gate控制长时状态的哪些部分被删除(由 f(t) 控制)
  - input gate控制 g(t) 中的哪一部分被添加到长时状态中（由 i(t) 控制）
  - output gate控制长时状态中的哪个部分被读取，同时在这一时刻输出至 h(t) 与 y(t) ，它由 o(t) 控制
综合以上特征，LSTM可以学习到重要的输入，将其储存在长时状态 c(t) 中，同时只要forget gate选择不丢弃这个信息，它可以一直被存储。因此LSTM可以解决长时记忆的问题。
LSTM cell中涉及到的一些计算
$i t = σ (W T x i x t + W T h i h t - 1 + b i) f t = σ (W T x f x t + W T h f h t - 1 + b f) o t = σ (W T x o x t + W T h o h t - 1 + b o) g t = tanh (W T x g x t + W T h g h t - 1 + b g) c t = f t \otimes c t - 1 + i t \otimes g t y t = h t = o t \otimes tanh (c t)$

一个变形：窥视孔连接(Peephole Connections)

在一个基本的LSTM cell中，gate controllers只能观察到 xt 与 ht−1 的信息，更好的方法是让这些gate controllers也可以观察到长时状态 ct−1 的信息，这种LSTM的变种被称为Peephole Connections。在TF中，只需设置参数use_peepholes=True即可。

另一种变形：GRU cell(Gated Recurrent Unit)

GRU cell的结构如下
GRU cell是对LSTM cell的简化，同时也能取得很好的效果，GRU cell主要在以下几个方面进行了简化
- 两个状态向量 ht 与 ct 被合并为一个向量 ht 。
- 用1个gate controller去控制forget gate与input gate，如果gate controller输出为1，则input gate打开，forget gate关闭；如果gate controller输出为0，则input gate关闭，forget gate打开。即，当信息需要被存储时，它所在的存储位置的信息会被删除
- 没有output gate，同时引入了一个新的gate controller，用于决定之前状态的哪些部分会被用于main layer(之前LSTM cell中提到的，里面包含需要输出的信息)中
GRU cell中的一些计算
$z t = σ (W T x z x t + W T h z h t - 1) r t = σ (W T x r x t + W T h r h t - 1) g t = tanh (W T x g x t + W T h g (r t \otimes h t - 1)) h t = (1 - z t) \otimes h t - 1 + z t \otimes g t$
TF中直接使用GRU Cell就可以创建GRU cell

自然语言处理(NLP)

现在绝大部分的NLP问题，如机器翻译、总结、情感分析等，都是基于RNN

Word Embeddings(词嵌入)

首先我们需要选择一个词汇表征的方法(word representation)，一个比较容易想到的方法就是将每个word都作为一个one-hot vector(这个word对应的列为1，其他全为0)。但是这种方法可能会非常低效，而且会耗费大量的内存，矩阵中全为0和1。
最常用的方法就是用一个很小的dense vector表示每个单词，这个vector被称为embedding，该矩阵中的元素为实数。随着BP的训练过程，含义相近的两个word的embedding会越来越靠近(embedding的差的范数会很小)。
在处理之前，我们需要对sentence进行预处理，去除其中一些几乎没有意义的词汇，将一些很简单的词汇转化进行转化。
得到比较好的embedding模型之后，也可以将该模型用到其他的NLP应用中。

Encoder-Decoder网络用于机器翻译的流程

输入的sentence被转化为单词words，这些words都有其唯一的id，
使用encoder计算words的embedding(可以首先训练出这样一个embedding的查找表，然后查找得到)
将embedding作为decoder的输入，对于每一个词汇，都给出其所有可能的对应输出词汇的分数，然后使用softmax将分数转化为概率，找出最大概率对应的word，就是翻译结果
可能会遇到的一些问题以及解决方案
- 假设所有的输入序列的长度是相同且固定的，但是实际情况并非如此。有几种方法：可以在定义rnn时，需要将sequence_length作为一个变量；也可以将所有的sentence用pad填充成一个固定的长度，再输入rnn中进行训练
- 如果输出的词汇表规模很大，则为词汇表中的每个可能输出的单词都计算其概率是一个十分耗时的过程。如果词汇表有50000个word，则对于每一个单词，都会输出50000个概率，这在计算softmax时十分耗时。一种解决方案是：让decoder输出一个尺寸更小的概率向量，然后才要那个抽样softmax技术，不对词汇上的所有word计算其loss。TF中可以使用sampled_softmax_loss()函数实现
tf.nn.legacy_seq2seq模块可以十分方便地构建Encoder-Decoder模型

from six.moves import urllib

import errno
import os
import zipfile

WORDS_PATH = "./dataset/words"
WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'

def mkdir_p(path):
    """Create directories, ok if they already exist.

    This is for python 2 support. In python >=3.2, simply use:
    >>> os.makedirs(path, exist_ok=True)
    """
    try:
        os.makedirs(path)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):
    os.makedirs(words_path, exist_ok=True)
    zip_path = os.path.join(words_path, "words.zip")
    if not os.path.exists(zip_path):
        urllib.request.urlretrieve(words_url, zip_path)
    with zipfile.ZipFile(zip_path) as f: # 如果报错，可能是下载过程中出现了一些问题，需要重新下载或者手动下载到指定目录
        data = f.read(f.namelist()[0])
    return data.decode("ascii").split()

# 读取下载的words
words = fetch_words_data()
print( len(words ) )

17005207

# 将word转化为integer
from collections import Counter

vocabulary_size = 50000

vocabulary = [("UNK", None)] + Counter(words).most_common(vocabulary_size - 1)
vocabulary = np.array([word for word, _ in vocabulary])
dictionary = {word: code for code, word in enumerate(vocabulary)}
data = np.array([dictionary.get(word, 0) for word in words])

print( words[:5] )
print( data[:5] )

['anarchism', 'originated', 'as', 'a', 'term']
[5234 3081   12    6  195]

import random
from collections import deque

def generate_batch(batch_size, num_skips, skip_window):
    global data_index
    assert batch_size % num_skips == 0
    assert num_skips <= 2 * skip_window
    batch = np.ndarray(shape=(batch_size), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    span = 2 * skip_window + 1 # [ skip_window target skip_window ]
    buffer = deque(maxlen=span)
    for _ in range(span):
        buffer.append(data[data_index])
        data_index = (data_index + 1) % len(data)
    for i in range(batch_size // num_skips):
        target = skip_window  # target label at the center of the buffer
        targets_to_avoid = [ skip_window ]
        for j in range(num_skips):
            while target in targets_to_avoid:
                target = random.randint(0, span - 1)
            targets_to_avoid.append(target)
            batch[i * num_skips + j] = buffer[skip_window]
            labels[i * num_skips + j, 0] = buffer[target]
        buffer.append(data[data_index])
        data_index = (data_index + 1) % len(data)
    return batch, labels

data_index=0
batch, labels = generate_batch(8, 2, 1)
print( batch )
print( labels.flatten() )

[3081 3081   12   12    6    6  195  195]
[  12 5234    6 3081   12  195    6    2]

batch, [vocabulary[word] for word in batch]

(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32),
 ['originated', 'originated', 'as', 'as', 'a', 'a', 'term', 'term'])

batch_size = 128
embedding_size = 128  # Dimension of the embedding vector.
skip_window = 1       # How many words to consider left and right.
num_skips = 2         # How many times to reuse an input to generate a label.

# We pick a random validation set to sample nearest neighbors. Here we limit the
# validation samples to the words that have a low numeric ID, which by
# construction are also the most frequent.
valid_size = 16     # Random set of words to evaluate similarity on.
valid_window = 100  # Only pick dev samples in the head of the distribution.
valid_examples = np.random.choice(valid_window, valid_size, replace=False)
num_sampled = 64    # Number of negative examples to sample.

learning_rate = 0.01

reset_graph()

# Input data.
train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

vocabulary_size = 50000
embedding_size = 150

# Look up embeddings for inputs.
init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)
embeddings = tf.Variable(init_embeds)

train_inputs = tf.placeholder(tf.int32, shape=[None])
embed = tf.nn.embedding_lookup(embeddings, train_inputs)

# Construct the variables for the NCE loss
nce_weights = tf.Variable(
    tf.truncated_normal([vocabulary_size, embedding_size],
                        stddev=1.0 / np.sqrt(embedding_size)))
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
loss = tf.reduce_mean(
    tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed,
                   num_sampled, vocabulary_size))

# Construct the Adam optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)

# Compute the cosine similarity between minibatch examples and all embeddings.
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keep_dims=True))
normalized_embeddings = embeddings / norm
valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)
similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)

# Add variable initializer.
init = tf.global_variables_initializer()

# 训练模型
num_steps = 10001

with tf.Session() as session:
    init.run()

    average_loss = 0
    for step in range(num_steps):
        print("\rIteration: {}".format(step), end="\t")
        batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)
        feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}

        # We perform one update step by evaluating the training op (including it
        # in the list of returned values for session.run()
        _, loss_val = session.run([training_op, loss], feed_dict=feed_dict)
        average_loss += loss_val

        if step % 2000 == 0:
            if step > 0:
                average_loss /= 2000
            # The average loss is an estimate of the loss over the last 2000 batches.
            print("Average loss at step ", step, ": ", average_loss)
            average_loss = 0

        # Note that this is expensive (~20% slowdown if computed every 500 steps)
        if step % 10000 == 0:
            sim = similarity.eval()
            for i in range(valid_size):
                valid_word = vocabulary[valid_examples[i]]
                top_k = 8 # number of nearest neighbors
                nearest = (-sim[i, :]).argsort()[1:top_k+1]
                log_str = "Nearest to %s:" % valid_word
                for k in range(top_k):
                    close_word = vocabulary[nearest[k]]
                    log_str = "%s %s," % (log_str, close_word)
                print(log_str)

    final_embeddings = normalized_embeddings.eval()

Iteration: 0    Average loss at step  0 :  285.43389892578125
Nearest to only: desperation, harmed, tanaka, drown, alkenes, candu, laughter, illustrating,
Nearest to or: copyleft, haag, weaned, skilled, gv, gdansk, carcassonne, stokes,
Nearest to united: iii, citizen, crows, decimals, eutelsat, dcc, auckland, ennis,
Nearest to first: liquidity, steinbeck, profoundly, integration, inhabiting, ticino, incrimination, acclaimed,
Nearest to he: transitioned, winchell, resh, goldsmiths, standardised, markings, pursued, satirized,
Nearest to not: censor, fucking, venetian, lu, quarto, contractor, headway, stylus,
Nearest to many: spreadsheets, redeemer, nominees, absurd, alerts, xxvi, transylvanian, autonegotiation,
Nearest to been: powerpc, maccabean, precarious, hounds, hazael, gol, linear, schuster,
Nearest to an: gutierrez, dyess, privations, archaeological, bijection, kon, joh, insemination,
Nearest to six: melds, signer, hurtful, paws, ev, melodies, perennially, adf,
Nearest to however: frank, referring, maximization, beltway, liechtenstein, oxus, erik, vicki,
Nearest to has: camelopardalis, lessen, learning, ji, duddy, brontosaurus, unabomber, semipalatinsk,
Nearest to see: elsinore, gaines, esque, battleship, whip, hl, postings, tint,
Nearest to had: catalytic, frankenstein, tam, lefty, grenville, lineker, shtml, sartre,
Nearest to one: imagines, tijuana, hindrance, motorcyclist, steadfastly, lords, letting, hutchinson,
Nearest to d: schuster, asgard, intriguing, catus, jewellery, leptons, goodwill, prosthetic,
Iteration: 2000 Average loss at step  2000 :  130.98741731071473
Iteration: 4000 Average loss at step  4000 :  62.76376576328278
Iteration: 6000 Average loss at step  6000 :  42.172603695631025
Iteration: 8000 Average loss at step  8000 :  31.732391747474672
Iteration: 10000    Average loss at step  10000 :  25.78748117876053
Nearest to only: one, slovenians, delay, alerts, not, citrate, essendon, wolsey,
Nearest to or: of, uppercase, aruba, arendt, and, spassky, judo, ataxia,
Nearest to united: states, taxi, pus, inconsistencies, worldwide, statistic, phoenician, transmit,
Nearest to first: in, intercession, by, hep, fourier, phalanx, of, sly,
Nearest to he: it, observations, holmes, asparagales, had, that, illyrians, nurse,
Nearest to not: to, they, always, bogus, natchez, otherwise, assist, attend,
Nearest to many: the, astrological, chomsky, accredited, people, transporting, uppercase, boosting,
Nearest to been: has, parte, by, donated, cameron, vajrayana, it, illyrians,
Nearest to an: and, the, microtubules, ustinov, tetrapods, parsley, complement, crouching,
Nearest to six: five, nine, one, eight, seven, three, zero, two,
Nearest to however: ampere, eclipsed, scrimmage, coulomb, mj, precipitation, mathfrak, sidebands,
Nearest to has: is, been, willem, chadic, ataxia, columbus, contended, automorphism,
Nearest to see: heretics, bartholomew, pavements, destructive, topalov, doctorate, groundwater, helm,
Nearest to had: he, but, departures, assassinate, legend, surprisingly, columbus, deuterostomes,
Nearest to one: nine, two, six, seven, three, five, eight, four,
Nearest to d: nine, interlocking, ads, khorasan, perspectives, one, american, circus,

# 保存当期啊你的embedding
np.save("./models/words/my_final_embeddings.npy", final_embeddings)

def plot_with_labels(low_dim_embs, labels):
    assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
    plt.figure(figsize=(12, 12))  #in inches
    for i, label in enumerate(labels):
        x, y = low_dim_embs[i,:]
        plt.scatter(x, y)
        plt.annotate(label,
                     xy=(x, y),
                     xytext=(5, 2),
                     textcoords='offset points',
                     ha='right',
                     va='bottom')

from sklearn.manifold import TSNE

tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
plot_only = 100
low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
labels = [vocabulary[i] for i in range(plot_only)]
plot_with_labels(low_dim_embs, labels)
plt.show()

你可能感兴趣的:(python,机器学习,深度学习,python相关)

python 读excel每行替换_Python脚本操作Excel实现批量替换功能 weixin_39646695 python 读excel每行替换
Python脚本操作Excel实现批量替换功能大家好，给大家分享下如何使用Python脚本操作Excel实现批量替换。使用的工具Openpyxl，一个处理excel的python库，处理excel，其实针对的就是WorkBook，Sheet，Cell这三个最根本的元素~明确需求原始excel如下我们的目标是把下面excel工作表的sheet1表页A列的内容“替换我吧”批量替换为B列的“我用来替换的
python笔记14介绍几个魔法方法抢公主的大魔王 python python
python笔记14介绍几个魔法方法先声明一下各位大佬，这是我的笔记。如有错误，恳请指正。另外，感谢您的观看，谢谢啦！(1).__doc__输出对应的函数，类的说明文档print(print.__doc__)print(value,...,sep='',end='\n',file=sys.stdout,flush=False)Printsthevaluestoastream,ortosys.std
Anaconda 和 Miniconda：功能详解与选择建议古月฿ python入门 python conda
Anaconda和Miniconda详细介绍一、Anaconda的详细介绍1.什么是Anaconda？Anaconda是一个开源的包管理和环境管理工具，在数据科学、机器学习以及科学计算领域发挥着关键作用。它以Python和R语言为基础，为用户精心准备了大量预装库和工具，极大地缩短了搭建数据科学环境的时间。对于那些想要快速开展数据分析、模型训练等工作的人员来说，Anaconda就像是一个一站式的“数
环境搭建 | Python + Anaconda / Miniconda + PyCharm 的安装、配置与使用
本文将分别介绍Python、Anaconda/Miniconda、PyCharm的安装、配置与使用，详细介绍Python环境搭建的全过程，涵盖Python、Pip、PythonLauncher、Anaconda、Miniconda、Pycharm等内容，以官方文档为参照，使用经验为补充，内容全面而详实。由于图片太多，就先贴一个无图简化版吧，详情请查看Python+Anaconda/Minicond
你竟然还在用克隆删除？Conda最新版rename命令全攻略！曦紫沐 Python基础知识 conda 虚拟环境管理
文章摘要Conda虚拟环境管理终于迎来革命性升级！本文揭秘Conda4.9+版本新增的rename黑科技，彻底告别传统“克隆+删除”的繁琐操作。从命令解析到实战案例，手把手教你如何安全高效地重命名Python虚拟环境，附带版本检测、环境迁移、故障排查等进阶技巧，助你提升开发效率10倍！一、颠覆认知：Conda居然自带重命名功能？很多开发者仍停留在“Conda无法直接重命名环境”的认知阶段，实际上自
centos7安装配置 Anaconda3
Anaconda是一个用于科学计算的Python发行版,Anaconda于Python，相当于centos于linux。下载[root@testsrc]#mwgethttps://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-5.2.0-Linux-x86_64.shBegintodownload:Anaconda3-5.2.0-L
Pandas：数据科学的超级瑞士军刀科技林总 DeepSeek学AI 人工智能
**——从零基础到高效分析的进化指南**###**一、Pandas诞生：数据革命的救世主****2010年前的数据分析噩梦**：```python#传统Python处理表格数据data=[]forrowincsv_file:ifrow[3]>100androw[2]=="China":data.append(float(row[5])#代码冗长易错！```**核心痛点**：-Excel处理百万行崩
机器学习必备数学与编程指南：从入门到精通 a小胡哦机器学习基础机器学习人工智能
一、机器学习核心数学基础1.线性代数（神经网络的基础）必须掌握：矩阵运算（乘法、转置、逆）向量空间与线性变换特征值分解与奇异值分解(SVD)为什么重要：神经网络本质就是矩阵运算学习技巧：用NumPy实际操作矩阵运算2.概率与统计（模型评估的关键）核心概念：条件概率与贝叶斯定理概率分布（正态、泊松、伯努利）假设检验与p值应用场景：朴素贝叶斯、A/B测试3.微积分（优化算法的基础）重点掌握：导数与偏导
【Jupyter】个人开发常见命令 TIM老师 #Pycharm &VSCode python Jupyter
1.查看python版本importsysprint(sys.version)2.ipynb/py文件转换jupyternbconvert--topythonmy_file.ipynbipynb转换为mdjupyternbconvert--tomdmy_file.ipynbipynb转为htmljupyternbconvert--tohtmlmy_file.ipynbipython转换为pdfju
AI 生成虚拟宠物：24 小时陪你聊天解闷大力出奇迹985 人工智能宠物
本文围绕AI生成虚拟宠物展开，介绍这类依托人工智能技术诞生的虚拟伙伴，能实现24小时不间断陪伴聊天，为人们解闷。文中详细阐述其技术基础，包括自然语言处理、机器学习等；分析多样功能，如个性化互动、情绪回应等；探讨在独居人群、压力大者等不同群体中的应用场景，最后总结其为人们生活带来的积极影响及未来发展潜力，展现AI虚拟宠物在陪伴领域的独特价值。一、AI生成虚拟宠物的诞生背景与技术基石在快节奏的现代社会
用 Python 开发小游戏：零基础也能做出《贪吃蛇》
本文专为零基础学习者打造，详细介绍如何用Python开发经典小游戏《贪吃蛇》。无需复杂编程知识，从环境搭建到代码编写、功能实现，逐步讲解核心逻辑与操作。涵盖Pygame库的基础运用、游戏界面设计、蛇的移动与食物生成规则等，让新手能按步骤完成开发，同时融入SEO优化要点，帮助读者轻松入门Python游戏开发，体验从0到1做出游戏的乐趣。一、为什么选择用Python开发《贪吃蛇》对于零基础学习者来说，
基于Python的AI健康助手：开发与部署全攻略 AI算力网络与通信 AI算力网络与通信原理 AI人工智能大数据架构 python 人工智能开发语言 ai
基于Python的AI健康助手：开发与部署全攻略关键词：Python、AI健康助手、机器学习、自然语言处理、Flask、部署、健康管理摘要：本文将详细介绍如何使用Python开发一个AI健康助手，从需求分析、技术选型到核心功能实现，再到最终部署上线的完整过程。我们将使用自然语言处理技术理解用户健康咨询，通过机器学习模型提供个性化建议，并展示如何用Flask框架构建Web应用接口。文章包含大量实际代
数据分析领域中AI人工智能的发展前景展望 AI大模型应用工坊 AI大模型开发实战数据分析人工智能数据挖掘 ai
数据分析领域中AI人工智能的发展前景展望关键词：数据分析、人工智能、机器学习、深度学习、数据挖掘、预测分析、自动化摘要：本文深入探讨了人工智能在数据分析领域的发展现状和未来趋势。我们将从核心技术原理出发，分析AI如何改变传统数据分析范式，详细讲解机器学习算法在数据分析中的应用，并通过实际案例展示AI驱动的数据分析解决方案。文章还将探讨行业应用场景、工具生态以及未来发展面临的挑战和机遇，为数据分析师
AI人工智能中的数据挖掘：提升智能决策能力
AI人工智能中的数据挖掘：提升智能决策能力关键词：数据挖掘、人工智能、机器学习、智能决策、数据分析、特征工程、模型优化摘要：本文深入探讨了数据挖掘在人工智能领域中的核心作用，重点分析了如何通过数据挖掘技术提升智能决策能力。文章从基础概念出发，详细介绍了数据挖掘的关键算法、数学模型和实际应用场景，并通过Python代码示例展示了数据挖掘的全流程。最后，文章展望了数据挖掘技术的未来发展趋势和面临的挑战
数据中台中的数据科学工作台：Jupyter集成方案 AI大数据智能洞察大数据与AI人工智能 jupyter 信息可视化 ide ai
数据中台中的数据科学工作台：Jupyter集成方案关键词：数据中台、数据科学工作台、JupyterNotebook、数据科学、机器学习、数据可视化、协作开发摘要：本文深入探讨了在数据中台架构中集成JupyterNotebook作为数据科学工作台的完整解决方案。我们将从数据中台的基本概念出发，详细分析Jupyter在数据科学工作流中的核心作用，介绍多种集成方案和技术实现细节，并通过实际案例展示如何构
lesson20：Python函数的标注你的电影很有趣 python 开发语言
目录引言：为什么函数标注是现代Python开发的必备技能一、函数标注的基础语法1.1参数与返回值标注1.2支持的标注类型1.3Python3.9+的重大改进：标准集合泛型二、高级标注技巧与最佳实践2.1复杂参数结构标注2.2函数类型与回调标注2.3变量注解与类型别名三、静态类型检查工具应用3.1mypy：最流行的类型检查器3.2Pyright与IDE集成3.3运行时类型验证四、函数标注的工程价值与
Jupyter Notebook：数据科学的“瑞士军刀” a小胡哦机器学习基础人工智能机器学习
在数据科学的世界里，JupyterNotebook是一个不可或缺的工具，它就像是数据科学家手中的“瑞士军刀”，功能强大且灵活多变。今天，就让我们一起深入了解这个神奇的工具。一、JupyterNotebook是什么？JupyterNotebook是一个开源的Web应用程序，它允许你创建和共享包含实时代码、方程、可视化和解释性文本的文档。它支持多种编程语言，其中Python是最常用的语言之一。Jupy
2018年中南大学中英翻译某翁
参考：20180827235856533.jpg【1】机器学习理论表明，机器学习算法能从有限个训练集样本上得到较好的泛化【1】Machinelearningtheoryshowsthatmachinelearningalgorithmcangeneralizewellfromfinitetrainingsetsampleslimited有限的infinite无限的【2】这似乎违背了一些基本的逻辑准
Django学习笔记（一）
学习视频为：pythondjangoweb框架开发入门全套视频教程一、安装pipinstalldjango==****检查是否安装成功django.get_version()二、django新建项目操作1、新建一个项目django-adminstartprojectproject_name2、新建APPcdproject_namedjango-adminstartappApp注：一个project
Python 程序设计讲义（26）：字符串的用法——字符的编码睿思达DBA_WGX Python 讲义 python 开发语言
Python程序设计讲义（26）：字符串的用法——字符的编码目录Python程序设计讲义（26）：字符串的用法——字符的编码一、字符的编码二、`ASCII`编码三、`Unicode`编码四、使用`ord()`函数查询一个字符对应的`Unicode`编码五、使用`chr()`函数查询一个`Unicode`编码对应的字符六、`Python`字符串的特征一、字符的编码计算机默认只能处理二进制数，而不能处
【Python】pypinyin-汉字拼音转换工具鸟哥大大 Python python 自然语言处理
文章目录1.主要功能2.安装3.常用API3.1拼音风格3.2核心API3.2.1pypinyin.pinyin()3.2.2pypinyin.lazy_pinyin()3.2.3pypinyin.load_single_dict()3.2.4pypinyin.load_phrases_dict()3.2.5pypinyin.slug()3.3注册新的拼音风格4.基本用法4.1库导入4.2基本汉字
python编程第十四课：数据可视化小小源助手 Python代码实例信息可视化 python 开发语言
Python数据可视化：让数据“开口说话”在当今数据爆炸的时代，数据可视化已成为探索数据规律、传达数据信息的关键技术。Python凭借其丰富的第三方库，为数据可视化提供了强大而灵活的解决方案。本文将带你深入了解Matplotlib库的基础绘图、Seaborn库的高级可视化以及交互式可视化工具Plotly，帮助你通过图表清晰地展示数据背后的故事。一、Matplotlib库基础绘图Matplotlib
深入理解卷积神经网络（CNN）与循环神经网络（RNN） CodeJourney. cnn rnn 人工智能
在当今的人工智能领域，神经网络无疑是最为璀璨的明珠之一。而卷积神经网络（ConvolutionalNeuralNetworks，CNN）和循环神经网络（RecurrentNeuralNetworks，RNN）作为神经网络家族中的重要成员，各自有着独特的架构和强大的功能，广泛应用于众多领域。本文将深入探讨这两种神经网络的原理、特点以及应用场景，为对深度学习感兴趣的读者提供全面的知识讲解。一、卷积神经
Python数据可视化：用代码绘制数据背后的故事 AAEllisonPang Python 信息可视化 python 开发语言
引言：当数据会说话在数据爆炸的时代，可视化是解锁数据价值的金钥匙。Python凭借其丰富的可视化生态库，已成为数据科学家的首选工具。本文将带您从基础到高级，探索如何用Python将冰冷数字转化为引人入胜的视觉叙事。一、基础篇：二维可视化的艺术表达1.1Matplotlib：可视化领域的瑞士军刀importmatplotlib.pyplotaspltimportnumpyasnpx=np.linsp
python学习笔记（汇总）朕的剑还未配妥 python学习笔记整理 python 学习开发语言
文章目录一.基础知识二.python中的数据类型三.运算符四.程序的控制结构五.列表六.字典七.元组八.集合九.字符串十.函数十一.解决bug一.基础知识print函数字符串要加引号，数字可不加引号，如print(123.4)print('小谢')print("洛天依")还可输入表达式，如print(1+3)如果使用三引号，print打印的内容可不在同一行print("line1line2line
今年校招竞争真激烈 12_05
程序员满大街，都要找不到工作了。即使人工智能满大街，我也后悔当初没学机器学习，后悔当初没学Java。C++真难找工作。难道毕了业就失业吗？好担心！
时序预测 | MATLAB实现贝叶斯优化CNN-GRU时间序列预测(股票价格预测) Matlab机器学习之心 matlab cnn gru
✅作者简介：热爱数据处理、数学建模、仿真设计、论文复现、算法创新的Matlab仿真开发者。更多Matlab代码及仿真咨询内容点击主页：Matlab科研工作室个人信条：格物致知，期刊达人。内容介绍股票价格预测一直是金融领域一个极具挑战性的课题。其内在的非线性、随机性和复杂性使得传统的预测方法难以取得令人满意的效果。近年来，深度学习技术，特别是卷积神经网络(CNN)和门控循环单元(GRU)的结合，为时
时序预测 | MATLAB实现BO-CNN-GRU贝叶斯优化卷积门控循环单元时间序列预测 Matlab算法改进和仿真定制工程师 matlab cnn gru
✅作者简介：热爱数据处理、数学建模、算法创新的Matlab仿真开发者。更多Matlab代码及仿真咨询内容点击：Matlab科研工作室个人信条：格物致知。内容介绍时间序列预测在各个领域都具有重要的应用价值，例如金融市场预测、气象预报、交通流量预测等。准确地预测未来趋势对于决策制定至关重要。近年来，深度学习技术在时间序列预测领域取得了显著进展，其中卷积神经网络(CNN)和门控循环单元(GRU)由于其强
PDF转Markdown - Python 实现方案与代码 Eiceblue Python Python PDF pdf python 开发语言 vscode
PDF作为广泛使用的文档格式，转换为轻量级标记语言Markdown后，可无缝集成到技术文档、博客平台和版本控制系统中，提高内容的可编辑性和可访问性。本文将详细介绍如何使用国产Spire.PDFforPython库将PDF文档转换为Markdown格式。技术优势：精准保留原始文档结构（段落/列表/表格）完整提取文本和图像内容无需Adobe依赖的纯Python实现支持Linux/Windows/mac
使用Python和Gradio构建实时数据可视化工具 PythonAI编程架构实战家信息可视化 python 开发语言 ai
使用Python和Gradio构建实时数据可视化工具关键词：Python、Gradio、数据可视化、实时数据、Web应用、交互式界面、数据科学摘要：本文将详细介绍如何使用Python和Gradio框架构建一个实时数据可视化工具。我们将从基础概念开始，逐步深入到核心算法实现，包括数据处理、可视化技术以及Gradio的交互式界面设计。通过实际项目案例，读者将学习如何创建一个功能完整、响应迅速的实时数据
apache 安装linux windows 墙头上一根草 apache inux windows
linux安装Apache 有两种方式一种是手动安装通过二进制的文件进行安装，另外一种就是通过yum 安装，此中安装方式，需要物理机联网。以下分别介绍两种的安装方式通过二进制文件安装Apache需要的软件有apr,apr-util,pcre 1，安装 apr 下载地址：htt
fill_parent、wrap_content和match_parent的区别 Cb123456 match_parent fill_parent
fill_parent、wrap_content和match_parent的区别: 1）fill_parent 设置一个构件的布局为fill_parent将强制性地使构件扩展，以填充布局单元内尽可能多的空间。这跟Windows控件的dockstyle属性大体一致。设置一个顶部布局或控件为fill_parent将强制性让它布满整个屏幕。 2） wrap_conte
网页自适应设计天子之骄 html css 响应式设计页面自适应
网页自适应设计网页对浏览器窗口的自适应支持变得越来越重要了。自适应响应设计更是异常火爆。再加上移动端的崛起，更是如日中天。以前为了适应不同屏幕分布率和浏览器窗口的扩大和缩小，需要设计几套css样式，用js脚本判断窗口大小，选择加载。结构臃肿，加载负担较大。现笔者经过一定时间的学习，有所心得，故分享于此，加强交流，共同进步。同时希望对大家有所
[sql server] 分组取最大最小常用sql 一炮送你回车库 SQL Server
--分组取最大最小常用sql--测试环境if OBJECT_ID('tb') is not null drop table tb;gocreate table tb( col1 int, col2 int, Fcount int)insert into tbselect 11,20,1 union allselect 11,22,1 union allselect 1
ImageIO写图片输出到硬盘 3213213333332132 java image
package awt; import java.awt.Color; import java.awt.Font; import java.awt.Graphics; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import javax.imagei
自己的String动态数组宝剑锋梅花香 java 动态数组数组
数组还是好说，学过一两门编程语言的就知道，需要注意的是数组声明时需要把大小给它定下来，比如声明一个字符串类型的数组：String str[]=new String[10]; 但是问题就来了，每次都是大小确定的数组，我需要数组大小不固定随时变化怎么办呢？动态数组就这样应运而生，龙哥给我们讲的是自己用代码写动态数组，并非用的ArrayList 看看字符
pinyin4j工具类 darkranger .net
pinyin4j工具类Java工具类 2010-04-24 00:47:00 阅读69 评论0 字号：大中小引入pinyin4j-2.5.0.jar包: pinyin4j是一个功能强悍的汉语拼音工具包，主要是从汉语获取各种格式和需求的拼音，功能强悍，下面看看如何使用pinyin4j。本人以前用AscII编码提取工具，效果不理想，现在用pinyin4j简单实现了一个。功能还不是很完美，
StarUML学习笔记----基本概念 aijuans UML建模
介绍StarUML的基本概念，这些都是有效运用StarUML?所需要的。包括对模型、视图、图、项目、单元、方法、框架、模型块及其差异以及UML轮廓。模型、视与图（Model, View and Diagram） &
Activiti最终总结 avords Activiti id 工作流
1、流程定义ID：ProcessDefinitionId，当定义一个流程就会产生。 2、流程实例ID：ProcessInstanceId，当开始一个具体的流程时就会产生，也就是不同的流程实例ID可能有相同的流程定义ID。 3、TaskId，每一个userTask都会有一个Id这个是存在于流程实例上的。 4、TaskDefinitionKey和（ActivityImpl activityId
从省市区多重级联想到的，react和jquery的差别 bee1314 jquery UI react
在我们的前端项目里经常会用到级联的select，比如省市区这样。通常这种级联大多是动态的。比如先加载了省，点击省加载市，点击市加载区。然后数据通常ajax返回。如果没有数据则说明到了叶子节点。针对这种场景，如果我们使用jquery来实现，要考虑很多的问题，数据部分，以及大量的dom操作。比如这个页面上显示了某个区，这时候我切换省，要把市重新初始化数据，然后区域的部分要从页面
Eclipse快捷键大全 bijian1013 java eclipse 快捷键
Ctrl+1 快速修复(最经典的快捷键,就不用多说了)Ctrl+D: 删除当前行 Ctrl+Alt+↓ 复制当前行到下一行(复制增加)Ctrl+Alt+↑ 复制当前行到上一行(复制增加)Alt+↓ 当前行和下面一行交互位置(特别实用,可以省去先剪切,再粘贴了)Alt+↑ 当前行和上面一行交互位置(同上)Alt+← 前一个编辑的页面Alt+→ 下一个编辑的页面(当然是针对上面那条来说了)Alt+En
js 笔记函数征客丶 JavaScript
一、函数的使用 1.1、定义函数变量 var vName = funcation(params){ } 1.2、函数的调用函数变量的调用： vName(params); 函数定义时自发调用：(function(params){})(params); 1.3、函数中变量赋值 var a = 'a'; var ff
【Scala四】分析Spark源代码总结的Scala语法二 bit1129 scala
1. Some操作在下面的代码中，使用了Some操作：if (self.partitioner == Some(partitioner))，那么Some(partitioner)表示什么含义？首先partitioner是方法combineByKey传入的变量， Some的文档说明： /** Class `Some[A]` represents existin
java 匿名内部类 BlueSkator java匿名内部类
组合优先于继承 Java的匿名类，就是提供了一个快捷方便的手段，令继承关系可以方便地变成组合关系继承只有一个时候才能用，当你要求子类的实例可以替代父类实例的位置时才可以用继承。在Java中内部类主要分为成员内部类、局部内部类、匿名内部类、静态内部类。内部类不是很好理解，但说白了其实也就是一个类中还包含着另外一个类如同一个人是由大脑、肢体、器官等身体结果组成，而内部类相
盗版win装在MAC有害发热，苹果的东西不值得买，win应该不用 ljy325 游戏 apple windows XP OS
Mac mini 型号: MC270CH-A RMB:5,688 Apple 对windows的产品支持不好,有以下问题: 1.装完了xp,发现机身很热虽然没有运行任何程序！貌似显卡跑游戏发热一样，按照那样的发热量,那部机子损耗很大,使用寿命受到严重的影响! 2.反观安装了Mac os的展示机，发热量很小，运行了1天温度也没有那么高 &nbs
读《研磨设计模式》-代码笔记-生成器模式-Builder bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ /** * 生成器模式的意图在于将一个复杂的构建与其表示相分离，使得同样的构建过程可以创建不同的表示（GoF） * 个人理解： * 构建一个复杂的对象，对于创建者（Builder）来说，一是要有数据来源(rawData)，二是要返回构
JIRA与SVN插件安装 chenyu19891124 SVN jira
JIRA安装好后提交代码并要显示在JIRA上，这得需要用SVN的插件才能看见开发人员提交的代码。 1.下载svn与jira插件安装包，解压后在安装包(atlassian-jira-subversion-plugin-0.10.1) 2.解压出来的包里下的lib文件夹下的jar拷贝到(C:\Program Files\Atlassian\JIRA 4.3.4\atlassian-jira\WEB
常用数学思想方法 comsci 工作
对于搞工程和技术的朋友来讲，在工作中常常遇到一些实际问题，而采用常规的思维方式无法很好的解决这些问题，那么这个时候我们就需要用数学语言和数学工具，而使用数学工具的前提却是用数学思想的方法来描述问题。。下面转帖几种常用的数学思想方法，仅供学习和参考函数思想　　把某一数学问题用函数表示出来，并且利用函数探究这个问题的一般规律。这是最基本、最常用的数学方法
pl/sql集合类型 daizj oracle 集合 type pl/sql
--集合类型 /* 单行单列的数据，使用标量变量单行多列数据，使用记录单列多行数据，使用集合（。。。） *集合：类似于数组也就是。pl/sql集合类型包括索引表（pl/sql table）、嵌套表（Nested Table）、变长数组（VARRAY）等 */ /* --集合方法 &n
[Ofbiz]ofbiz初用 dinguangx 电商 ofbiz
从github下载最新的ofbiz（截止2015-7-13），从源码进行ofbiz的试用 1. 加载测试库 ofbiz内置derby，通过下面的命令初始化测试库 ./ant load-demo (与load-seed有一些区别) 2. 启动内置tomcat ./ant start 或 ./startofbiz.sh 或 java -jar ofbiz.jar &
结构体中最后一个元素是长度为0的数组 dcj3sjt126com c gcc
在Linux源代码中，有很多的结构体最后都定义了一个元素个数为0个的数组，如/usr/include/linux/if_pppox.h中有这样一个结构体： struct pppoe_tag { __u16 tag_type; __u16 tag_len; &n
Linux cp 实现强行覆盖 dcj3sjt126com linux
发现在Fedora 10 /ubutun 里面用cp -fr src dest，即使加了-f也是不能强行覆盖的，这时怎么回事的呢？一两个文件还好说，就输几个yes吧，但是要是n多文件怎么办，那还不输死人呢？下面提供三种解决办法。方法一我们输入alias命令，看看系统给cp起了一个什么别名。 [root@localhost ~]# aliasalias cp=’cp -i’a
Memcached(一)、HelloWorld frank1234 memcached
一、简介高性能的架构离不开缓存，分布式缓存中的佼佼者当属memcached，它通过客户端将不同的key hash到不同的memcached服务器中，而获取的时候也到相同的服务器中获取，由于不需要做集群同步，也就省去了集群间同步的开销和延迟，所以它相对于ehcache等缓存来说能更好的支持分布式应用，具有更强的横向伸缩能力。二、客户端选择一个memcached客户端，我这里用的是memc
Search in Rotated Sorted Array II hcx2013 search
Follow up for "Search in Rotated Sorted Array":What if duplicates are allowed? Would this affect the run-time complexity? How and why? Write a function to determine if a given ta
Spring4新特性——更好的Java泛型操作API jinnianshilongnian spring4 generic type
Spring4新特性——泛型限定式依赖注入 Spring4新特性——核心容器的其他改进 Spring4新特性——Web开发的增强 Spring4新特性——集成Bean Validation 1.1(JSR-349)到SpringMVC Spring4新特性——Groovy Bean定义DSL Spring4新特性——更好的Java泛型操作API Spring4新
CentOS安装JDK liuxingguome centos
1、行卸载原来的： [root@localhost opt]# rpm -qa | grep java tzdata-java-2014g-1.el6.noarch java-1.7.0-openjdk-1.7.0.65-2.5.1.2.el6_5.x86_64 java-1.6.0-openjdk-1.6.0.0-11.1.13.4.el6.x86_64 [root@localhost
二分搜索专题2-在有序二维数组中搜索一个元素 OpenMind 二维数组算法二分搜索
1,设二维数组p的每行每列都按照下标递增的顺序递增。用数学语言描述如下：p满足 (1),对任意的x1，x2，y，如果x1<x2,则p(x1,y)<p(x2,y); (2),对任意的x，y1,y2, 如果y1<y2,则p(x,y1)<p(x,y2); 2,问题：给定满足1的数组p和一个整数k，求是否存在x0,y0使得p(x0,y0)=k? 3,算法分析： (
java 随机数 Math与Random SaraWon java Math Random
今天需要在程序中产生随机数，知道有两种方法可以使用，但是使用Math和Random的区别还不是特别清楚，看到一篇文章是关于的，觉得写的还挺不错的，原文地址是 http://www.oschina.net/question/157182_45274?sort=default&p=1#answers 产生1到10之间的随机数的两种实现方式： //Math Math.roun
oracle创建表空间 tugn oracle
create temporary tablespace TXSJ_TEMP tempfile 'E:\Oracle\oradata\TXSJ_TEMP.dbf' size 32m autoextend on next 32m maxsize 2048m extent m
使用Java8实现自己的个性化搜索引擎 yangshangchuan java superword 搜索引擎 java8 全文检索
需要对249本软件著作实现句子级别全文检索，这些著作均为PDF文件，不使用现有的框架如lucene，自己实现的方法如下： 1、从PDF文件中提取文本，这里的重点是如何最大可能地还原文本。提取之后的文本，一个句子一行保存为文本文件。 2、将所有文本文件合并为一个单一的文本文件，这样，每一个句子就有一个唯一行号。 3、对每一行文本进行分词，建立倒排表，倒排表的格式为：词=包含该词的总行数N=行号