推荐阅读:
文章中参考的内容都用超链接标注出。
自编码器。AutoEncoder 是多层神经网络,其中输入层和输出层表示相同的含义,具有相同的节点数。AutoEncode学习的是一个输入输出相同的“恒等函数”。不过输入和输出相同,使得这个网络的输出没有任何意义。AutoEncoder的意义在于学习的(通常是节点数更少的)中间coder层(最中间的那一层),这一层是输入向量的良好表示。这个过程起到了“降维”的作用。当AutoEncoder只有一个隐含层的时候,其原理相当于主成分分析(PCA),当AutoEncoder有多个隐含层的时候,每两层之间可以用RBM来pre-training,最后由BP来调整最终权值。网络权重更新公式很容易用求偏导数的方法推导出来,算法是梯度下降法。(RBM:层内无连接,层间全连接,二分图)。
–参考&建议阅读:Autoencoder 详解
–参考的代码:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("E:/workspace_py/mnist/", one_hot=True) # use your path.
n_epochs = 30
batch_size = 128
data_ph = tf.placeholder('float', [None, 28*28], name = 'data_ph')
output_ph = tf.placeholder('float', [None, 28*28], name = 'output_ph')
learning_rate = tf.placeholder('float', [], name = 'learning_rate_ph') # keeping lr as a placeholder allows variable lr.
weights = { # set of weights.
'hl1': tf.Variable(tf.random_normal([28*28, 250])),
# 'hl2': tf.Variable(tf.random_normal([500, 200])),
'hl2': tf.Variable(tf.random_normal([250, 50])), # middle layer
'hl3': tf.Variable(tf.random_normal([50, 250])),
# 'hl5': tf.Variable(tf.random_normal([200, 500])),
'ol': tf.Variable(tf.random_normal([250, 28*28]))
}
biases = { # set of biases.
'hl1': tf.Variable(tf.random_normal([250])),
# 'hl2': tf.Variable(tf.random_normal([200])),
'hl2': tf.Variable(tf.random_normal([50])),
# 'hl4': tf.Variable(tf.random_normal([200])),
'hl3': tf.Variable(tf.random_normal([250])),
'ol': tf.Variable(tf.random_normal([28*28]))
}
hl1 = tf.nn.sigmoid(tf.add(tf.matmul(data_ph, weights['hl1']), biases['hl1']), name = 'hl1')
hl2 = tf.nn.sigmoid(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2']), name = 'hl2')
hl3 = tf.nn.sigmoid(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3']), name = 'hl3')
# hl4 = tf.nn.relu(tf.add(tf.matmul(hl3, weights['hl4']), biases['hl4']), name = 'hl4')
# hl5 = tf.nn.relu(tf.add(tf.matmul(hl4, weights['hl5']), biases['hl5']), name = 'hl5')
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl1, weights['ol']), biases['ol']), name = 'ol')
loss = tf.reduce_mean((ol - output_ph)**2, name = 'loss')
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
##########
# saver = tf.train.import_meta_graph("E:/workspace_py/saved_models/autoencoder/autoencoder-1.ckpt.meta") # use your path.
# saver.restore(sess, tf.train.latest_checkpoint('E:/workspace_py/saved_models/autoencoder/autoencoder-mnist')) # use your path.
########## UNCOMMENT THESE LINES TO CONTINUE FROM THE SAVED MODEL. CURRENTLY, THE SAVED MODEL HAS DONE 30 EPOCHS.
# err = 999999 # infinity
for epoch in range(n_epochs):
for iteration in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_, err = sess.run([train, loss], feed_dict={data_ph: epoch_x, output_ph: epoch_x, learning_rate: 0.01})
print("Loss @ epoch ", str(epoch), " = ", err)
save_path = saver.save(sess, "E:/workspace_py/saved_models/autoencoder/autoencoder-mnist/autoencoder-1.ckpt") # use your path.
prediction = sess.run(ol, feed_dict={data_ph: [mnist.train.images[0]]})
print("prediction: ", prediction)
import matplotlib.pyplot as plt
plt.subplot(1, 2, 1)
plt.imshow(np.reshape(mnist.train.images[0], [28, 28]))
plt.subplot(1,2, 2)
plt.imshow(np.reshape(prediction, [28, 28]))
plt.show()
sess.close()
其他相关:
深度信念网络,由多个受限玻尔兹曼机或变分自动编码堆砌而成。
DBN 由多层神经元构成,这些神经元又分为显性神经元和隐性神经元(显元和隐元)。显元用于接受输入,隐元用于提取特征。因此隐元也有个别名,叫特征检测器 (feature detectors)。最顶上的两层间的连接是无向的,组成联合内存 (associative memory)。较低的其他层之间有连接上下的有向连接。最底层代表了数据向量 (data vectors),每一个神经元代表数据向量的一维。
–参考&建议阅读:深度信念网络(DBN)
可以自行寻找代码
卷积神经网络。原理:本质就是临近区域(像素)对于判断、决策时意义要远大于较远的区域(像素),所以在网络计算中对于特定节点的更新也就只需要根据附近的像素点数据进行决断,减少噪声的干扰,用卷积这个工具可以进行快速的处理。在普通的神经网络层上会添加对应的卷积运算,就可以实现根据邻域数据来更新网络权值,这样也就将普通的网络层改造成为卷积层,对应的也就得到了卷积神经网络。
参考阅读:
卷积神经网络/CNNs基础知识及使用tensorflow搭建
Tensorflow基于MNIST数据集识别自己的手写数字
参考代码↓
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
def weight_variable(shape):
initial = tf.truncated_normal(shape,stddev=0.1) #截断正态分布,此函数原型为尺寸、均值、标准差
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1,shape=shape)
return tf.Variable(initial)
def conv2d(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME') # strides第0位和第3为一定为1,剩下的是卷积的横向和纵向步长
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize = [1,2,2,1],strides=[1,2,2,1],padding='SAME')# 参数同上,ksize是池化块的大小
x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])
# 图像转化为一个四维张量,第一个参数代表样本数量,-1表示不定,第二三参数代表图像尺寸,最后一个参数代表图像通道数
x_image = tf.reshape(x,[-1,28,28,1])
# 第一层卷积加池化
w_conv1 = weight_variable([5,5,1,32]) # 第一二参数值得卷积核尺寸大小,即patch,第三个参数是图像通道数,第四个参数是卷积核的数目,代表会出现多少个卷积特征
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image,w_conv1)+b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# 第二层卷积加池化
w_conv2 = weight_variable([5,5,32,64]) # 多通道卷积,卷积出64个特征
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1,w_conv2)+b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# 原图像尺寸28*28,第一轮图像缩小为14*14,共有32张,第二轮后图像缩小为7*7,共有64张
w_fc1 = weight_variable([7*7*64,1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*64]) # 展开,第一个参数为样本数量,-1未知
f_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,w_fc1)+b_fc1)
# dropout操作,减少过拟合
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(f_fc1,keep_prob)
w_fc2 = weight_variable([1024,10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop,w_fc2)+b_fc2)
cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv)) # 定义交叉熵为loss函数
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) # 调用优化器优化
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
for i in range(2000):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
print "step %d, training accuracy %g"%(i, train_accuracy)
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print "test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images[0:500], y_: mnist.test.labels[0:500], keep_prob: 1.0})
其他相关:
循环神经网络,
推荐阅读:
循环神经网络打开手册
tensorflow rnn代码
TensorFlow练手项目一:使用循环神经网络(RNN)实现影评情感分类
对应的代码↓
import tensorflow as tf
import numpy as np
import random
num_of_input_nodes = 1
num_of_hidden_nodes = 80
num_of_output_nodes = 1
length_of_sequences = 10
num_of_training_epochs = 5000
size_of_mini_batch = 100
num_of_prediction_epochs = 100
learning_rate = 0.01
forget_bias = 0.8
num_of_sample = 1000
def get_batch(batch_size, X, t):
rnum = [random.randint(0, len(X) - 1) for x in range(batch_size)]
xs = np.array([[[y] for y in list(X[r])] for r in rnum])
ts = np.array([[t[r]] for r in rnum])
return xs, ts
def create_data(nb_of_samples, sequence_len):
X = np.zeros((nb_of_samples, sequence_len))
for row_idx in range(nb_of_samples):
X[row_idx, :] = np.around(np.random.rand(sequence_len)).astype(int)
# Create the targets for each sequence
t = np.sum(X, axis=1)
return X, t
def make_prediction(nb_of_samples):
sequence_len = 10
xs, ts = create_data(nb_of_samples, sequence_len)
return np.array([[[y] for y in x] for x in xs]), np.array([[x] for x in ts])
def inference(input_ph, istate_ph):
with tf.name_scope("inference") as scope:
weight1_var = tf.Variable(tf.truncated_normal(
[num_of_input_nodes, num_of_hidden_nodes], stddev=0.1), name="weight1")
weight2_var = tf.Variable(tf.truncated_normal(
[num_of_hidden_nodes, num_of_output_nodes], stddev=0.1), name="weight2")
bias1_var = tf.Variable(tf.truncated_normal([num_of_hidden_nodes], stddev=0.1), name="bias1")
bias2_var = tf.Variable(tf.truncated_normal([num_of_output_nodes], stddev=0.1), name="bias2")
in1 = tf.transpose(input_ph, [1, 0, 2])
in2 = tf.reshape(in1, [-1, num_of_input_nodes])
in3 = tf.matmul(in2, weight1_var) + bias1_var
in4 = tf.split(in3, length_of_sequences, 0)
cell = tf.nn.rnn_cell.BasicLSTMCell(num_of_hidden_nodes, forget_bias=forget_bias, state_is_tuple=False)
rnn_output, states_op = tf.contrib.rnn.static_rnn(cell, in4, initial_state=istate_ph)
output_op = tf.matmul(rnn_output[-1], weight2_var) + bias2_var
# Add summary ops to collect data
w1_hist = tf.summary.histogram("weights1", weight1_var)
w2_hist = tf.summary.histogram("weights2", weight2_var)
b1_hist = tf.summary.histogram("biases1", bias1_var)
b2_hist = tf.summary.histogram("biases2", bias2_var)
output_hist = tf.summary.histogram("output", output_op)
results = [weight1_var, weight2_var, bias1_var, bias2_var]
return output_op, states_op, results
def loss(output_op, supervisor_ph):
with tf.name_scope("loss") as scope:
square_error = tf.reduce_mean(tf.square(output_op - supervisor_ph))
loss_op = square_error
tf.summary.scalar("loss", loss_op)
return loss_op
def training(loss_op):
with tf.name_scope("training") as scope:
training_op = optimizer.minimize(loss_op)
return training_op
def calc_accuracy(output_op, prints=False):
inputs, ts = make_prediction(num_of_prediction_epochs)
pred_dict = {
input_ph: inputs,
supervisor_ph: ts,
istate_ph: np.zeros((num_of_prediction_epochs, num_of_hidden_nodes * 2)),
}
output = sess.run([output_op], feed_dict=pred_dict)
def print_result(i, p, q):
[print(list(x)[0]) for x in i]
print("output: %f, correct: %d" % (p, q))
if prints:
[print_result(i, p, q) for i, p, q in zip(inputs, output[0], ts)]
opt = abs(output - ts)[0]
total = sum([1 if x[0] < 0.05 else 0 for x in opt])
print("accuracy %f" % (total / float(len(ts))))
return output
random.seed(0)
np.random.seed(0)
tf.set_random_seed(0)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
X, t = create_data(num_of_sample, length_of_sequences)
with tf.Graph().as_default():
input_ph = tf.placeholder(tf.float32, [None, length_of_sequences, num_of_input_nodes], name="input")
supervisor_ph = tf.placeholder(tf.float32, [None, num_of_output_nodes], name="supervisor")
istate_ph = tf.placeholder(tf.float32, [None, num_of_hidden_nodes * 2], name="istate")
output_op, states_op, datas_op = inference(input_ph, istate_ph)
loss_op = loss(output_op, supervisor_ph)
training_op = training(loss_op)
summary_op = tf.summary.merge_all()
init = tf.initialize_all_variables()
with tf.Session() as sess:
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter("/tmp/tensorflow_log", graph=sess.graph)
sess.run(init)
for epoch in range(num_of_training_epochs):
inputs, supervisors = get_batch(size_of_mini_batch, X, t)
train_dict = {
input_ph: inputs,
supervisor_ph: supervisors,
istate_ph: np.zeros((size_of_mini_batch, num_of_hidden_nodes * 2)),
}
sess.run(training_op, feed_dict=train_dict)
if (epoch) % 100 == 0:
summary_str, train_loss = sess.run([summary_op, loss_op], feed_dict=train_dict)
print("train#%d, train loss: %e" % (epoch, train_loss))
summary_writer.add_summary(summary_str, epoch)
if (epoch) % 500 == 0:
calc_accuracy(output_op)
calc_accuracy(output_op, prints=True)
datas = sess.run(datas_op)
saver.save(sess, "model.ckpt")
长短期记忆网络, RNN的变种,主要用于解决梯度消失/爆炸的问题。
参考阅读:tensorflow笔记:多层LSTM代码分析
对应的代码
# -*-coding:utf-8-*-
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.reset_default_graph()
# Hyper Parameters
learning_rate = 0.01
n_steps = 28
n_inputs = 28
n_hiddens = 64
n_layers = 3
n_classes = 10
# data
mnist = input_data.read_data_sets(r"C:\Users\ilike\PycharmProjects\untitled1\MNIST_data", one_hot=True)
test_x = mnist.test.images
test_y = mnist.test.labels
# tensor placeholder
with tf.name_scope('inputs'):
x = tf.placeholder(tf.float32, [None, n_steps * n_inputs], name='x_input')
y = tf.placeholder(tf.float32, [None, n_classes], name='y_input')
keep_prob = tf.placeholder(tf.float32, name='keep_prob_input')
batch_size = tf.placeholder(tf.int32, [], name='batch_size_input')
# weights and biases
with tf.name_scope('weights'):
Weights = tf.Variable(tf.truncated_normal([n_hiddens, n_classes], stddev=0.1), dtype=tf.float32, name='W')
tf.summary.histogram('output_layer_weights', Weights)
with tf.name_scope('biases'):
biases = tf.Variable(tf.random_normal([n_classes]), name='b')
tf.summary.histogram('output_layer_biases', biases)
# RNN structure
def RNN_LSTM(x, Weights, biases):
x = tf.reshape(x, [-1, n_steps, n_inputs])
def attn_cell():
lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hiddens)
with tf.name_scope('lstm_dropout'):
return tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
enc_cells = []
for i in range(0, n_layers):
enc_cells.append(attn_cell())
with tf.name_scope('lstm_cells_layers'):
mlstm_cell = tf.contrib.rnn.MultiRNNCell(enc_cells, state_is_tuple=True)
_init_state = mlstm_cell.zero_state(batch_size, dtype=tf.float32)
outputs, states = tf.nn.dynamic_rnn(mlstm_cell, x, initial_state=_init_state, dtype=tf.float32, time_major=False)
# return tf.matmul(outputs[:,-1,:], Weights) + biases
return tf.nn.softmax(tf.matmul(outputs[:, -1, :], Weights) + biases)
with tf.name_scope('output_layer'):
pred = RNN_LSTM(x, Weights, biases)
tf.summary.histogram('outputs', pred)
# cost
with tf.name_scope('loss'):
# cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=[1]))
tf.summary.scalar('loss', cost)
# optimizer
with tf.name_scope('train'):
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# accuarcy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.name_scope('accuracy'):
accuracy = tf.metrics.accuracy(labels=tf.argmax(y, axis=1), predictions=tf.argmax(pred, axis=1))[1]
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init)
train_writer = tf.summary.FileWriter(r"\MNIST_data\train", sess.graph)
test_writer = tf.summary.FileWriter(r"C:\Users\ilike\PycharmProjects\untitled1\MNIST_data\test", sess.graph)
# training
step = 1
for i in range(2000):
_batch_size = 128
batch_x, batch_y = mnist.train.next_batch(_batch_size)
sess.run(train_op, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5, batch_size: _batch_size})
if (i + 1) % 100 == 0:
# loss = sess.run(cost, feed_dict={x:batch_x, y:batch_y, keep_prob:1.0, batch_size:_batch_size})
# acc = sess.run(accuracy, feed_dict={x:batch_x, y:batch_y, keep_prob:1.0, batch_size:_batch_size})
# print('Iter: %d' % ((i+1) * _batch_size), '| train loss: %.6f' % loss, '| train accuracy: %.6f' % acc)
train_result = sess.run(merged, feed_dict={x: batch_x, y: batch_y, keep_prob: 1.0, batch_size: _batch_size})
test_result = sess.run(merged,
feed_dict={x: test_x, y: test_y, keep_prob: 1.0, batch_size: test_x.shape[0]})
train_writer.add_summary(train_result, i + 1)
test_writer.add_summary(test_result, i + 1)
print("Optimization Finished!")
# prediction
print("Testing Accuracy:",
sess.run(accuracy, feed_dict={x: test_x, y: test_y, keep_prob: 1.0, batch_size: test_x.shape[0]}))
其他相关:
支持向量机,入门机器学习的内容。
对应的代码↓
import numpy as np
from sklearn import svm
from sklearn.multiclass import OneVsRestClassifier
import loadData
image_train, label_train = loadData.readfile()
image_test,label_test = loadData.read_test_file()
# 从训练集选1000个样本
train_image = loadData.get_image(image_train, 1000)
train_label = loadData.get_label(label_train, 1000)
# 从测试集选100个样本
test_image = loadData.get_image(image_test, 100)
test_label = loadData.get_label(label_test, 100)
#kernel、degree就是模型的参数
#kernel是核方法,常用的核方法有:‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’
#这个例子中改变degree的大小或者kernel的类型,准确率就会变
#svc = svm.SVC(kernel='linear')
#svc = svm.SVC(kernel='poly',degree=2)
svc = svm.SVC(kernel='poly',degree=2) #设置参数
svc.fit(train_image, train_label) #训练集
res = svc.predict(test_image) #预测
wrongNum = np.sum(res != test_label)#得出错误个数
num = len(test_image)#训练图片的总数
acc = 1-wrongNum/float(num)
print ("准确率:", acc)#得出正确率
© 2020 GitHub, Inc.
Terms
Privacy
Security
Status
Help