经过前期自己写全连接层(FC),归一化层(Batch Normalization),Relu层,Pool层,softmax以及svm的loss层,(还没有写dropout层,以及conv层),对Deeplearning有了更清晰的了解。
终于来到深度学习架构的学习,考虑到以后工作可能不是搞学术研究(偏学术的推荐pytorch,大佬推荐),选择了跨平台性好的tensorflow。
主要是写tensorflow.ipynb作业的学习记录,包括很多查阅的资料。
创建虚拟python环境:
virtualenv -p python3 .env
source .env/bin/activate
所创建的虚拟环境只用了裸的python和编译器(即使系统里装了tensorflow也不会认识),所以可以在虚拟环境随便发挥,装任何需要的库。
安装tensorflow请参考安装tensorflow
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
%matplotlib inline
from cs231n.data_utils import load_CIFAR10
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for the two-layer neural net classifier. These are the same steps as
we used for the SVM, but condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# Subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
# Normalize the data: subtract the mean image
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
return X_train, y_train, X_val, y_val, X_test, y_test
# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
# clear old variables
tf.reset_default_graph()
# setup input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
def simple_model(X,y):
# define our weights (e.g. init_two_layer_convnet)
# setup variables
Wconv1 = tf.get_variable("Wconv1", shape=[7, 7, 3, 32])
bconv1 = tf.get_variable("bconv1", shape=[32])
W1 = tf.get_variable("W1", shape=[5408, 10])
b1 = tf.get_variable("b1", shape=[10])
# define our graph (e.g. two_layer_convnet)
a1 = tf.nn.conv2d(X, Wconv1, strides=[1,2,2,1], padding='VALID') + bconv1
h1 = tf.nn.relu(a1)
h1_flat = tf.reshape(h1,[-1,5408])# 13 * 13 * 32 = 5408
y_out = tf.matmul(h1_flat,W1) + b1
return y_out
y_out = simple_model(X,y)
# define our loss
total_loss = tf.losses.hinge_loss(tf.one_hot(y,10),logits=y_out)
mean_loss = tf.reduce_mean(total_loss)
# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)
tf.nn.conv2d()
X,Wconv1,strides are Tensor,for example,Wconv1’s shape is [filter_height, filter_width, in_channels, out_channels]
strides[0],strides[3]=1
padding, two choices, [‘VALID’ ‘SAME’]
calculate output_width:(height is the same way)
‘VALID’: output_width = np.ceil((input_width - kernel + 1)/strides[1])
‘SAME’: output_width = np.ceil(input_width/strides[1])
tf.one_hot(y,10)
transform (N,) into (N,10).Rules: (i, y[i]) = 1, others = 0
tf.losses.hinge_loss()
just svm loss??
def run_model(session, predict, loss_val, Xd, yd,
epochs=1, batch_size=64, print_every=100,
training=None, plot_losses=False):
# have tensorflow compute accuracy
correct_prediction = tf.equal(tf.argmax(predict,1), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# shuffle indicies
train_indicies = np.arange(Xd.shape[0])
np.random.shuffle(train_indicies)
training_now = training is not None
# setting up variables we want to compute (and optimizing)
# if we have a training function, add that to things we compute
variables = [mean_loss,correct_prediction,accuracy]
if training_now:
variables[-1] = training
# counter
iter_cnt = 0
for e in range(epochs):
# keep track of losses and accuracy
correct = 0
losses = []
# make sure we iterate over the dataset once
for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
# generate indicies for the batch
start_idx = (i*batch_size)%Xd.shape[0]
idx = train_indicies[start_idx:start_idx+batch_size]
# create a feed dictionary for this batch
feed_dict = {X: Xd[idx,:],
y: yd[idx],
is_training: training_now }
# get batch size
actual_batch_size = yd[idx].shape[0]
# have tensorflow compute loss and correct predictions
# and (if given) perform a training step
loss, corr, _ = session.run(variables,feed_dict=feed_dict)
# aggregate performance stats
losses.append(loss*actual_batch_size)
correct += np.sum(corr)
# print every now and then
if training_now and (iter_cnt % print_every) == 0:
print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
.format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
iter_cnt += 1
total_correct = correct/Xd.shape[0]
total_loss = np.sum(losses)/Xd.shape[0]
print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
.format(total_loss,total_correct,e+1))
if plot_losses:
plt.plot(losses)
plt.grid(True)
plt.title('Epoch {} Loss'.format(e+1))
plt.xlabel('minibatch number')
plt.ylabel('minibatch loss')
plt.show()
return total_loss,total_correct
with tf.Session() as sess:
with tf.device("/cpu:0"): #"/cpu:0" or "/gpu:0"
sess.run(tf.global_variables_initializer())
print('Training')
run_model(sess,y_out,mean_loss,X_train,y_train,1,64,100,train_step,True)
print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val,1,64)
tf.cast
change data type among tf.float32, tf.float64, tf.int32
tf.cast
tf.reduce_mean
calculate mean value for the tensor or some axis of the tensor
np.arange(N)
make np.array([0,1,2…N-1])
np.random.shuffle()
change the order of train data
np.random.shuffle
“loss = {1:.3g}”.format()
loss = {1:.3g} represents gets No.1 value in (), after “.” leave 3 numbers.
In this section, we’re going to specify a model for you to construct. The goal here isn’t to get good performance (that’ll be next), but instead to get comfortable with understanding the TensorFlow documentation and configuring your own model.
Using the code provided above as guidance, and using the following TensorFlow documentation, specify a model with the following architecture:
# clear old variables
tf.reset_default_graph()
# define our input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool, name='is_training')
# define model
def complex_model(X,y,is_training):
Wconv1 = tf.get_variable("Wconv1", shape=[7, 7, 3, 32])
bconv1 = tf.get_variable("bconv1", shape=[32])
W1 = tf.get_variable("W1", shape=[5408, 1024])
b1 = tf.get_variable("b1", shape=[1024])
W2 = tf.get_variable("W2", shape=[1024, 10])
b2 = tf.get_variable("b2", shape=[10])
a1 = tf.nn.conv2d(X, Wconv1, strides=[1,1,1,1], padding="VALID") + bconv1
h1 = tf.nn.relu(a1)
h1_batch_norm = tf.cond(is_training,
lambda: tf.contrib.layers.batch_norm(h1, scale=True, updates_collections=None, is_training=True, reuse=None, scope='bn'),
lambda: tf.contrib.layers.batch_norm(h1, scale=True, updates_collections=None, is_training=False, reuse=True, scope='bn'))
h1_pool = tf.nn.max_pool(h1_batch_norm, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
h1_flat = tf.reshape(h1_pool, [-1, 5408])
a2 = tf.matmul(h1_flat, W1) + b1
h2 = tf.nn.relu(a2)
y_out = tf.matmul(h2, W2) + b2
return y_out
y_out = complex_model(X,y,is_training)
total_loss = tf.losses.softmax_cross_entropy(tf.one_hot(y, 10), logits = y_out)
mean_loss = tf.reduce_mean(total_loss)
optimizer = tf.train.RMSPropOptimizer(1e-3)
# batch normalization in tensorflow requires this extra dependency
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
train_step = optimizer.minimize(mean_loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print('Training')
run_model(sess,y_out,mean_loss,X_train,y_train,1,64,100,train_step)
print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val,1,64)
tf.layers
. 我选择搭建的网络架构是[conv-relu-pool]*N + [fc-relu-dropout]*M + relu + softmax
def my_model(X,y,is_training):
conv1 = tf.layers.conv2d(inputs=X, filters=64, kernel_size=5, strides=(1, 1), padding='VALID', activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=2, strides=2, padding='VALID')
conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=3, strides=(1, 1), padding='VALID', activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=2, strides=2, padding='VALID')
#conv3 = tf.layers.conv2d(inputs=pool2, filters=64, kernel_size=3, strides=(1, 1), padding='VALID', activation=tf.nn.relu)
#pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=2, strides=2, padding='VALID')
pool2_flat = tf.reshape(pool2, [-1, 2304])
dense1 = tf.layers.dense(inputs=pool2_flat, units=100, activation=None)
dropout1 = tf.layers.dropout(inputs=dense1, rate=0.6, training=is_training)
#dense2 = tf.layers.dense(inputs=dense1, units=50, activation=None)
y_out = tf.layers.dense(inputs=dropout1, units=10)
return y_out
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
y_out = my_model(X,y,is_training)
total_loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_out)
mean_loss = tf.reduce_mean(total_loss)
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 1e-3
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step=global_step, decay_steps=1000, decay_rate=0.9, staircase=True)
optimizer = tf.train.RMSPropOptimizer(learning_rate)
train_step = optimizer.minimize(mean_loss, global_step=global_step)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print('Training')
run_model(sess,y_out,mean_loss,X_train,y_train,15,64,100,train_step,True)
print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val,1,64)
https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/layers/convolutional.py
http://docs.w3cub.com/tensorflow~python/tf/layers/conv2d/
https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/layers/pooling.py
http://docs.w3cub.com/tensorflow~python/tf/layers/max_pooling2d/
tf.layers.dense:
dense(inputs, units, activation=None, use_bias=True, kernel_initializer=None,
bias_initializer=init_ops.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None,
activity_regularizer=None, kernel_constraint=None, bias_constraint=None,
trainable=True, name=None, reuse=None)
dropout(inputs, rate=0.5, noise_shape=None, seed=None, training=False, name=None)
Before tf.losses.softmax_cross_entropy, no necessary add tf.nn.softmax.
查阅了很多资料,熟悉了tensorflow部分api接口。
tf.nn是比较基础的接口
tf.layers是比较高级的封装
感受到了架构带来的强大便利,比如最让我头大的反向梯度求导可以在深度学习的架构里自动运算,果然是站在巨人的肩膀上。
还需要进一步掌握网络如何设计、优化策略如何选取、参数如何调试以及了解卷积网络到底学习了什么样的特征。