CS231n Assignment2 tensorflow.ipynb

1. 前言

经过前期自己写全连接层(FC),归一化层(Batch Normalization),Relu层,Pool层,softmax以及svm的loss层,(还没有写dropout层,以及conv层),对Deeplearning有了更清晰的了解。

终于来到深度学习架构的学习,考虑到以后工作可能不是搞学术研究(偏学术的推荐pytorch,大佬推荐),选择了跨平台性好的tensorflow。

主要是写tensorflow.ipynb作业的学习记录,包括很多查阅的资料。

关于虚拟的python环境和tensorflow安装

创建虚拟python环境:

virtualenv -p python3 .env
source .env/bin/activate

所创建的虚拟环境只用了裸的python和编译器(即使系统里装了tensorflow也不会认识),所以可以在虚拟环境随便发挥,装任何需要的库。
安装tensorflow请参考安装tensorflow

2. 正文

import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
%matplotlib inline

from cs231n.data_utils import load_CIFAR10

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)


# clear old variables
tf.reset_default_graph()

# setup input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)

def simple_model(X,y):
    # define our weights (e.g. init_two_layer_convnet)

    # setup variables
    Wconv1 = tf.get_variable("Wconv1", shape=[7, 7, 3, 32])
    bconv1 = tf.get_variable("bconv1", shape=[32])
    W1 = tf.get_variable("W1", shape=[5408, 10])
    b1 = tf.get_variable("b1", shape=[10])

    # define our graph (e.g. two_layer_convnet)
    a1 = tf.nn.conv2d(X, Wconv1, strides=[1,2,2,1], padding='VALID') + bconv1
    h1 = tf.nn.relu(a1)
    h1_flat = tf.reshape(h1,[-1,5408])# 13 * 13 * 32 = 5408
    y_out = tf.matmul(h1_flat,W1) + b1
    return y_out

y_out = simple_model(X,y)

# define our loss
total_loss = tf.losses.hinge_loss(tf.one_hot(y,10),logits=y_out)
mean_loss = tf.reduce_mean(total_loss)

# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)

Comments

  • tf.nn.conv2d()
    X,Wconv1,strides are Tensor,for example,Wconv1’s shape is [filter_height, filter_width, in_channels, out_channels]
    strides[0],strides[3]=1
    padding, two choices, [‘VALID’ ‘SAME’]
    calculate output_width:(height is the same way)
    ‘VALID’: output_width = np.ceil((input_width - kernel + 1)/strides[1])
    ‘SAME’: output_width = np.ceil(input_width/strides[1])

  • tf.one_hot(y,10)
    transform (N,) into (N,10).Rules: (i, y[i]) = 1, others = 0

  • tf.losses.hinge_loss()
    just svm loss??

Reference

  • tf.nn.conv2d
  • conv2d
  • padding
  • tf_losses_hinge_loss

def run_model(session, predict, loss_val, Xd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None

    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training

    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]

            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[idx].shape[0]

            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)

            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)

            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

 with tf.Session() as sess:
     with tf.device("/cpu:0"): #"/cpu:0" or "/gpu:0" 
         sess.run(tf.global_variables_initializer())
         print('Training')
         run_model(sess,y_out,mean_loss,X_train,y_train,1,64,100,train_step,True)
         print('Validation')
         run_model(sess,y_out,mean_loss,X_val,y_val,1,64)

Comments

  • tf.cast
    change data type among tf.float32, tf.float64, tf.int32
    tf.cast

  • tf.reduce_mean
    calculate mean value for the tensor or some axis of the tensor

  • np.arange(N)
    make np.array([0,1,2…N-1])

  • np.random.shuffle()
    change the order of train data
    np.random.shuffle

  • “loss = {1:.3g}”.format()
    loss = {1:.3g} represents gets No.1 value in (), after “.” leave 3 numbers.


训练一个指定的模型

In this section, we’re going to specify a model for you to construct. The goal here isn’t to get good performance (that’ll be next), but instead to get comfortable with understanding the TensorFlow documentation and configuring your own model.

Using the code provided above as guidance, and using the following TensorFlow documentation, specify a model with the following architecture:

  • 7x7 Convolutional Layer with 32 filters and stride of 1
  • ReLU Activation Layer
  • Spatial Batch Normalization Layer (trainable parameters, with scale and centering)
  • 2x2 Max Pooling layer with a stride of 2
  • Affine layer with 1024 output units
  • ReLU Activation Layer
  • Affine layer from 1024 input units to 10 outputs
# clear old variables
tf.reset_default_graph()

# define our input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None]) 
is_training = tf.placeholder(tf.bool, name='is_training')

# define model
def complex_model(X,y,is_training):
    Wconv1 = tf.get_variable("Wconv1", shape=[7, 7, 3, 32])
    bconv1 = tf.get_variable("bconv1", shape=[32])

    W1 = tf.get_variable("W1", shape=[5408, 1024])
    b1 = tf.get_variable("b1", shape=[1024])

    W2 = tf.get_variable("W2", shape=[1024, 10])
    b2 = tf.get_variable("b2", shape=[10])


    a1 = tf.nn.conv2d(X, Wconv1, strides=[1,1,1,1], padding="VALID") + bconv1
    h1 = tf.nn.relu(a1)

    h1_batch_norm = tf.cond(is_training, 
    lambda: tf.contrib.layers.batch_norm(h1, scale=True, updates_collections=None, is_training=True, reuse=None, scope='bn'),
    lambda: tf.contrib.layers.batch_norm(h1, scale=True, updates_collections=None, is_training=False, reuse=True, scope='bn'))

    h1_pool = tf.nn.max_pool(h1_batch_norm, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')

    h1_flat = tf.reshape(h1_pool, [-1, 5408])

    a2 = tf.matmul(h1_flat, W1) + b1
    h2 = tf.nn.relu(a2)

    y_out = tf.matmul(h2, W2) + b2

    return y_out


y_out = complex_model(X,y,is_training)

total_loss = tf.losses.softmax_cross_entropy(tf.one_hot(y, 10), logits = y_out)
mean_loss = tf.reduce_mean(total_loss)
optimizer = tf.train.RMSPropOptimizer(1e-3)

# batch normalization in tensorflow requires this extra dependency
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    train_step = optimizer.minimize(mean_loss)

sess = tf.Session()

sess.run(tf.global_variables_initializer())
print('Training')
run_model(sess,y_out,mean_loss,X_train,y_train,1,64,100,train_step)

print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val,1,64)

Comments

  • normalized=(γ(xμ)σ+β)
  • tf.nn.moments
  • tf.nn.batch_normalization
  • tf.cond
  • batch_normalization_stackoverflow
  • tf.contrib.layers.batch_norm [API’s source code]
  • name_scope with reuse
  • tf.nn.max_pool

训练自己的模型:

  • Filter size: Above we used 7x7; this makes pretty pictures but smaller filters may be more efficient
  • Number of filters: Above we used 32 filters. Do more or fewer do better?
  • Pooling vs Strided Convolution: Do you use max pooling or just stride convolutions?
  • Batch normalization: Try adding spatial batch normalization after convolution layers and vanilla batch normalization after affine layers. Do your networks train faster?
  • Network architecture: The network above has two layers of trainable parameters. Can you do better with a deep network? Good architectures to try include:
    • [conv-relu-pool]xN -> [affine]xM -> [softmax or SVM]
    • [conv-relu-conv-relu-pool]xN -> [affine]xM -> [softmax or SVM]
    • [batchnorm-relu-conv]xN -> [affine]xM -> [softmax or SVM]
  • Use TensorFlow Scope: Use TensorFlow scope and/or tf.layers to make it easier to write deeper networks. See this tutorial for how to use tf.layers.
  • Use Learning Rate Decay: As the notes point out, decaying the learning rate might help the model converge. Feel free to decay every epoch, when loss doesn’t change over an entire epoch, or any other heuristic you find appropriate. See the Tensorflow documentation for learning rate decay.
  • Global Average Pooling: Instead of flattening and then having multiple affine layers, perform convolutions until your image gets small (7x7 or so) and then perform an average pooling operation to get to a 1x1 image picture (1, 1 , Filter#), which is then reshaped into a (Filter#) vector. This is used in Google’s Inception Network (See Table 1 for their architecture).
  • Regularization: Add l2 weight regularization, or perhaps use Dropout as in the TensorFlow MNIST tutorial

我选择搭建的网络架构是[conv-relu-pool]*N + [fc-relu-dropout]*M + relu + softmax

def my_model(X,y,is_training):
    conv1 = tf.layers.conv2d(inputs=X, filters=64, kernel_size=5, strides=(1, 1), padding='VALID', activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=2, strides=2, padding='VALID')

    conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=3, strides=(1, 1), padding='VALID', activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=2, strides=2, padding='VALID')

    #conv3 = tf.layers.conv2d(inputs=pool2, filters=64, kernel_size=3, strides=(1, 1), padding='VALID', activation=tf.nn.relu)
    #pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=2, strides=2, padding='VALID')

    pool2_flat = tf.reshape(pool2, [-1, 2304])
    dense1 = tf.layers.dense(inputs=pool2_flat, units=100, activation=None)
    dropout1 = tf.layers.dropout(inputs=dense1, rate=0.6, training=is_training)
    #dense2 = tf.layers.dense(inputs=dense1, units=50, activation=None)
    y_out = tf.layers.dense(inputs=dropout1, units=10)

    return y_out

tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)

y_out = my_model(X,y,is_training)

total_loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_out)
mean_loss = tf.reduce_mean(total_loss)

global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 1e-3
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step=global_step, decay_steps=1000, decay_rate=0.9, staircase=True)

optimizer = tf.train.RMSPropOptimizer(learning_rate)
train_step = optimizer.minimize(mean_loss, global_step=global_step)

sess = tf.Session()

sess.run(tf.global_variables_initializer())
print('Training')
run_model(sess,y_out,mean_loss,X_train,y_train,15,64,100,train_step,True)
print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val,1,64)

Comments

  • https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/layers/convolutional.py

  • http://docs.w3cub.com/tensorflow~python/tf/layers/conv2d/

  • https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/layers/pooling.py

  • http://docs.w3cub.com/tensorflow~python/tf/layers/max_pooling2d/

  • tf.layers.dense:

dense(inputs, units, activation=None, use_bias=True, kernel_initializer=None,
    bias_initializer=init_ops.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None,
    activity_regularizer=None, kernel_constraint=None, bias_constraint=None,
    trainable=True, name=None, reuse=None)
  • tf.layers.dropout:
dropout(inputs, rate=0.5, noise_shape=None, seed=None, training=False, name=None)

Before tf.losses.softmax_cross_entropy, no necessary add tf.nn.softmax.

  • http://docs.w3cub.com/tensorflow~python/tf/losses/softmax_cross_entropy/
  • http://docs.w3cub.com/tensorflow~python/tf/nn/softmax_cross_entropy_with_logits/

3. 小结

查阅了很多资料,熟悉了tensorflow部分api接口。

tf.nn是比较基础的接口
tf.layers是比较高级的封装

感受到了架构带来的强大便利,比如最让我头大的反向梯度求导可以在深度学习的架构里自动运算,果然是站在巨人的肩膀上。

还需要进一步掌握网络如何设计、优化策略如何选取、参数如何调试以及了解卷积网络到底学习了什么样的特征。

你可能感兴趣的:(机器视觉)