Tensorflow训练神经网络

Tensorflow实战google深度学习框架中章节

import os
os.getcwd()  #查看当前路径
\'E:\\深度学习\\mnist\'
os.chdir(\'E:\深度学习\mnist\')   #把当前路径设置为  E:\深度学习\mnist

1. 导入相应的模块

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

2. 设置输入、输出节点数目以及神经网络相应的参数

INPUT_NODE = 784     #输入节点数目
OUTPUT_NODE = 10     #输出节点数目

LAYER1_NODE = 500    # 隐藏层节点数                                     
BATCH_SIZE = 100     # 每次batch打包的样本个数        

##模型相关的参数
LEARNING_RATE_BASE = 0.8    #基础的学习率  
LEARNING_RATE_DECAY = 0.99  #学习率的衰减率
REGULARAZTION_RATE = 0.0001  #正则项前的表示系数 
TRAINING_STEPS = 5000        #训练轮数
MOVING_AVERAGE_DECAY = 0.99  #滑动平均衰减率

3. 定义辅助函数来计算前向传播结果,使用ReLU做为激活函数

def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # 不使用滑动平均类
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2

    else:
        # 使用滑动平均类
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)

4. 定义训练过程

def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name=\'x-input\')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name=\'y-input\')
    # 生成隐藏层的参数。
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层的参数。
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 计算在当前参数下不含滑动平均值的前向传播结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义训练轮数及相关的滑动平均类,对所有代表神经网络参数的变量上使用滑动平均
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    #tf.trainable_variables返回的就是图上集合

    variables_averages_op = variable_averages.apply(tf.trainable_variables())    
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵及其平均值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
  
   #计算在当前batch中所有样例的交叉熵平均值
    cross_entropy_mean = tf.reduce_mean(cross_entropy)       
    # 损失函数的计算
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)   #计算L2正则化损失函数
    regularaztion = regularizer(weights1) + regularizer(weights2)    #计算模型的正则化损失
    loss = cross_entropy_mean + regularaztion     #总的损失是交叉熵损失和正则化损失的和
    
    # 设置指数衰减的学习率。
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, #基础的学习率,随着迭代的进行,更新变量时使用的学习率在这个基础上递减
        global_step,        #当前迭代的轮数
        mnist.train.num_examples / BATCH_SIZE,     #过完所有的训练数据需要的迭代次数
        LEARNING_RATE_DECAY,    #学习率衰减速度
        staircase=True)
    
    # 优化损失函数(包括交叉熵损失函数和L2正则化损失函数)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 反向传播更新参数和更新每一个参数的滑动平均值
    # tf支持进行一次完成多个操作,既需要进行train_step又需要variables_averages_op
    # 例如创建一个group,把train_step和variables_averages_op两个操作放在一起进行,等同于以下操作:
    # with tf.control_dependencies([train_step, variables_averages_op]):
    #     train_op = tf.no_op(name=\'train\')
    train_op = tf.group(train_step, variables_averages_op)    

    # 计算正确率
    # average_y.shape = [None, OUTPUT_NODE],tf.argmax(average_y, 1)表示返回average_y中最大值的序号
    # Signature: tf.argmax(input, axis=None, name=None, dimension=None, output_type=tf.int64)
    # Returns the index with the largest value across axes of a tensor. (deprecated arguments)

    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化会话,并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
        
        # 迭代地训练神经网络。
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:        #每1000轮输出一次在实验集上的测试结果
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
           
     #产生这一轮使用的一个batch的训练数据,并进行训练过程   
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x:xs,y_:ys})
    
    #训练结束后在测试集上检测模型的准确率    
        test_acc=sess.run(accuracy,feed_dict=test_feed)
        print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))

5。主程序入口

## 我这里设定模型训练次数时是5000次
def main(argv=None):
    mnist = input_data.read_data_sets("../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__==\'__main__\':
    main()
Extracting ../../datasets/MNIST_data\train-images-idx3-ubyte.gz
Extracting ../../datasets/MNIST_data\train-labels-idx1-ubyte.gz
Extracting ../../datasets/MNIST_data\t10k-images-idx3-ubyte.gz
Extracting ../../datasets/MNIST_data\t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy using average model is 0.0614 
After 1000 training step(s), validation accuracy using average model is 0.9778 
After 2000 training step(s), validation accuracy using average model is 0.9832 
After 3000 training step(s), validation accuracy using average model is 0.9836 
After 4000 training step(s), validation accuracy using average model is 0.9826 
After 5000 training step(s), test accuracy using average model is 0.9835