TensorFlow笔记

本文是基于Windows 10系统环境，进行TensorFlow的学习和使用

Windows 10
PyCharm

一、TensorFlow的简介

1.1 TensorFlow基本概念

1.1.1 张量

张量就是多维数组（列表），用“阶”表示张量的维度

0阶张量
0 阶张量称作标量，表示一个单独的数，例如 S=123
1阶张量
1 阶张量称作向量，表示一个一维数组，例如 V=[1,2,3]
2阶张量
2 阶张量称作矩阵，表示一个二维数组，它可以有 i 行 j 列个元素，每个元素可以用行号和列号共同索引到，例如 m=[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
n阶张量
判断张量是几阶的，就通过张量右边的方括号数，0 个是 0 阶，n 个是 n 阶，张量可以表示 0 阶到 n 阶数组（列表）；

1.2 常量

定义常量，并打印输出

import tensorflow as tf #引入模块
x = tf.constant([[1.0, 2.0]]) #定义一个 2 阶张量等于[[1.0,2.0]]
w = tf.constant([[3.0], [4.0]]) #定义一个 2 阶张量等于[[3.0],[4.0]]
y = tf.matmul(x, w) #实现 xw 矩阵乘法
print(y) #打印出结果
with tf.Session() as sess:
    print(sess.run(y)) #执行会话并打印出执行后的结果

1.3 常用的生成随机数/数组的函数

#生成正态分布随机数
tf.random_normal()     
#生成去掉过大偏离点的正态分布随机数 
tf.truncated_normal()    
#生成均匀分布随机数
tf.random_uniform()     
#表示生成全 0 数组
tf.zeros  
#表示生成全 1 数组 
tf.ones 
#表示生成全定值数组 
tf.fill 
#表示生成直接给定值的数组
tf.constant

1.4 变量初始化/打印

#引入模块
import tensorflow as tf 
#表示去掉偏离过大的正态分布
w=tf.Variable(tf.truncated_normal([2,3],stddev=2, mean=0, seed=1))
with tf.Session() as sess:
    #初始化
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print(sess.run(y)) #执行会话并打印出执行后的结果

1.5 placeholder占位/喂入数据

喂一组数据

引入模块
import tensorflow as tf

喂一组数据：
x = tf.placeholder(tf.float32, shape=(1, 2))
w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))
y=tf.matmul(x,w1)

with tf.Session() as sess:
```
#初始化
init_op = tf.global_variables_initializer()
sess.run(init_op)
sess.run(y, feed_dict={ x: [[0.5,0.6]]})
```
喂N组数据

引入模块
import tensorflow as tf

喂N组数据：
x = tf.placeholder(tf.float32, shape=(None, 2))
w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))
y=tf.matmul(x,w1)

with tf.Session() as sess:
```
#初始化
init_op = tf.global_variables_initializer()
sess.run(init_op)
sess.run(y, feed_dict={ x: [[0.1,0.2],[0.2,0.3],[0.3,0.4],[0.4,0.5]]})
```

1.6 损失函数

均方误差MSE

M S E ( y _ , y ) = ∑ i = 1 n ( y − y _ ) 2 n MSE(y_{\_},y)= \dfrac{\sum_{i=1}^n (y-y_{\_})^2}{n} MSE(y_,y)=n∑i=1n(y−y_)2

引入模块
import tensorflow as tf

loss = tf.reducemean(tf.square(y, y))
交叉熵ce

H ( y _ , y ) = ∑ y _ ∗ l o g ( y ) H(y_{\_},y)= \sum{y_{\_}*log(y)} H(y_,y)=∑y_∗log(y)

引入模块
import tensorflow as tf

ce = -tf.reducemean(y*tf.log(tf.clip_by_value(y, 1e-12, 1.0)))
softmax函数
当n分类的n个输出 ( y 1 , y 2 , . . . , y n ) (y_{1},y_{2},…,y_{n}) (y1,y2,…,yn)通过 s o f t m a x ( ) softmax() softmax()函数，便满足了概率分布要求：
∀ x , P ( X = x ) ∈ [ 0 , 1 ] ， ∑ x P ( X = x ) = 1 \forall{x}, P(X=x) \isin{[0,1]} ，\sum_{x}{P(X=x)=1} ∀x,P(X=x)∈[0,1]，x∑P(X=x)=1
s o f t m a x ( y i ) = e y i ∑ j = 1 n e y i softmax(y_{i})=\dfrac{e^{y_{i}}}{\sum_{j=1}^{n}{e^{y_{i}}}} softmax(yi)=∑j=1neyieyi

引入模块
import tensorflow as tf

ce = tf.nn.sparsesoftmax_cross_entropy_with_logits(logits=y,lables=tf.argmax(y, 1))
cem = tf.reduce_mean(ce)

1.7 学习率

训练模型每次参数更新的幅度
w n + 1 = w n − l e a r n i n g _ r a t e ∗ ∇ w_{n+1}=w_{n}-learning{\_}rate*\nabla wn+1=wn−learning_rate∗∇
学习率设置过大，模型振荡不收敛；
学习了设置过小，模型收敛速度慢；

指数衰减学习率

引入模块
import tensorflow as tf

global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate_base,global_step,learning_rate_step, learning_rate_decay,staircase=True)

1.8 滑动平均（影子值）

记录了每一个参数一段时间内过往值的平均值，增加了模型的泛化性，针对于所有参数：w和b

#引入模块
import tensorflow as tf 
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
ema_op= ema.apply(tf.trainable_variables())
with tf.control_dependencies([train_step, ema_op]):
    train_op = tf.no_op(name='train')

1.9 正则化

记录了每一个参数一段时间内过往值的平均值，增加了模型的泛化性，针对于所有参数：w和b

#引入模块
import tensorflow as tf 
tf.add_to_collection('losses', tf.contrib,layers.l2_regularizer(regularizer)(w))
loss_total = cem + tf.add_n(tf.get_collection('losses'))

2.0 反向传播训练方法

以减小 loss 值为优化目标，有梯度下降、momentum 优化器、adam 优化器等优化方法。

梯度下降法

引入模块
import tensorflow as tf

train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
momentum 优化器

引入模块
import tensorflow as tf

train_step=tf.train.MomentumOptimizer(learning_rate, momentum).minimize(loss)
adam 优化器

引入模块
import tensorflow as tf

train_step=tf.train.AdamOptimizer(learning_rate).minimize(loss)

二、TensorFlow的mnist手写识别实例

mnist_forward.py

import tensorflow as tf

INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500

def get_weight(shape, regularizer):

w = tf.Variable(tf.truncated_normal(shape,stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w

def get_bias(shape):  
    b = tf.Variable(tf.zeros(shape))  
    return b
def forward(x, regularizer):
    w1 = get_weight([INPUT_NODE, LAYER1_NODE], regularizer)
    b1 = get_bias([LAYER1_NODE])
    y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
    w2 = get_weight([LAYER1_NODE, OUTPUT_NODE], regularizer)
    b2 = get_bias([OUTPUT_NODE])
    y = tf.matmul(y1, w2) + b2
    return y

mnist_backward.py

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_forward
import os

BATCH_SIZE = 200
LEARNING_RATE_BASE = 0.1
LEARNING_RATE_DECAY = 0.99
REGULARIZER = 0.0001
STEPS = 50000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH=”./model/“
MODEL_NAME=”mnist_model”

def backward(mnist):
    x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE])
    y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE])
    y = mnist_forward.forward(x, REGULARIZER)
    global_step = tf.Variable(0, trainable=False)    
    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cem = tf.reduce_mean(ce)
    loss = cem + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE, 
        LEARNING_RATE_DECAY,
        staircase=True)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    ema_op = ema.apply(tf.trainable_variables())
    with tf.control_dependencies([train_step, ema_op]):
        train_op = tf.no_op(name='train')
    saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for i in range(STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={ x: xs, y_: ys})
            if i % 1000 == 0:
                print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
def main():
    mnist = input_data.read_data_sets("./data/", one_hot=True)
    backward(mnist)
if __name__ == '__main__':
    main()

mnist_test.py

coding:utf-8

import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_forward
import mnist_backward
TEST_INTERVAL_SECS = 5

def test(mnist):

with tf.Graph().as_default() as g:
    x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE])
    y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE])
    y = mnist_forward.forward(x, None)
    ema = tf.train.ExponentialMovingAverage(mnist_backward.MOVING_AVERAGE_DECAY)
    ema_restore = ema.variables_to_restore()
    saver = tf.train.Saver(ema_restore)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))        
    while True:
        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(mnist_backward.MODEL_SAVE_PATH)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                accuracy_score = sess.run(accuracy, feed_dict={ x: mnist.test.images, y_: mnist.test.labels})
                print("After %s training step(s), test accuracy = %g" % (global_step, accuracy_score))
            else:
                print('No checkpoint file found')
                return
        time.sleep(TEST_INTERVAL_SECS)

def main():

mnist = input_data.read_data_sets("./data/", one_hot=True)
test(mnist)

if name == ‘main‘:

main()

mnist_app.py

coding:utf-8

import tensorflow as tf
import numpy as np
from PIL import Image
import mnist_backward
import mnist_forward

def restore_model(testPicArr):

with tf.Graph().as_default() as tg:
    x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE])
    y = mnist_forward.forward(x, None)
    preValue = tf.argmax(y, 1)
    variable_averages = tf.train.ExponentialMovingAverage(mnist_backward.MOVING_AVERAGE_DECAY)
     variables_to_restore = variable_averages.variables_to_restore()
     saver = tf.train.Saver(variables_to_restore)
    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(mnist_backward.MODEL_SAVE_PATH)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            preValue = sess.run(preValue, feed_dict={ x:testPicArr})
            return preValue
        else:
            print("No checkpoint file found")
            return -1

def pre_pic(picName):

img = Image.open(picName)
reIm = img.resize((28,28), Image.ANTIALIAS)
im_arr = np.array(reIm.convert('L'))
threshold = 50
for i in range(28):
    for j in range(28):
        im_arr[i][j] = 255 - im_arr[i][j]
         if (im_arr[i][j] < threshold):
             im_arr[i][j] = 0
        else: im_arr[i][j] = 255
nm_arr = im_arr.reshape([1, 784])
nm_arr = nm_arr.astype(np.float32)
img_ready = np.multiply(nm_arr, 1.0/255.0)
return img_ready

def application():

testNum = input("input the number of test pictures:")
for i in range(testNum):
    testPic = raw_input("the path of test picture:")
    testPicArr = pre_pic(testPic)
    preValue = restore_model(testPicArr)
    print "The prediction number is:", preValue