TensorFlow笔记
TensorFlow笔记
本文是基于Windows 10系统环境,进行TensorFlow的学习和使用
- Windows 10
- PyCharm
一、TensorFlow的简介
1.1 TensorFlow基本概念
1.1.1 张量
张量就是多维数组(列表),用“阶”表示张量的维度
- 0阶张量
0 阶张量称作标量,表示一个单独的数,例如 S=123 - 1阶张量
1 阶张量称作向量,表示一个一维数组,例如 V=[1,2,3] - 2阶张量
2 阶张量称作矩阵,表示一个二维数组,它可以有 i 行 j 列个元素,每个元素可以用行号和列号共同索引到,例如 m=[[1, 2, 3], [4, 5, 6], [7, 8, 9]] - n阶张量
判断张量是几阶的,就通过张量右边的方括号数,0 个是 0 阶,n 个是 n 阶,张量可以表示 0 阶到 n 阶数组(列表);
1.2 常量
定义常量,并打印输出
import tensorflow as tf #引入模块
x = tf.constant([[1.0, 2.0]]) #定义一个 2 阶张量等于[[1.0,2.0]]
w = tf.constant([[3.0], [4.0]]) #定义一个 2 阶张量等于[[3.0],[4.0]]
y = tf.matmul(x, w) #实现 xw 矩阵乘法
print(y) #打印出结果
with tf.Session() as sess:
print(sess.run(y)) #执行会话并打印出执行后的结果
1.3 常用的生成随机数/数组的函数
#生成正态分布随机数
tf.random_normal()
#生成去掉过大偏离点的正态分布随机数
tf.truncated_normal()
#生成均匀分布随机数
tf.random_uniform()
#表示生成全 0 数组
tf.zeros
#表示生成全 1 数组
tf.ones
#表示生成全定值数组
tf.fill
#表示生成直接给定值的数组
tf.constant
1.4 变量初始化/打印
#引入模块
import tensorflow as tf
#表示去掉偏离过大的正态分布
w=tf.Variable(tf.truncated_normal([2,3],stddev=2, mean=0, seed=1))
with tf.Session() as sess:
#初始化
init_op = tf.global_variables_initializer()
sess.run(init_op)
print(sess.run(y)) #执行会话并打印出执行后的结果
1.5 placeholder占位/喂入数据
喂一组数据
引入模块
import tensorflow as tf
喂一组数据:
x = tf.placeholder(tf.float32, shape=(1, 2))
w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))
y=tf.matmul(x,w1)with tf.Session() as sess:
#初始化
init_op = tf.global_variables_initializer()
sess.run(init_op)
sess.run(y, feed_dict={ x: [[0.5,0.6]]})
喂N组数据
引入模块
import tensorflow as tf
喂N组数据:
x = tf.placeholder(tf.float32, shape=(None, 2))
w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))
y=tf.matmul(x,w1)with tf.Session() as sess:
#初始化
init_op = tf.global_variables_initializer()
sess.run(init_op)
sess.run(y, feed_dict={ x: [[0.1,0.2],[0.2,0.3],[0.3,0.4],[0.4,0.5]]})
1.6 损失函数
均方误差MSE
M S E ( y _ , y ) = ∑ i = 1 n ( y − y _ ) 2 n MSE(y_{\_},y)= \dfrac{\sum_{i=1}^n (y-y_{\_})^2}{n} MSE(y_,y)=n∑i=1n(y−y_)2
引入模块
import tensorflow as tf
loss = tf.reducemean(tf.square(y, y))
交叉熵ce
H ( y _ , y ) = ∑ y _ ∗ l o g ( y ) H(y_{\_},y)= \sum{y_{\_}*log(y)} H(y_,y)=∑y_∗log(y)
引入模块
import tensorflow as tf
ce = -tf.reducemean(y*tf.log(tf.clip_by_value(y, 1e-12, 1.0)))
softmax函数
当n分类的n个输出 ( y 1 , y 2 , . . . , y n ) (y_{1},y_{2},…,y_{n}) (y1,y2,…,yn)通过 s o f t m a x ( ) softmax() softmax()函数,便满足了概率分布要求:
∀ x , P ( X = x ) ∈ [ 0 , 1 ] , ∑ x P ( X = x ) = 1 \forall{x}, P(X=x) \isin{[0,1]} ,\sum_{x}{P(X=x)=1} ∀x,P(X=x)∈[0,1],x∑P(X=x)=1
s o f t m a x ( y i ) = e y i ∑ j = 1 n e y i softmax(y_{i})=\dfrac{e^{y_{i}}}{\sum_{j=1}^{n}{e^{y_{i}}}} softmax(yi)=∑j=1neyieyi引入模块
import tensorflow as tf
ce = tf.nn.sparsesoftmax_cross_entropy_with_logits(logits=y,lables=tf.argmax(y, 1))
cem = tf.reduce_mean(ce)
1.7 学习率
训练模型每次参数更新的幅度
w n + 1 = w n − l e a r n i n g _ r a t e ∗ ∇ w_{n+1}=w_{n}-learning{\_}rate*\nabla wn+1=wn−learning_rate∗∇
学习率设置过大,模型振荡不收敛;
学习了设置过小,模型收敛速度慢;
指数衰减学习率
引入模块
import tensorflow as tf
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate_base,global_step,learning_rate_step, learning_rate_decay,staircase=True)
1.8 滑动平均(影子值)
记录了每一个参数一段时间内过往值的平均值,增加了模型的泛化性,针对于所有参数:w和b
#引入模块
import tensorflow as tf
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
ema_op= ema.apply(tf.trainable_variables())
with tf.control_dependencies([train_step, ema_op]):
train_op = tf.no_op(name='train')
1.9 正则化
记录了每一个参数一段时间内过往值的平均值,增加了模型的泛化性,针对于所有参数:w和b
#引入模块
import tensorflow as tf
tf.add_to_collection('losses', tf.contrib,layers.l2_regularizer(regularizer)(w))
loss_total = cem + tf.add_n(tf.get_collection('losses'))
2.0 反向传播训练方法
以减小 loss 值为优化目标,有梯度下降、momentum 优化器、adam 优化器等优化方法。
梯度下降法
引入模块
import tensorflow as tf
train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
momentum 优化器
引入模块
import tensorflow as tf
train_step=tf.train.MomentumOptimizer(learning_rate, momentum).minimize(loss)
adam 优化器
引入模块
import tensorflow as tf
train_step=tf.train.AdamOptimizer(learning_rate).minimize(loss)
二、TensorFlow的mnist手写识别实例
mnist_forward.py
import tensorflow as tf
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500def get_weight(shape, regularizer):
w = tf.Variable(tf.truncated_normal(shape,stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
def get_bias(shape):
b = tf.Variable(tf.zeros(shape))
return b
def forward(x, regularizer):
w1 = get_weight([INPUT_NODE, LAYER1_NODE], regularizer)
b1 = get_bias([LAYER1_NODE])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
w2 = get_weight([LAYER1_NODE, OUTPUT_NODE], regularizer)
b2 = get_bias([OUTPUT_NODE])
y = tf.matmul(y1, w2) + b2
return y
mnist_backward.py
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_forward
import osBATCH_SIZE = 200
LEARNING_RATE_BASE = 0.1
LEARNING_RATE_DECAY = 0.99
REGULARIZER = 0.0001
STEPS = 50000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH=”./model/“
MODEL_NAME=”mnist_model”
def backward(mnist):
x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE])
y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE])
y = mnist_forward.forward(x, REGULARIZER)
global_step = tf.Variable(0, trainable=False)
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cem = tf.reduce_mean(ce)
loss = cem + tf.add_n(tf.get_collection('losses'))
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
ema_op = ema.apply(tf.trainable_variables())
with tf.control_dependencies([train_step, ema_op]):
train_op = tf.no_op(name='train')
saver = tf.train.Saver()
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
for i in range(STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={ x: xs, y_: ys})
if i % 1000 == 0:
print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
def main():
mnist = input_data.read_data_sets("./data/", one_hot=True)
backward(mnist)
if __name__ == '__main__':
main()
mnist_test.py
coding:utf-8
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_forward
import mnist_backward
TEST_INTERVAL_SECS = 5def test(mnist):
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE])
y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE])
y = mnist_forward.forward(x, None)
ema = tf.train.ExponentialMovingAverage(mnist_backward.MOVING_AVERAGE_DECAY)
ema_restore = ema.variables_to_restore()
saver = tf.train.Saver(ema_restore)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
while True:
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(mnist_backward.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
accuracy_score = sess.run(accuracy, feed_dict={ x: mnist.test.images, y_: mnist.test.labels})
print("After %s training step(s), test accuracy = %g" % (global_step, accuracy_score))
else:
print('No checkpoint file found')
return
time.sleep(TEST_INTERVAL_SECS)
def main():
mnist = input_data.read_data_sets("./data/", one_hot=True)
test(mnist)
if name == ‘main‘:
main()
mnist_app.py
coding:utf-8
import tensorflow as tf
import numpy as np
from PIL import Image
import mnist_backward
import mnist_forwarddef restore_model(testPicArr):
with tf.Graph().as_default() as tg:
x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE])
y = mnist_forward.forward(x, None)
preValue = tf.argmax(y, 1)
variable_averages = tf.train.ExponentialMovingAverage(mnist_backward.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(mnist_backward.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
preValue = sess.run(preValue, feed_dict={ x:testPicArr})
return preValue
else:
print("No checkpoint file found")
return -1
def pre_pic(picName):
img = Image.open(picName)
reIm = img.resize((28,28), Image.ANTIALIAS)
im_arr = np.array(reIm.convert('L'))
threshold = 50
for i in range(28):
for j in range(28):
im_arr[i][j] = 255 - im_arr[i][j]
if (im_arr[i][j] < threshold):
im_arr[i][j] = 0
else: im_arr[i][j] = 255
nm_arr = im_arr.reshape([1, 784])
nm_arr = nm_arr.astype(np.float32)
img_ready = np.multiply(nm_arr, 1.0/255.0)
return img_ready
def application():
testNum = input("input the number of test pictures:")
for i in range(testNum):
testPic = raw_input("the path of test picture:")
testPicArr = pre_pic(testPic)
preValue = restore_model(testPicArr)
print "The prediction number is:", preValue
def main():
application()
if name == ‘main‘:
main()
还没有评论,来说两句吧...