TensorFlow教程

对日常TensorFlow的使用进行归纳和总结。

Tensorflow1.x

变量初始化

tf.ones

生成指定shape的矩阵,并且所有元素设置为1

1
a = tf.ones([3, 3], dtype=tf.float32)

tf.zeros

生成指定shape的矩阵,并且所有元素设置为0

1
a = tf.zeros([3, 3], dtype=tf.float32)

tf.ones_like

生成和tensor同样维度的矩阵,里面的所有元素值为1

1
2
a = tf.zeros([3, 3], dtype=tf.float32)
b = tf.ones_like(a, dtype=float32)

tf.zeros_like

生成和tensor同样维度的矩阵,里面的所有元素值为1

1
2
a = tf.zeros([3, 3], dtype=tf.float32)
b = tf.zeros_like(a, dtype=float32)

tf.fill

生成指定dims的矩阵,里面所有元素的值为value

1
a = tf.fill([3, 3], value=4.2)

tf.constant

生成指定shape的矩阵,里面所有元素的值为value

1
a = tf.constant(2.43, shape=[3, 3], dtype=tf.float32)

tf.random_normal

生成指定shape的矩阵,里面的元素的值根据正态分布随机生成,均值为mean,标准差为stddev

1
a = tf.random_normal([3, 3], mean=0.0, stddev=0.01, dtype=tf.float32)

tf.truncated_normal

tf.random_normal类似,不过只保留\([mean - 2 \times stddev, mean + 2 \times stddev]\)范围内的随机数

1
a = tf.truncated_normal([3, 3], mean=0.0, stddev=0.01, dtype=tf.float32)

tf.random_uniform

生成指定shape的矩阵,里面的元素的值根据均匀分布随机生成,最小值为minval,最大值为maxval

1
a = tf.random_uniform([3, 3], minval=-1, maxval=1, dtype=tf.float32)

tf.lin_space

生成等差数列,起始值为start,结束值为stop,共取num个值

1
a = tf.lin_space(0.0, 6.0, 5)

tf.range

生成等差数列,起始值为start,结束值为stop,以间隔delta取值

1
a = tf.range(0, 6, 1, dtype=tf.float32)

使用自定义矩阵初始化variable

1
2
3
value = [0, 1, 2, 3, 4, 5, 6, 7]
init = tf.constant_initializer(value)
x = tf.get_variable("x", shape=[8], initializer=init)

梯度

梯度计算

1
2
tvars = tf.trainable_variables()
grads = tf.gradients(loss, tvars)

特定参数的梯度计算

1
2
3
# 只训练embedding层
tvars = [v for v in tf.trainable_variables() if "embedding" in v.name]
grads = tf.gradients(loss, tvars)

使用tf.stop_gradient()控制参数梯度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 只训练embedding层的第一行
embedding_table = tf.get_variable("embedding_table", [2, 2])
embedding_table = tf.stack([embedding_table[0], tf.stop_gradient(embedding_table[1])], axis=0)

print(tf.trainable_variables())

embedding_token = tf.nn.embedding_lookup(embedding_table, [0, 1])

result = tf.ones_like(embedding_token)

loss = tf.losses.mean_squared_error(result, embedding_token)

train_op = tf.train.AdamOptimizer(0.1).minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for i in range(100):
_, loss_val, embedding_table_val = sess.run([train_op, loss, embedding_table])
print(loss_val)
print(embedding_table_val)

模型保存

保存为checkpoint

1
2
saver = tf.train.Saver(max_to_keep=3)
saver.save(sess, "model.ckpt", global_step=step)

保存为pb模型

1
2
constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names=["probs"])
tf.train.write_graph(constant_graph, logdir="", name="model.pb", as_text=False)

模型载入

载入checkpoint

1
2
3
4
5
6
init_vars = tf.train.list_variables(init_checkpoint_name)
assignment_map = collections.OrderedDict()
for (name, var) in init_vars:
assignment_map[name] = name

tf.train.init_from_checkpoint(init_checkpoint_name, assignment_map)

载入pb

1
2
3
4
5
6
7
8
9
10
11
sess = tf.Session()
with tf.gfile.FastGFile("model.pb", "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
sess.graph.as_default()
output = tf.import_graph_def(
graph_def,
input_map={"input_ids:0": input_ids},
return_elements=["probs:0"]
)
sess.run(ouput, feed_dict=feed_dict)

单机多卡

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import tensorflow as tf
import os
os.environ['CUDA_VISIBLE_DEVICES']='0,1,2' #视情况更改
gpus=[0, 1, 2]

# 来自官方代码,github各开源repo基本都会出现
def average_gradients(tower_grads):
average_grads = []
##grad_and_vars:((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads

#核心代码
def train_mult():
# 假定输入是224
input_image_size = 224
#定义输入占位符
inputs = tf.placeholder(shape=[None, input_image_size, input_image_size, 3], dtype=tf.float32, name='inputs')
#定义shape以及color的标签占位符
shape_label = tf.placeholder(shape=[None], dtype=tf.int64, name='shape_label')
color_label = tf.placeholder(shape=[None], dtype=tf.int64, name='color_label')
#定义一个非可训练变量作为global_step
global_step = tf.Variable(0, name='global_step', trainable=False)
#定义优化器
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

tower_grads = [] #汇总各gpu上的grads
tower_loss = [] #汇总各gpu上的total loss

#由于是采用的模型并行训练,所以这里需要拆分数据为gpu_num
images_splits = tf.split(inputs, num_or_size_splits=len(gpus), axis=0)
color_label_splits = tf.split(color_label, num_or_size_splits=len(gpus), axis=0)
shape_label_splits = tf.split(shape_label, num_or_size_splits=len(gpus), axis=0)

for i in range(len(gpus)):#循环gpu_num次,在每块gpu上构建net,var共享
with tf.device('/gpu:%d' % i):#指定gpu
# 创建一个全局的variable_scope,reuse=tf.AUTO_REUSE代表在该scope下,如果var不存在则新建var,如果存在,则复用共享。
with tf.variable_scope('my_net', reuse=tf.AUTO_REUSE):
#定义你自己的model net,我这里返回两个logits
logits_shape, logits_color= my_net(images_splits[i])

#计算两个分类任务的loss,计算loss的函数可自己定义,也可采用tf已有的函数接口
color_loss = compute_color_loss(logits_color, color_label_splits[i])
tf.summary.scalar('color_loss_gpu%d' % i, color_loss)#将每个gpu的loss写入到tensorboard,视自己情况来定
shape_loss = compute_shape_loss(logits_shape, shape_label_splits[i])
tf.summary.scalar('shape_loss_gpu%d' % i, shape_loss)

# multi task,所以loss求和
sum_loss = color_loss + shape_loss
tf.summary.scalar('sum_loss_gpu%d' % i, sum_loss)

#每个gpu上compute_gradients
grads = optimizer.compute_gradients(sum_loss, var_list=tf.trainable_variables())
#汇总grads,由于返回的grad可能存在None的情况,需要剔除,否则无法计算average_gradients,除非在average_gradients函数中再剔除
tower_grads.append([x for x in grads if x[0] is not None])

#将每块gpu上的loss汇总
tower_loss.append(sum_loss)

#求average total loss
avg_tower_loss = tf.reduce_mean(tower_loss, axis=0)
tf.summary.scalar('avg_tower_loss', avg_tower_loss)

#求平均梯度
grads_avg = average_gradients(tower_grads)

merged_summay = tf.summary.merge_all()
update = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

#应用梯度
with tf.control_dependencies(update):
train_op = optimizer.apply_gradients(grads_avg, global_step)

Tensorflow2.x