2017年2月15日，谷歌举办了首届TensorFlow Dev Summit，并且发布了TensorFlow 1.0 正式版。 3月18号，上海的谷歌开发者社区（GDG）组织了针对峰会的专场回顾活动。本文是我在活动上分享的一些回顾，主要介绍了在流利说我们是如何使用TensorFlow来构建学生模型并应用在自适应系统里面的。首发于流利说技术团队公众号 原文链接

## 一、应用背景

### Deep Knowledge Tracing

$\tag{1} L = \sum_{t}\ell(y^{T}\delta(q_{t+1}), a_{t+1})$

## 二、模型构建

# encoding:utf-8
import tensorflow as tf
class TensorFlowDKT(object):
def __init__(self, config):
self.hidden_neurons = hidden_neurons = config["hidden_neurons"]
self.num_skills = num_skills = config["num_skills"]  # 题库的题目数量
self.input_size = input_size = config["input_size"]  # 输入层节点数，等于题库数量 * 2
self.batch_size = batch_size = config["batch_size"]
self.keep_prob_value = config["keep_prob"]
# 接收输入
self.input_data = tf.placeholder(tf.float32, [batch_size, None, input_size])  # 答题信息
self.sequence_len = tf.placeholder(tf.int32, [batch_size])  # 一个batch中每个序列的有效长度
self.max_steps = tf.placeholder(tf.int32)  # max seq length of current batch.
self.keep_prob = tf.placeholder(tf.float32)  # dropout keep prob
# 接收标签信息
self.target_id = tf.placeholder(tf.int32, [batch_size, None]) # 回答的题目ID
self.target_correctness = tf.placeholder(tf.float32, [batch_size, None]) # 答题对错情况

# create rnn cell
hidden_layers = []
for idx, hidden_size in enumerate(hidden_neurons):
lstm_layer = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
hidden_layer = tf.contrib.rnn.DropoutWrapper(cell=lstm_layer,output_keep_prob=self.keep_prob)
hidden_layers.append(hidden_layer)
self.hidden_cell = tf.contrib.rnn.MultiRNNCell(cells=hidden_layers, state_is_tuple=True)
# dynamic rnn
state_series, self.current_state = tf.nn.dynamic_rnn(cell=self.hidden_cell, inputs=self.input_data, sequence_length=self.sequence_len,dtype=tf.float32)

# output layer
output_w = tf.get_variable("W", [hidden_neurons[-1], num_skills])
output_b = tf.get_variable("b", [num_skills])
self.state_series = tf.reshape(state_series, [batch_size*self.max_steps, hidden_neurons[-1]])
self.logits = tf.matmul(self.state_series, output_w) + output_b
self.mat_logits = tf.reshape(self.logits, [batch_size, self.max_steps, num_skills])
self.pred_all = tf.sigmoid(self.mat_logits)  # predict 输出

# compute loss
flat_logits = tf.reshape(self.logits, [-1])
flat_target_correctness = tf.reshape(self.target_correctness, [-1])
flat_base_target_index = tf.range(batch_size * self.max_steps) * num_skills
flat_bias_target_id = tf.reshape(self.target_id, [-1])
flat_target_id = flat_bias_target_id + flat_base_target_index
flat_target_logits = tf.gather(flat_logits, flat_target_id)
self.pred = tf.sigmoid(tf.reshape(flat_target_logits, [batch_size, self.max_steps]))
self.binary_pred = tf.cast(tf.greater_equal(self.pred, 0.5), tf.int32)
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=flat_target_correctness, logits=flat_target_logits)
self.loss = tf.reduce_sum(loss)

self.lr = tf.Variable(0.0, trainable=False)
trainable_vars = tf.trainable_variables()
self.train_op = optimizer.apply_gradients(zip(self.grads, trainable_vars))

def step(self, sess, input_x, target_id, target_correctness, sequence_len, is_train):
_, max_steps, _ = input_x.shape
input_feed = {self.input_data: input_x,
self.target_id: target_id,
self.target_correctness: target_correctness,
self.max_steps: max_steps,
self.sequence_len: sequence_len}
if is_train:
input_feed[self.keep_prob] = self.keep_prob_value
train_loss, _, _ = sess.run([self.loss, self.train_op, self.current_state], input_feed)
return train_loss
else:
input_feed[self.keep_prob] = 1
bin_pred, pred, pred_all = sess.run([self.binary_pred, self.pred, self.pred_all], input_feed)
return bin_pred, pred, pred_all

def assign_lr(self, session, lr_value):
session.run(tf.assign(self.lr, lr_value))

# process data ...
# config and create model
# config = ...
model = TensorFlowDKT(config)
# create session and init all variables
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(num_epoch):
# train
# lr_value = ...
model.assign_lr(sess, lr_value)  # assign learning rate
train_generator.shuffle()  # random shuffle before each epoch
while not train_generator.end:
input_x, target_id, target_correctness, seqs_len, _ = train_generator.next_batch()
overall_loss += model.step(sess, input_x, target_id, target_correctness, seqs_len, is_train=True)
# test
while not test_generator.end:
input_x, target_id, target_correctness, seqs_len, _ = test_generator.next_batch()
binary_pred, pred, _ = model.step(sess, input_x, target_id, target_correctness, seqs_len, is_train=False)
# calculate metrics ...

Demo的完整代码，见https://github.com/lingochamp/tensorflow-dkt

## Truncated BPTT

### 多GPU加速

def multi_gpu_model(num_gpus=1, model_config=None):
for i in range(num_gpus):
with tf.device("/gpu:%d" % i):
with tf.name_scope("tower_%d" % i):  # 用name_scope区分各模型
model = TensorFlowDKT(model_config)  # 为每个GPU构造一个模型实例
tf.get_variable_scope().reuse_variables()  # 重用同一作用域内的变量
with tf.device("cpu:0"):
return train_op

def generate_feed_dict(data_generator):
feed_dict = {}
models = tf.get_collection("train_model")  # get_collection，取回模型
for model in models:
inputs, target_id, target_correctness, seqs_len, max_len = data_generator.next_batch()
feed_dict[model.input_data] = inputs
feed_dict[model.target_id] = target_id
feed_dict[model.target_correctness] = target_correctness
feed_dict[model.max_steps] = max_len
feed_dict[model.sequence_len] = seqs_len
feed_dict[model.keep_prob] = model.keep_prob_value
return feed_dict

sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

## 结语

TensorFlow是一个十分简单易用的机器学习框架，也是目前最流行的深度学习框架，它可以让机器学习研究者更少的关注底层的问题，而专注于问题的解决和算法的优化上。流利说算法团队从16年初开始就将TensorFlow应用到内部的机器学习项目里面，积累了很多相关的使用经验，从而帮助我们的用更智能算法来服务用户。

### References

1. Piech, Chris, et al. “Deep knowledge tracing.”

Advances in Neural Information Processing Systems

1. . 2015.