作者 | Aymeric Damien
编辑 | 奇予纪
出品 | 磐创AI团队
W o rd2Vec (Word Embedding)
使用TensorFlow 2.0实现Word2Vec算法计 算单词的向量表示,这 个例子是使用一小部分维基百科文章来训练的。
更多信息请查看论文:
Mikolov, Tomas et al. “Efficient Estimation of Word Representations in Vector Space.”, 2013 [1]
from __future__ import division, print_function, absolute_import import collections import os import random import urllib import zipfile import numpy as np import tensorflow as tf
learning_rate = 0.1 batch_size = 128 num_steps = 3000000 display_step = 10000 eval_step = 200000 # 训练参数 learning_rate = 0.1 batch_size = 128 num_steps = 3000000 display_step = 10000 eval_step = 200000 # 评估参数 eval_words = ['five', 'of', 'going', 'hardware', 'american', 'britain'] # Word2Vec 参数 embedding_size = 200 # 嵌入向量的维度 vector. max_vocabulary_size = 50000 # 词汇表中不同单词的总数words in the vocabulary. min_occurrence = 10 # 删除出现小于n次的所有单词 skip_window = 3 # 左右各要考虑多少个单词 num_skips = 2 # 重复使用输入生成标签的次数 num_sampled = 64 # 负采样数量
# 下载一小部分维基百科文章集 url = 'http://mattmahoney.net/dc/text8.zip' data_path = 'text8.zip' if not os.path.exists(data_path): print("Downloading the dataset... (It may take some time)") filename, _ = urllib.urlretrieve(url, data_path) print("Done!") # 解压数据集文件,文本已处理完毕 with zipfile.ZipFile(data_path) as f: text_words = f.read(f.namelist()[0]).lower().split()
# 构建词典并用 UNK 标记替换频数较低的词 count = [('UNK', -1)] # 检索最常见的单词 count.extend(collections.Counter(text_words).most_common(max_vocabulary_size - 1)) # 删除少于'min_occurrence'次数的样本 for i in range(len(count) - 1, -1, -1): if count[i][1] < min_occurrence: count.pop(i) else: #该集合是有序的,因此在当出现小于'min_occurrence'时停止 break # 计算单词表单词个数 vocabulary_size = len(count) # 为每一个词分配id word2id = dict() for i, (word, _)in enumerate(count): word2id[word] = i data = list() unk_count = 0 for word in text_words: # 检索单词id,或者如果不在字典中则为其指定索引0('UNK') index = word2id.get(word, 0) if index == 0: unk_count += 1 data.append(index) count[0] = ('UNK', unk_count) id2word = dict(zip(word2id.values(), word2id.keys())) print("Words count:", len(text_words)) print("Unique words:", len(set(text_words))) print("Vocabulary size:", vocabulary_size) print("Most common words:", count[:10])
output:
Words count: 17005207 Unique words: 253854 Vocabulary size: 47135 Most common words: [('UNK', 444176), ('the', 1061396), ('of', 593677), ('and', 416629), ('one', 411764), ('in', 372201), ('a', 325873), ('to', 316376), ('zero', 264975), ('nine', 250430)]
data_index = 0 # 为skip-gram模型生成训练批次 def next_batch(batch_size, num_skips, skip_window): global data_index assert batch_size % num_skips == 0 assert num_skips <= 2 * skip_window batch = np.ndarray(shape=(batch_size), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) # 得到窗口长度( 当前单词左边和右边 + 当前单词) span = 2 * skip_window + 1 buffer = collections.deque(maxlen=span) if data_index + span > len(data): data_index = 0 buffer.extend(data[data_index:data_index + span]) data_index += span for i in range(batch_size // num_skips): context_words = [w for w in range(span) if w != skip_window] words_to_use = random.sample(context_words, num_skips) for j, context_word in enumerate(words_to_use): batch[i * num_skips + j] = buffer[skip_window] labels[i * num_skips + j, 0] = buffer[context_word] if data_index == len(data): buffer.extend(data[0:span]) data_index = span else: buffer.append(data[data_index]) data_index += 1 #回溯一点,以避免在批处理结束时跳过单词 data_index = (data_index + len(data) - span) % len(data) return batch, labels
# 确保在CPU上分配以下操作和变量 # (某些操作在GPU上不兼容) with tf.device('/cpu:0'): # 创建嵌入变量(每一行代表一个词嵌入向量) embedding vector). embedding = tf.Variable(tf.random.normal([vocabulary_size, embedding_size])) # 构造NCE损失的变量 nce_weights = tf.Variable(tf.random.normal([vocabulary_size, embedding_size])) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) def get_embedding(x): with tf.device('/cpu:0'): # 对于X中的每一个样本查找对应的嵌入向量 x_embed = tf.nn.embedding_lookup(embedding, x) return x_embed def nce_loss(x_embed, y): with tf.device('/cpu:0'): # 计算批处理的平均NCE损失 y = tf.cast(y, tf.int64) loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=y, inputs=x_embed, num_sampled=num_sampled, num_classes=vocabulary_size)) return loss # 评估 def evaluate(x_embed): with tf.device('/cpu:0'): # 计算输入数据嵌入与每个嵌入向量之间的余弦相似度 x_embed = tf.cast(x_embed, tf.float32) x_embed_norm = x_embed / tf.sqrt(tf.reduce_sum(tf.square(x_embed))) embedding_norm = embedding / tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keepdims=True), tf.float32) cosine_sim_op = tf.matmul(x_embed_norm, embedding_norm, transpose_b=True) return cosine_sim_op # 定义优化器 optimizer = tf.optimizers.SGD(learning_rate)
# 优化过程 def run_optimization(x, y): with tf.device('/cpu:0'): # 将计算封装在GradientTape中以实现自动微分 with tf.GradientTape() as g: emb = get_embedding(x) loss = nce_loss(emb, y) # 计算梯度 gradients = g.gradient(loss, [embedding, nce_weights, nce_biases]) # 按gradients更新 W 和 b optimizer.apply_gradients(zip(gradients, [embedding, nce_weights, nce_biases]))
# 用于测试的单词 x_test = np.array([word2id[w] for w in eval_words]) # 针对给定步骤数进行训练 for step in xrange(1, num_steps + 1): batch_x, batch_y = next_batch(batch_size, num_skips, skip_window) run_optimization(batch_x, batch_y) if step % display_step == 0 or step == 1: loss = nce_loss(get_embedding(batch_x), batch_y) print("step: %i, loss: %f" % (step, loss)) # 评估 if step % eval_step == 0 or step == 1: print("Evaluation...") sim = evaluate(get_embedding(x_test)).numpy() for i in xrange(len(eval_words)): top_k = 8 # 最相似的单词数量 nearest = (-sim[i, :]).argsort()[1:top_k + 1] log_str = '"%s" nearest neighbors:' % eval_words[i] for k in xrange(top_k): log_str = '%s %s,' % (log_str, id2word[nearest[k]]) print(log_str)
step: 1, loss: 504.444214 Evaluation... "five" nearest neighbors: censure, stricken, anglicanism, stick, streetcars, shrines, horrified, sparkle, "of" nearest neighbors: jolly, weary, clinicians, kerouac, economist, owls, safe, playoff, "going" nearest neighbors: filament, platforms, moderately, micheal, despotic, krag, disclosed, your, "hardware" nearest neighbors: occupants, paraffin, vera, reorganized, rename, declares, prima, condoned, "american" nearest neighbors: portfolio, rhein, aalto, angle, lifeson, tucker, sexton, dench, "britain" nearest neighbors: indivisible, disbelief, scripture, pepsi, scriptores, sighting, napalm, strike, step: 10000, loss: 117.166962 step: 20000, loss: 65.478333 step: 30000, loss: 46.580460 step: 40000, loss: 25.563128 step: 50000, loss: 50.924446 step: 60000, loss: 51.696526 step: 70000, loss: 17.272142 step: 80000, loss: 32.579414 step: 90000, loss: 68.372032 step: 100000, loss: 36.026573 step: 110000, loss: 22.502020 step: 120000, loss: 15.788742 step: 130000, loss: 31.832420 step: 140000, loss: 25.096617 step: 150000, loss: 12.013027 step: 160000, loss: 20.574780 step: 170000, loss: 12.201975 step: 180000, loss: 20.983793 step: 190000, loss: 11.366720 step: 200000, loss: 19.431549 Evaluation... "five" nearest neighbors: three, four, eight, six, two, seven, nine, zero, "of" nearest neighbors: the, a, and, first, with, on, but, from, "going" nearest neighbors: have, more, used, out, be, with, on, however, "hardware" nearest neighbors: be, known, system, apollo, and, a, such, used, "american" nearest neighbors: UNK, and, from, s, at, in, after, about, "britain" nearest neighbors: of, and, many, the, as, used, but, such, step: 210000, loss: 16.361233 step: 220000, loss: 17.529526 step: 230000, loss: 16.805817 step: 240000, loss: 6.365625 step: 250000, loss: 8.083097 step: 260000, loss: 11.262514 step: 270000, loss: 9.842708 step: 280000, loss: 6.363440 step: 290000, loss: 8.732617 step: 300000, loss: 10.484728 step: 310000, loss: 12.099487 step: 320000, loss: 11.496288 step: 330000, loss: 9.283813 step: 340000, loss: 10.777218 step: 350000, loss: 16.310440 step: 360000, loss: 7.495782 step: 370000, loss: 9.287696 step: 380000, loss: 6.982735 step: 390000, loss: 8.549622 step: 400000, loss: 8.388112 Evaluation... "five" nearest neighbors: four, three, six, two, seven, eight, one, zero, "of" nearest neighbors: the, a, with, also, for, and, which, by, "going" nearest neighbors: have, are, both, called, being, a, of, had, "hardware" nearest neighbors: may, de, some, have, so, which, other, also, "american" nearest neighbors: s, british, UNK, from, in, including, first, see, "britain" nearest neighbors: against, include, including, both, british, other, an, most, step: 410000, loss: 8.757725 step: 420000, loss: 12.303110 step: 430000, loss: 12.325478 step: 440000, loss: 7.659882 step: 450000, loss: 6.028089 step: 460000, loss: 12.700299 step: 470000, loss: 7.063077 step: 480000, loss: 18.004183 step: 490000, loss: 7.510474 step: 500000, loss: 10.089376 step: 510000, loss: 11.404436 step: 520000, loss: 9.494527 step: 530000, loss: 7.797963 step: 540000, loss: 7.390718 step: 550000, loss: 13.911215 step: 560000, loss: 6.975731 step: 570000, loss: 6.179163 step: 580000, loss: 7.066525 step: 590000, loss: 6.487288 step: 600000, loss: 5.361528 Evaluation... "five" nearest neighbors: four, six, three, seven, two, one, eight, zero, "of" nearest neighbors: the, and, from, with, a, including, in, include, "going" nearest neighbors: have, even, they, term, who, many, which, were, "hardware" nearest neighbors: include, computer, an, which, other, each, than, may, "american" nearest neighbors: english, french, s, german, from, in, film, see, "britain" nearest neighbors: several, first, modern, part, government, german, was, were, step: 610000, loss: 4.144980 step: 620000, loss: 5.865635 step: 630000, loss: 6.826498 step: 640000, loss: 8.376097 step: 650000, loss: 7.117930 step: 660000, loss: 7.639544 step: 670000, loss: 5.973255 step: 680000, loss: 4.908459 step: 690000, loss: 6.164993 step: 700000, loss: 7.360281 step: 710000, loss: 12.693079 step: 720000, loss: 6.410182 step: 730000, loss: 7.499201 step: 740000, loss: 6.509094 step: 750000, loss: 10.625893 step: 760000, loss: 7.177696 step: 770000, loss: 12.639092 step: 780000, loss: 8.441635 step: 790000, loss: 7.529139 step: 800000, loss: 6.579177 Evaluation... "five" nearest neighbors: four, three, six, seven, eight, two, one, zero, "of" nearest neighbors: and, with, in, the, its, from, by, including, "going" nearest neighbors: have, they, how, include, people, however, also, their, "hardware" nearest neighbors: computer, large, include, may, or, which, other, there, "american" nearest neighbors: born, french, british, english, german, b, john, d, "britain" nearest neighbors: country, including, include, general, part, various, several, by, step: 810000, loss: 6.934138 step: 820000, loss: 5.686094 step: 830000, loss: 7.310243 step: 840000, loss: 5.028157 step: 850000, loss: 7.079705 step: 860000, loss: 6.768996 step: 870000, loss: 5.604030 step: 880000, loss: 8.208309 step: 890000, loss: 6.301597 step: 900000, loss: 5.733234 step: 910000, loss: 6.577081 step: 920000, loss: 6.774826 step: 930000, loss: 7.068932 step: 940000, loss: 6.694956 step: 950000, loss: 7.944673 step: 960000, loss: 5.988618 step: 970000, loss: 6.651366 step: 980000, loss: 4.595577 step: 990000, loss: 6.564834 step: 1000000, loss: 4.327858 Evaluation... "five" nearest neighbors: four, three, seven, six, eight, two, nine, zero, "of" nearest neighbors: the, first, and, became, from, under, at, with, "going" nearest neighbors: others, has, then, have, how, become, had, also, "hardware" nearest neighbors: computer, large, systems, these, different, either, include, using, "american" nearest neighbors: b, born, d, UNK, nine, english, german, french, "britain" nearest neighbors: government, island, local, country, by, including, control, within, step: 1010000, loss: 5.841236 step: 1020000, loss: 5.805200 step: 1030000, loss: 9.962063 step: 1040000, loss: 6.281199 step: 1050000, loss: 7.147995 step: 1060000, loss: 5.721184 step: 1070000, loss: 7.080662 step: 1080000, loss: 6.638658 step: 1090000, loss: 5.814178 step: 1100000, loss: 5.195928 step: 1110000, loss: 6.724787 step: 1120000, loss: 6.503905 step: 1130000, loss: 5.762966 step: 1140000, loss: 5.790243 step: 1150000, loss: 5.958191 step: 1160000, loss: 5.997983 step: 1170000, loss: 7.065348 step: 1180000, loss: 6.073387 step: 1190000, loss: 6.644097 step: 1200000, loss: 5.934450 Evaluation... "five" nearest neighbors: three, four, six, eight, seven, two, nine, zero, "of" nearest neighbors: the, and, including, in, its, with, from, on, "going" nearest neighbors: others, then, through, has, had, another, people, when, "hardware" nearest neighbors: computer, control, systems, either, these, large, small, other, "american" nearest neighbors: born, german, john, d, british, b, UNK, french, "britain" nearest neighbors: local, against, british, island, country, general, including, within, step: 1210000, loss: 5.832344 step: 1220000, loss: 6.453851 step: 1230000, loss: 6.583966 step: 1240000, loss: 5.571673 step: 1250000, loss: 5.720917 step: 1260000, loss: 7.663424 step: 1270000, loss: 6.583741 step: 1280000, loss: 8.503859 step: 1290000, loss: 5.540640 step: 1300000, loss: 6.703249 step: 1310000, loss: 5.274101 step: 1320000, loss: 5.846446 step: 1330000, loss: 5.438172 step: 1340000, loss: 6.367691 step: 1350000, loss: 6.558622 step: 1360000, loss: 9.822924 step: 1370000, loss: 4.982378 step: 1380000, loss: 6.159739 step: 1390000, loss: 5.819083 step: 1400000, loss: 7.775135 Evaluation... "five" nearest neighbors: four, three, six, seven, two, eight, one, zero, "of" nearest neighbors: and, the, in, with, its, within, for, including, "going" nearest neighbors: others, through, while, has, to, how, particularly, their, "hardware" nearest neighbors: computer, systems, large, control, research, using, information, either, "american" nearest neighbors: english, french, german, born, film, british, s, former, "britain" nearest neighbors: british, country, europe, local, military, island, against, western, step: 1410000, loss: 8.214248 step: 1420000, loss: 4.696859 step: 1430000, loss: 5.873761 step: 1440000, loss: 5.971557 step: 1450000, loss: 4.992722 step: 1460000, loss: 5.197714 step: 1470000, loss: 6.916918 step: 1480000, loss: 6.441984 step: 1490000, loss: 5.443647 step: 1500000, loss: 5.178482 step: 1510000, loss: 6.060414 step: 1520000, loss: 6.373306 step: 1530000, loss: 5.098322 step: 1540000, loss: 6.674916 step: 1550000, loss: 6.712685 step: 1560000, loss: 5.280202 step: 1570000, loss: 6.454964 step: 1580000, loss: 4.896697 step: 1590000, loss: 6.239226 step: 1600000, loss: 5.709726 Evaluation... "five" nearest neighbors: three, four, two, six, seven, eight, one, zero, "of" nearest neighbors: the, and, including, in, with, within, its, following, "going" nearest neighbors: others, people, who, they, that, far, were, have, "hardware" nearest neighbors: computer, systems, include, high, research, some, information, large, "american" nearest neighbors: born, english, french, british, german, d, john, b, "britain" nearest neighbors: country, military, china, europe, against, local, central, british, step: 1610000, loss: 6.334940 step: 1620000, loss: 5.093616 step: 1630000, loss: 6.119366 step: 1640000, loss: 4.975187 step: 1650000, loss: 6.490408 step: 1660000, loss: 7.464082 step: 1670000, loss: 4.977184 step: 1680000, loss: 5.658133 step: 1690000, loss: 5.352454 step: 1700000, loss: 6.810776 step: 1710000, loss: 5.687447 step: 1720000, loss: 5.992206 step: 1730000, loss: 5.513011 step: 1740000, loss: 5.548522 step: 1750000, loss: 6.200248 step: 1760000, loss: 13.070073 step: 1770000, loss: 4.621058 step: 1780000, loss: 5.301342 step: 1790000, loss: 4.777030 step: 1800000, loss: 6.912136 Evaluation... "five" nearest neighbors: three, four, six, seven, eight, two, nine, zero, "of" nearest neighbors: the, in, first, from, became, and, following, under, "going" nearest neighbors: others, their, through, which, therefore, open, how, that, "hardware" nearest neighbors: computer, systems, include, research, standard, different, system, small, "american" nearest neighbors: b, d, born, actor, UNK, english, nine, german, "britain" nearest neighbors: china, country, europe, against, canada, military, island, including, step: 1810000, loss: 5.584600 step: 1820000, loss: 5.619820 step: 1830000, loss: 6.078709 step: 1840000, loss: 5.052518 step: 1850000, loss: 5.430106 step: 1860000, loss: 7.396770 step: 1870000, loss: 5.344787 step: 1880000, loss: 5.937998 step: 1890000, loss: 5.706491 step: 1900000, loss: 5.140662 step: 1910000, loss: 5.607048 step: 1920000, loss: 5.407231 step: 1930000, loss: 6.238531 step: 1940000, loss: 5.567973 step: 1950000, loss: 4.894245 step: 1960000, loss: 6.104193 step: 1970000, loss: 5.282631 step: 1980000, loss: 6.189069 step: 1990000, loss: 6.169409 step: 2000000, loss: 6.470152 Evaluation... "five" nearest neighbors: four, three, six, seven, eight, two, nine, zero, "of" nearest neighbors: the, its, in, with, and, including, within, against, "going" nearest neighbors: others, only, therefore, will, how, a, far, though, "hardware" nearest neighbors: computer, systems, for, network, software, program, research, system, "american" nearest neighbors: born, actor, d, italian, german, john, robert, b, "britain" nearest neighbors: china, country, europe, canada, british, former, island, france, step: 2010000, loss: 5.298714 step: 2020000, loss: 5.494207 step: 2030000, loss: 5.410875 step: 2040000, loss: 6.228232 step: 2050000, loss: 5.044596 step: 2060000, loss: 4.624638 step: 2070000, loss: 4.919327 step: 2080000, loss: 4.639625 step: 2090000, loss: 4.865627 step: 2100000, loss: 4.951073 step: 2110000, loss: 5.973768 step: 2120000, loss: 7.366824 step: 2130000, loss: 5.149571 step: 2140000, loss: 7.846234 step: 2150000, loss: 5.449315 step: 2160000, loss: 5.359211 step: 2170000, loss: 5.171029 step: 2180000, loss: 6.106437 step: 2190000, loss: 6.043995 step: 2200000, loss: 5.642351 Evaluation... "five" nearest neighbors: four, three, six, two, eight, seven, zero, one, "of" nearest neighbors: the, and, its, see, for, in, with, including, "going" nearest neighbors: others, therefore, how, even, them, your, have, although, "hardware" nearest neighbors: computer, systems, system, network, program, research, software, include, "american" nearest neighbors: english, french, german, canadian, british, film, author, italian, "britain" nearest neighbors: europe, china, country, germany, british, england, france, throughout, step: 2210000, loss: 4.427110 step: 2220000, loss: 6.240989 step: 2230000, loss: 5.184978 step: 2240000, loss: 8.035570 step: 2250000, loss: 5.793781 step: 2260000, loss: 4.908427 step: 2270000, loss: 8.807668 step: 2280000, loss: 6.083229 step: 2290000, loss: 5.773360 step: 2300000, loss: 5.613671 step: 2310000, loss: 6.080076 step: 2320000, loss: 5.288568 step: 2330000, loss: 5.949232 step: 2340000, loss: 5.479994 step: 2350000, loss: 7.717686 step: 2360000, loss: 5.163609 step: 2370000, loss: 5.989407 step: 2380000, loss: 5.785729 step: 2390000, loss: 5.345478 step: 2400000, loss: 6.627133 Evaluation... "five" nearest neighbors: three, four, six, two, seven, eight, zero, nine, "of" nearest neighbors: the, in, and, including, from, within, its, with, "going" nearest neighbors: therefore, people, they, out, only, according, your, now, "hardware" nearest neighbors: computer, systems, network, program, system, software, run, design, "american" nearest neighbors: author, born, actor, english, canadian, british, italian, d, "britain" nearest neighbors: china, europe, country, throughout, france, canada, england, western, step: 2410000, loss: 5.666146 step: 2420000, loss: 5.316198 step: 2430000, loss: 5.129625 step: 2440000, loss: 5.247949 step: 2450000, loss: 5.741394 step: 2460000, loss: 5.833083 step: 2470000, loss: 7.704844 step: 2480000, loss: 5.398345 step: 2490000, loss: 5.089633 step: 2500000, loss: 5.620508 step: 2510000, loss: 4.976034 step: 2520000, loss: 5.884676 step: 2530000, loss: 6.649922 step: 2540000, loss: 5.002588 step: 2550000, loss: 5.072144 step: 2560000, loss: 5.165375 step: 2570000, loss: 5.310089 step: 2580000, loss: 5.481957 step: 2590000, loss: 6.104440 step: 2600000, loss: 5.339644 Evaluation... "five" nearest neighbors: three, four, six, seven, eight, nine, two, zero, "of" nearest neighbors: the, first, from, with, became, in, following, and, "going" nearest neighbors: how, therefore, back, will, through, always, your, make, "hardware" nearest neighbors: computer, systems, system, network, program, technology, design, software, "american" nearest neighbors: actor, singer, born, b, author, d, english, writer, "britain" nearest neighbors: europe, china, throughout, great, england, france, country, india, step: 2610000, loss: 7.754117 step: 2620000, loss: 5.979313 step: 2630000, loss: 5.394362 step: 2640000, loss: 4.866740 step: 2650000, loss: 5.219806 step: 2660000, loss: 6.074809 step: 2670000, loss: 6.216953 step: 2680000, loss: 5.944881 step: 2690000, loss: 5.863350 step: 2700000, loss: 6.128705 step: 2710000, loss: 5.502523 step: 2720000, loss: 5.300839 step: 2730000, loss: 6.358493 step: 2740000, loss: 6.058306 step: 2750000, loss: 4.689510 step: 2760000, loss: 6.032880 step: 2770000, loss: 5.844904 step: 2780000, loss: 5.385874 step: 2790000, loss: 5.370956 step: 2800000, loss: 4.912577 Evaluation... "five" nearest neighbors: four, six, three, eight, seven, two, nine, one, "of" nearest neighbors: in, the, and, from, including, following, with, under, "going" nearest neighbors: your, then, through, will, how, so, back, even, "hardware" nearest neighbors: computer, systems, program, network, design, standard, physical, software, "american" nearest neighbors: actor, singer, born, author, writer, canadian, italian, d, "britain" nearest neighbors: europe, china, england, throughout, france, india, great, germany, step: 2810000, loss: 5.897756 step: 2820000, loss: 7.194932 step: 2830000, loss: 7.430175 step: 2840000, loss: 7.258231 step: 2850000, loss: 5.837617 step: 2860000, loss: 5.496673 step: 2870000, loss: 6.173716 step: 2880000, loss: 6.095749 step: 2890000, loss: 6.064944 step: 2900000, loss: 5.560488 step: 2910000, loss: 4.966107 step: 2920000, loss: 5.789579 step: 2930000, loss: 4.525987 step: 2940000, loss: 6.704808 step: 2950000, loss: 4.506433 step: 2960000, loss: 6.251270 step: 2970000, loss: 5.588204 step: 2980000, loss: 5.423235 step: 2990000, loss: 5.613834 step: 3000000, loss: 5.137326 Evaluation... "five" nearest neighbors: four, three, six, seven, eight, two, zero, one, "of" nearest neighbors: the, including, and, with, in, its, includes, within, "going" nearest neighbors: how, they, when, them, make, always, your, though, "hardware" nearest neighbors: computer, systems, network, program, physical, design, technology, software, "american" nearest neighbors: canadian, english, australian, british, german, film, italian, author, "britain" nearest neighbors: europe, england, china, throughout, india, france, great, british,
[1] : https://arxiv.org/pdf/1301.3781.pdf
Be First to Comment