NLP-预训练模型-2018-Bert-解析:BertForMaskedLM
import numpy as np
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertForMaskedLM
# Load pre-trained model (weights)
with torch.no_grad():
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained(r'D:\Pretrained_Model\bert-base-chinese')
model = BertForMaskedLM.from_pretrained(r'D:\Pretrained_Model\bert-base-chinese')
model.eval()
sentence = "我不会忘记和你一起奋斗的时光。"
tokenize_input = tokenizer.tokenize(sentence)
print('tokenize_input = ', tokenize_input)
tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
sen_len = len(tokenize_input)
sentence_loss = 0.
for idx, word in enumerate(tokenize_input):
print('\n\n idx = {0}'.format(idx))
# add mask to i-th character of the sentence
tokenize_input[idx] = '[MASK]'
mask_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
print('\t mask_input = {0}'.format(mask_input))
output = model(mask_input)
print('\n\t output = {0}'.format(output))
prediction_scores = output[0]
print('\n\t prediction_scores = output[0] = {0}'.format(prediction_scores))
softmax = nn.Softmax(dim=0)
ps = softmax(prediction_scores[0, idx]).log()
print('\n\t ps = {0}'.format(ps))
word_loss = ps[tensor_input[0, idx]]
print('\n\t word_loss = {0}'.format(word_loss))
sentence_loss += word_loss.item()
tokenize_input[idx] = word
ppl = np.exp(-sentence_loss / sen_len)
print("sentence_loss = {0};ppl = {1}".format(sentence_loss, ppl))
打印结果:
tokenize_input = ['我', '不', '会', '忘', '记', '和', '你', '一', '起', '奋', '斗', '的', '时', '光', '。']
idx = 0
mask_input = tensor([[ 103, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-10.0067, -9.9702, -10.3403, ..., -7.0367, -7.9918, -7.8884],
[ -8.9250, -8.6627, -8.8329, ..., -5.6988, -5.0543, -7.3196],
[-17.5815, -16.8282, -17.5551, ..., -11.2575, -8.4464, -15.8063],
...,
[-17.7271, -17.4097, -18.3814, ..., -12.5380, -14.9620, -13.0537],
[-14.8090, -15.5407, -14.8516, ..., -9.6344, -8.9355, -11.3215],
[-10.2498, -10.0447, -10.2479, ..., -5.7584, -4.9482, -5.1695]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-10.0067, -9.9702, -10.3403, ..., -7.0367, -7.9918, -7.8884],
[ -8.9250, -8.6627, -8.8329, ..., -5.6988, -5.0543, -7.3196],
[-17.5815, -16.8282, -17.5551, ..., -11.2575, -8.4464, -15.8063],
...,
[-17.7271, -17.4097, -18.3814, ..., -12.5380, -14.9620, -13.0537],
[-14.8090, -15.5407, -14.8516, ..., -9.6344, -8.9355, -11.3215],
[-10.2498, -10.0447, -10.2479, ..., -5.7584, -4.9482, -5.1695]]])
ps = tensor([-20.2204, -20.1840, -20.5541, ..., -17.2505, -18.2055, -18.1022])
word_loss = -4.207489013671875
idx = 1
mask_input = tensor([[2769, 103, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-14.2572, -13.5664, -13.7818, ..., -12.4880, -11.4941, -11.0043],
[-13.3572, -12.7593, -13.1295, ..., -8.9165, -6.9501, -8.0928],
[-18.3267, -17.2391, -16.6626, ..., -9.1351, -8.5136, -10.8610],
...,
[-16.8631, -15.9635, -16.3637, ..., -11.8876, -12.6025, -10.4363],
[-14.1836, -14.0044, -13.6275, ..., -9.0348, -10.7950, -9.2346],
[-16.2714, -15.7472, -15.5543, ..., -9.3256, -9.7824, -7.4806]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-14.2572, -13.5664, -13.7818, ..., -12.4880, -11.4941, -11.0043],
[-13.3572, -12.7593, -13.1295, ..., -8.9165, -6.9501, -8.0928],
[-18.3267, -17.2391, -16.6626, ..., -9.1351, -8.5136, -10.8610],
...,
[-16.8631, -15.9635, -16.3637, ..., -11.8876, -12.6025, -10.4363],
[-14.1836, -14.0044, -13.6275, ..., -9.0348, -10.7950, -9.2346],
[-16.2714, -15.7472, -15.5543, ..., -9.3256, -9.7824, -7.4806]]])
ps = tensor([-27.0073, -26.4094, -26.7796, ..., -22.5666, -20.6002, -21.7429])
word_loss = -3.4179904460906982
idx = 2
mask_input = tensor([[2769, 679, 103, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-10.4260, -9.7421, -10.0949, ..., -9.1981, -9.3232, -9.0737],
[-11.1497, -10.3329, -10.3952, ..., -6.6423, -5.8855, -7.4425],
[-10.2441, -9.8596, -10.0538, ..., -6.8899, -6.3872, -7.1557],
...,
[-14.8344, -13.9255, -14.6416, ..., -11.8463, -11.3034, -9.4505],
[-13.0585, -12.7334, -12.5315, ..., -9.1430, -9.0249, -8.6625],
[-10.8999, -10.1885, -10.4381, ..., -6.9490, -6.5864, -5.2088]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-10.4260, -9.7421, -10.0949, ..., -9.1981, -9.3232, -9.0737],
[-11.1497, -10.3329, -10.3952, ..., -6.6423, -5.8855, -7.4425],
[-10.2441, -9.8596, -10.0538, ..., -6.8899, -6.3872, -7.1557],
...,
[-14.8344, -13.9255, -14.6416, ..., -11.8463, -11.3034, -9.4505],
[-13.0585, -12.7334, -12.5315, ..., -9.1430, -9.0249, -8.6625],
[-10.8999, -10.1885, -10.4381, ..., -6.9490, -6.5864, -5.2088]]])
ps = tensor([-23.9556, -23.5712, -23.7654, ..., -20.6015, -20.0987, -20.8673])
word_loss = -3.0778353214263916
idx = 3
mask_input = tensor([[2769, 679, 833, 103, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-11.1854, -10.8186, -10.8980, ..., -10.0304, -6.8312, -10.1228],
[-18.3292, -17.1635, -18.1168, ..., -12.8976, -6.5055, -10.3133],
[-18.9977, -17.6461, -18.6712, ..., -12.0834, -9.4692, -13.3222],
...,
[-15.9868, -15.1038, -15.7956, ..., -11.8385, -8.8921, -11.2440],
[-13.2753, -13.0012, -12.8868, ..., -8.5294, -7.7151, -9.6861],
[-14.0791, -13.6179, -13.8650, ..., -9.9380, -8.0259, -6.6505]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-11.1854, -10.8186, -10.8980, ..., -10.0304, -6.8312, -10.1228],
[-18.3292, -17.1635, -18.1168, ..., -12.8976, -6.5055, -10.3133],
[-18.9977, -17.6461, -18.6712, ..., -12.0834, -9.4692, -13.3222],
...,
[-15.9868, -15.1038, -15.7956, ..., -11.8385, -8.8921, -11.2440],
[-13.2753, -13.0012, -12.8868, ..., -8.5294, -7.7151, -9.6861],
[-14.0791, -13.6179, -13.8650, ..., -9.9380, -8.0259, -6.6505]]])
ps = tensor([-28.6803, -28.3364, -28.7086, ..., -26.4609, -23.3448, -25.8600])
word_loss = -0.024608036503195763
idx = 4
mask_input = tensor([[2769, 679, 833, 2563, 103, 1469, 872, 671, 6629, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-11.6949, -11.0920, -11.4218, ..., -10.2302, -9.3920, -10.9836],
[-18.6331, -17.9585, -18.3607, ..., -12.7316, -10.2360, -14.0741],
[-19.6247, -18.4559, -19.2653, ..., -12.6368, -11.0657, -15.6243],
...,
[-15.9810, -15.1353, -15.9852, ..., -12.4308, -12.2341, -10.9428],
[-13.4082, -13.1908, -13.3454, ..., -10.0117, -10.6251, -10.7604],
[-13.8807, -13.1495, -13.6315, ..., -9.3678, -9.9106, -7.1275]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-11.6949, -11.0920, -11.4218, ..., -10.2302, -9.3920, -10.9836],
[-18.6331, -17.9585, -18.3607, ..., -12.7316, -10.2360, -14.0741],
[-19.6247, -18.4559, -19.2653, ..., -12.6368, -11.0657, -15.6243],
...,
[-15.9810, -15.1353, -15.9852, ..., -12.4308, -12.2341, -10.9428],
[-13.4082, -13.1908, -13.3454, ..., -10.0117, -10.6251, -10.7604],
[-13.8807, -13.1495, -13.6315, ..., -9.3678, -9.9106, -7.1275]]])
ps = tensor([-30.6680, -30.0711, -30.5083, ..., -28.1964, -25.7133, -29.4577])
word_loss = -0.021782301366329193
idx = 5
mask_input = tensor([[2769, 679, 833, 2563, 6381, 103, 872, 671, 6629, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-10.8215, -10.1308, -10.5400, ..., -9.4374, -9.1841, -9.7690],
[-16.6464, -15.7021, -16.0986, ..., -9.1416, -7.5447, -8.9926],
[-18.4551, -17.0224, -17.3103, ..., -8.7594, -8.8654, -10.6732],
...,
[-14.8322, -13.5759, -14.5636, ..., -10.8961, -10.6665, -8.9241],
[-12.3797, -11.8117, -11.9058, ..., -8.7238, -9.1733, -9.1059],
[-12.6140, -11.4767, -11.6919, ..., -8.0748, -9.4955, -5.7950]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-10.8215, -10.1308, -10.5400, ..., -9.4374, -9.1841, -9.7690],
[-16.6464, -15.7021, -16.0986, ..., -9.1416, -7.5447, -8.9926],
[-18.4551, -17.0224, -17.3103, ..., -8.7594, -8.8654, -10.6732],
...,
[-14.8322, -13.5759, -14.5636, ..., -10.8961, -10.6665, -8.9241],
[-12.3797, -11.8117, -11.9058, ..., -8.7238, -9.1733, -9.1059],
[-12.6140, -11.4767, -11.6919, ..., -8.0748, -9.4955, -5.7950]]])
ps = tensor([-20.0339, -19.5133, -20.0343, ..., -17.0866, -17.4351, -15.4161])
word_loss = -2.464529037475586
idx = 6
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 103, 671, 6629, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-11.6927, -10.7244, -10.8993, ..., -8.0539, -8.4719, -9.0431],
[-14.6502, -14.0066, -14.4193, ..., -7.7190, -5.6522, -8.8189],
[-17.8192, -16.0978, -17.0802, ..., -8.5008, -7.9125, -11.5379],
...,
[-15.0797, -14.0576, -14.8092, ..., -10.5593, -11.1677, -9.6744],
[-12.6444, -12.2899, -12.1446, ..., -8.7772, -9.4889, -9.6838],
[-11.8326, -11.0902, -11.1104, ..., -7.6406, -8.1461, -6.2924]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-11.6927, -10.7244, -10.8993, ..., -8.0539, -8.4719, -9.0431],
[-14.6502, -14.0066, -14.4193, ..., -7.7190, -5.6522, -8.8189],
[-17.8192, -16.0978, -17.0802, ..., -8.5008, -7.9125, -11.5379],
...,
[-15.0797, -14.0576, -14.8092, ..., -10.5593, -11.1677, -9.6744],
[-12.6444, -12.2899, -12.1446, ..., -8.7772, -9.4889, -9.6838],
[-11.8326, -11.0902, -11.1104, ..., -7.6406, -8.1461, -6.2924]]])
ps = tensor([-17.8420, -17.7343, -17.7814, ..., -15.6324, -16.8942, -15.6699])
word_loss = -3.217534065246582
idx = 7
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 103, 6629, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-12.0652, -11.1372, -11.7658, ..., -10.3255, -9.5978, -10.2930],
[-17.4623, -16.3227, -17.0211, ..., -10.0448, -8.8320, -11.6701],
[-19.7825, -18.2467, -18.9617, ..., -10.4417, -10.0575, -13.2705],
...,
[-16.7194, -15.7009, -16.5568, ..., -11.9396, -12.9538, -9.1279],
[-14.1858, -13.9772, -14.0763, ..., -9.9030, -10.4625, -8.7678],
[-14.0998, -13.0324, -13.3418, ..., -8.7676, -10.0443, -6.4476]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-12.0652, -11.1372, -11.7658, ..., -10.3255, -9.5978, -10.2930],
[-17.4623, -16.3227, -17.0211, ..., -10.0448, -8.8320, -11.6701],
[-19.7825, -18.2467, -18.9617, ..., -10.4417, -10.0575, -13.2705],
...,
[-16.7194, -15.7009, -16.5568, ..., -11.9396, -12.9538, -9.1279],
[-14.1858, -13.9772, -14.0763, ..., -9.9030, -10.4625, -8.7678],
[-14.0998, -13.0324, -13.3418, ..., -8.7676, -10.0443, -6.4476]]])
ps = tensor([-29.0154, -28.9152, -28.5686, ..., -23.7333, -25.4041, -24.8862])
word_loss = -0.006231430917978287
idx = 8
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 103, 1939, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-12.3327, -11.7290, -12.1774, ..., -10.6400, -9.2812, -10.8762],
[-17.4025, -16.3325, -17.3093, ..., -9.6641, -8.0054, -10.9477],
[-19.8157, -18.1812, -19.2325, ..., -10.3199, -9.6911, -13.2068],
...,
[-15.4990, -14.1986, -15.4210, ..., -10.8605, -11.1951, -9.2175],
[-13.5214, -13.1154, -13.2580, ..., -9.1551, -8.5442, -8.5556],
[-13.9661, -12.7296, -13.4830, ..., -7.9905, -9.4974, -5.5795]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-12.3327, -11.7290, -12.1774, ..., -10.6400, -9.2812, -10.8762],
[-17.4025, -16.3325, -17.3093, ..., -9.6641, -8.0054, -10.9477],
[-19.8157, -18.1812, -19.2325, ..., -10.3199, -9.6911, -13.2068],
...,
[-15.4990, -14.1986, -15.4210, ..., -10.8605, -11.1951, -9.2175],
[-13.5214, -13.1154, -13.2580, ..., -9.1551, -8.5442, -8.5556],
[-13.9661, -12.7296, -13.4830, ..., -7.9905, -9.4974, -5.5795]]])
ps = tensor([-26.1031, -25.4673, -25.6910, ..., -23.7415, -24.6235, -23.6001])
word_loss = -0.4470815658569336
idx = 9
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 103, 3159, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-12.9872, -12.3978, -12.9848, ..., -11.8125, -12.0875, -12.0079],
[-17.5210, -16.8555, -17.3870, ..., -10.8851, -9.9333, -12.9947],
[-19.9390, -18.8892, -19.5466, ..., -12.1456, -11.2809, -13.7224],
...,
[-14.5711, -13.7166, -14.6204, ..., -10.1978, -11.9384, -9.0040],
[-13.0610, -12.8815, -12.9802, ..., -9.4830, -9.4141, -10.5692],
[-14.2910, -13.4047, -14.0815, ..., -8.9679, -11.1636, -6.8003]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-12.9872, -12.3978, -12.9848, ..., -11.8125, -12.0875, -12.0079],
[-17.5210, -16.8555, -17.3870, ..., -10.8851, -9.9333, -12.9947],
[-19.9390, -18.8892, -19.5466, ..., -12.1456, -11.2809, -13.7224],
...,
[-14.5711, -13.7166, -14.6204, ..., -10.1978, -11.9384, -9.0040],
[-13.0610, -12.8815, -12.9802, ..., -9.4830, -9.4141, -10.5692],
[-14.2910, -13.4047, -14.0815, ..., -8.9679, -11.1636, -6.8003]]])
ps = tensor([-23.7871, -23.3177, -23.7469, ..., -19.6898, -21.1636, -19.3445])
word_loss = -1.905866265296936
idx = 10
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 103, 4638,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-10.2435, -9.4682, -9.9029, ..., -8.6173, -7.9944, -9.5463],
[-14.7155, -14.1531, -14.7035, ..., -7.7060, -7.0066, -8.7167],
[-17.8262, -16.8357, -17.2724, ..., -9.3416, -9.6015, -11.3678],
...,
[-13.5025, -12.6059, -13.4680, ..., -9.6887, -10.2040, -7.5718],
[-11.8572, -11.8200, -11.6956, ..., -8.0838, -8.2098, -7.9838],
[-11.4906, -10.7753, -11.1489, ..., -6.4764, -8.7700, -4.7994]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-10.2435, -9.4682, -9.9029, ..., -8.6173, -7.9944, -9.5463],
[-14.7155, -14.1531, -14.7035, ..., -7.7060, -7.0066, -8.7167],
[-17.8262, -16.8357, -17.2724, ..., -9.3416, -9.6015, -11.3678],
...,
[-13.5025, -12.6059, -13.4680, ..., -9.6887, -10.2040, -7.5718],
[-11.8572, -11.8200, -11.6956, ..., -8.0838, -8.2098, -7.9838],
[-11.4906, -10.7753, -11.1489, ..., -6.4764, -8.7700, -4.7994]]])
ps = tensor([-23.3028, -23.2676, -24.0384, ..., -20.8967, -21.3373, -20.7125])
word_loss = -0.3310864269733429
idx = 11
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 103,
3198, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-11.1587, -10.6746, -11.6326, ..., -9.9938, -8.8795, -10.4635],
[-14.6800, -14.1649, -14.6931, ..., -7.1853, -6.1263, -11.4231],
[-17.5996, -16.4610, -17.1693, ..., -7.9229, -7.0681, -13.4018],
...,
[-13.6107, -12.4848, -13.5183, ..., -9.4305, -9.1442, -7.4951],
[-11.5701, -11.2959, -11.3109, ..., -7.2745, -7.0823, -8.4521],
[-13.5606, -12.9446, -13.3137, ..., -8.5220, -9.7932, -7.7482]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-11.1587, -10.6746, -11.6326, ..., -9.9938, -8.8795, -10.4635],
[-14.6800, -14.1649, -14.6931, ..., -7.1853, -6.1263, -11.4231],
[-17.5996, -16.4610, -17.1693, ..., -7.9229, -7.0681, -13.4018],
...,
[-13.6107, -12.4848, -13.5183, ..., -9.4305, -9.1442, -7.4951],
[-11.5701, -11.2959, -11.3109, ..., -7.2745, -7.0823, -8.4521],
[-13.5606, -12.9446, -13.3137, ..., -8.5220, -9.7932, -7.7482]]])
ps = tensor([-24.5581, -24.6442, -24.8213, ..., -21.0443, -21.8916, -20.5020])
word_loss = -0.0409548319876194
idx = 12
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
103, 1045, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-11.2262, -11.1173, -11.6287, ..., -10.3565, -9.4391, -11.4225],
[-15.2089, -14.6585, -15.3925, ..., -7.9105, -6.7598, -10.2716],
[-17.7514, -16.8604, -17.4242, ..., -8.0904, -8.6169, -12.3799],
...,
[-11.9148, -11.5928, -12.1447, ..., -7.0739, -9.0568, -7.8991],
[-10.9299, -10.9160, -10.9438, ..., -5.0096, -7.1774, -7.3603],
[-14.6292, -14.3548, -14.3348, ..., -6.8946, -10.3034, -8.7604]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-11.2262, -11.1173, -11.6287, ..., -10.3565, -9.4391, -11.4225],
[-15.2089, -14.6585, -15.3925, ..., -7.9105, -6.7598, -10.2716],
[-17.7514, -16.8604, -17.4242, ..., -8.0904, -8.6169, -12.3799],
...,
[-11.9148, -11.5928, -12.1447, ..., -7.0739, -9.0568, -7.8991],
[-10.9299, -10.9160, -10.9438, ..., -5.0096, -7.1774, -7.3603],
[-14.6292, -14.3548, -14.3348, ..., -6.8946, -10.3034, -8.7604]]])
ps = tensor([-26.6420, -26.3200, -26.8719, ..., -21.8011, -23.7840, -22.6264])
word_loss = -0.2741313576698303
idx = 13
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
3198, 103, 511]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-12.9708, -12.1014, -12.6502, ..., -10.6858, -10.9495, -11.6393],
[-17.4693, -16.4352, -17.2923, ..., -10.1345, -9.2979, -12.3043],
[-19.2976, -17.8839, -18.8252, ..., -11.4233, -10.9146, -13.9556],
...,
[-14.2439, -13.8837, -14.3827, ..., -10.8131, -9.7626, -10.4449],
[-11.0731, -11.4156, -11.2104, ..., -8.5579, -9.0104, -8.7935],
[-13.5802, -13.1632, -13.3280, ..., -9.2640, -10.9600, -8.3216]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-12.9708, -12.1014, -12.6502, ..., -10.6858, -10.9495, -11.6393],
[-17.4693, -16.4352, -17.2923, ..., -10.1345, -9.2979, -12.3043],
[-19.2976, -17.8839, -18.8252, ..., -11.4233, -10.9146, -13.9556],
...,
[-14.2439, -13.8837, -14.3827, ..., -10.8131, -9.7626, -10.4449],
[-11.0731, -11.4156, -11.2104, ..., -8.5579, -9.0104, -8.7935],
[-13.5802, -13.1632, -13.3280, ..., -9.2640, -10.9600, -8.3216]]])
ps = tensor([-26.7180, -27.0605, -26.8553, ..., -24.2028, -24.6553, -24.4384])
word_loss = -2.3570048809051514
idx = 14
mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
3198, 1045, 103]])
output = MaskedLMOutput(loss=None, logits=tensor([[[-11.3928, -10.5590, -11.3276, ..., -8.2870, -7.0281, -9.3417],
[-15.1979, -14.2848, -14.9167, ..., -8.0477, -3.3199, -9.9085],
[-16.9765, -15.7591, -16.4064, ..., -7.3844, -3.6073, -10.5002],
...,
[-14.3350, -13.5203, -14.7181, ..., -9.1939, -8.4368, -6.3008],
[-11.5855, -11.6669, -11.5224, ..., -6.1303, -7.0456, -5.4713],
[ -9.3767, -9.1142, -9.3964, ..., -5.2297, -5.3290, -3.2478]]]), hidden_states=None, attentions=None)
prediction_scores = output[0] = tensor([[[-11.3928, -10.5590, -11.3276, ..., -8.2870, -7.0281, -9.3417],
[-15.1979, -14.2848, -14.9167, ..., -8.0477, -3.3199, -9.9085],
[-16.9765, -15.7591, -16.4064, ..., -7.3844, -3.6073, -10.5002],
...,
[-14.3350, -13.5203, -14.7181, ..., -9.1939, -8.4368, -6.3008],
[-11.5855, -11.6669, -11.5224, ..., -6.1303, -7.0456, -5.4713],
[ -9.3767, -9.1142, -9.3964, ..., -5.2297, -5.3290, -3.2478]]])
ps = tensor([-20.6789, -20.4164, -20.6986, ..., -16.5319, -16.6312, -14.5500])
word_loss = -1.3718788623809814
sentence_loss = -23.16600384376943;ppl = 4.685160888290345
Process finished with exit code 0
参考资料:
基于BertForMaskedLM(Language Model) 的数据增强技术
还没有评论,来说两句吧...