NLP-预训练模型-2018-Bert-解析:BertForMaskedLM

梦里梦外; 2023-10-01 20:21 76阅读 0赞
  1. import numpy as np
  2. import torch
  3. import torch.nn as nn
  4. from transformers import BertTokenizer, BertForMaskedLM
  5. # Load pre-trained model (weights)
  6. with torch.no_grad():
  7. # Load pre-trained model tokenizer (vocabulary)
  8. tokenizer = BertTokenizer.from_pretrained(r'D:\Pretrained_Model\bert-base-chinese')
  9. model = BertForMaskedLM.from_pretrained(r'D:\Pretrained_Model\bert-base-chinese')
  10. model.eval()
  11. sentence = "我不会忘记和你一起奋斗的时光。"
  12. tokenize_input = tokenizer.tokenize(sentence)
  13. print('tokenize_input = ', tokenize_input)
  14. tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
  15. sen_len = len(tokenize_input)
  16. sentence_loss = 0.
  17. for idx, word in enumerate(tokenize_input):
  18. print('\n\n idx = {0}'.format(idx))
  19. # add mask to i-th character of the sentence
  20. tokenize_input[idx] = '[MASK]'
  21. mask_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
  22. print('\t mask_input = {0}'.format(mask_input))
  23. output = model(mask_input)
  24. print('\n\t output = {0}'.format(output))
  25. prediction_scores = output[0]
  26. print('\n\t prediction_scores = output[0] = {0}'.format(prediction_scores))
  27. softmax = nn.Softmax(dim=0)
  28. ps = softmax(prediction_scores[0, idx]).log()
  29. print('\n\t ps = {0}'.format(ps))
  30. word_loss = ps[tensor_input[0, idx]]
  31. print('\n\t word_loss = {0}'.format(word_loss))
  32. sentence_loss += word_loss.item()
  33. tokenize_input[idx] = word
  34. ppl = np.exp(-sentence_loss / sen_len)
  35. print("sentence_loss = {0};ppl = {1}".format(sentence_loss, ppl))

打印结果:

  1. tokenize_input = ['我', '不', '会', '忘', '记', '和', '你', '一', '起', '奋', '斗', '的', '时', '光', '。']
  2. idx = 0
  3. mask_input = tensor([[ 103, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
  4. 3198, 1045, 511]])
  5. output = MaskedLMOutput(loss=None, logits=tensor([[[-10.0067, -9.9702, -10.3403, ..., -7.0367, -7.9918, -7.8884],
  6. [ -8.9250, -8.6627, -8.8329, ..., -5.6988, -5.0543, -7.3196],
  7. [-17.5815, -16.8282, -17.5551, ..., -11.2575, -8.4464, -15.8063],
  8. ...,
  9. [-17.7271, -17.4097, -18.3814, ..., -12.5380, -14.9620, -13.0537],
  10. [-14.8090, -15.5407, -14.8516, ..., -9.6344, -8.9355, -11.3215],
  11. [-10.2498, -10.0447, -10.2479, ..., -5.7584, -4.9482, -5.1695]]]), hidden_states=None, attentions=None)
  12. prediction_scores = output[0] = tensor([[[-10.0067, -9.9702, -10.3403, ..., -7.0367, -7.9918, -7.8884],
  13. [ -8.9250, -8.6627, -8.8329, ..., -5.6988, -5.0543, -7.3196],
  14. [-17.5815, -16.8282, -17.5551, ..., -11.2575, -8.4464, -15.8063],
  15. ...,
  16. [-17.7271, -17.4097, -18.3814, ..., -12.5380, -14.9620, -13.0537],
  17. [-14.8090, -15.5407, -14.8516, ..., -9.6344, -8.9355, -11.3215],
  18. [-10.2498, -10.0447, -10.2479, ..., -5.7584, -4.9482, -5.1695]]])
  19. ps = tensor([-20.2204, -20.1840, -20.5541, ..., -17.2505, -18.2055, -18.1022])
  20. word_loss = -4.207489013671875
  21. idx = 1
  22. mask_input = tensor([[2769, 103, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
  23. 3198, 1045, 511]])
  24. output = MaskedLMOutput(loss=None, logits=tensor([[[-14.2572, -13.5664, -13.7818, ..., -12.4880, -11.4941, -11.0043],
  25. [-13.3572, -12.7593, -13.1295, ..., -8.9165, -6.9501, -8.0928],
  26. [-18.3267, -17.2391, -16.6626, ..., -9.1351, -8.5136, -10.8610],
  27. ...,
  28. [-16.8631, -15.9635, -16.3637, ..., -11.8876, -12.6025, -10.4363],
  29. [-14.1836, -14.0044, -13.6275, ..., -9.0348, -10.7950, -9.2346],
  30. [-16.2714, -15.7472, -15.5543, ..., -9.3256, -9.7824, -7.4806]]]), hidden_states=None, attentions=None)
  31. prediction_scores = output[0] = tensor([[[-14.2572, -13.5664, -13.7818, ..., -12.4880, -11.4941, -11.0043],
  32. [-13.3572, -12.7593, -13.1295, ..., -8.9165, -6.9501, -8.0928],
  33. [-18.3267, -17.2391, -16.6626, ..., -9.1351, -8.5136, -10.8610],
  34. ...,
  35. [-16.8631, -15.9635, -16.3637, ..., -11.8876, -12.6025, -10.4363],
  36. [-14.1836, -14.0044, -13.6275, ..., -9.0348, -10.7950, -9.2346],
  37. [-16.2714, -15.7472, -15.5543, ..., -9.3256, -9.7824, -7.4806]]])
  38. ps = tensor([-27.0073, -26.4094, -26.7796, ..., -22.5666, -20.6002, -21.7429])
  39. word_loss = -3.4179904460906982
  40. idx = 2
  41. mask_input = tensor([[2769, 679, 103, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
  42. 3198, 1045, 511]])
  43. output = MaskedLMOutput(loss=None, logits=tensor([[[-10.4260, -9.7421, -10.0949, ..., -9.1981, -9.3232, -9.0737],
  44. [-11.1497, -10.3329, -10.3952, ..., -6.6423, -5.8855, -7.4425],
  45. [-10.2441, -9.8596, -10.0538, ..., -6.8899, -6.3872, -7.1557],
  46. ...,
  47. [-14.8344, -13.9255, -14.6416, ..., -11.8463, -11.3034, -9.4505],
  48. [-13.0585, -12.7334, -12.5315, ..., -9.1430, -9.0249, -8.6625],
  49. [-10.8999, -10.1885, -10.4381, ..., -6.9490, -6.5864, -5.2088]]]), hidden_states=None, attentions=None)
  50. prediction_scores = output[0] = tensor([[[-10.4260, -9.7421, -10.0949, ..., -9.1981, -9.3232, -9.0737],
  51. [-11.1497, -10.3329, -10.3952, ..., -6.6423, -5.8855, -7.4425],
  52. [-10.2441, -9.8596, -10.0538, ..., -6.8899, -6.3872, -7.1557],
  53. ...,
  54. [-14.8344, -13.9255, -14.6416, ..., -11.8463, -11.3034, -9.4505],
  55. [-13.0585, -12.7334, -12.5315, ..., -9.1430, -9.0249, -8.6625],
  56. [-10.8999, -10.1885, -10.4381, ..., -6.9490, -6.5864, -5.2088]]])
  57. ps = tensor([-23.9556, -23.5712, -23.7654, ..., -20.6015, -20.0987, -20.8673])
  58. word_loss = -3.0778353214263916
  59. idx = 3
  60. mask_input = tensor([[2769, 679, 833, 103, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
  61. 3198, 1045, 511]])
  62. output = MaskedLMOutput(loss=None, logits=tensor([[[-11.1854, -10.8186, -10.8980, ..., -10.0304, -6.8312, -10.1228],
  63. [-18.3292, -17.1635, -18.1168, ..., -12.8976, -6.5055, -10.3133],
  64. [-18.9977, -17.6461, -18.6712, ..., -12.0834, -9.4692, -13.3222],
  65. ...,
  66. [-15.9868, -15.1038, -15.7956, ..., -11.8385, -8.8921, -11.2440],
  67. [-13.2753, -13.0012, -12.8868, ..., -8.5294, -7.7151, -9.6861],
  68. [-14.0791, -13.6179, -13.8650, ..., -9.9380, -8.0259, -6.6505]]]), hidden_states=None, attentions=None)
  69. prediction_scores = output[0] = tensor([[[-11.1854, -10.8186, -10.8980, ..., -10.0304, -6.8312, -10.1228],
  70. [-18.3292, -17.1635, -18.1168, ..., -12.8976, -6.5055, -10.3133],
  71. [-18.9977, -17.6461, -18.6712, ..., -12.0834, -9.4692, -13.3222],
  72. ...,
  73. [-15.9868, -15.1038, -15.7956, ..., -11.8385, -8.8921, -11.2440],
  74. [-13.2753, -13.0012, -12.8868, ..., -8.5294, -7.7151, -9.6861],
  75. [-14.0791, -13.6179, -13.8650, ..., -9.9380, -8.0259, -6.6505]]])
  76. ps = tensor([-28.6803, -28.3364, -28.7086, ..., -26.4609, -23.3448, -25.8600])
  77. word_loss = -0.024608036503195763
  78. idx = 4
  79. mask_input = tensor([[2769, 679, 833, 2563, 103, 1469, 872, 671, 6629, 1939, 3159, 4638,
  80. 3198, 1045, 511]])
  81. output = MaskedLMOutput(loss=None, logits=tensor([[[-11.6949, -11.0920, -11.4218, ..., -10.2302, -9.3920, -10.9836],
  82. [-18.6331, -17.9585, -18.3607, ..., -12.7316, -10.2360, -14.0741],
  83. [-19.6247, -18.4559, -19.2653, ..., -12.6368, -11.0657, -15.6243],
  84. ...,
  85. [-15.9810, -15.1353, -15.9852, ..., -12.4308, -12.2341, -10.9428],
  86. [-13.4082, -13.1908, -13.3454, ..., -10.0117, -10.6251, -10.7604],
  87. [-13.8807, -13.1495, -13.6315, ..., -9.3678, -9.9106, -7.1275]]]), hidden_states=None, attentions=None)
  88. prediction_scores = output[0] = tensor([[[-11.6949, -11.0920, -11.4218, ..., -10.2302, -9.3920, -10.9836],
  89. [-18.6331, -17.9585, -18.3607, ..., -12.7316, -10.2360, -14.0741],
  90. [-19.6247, -18.4559, -19.2653, ..., -12.6368, -11.0657, -15.6243],
  91. ...,
  92. [-15.9810, -15.1353, -15.9852, ..., -12.4308, -12.2341, -10.9428],
  93. [-13.4082, -13.1908, -13.3454, ..., -10.0117, -10.6251, -10.7604],
  94. [-13.8807, -13.1495, -13.6315, ..., -9.3678, -9.9106, -7.1275]]])
  95. ps = tensor([-30.6680, -30.0711, -30.5083, ..., -28.1964, -25.7133, -29.4577])
  96. word_loss = -0.021782301366329193
  97. idx = 5
  98. mask_input = tensor([[2769, 679, 833, 2563, 6381, 103, 872, 671, 6629, 1939, 3159, 4638,
  99. 3198, 1045, 511]])
  100. output = MaskedLMOutput(loss=None, logits=tensor([[[-10.8215, -10.1308, -10.5400, ..., -9.4374, -9.1841, -9.7690],
  101. [-16.6464, -15.7021, -16.0986, ..., -9.1416, -7.5447, -8.9926],
  102. [-18.4551, -17.0224, -17.3103, ..., -8.7594, -8.8654, -10.6732],
  103. ...,
  104. [-14.8322, -13.5759, -14.5636, ..., -10.8961, -10.6665, -8.9241],
  105. [-12.3797, -11.8117, -11.9058, ..., -8.7238, -9.1733, -9.1059],
  106. [-12.6140, -11.4767, -11.6919, ..., -8.0748, -9.4955, -5.7950]]]), hidden_states=None, attentions=None)
  107. prediction_scores = output[0] = tensor([[[-10.8215, -10.1308, -10.5400, ..., -9.4374, -9.1841, -9.7690],
  108. [-16.6464, -15.7021, -16.0986, ..., -9.1416, -7.5447, -8.9926],
  109. [-18.4551, -17.0224, -17.3103, ..., -8.7594, -8.8654, -10.6732],
  110. ...,
  111. [-14.8322, -13.5759, -14.5636, ..., -10.8961, -10.6665, -8.9241],
  112. [-12.3797, -11.8117, -11.9058, ..., -8.7238, -9.1733, -9.1059],
  113. [-12.6140, -11.4767, -11.6919, ..., -8.0748, -9.4955, -5.7950]]])
  114. ps = tensor([-20.0339, -19.5133, -20.0343, ..., -17.0866, -17.4351, -15.4161])
  115. word_loss = -2.464529037475586
  116. idx = 6
  117. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 103, 671, 6629, 1939, 3159, 4638,
  118. 3198, 1045, 511]])
  119. output = MaskedLMOutput(loss=None, logits=tensor([[[-11.6927, -10.7244, -10.8993, ..., -8.0539, -8.4719, -9.0431],
  120. [-14.6502, -14.0066, -14.4193, ..., -7.7190, -5.6522, -8.8189],
  121. [-17.8192, -16.0978, -17.0802, ..., -8.5008, -7.9125, -11.5379],
  122. ...,
  123. [-15.0797, -14.0576, -14.8092, ..., -10.5593, -11.1677, -9.6744],
  124. [-12.6444, -12.2899, -12.1446, ..., -8.7772, -9.4889, -9.6838],
  125. [-11.8326, -11.0902, -11.1104, ..., -7.6406, -8.1461, -6.2924]]]), hidden_states=None, attentions=None)
  126. prediction_scores = output[0] = tensor([[[-11.6927, -10.7244, -10.8993, ..., -8.0539, -8.4719, -9.0431],
  127. [-14.6502, -14.0066, -14.4193, ..., -7.7190, -5.6522, -8.8189],
  128. [-17.8192, -16.0978, -17.0802, ..., -8.5008, -7.9125, -11.5379],
  129. ...,
  130. [-15.0797, -14.0576, -14.8092, ..., -10.5593, -11.1677, -9.6744],
  131. [-12.6444, -12.2899, -12.1446, ..., -8.7772, -9.4889, -9.6838],
  132. [-11.8326, -11.0902, -11.1104, ..., -7.6406, -8.1461, -6.2924]]])
  133. ps = tensor([-17.8420, -17.7343, -17.7814, ..., -15.6324, -16.8942, -15.6699])
  134. word_loss = -3.217534065246582
  135. idx = 7
  136. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 103, 6629, 1939, 3159, 4638,
  137. 3198, 1045, 511]])
  138. output = MaskedLMOutput(loss=None, logits=tensor([[[-12.0652, -11.1372, -11.7658, ..., -10.3255, -9.5978, -10.2930],
  139. [-17.4623, -16.3227, -17.0211, ..., -10.0448, -8.8320, -11.6701],
  140. [-19.7825, -18.2467, -18.9617, ..., -10.4417, -10.0575, -13.2705],
  141. ...,
  142. [-16.7194, -15.7009, -16.5568, ..., -11.9396, -12.9538, -9.1279],
  143. [-14.1858, -13.9772, -14.0763, ..., -9.9030, -10.4625, -8.7678],
  144. [-14.0998, -13.0324, -13.3418, ..., -8.7676, -10.0443, -6.4476]]]), hidden_states=None, attentions=None)
  145. prediction_scores = output[0] = tensor([[[-12.0652, -11.1372, -11.7658, ..., -10.3255, -9.5978, -10.2930],
  146. [-17.4623, -16.3227, -17.0211, ..., -10.0448, -8.8320, -11.6701],
  147. [-19.7825, -18.2467, -18.9617, ..., -10.4417, -10.0575, -13.2705],
  148. ...,
  149. [-16.7194, -15.7009, -16.5568, ..., -11.9396, -12.9538, -9.1279],
  150. [-14.1858, -13.9772, -14.0763, ..., -9.9030, -10.4625, -8.7678],
  151. [-14.0998, -13.0324, -13.3418, ..., -8.7676, -10.0443, -6.4476]]])
  152. ps = tensor([-29.0154, -28.9152, -28.5686, ..., -23.7333, -25.4041, -24.8862])
  153. word_loss = -0.006231430917978287
  154. idx = 8
  155. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 103, 1939, 3159, 4638,
  156. 3198, 1045, 511]])
  157. output = MaskedLMOutput(loss=None, logits=tensor([[[-12.3327, -11.7290, -12.1774, ..., -10.6400, -9.2812, -10.8762],
  158. [-17.4025, -16.3325, -17.3093, ..., -9.6641, -8.0054, -10.9477],
  159. [-19.8157, -18.1812, -19.2325, ..., -10.3199, -9.6911, -13.2068],
  160. ...,
  161. [-15.4990, -14.1986, -15.4210, ..., -10.8605, -11.1951, -9.2175],
  162. [-13.5214, -13.1154, -13.2580, ..., -9.1551, -8.5442, -8.5556],
  163. [-13.9661, -12.7296, -13.4830, ..., -7.9905, -9.4974, -5.5795]]]), hidden_states=None, attentions=None)
  164. prediction_scores = output[0] = tensor([[[-12.3327, -11.7290, -12.1774, ..., -10.6400, -9.2812, -10.8762],
  165. [-17.4025, -16.3325, -17.3093, ..., -9.6641, -8.0054, -10.9477],
  166. [-19.8157, -18.1812, -19.2325, ..., -10.3199, -9.6911, -13.2068],
  167. ...,
  168. [-15.4990, -14.1986, -15.4210, ..., -10.8605, -11.1951, -9.2175],
  169. [-13.5214, -13.1154, -13.2580, ..., -9.1551, -8.5442, -8.5556],
  170. [-13.9661, -12.7296, -13.4830, ..., -7.9905, -9.4974, -5.5795]]])
  171. ps = tensor([-26.1031, -25.4673, -25.6910, ..., -23.7415, -24.6235, -23.6001])
  172. word_loss = -0.4470815658569336
  173. idx = 9
  174. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 103, 3159, 4638,
  175. 3198, 1045, 511]])
  176. output = MaskedLMOutput(loss=None, logits=tensor([[[-12.9872, -12.3978, -12.9848, ..., -11.8125, -12.0875, -12.0079],
  177. [-17.5210, -16.8555, -17.3870, ..., -10.8851, -9.9333, -12.9947],
  178. [-19.9390, -18.8892, -19.5466, ..., -12.1456, -11.2809, -13.7224],
  179. ...,
  180. [-14.5711, -13.7166, -14.6204, ..., -10.1978, -11.9384, -9.0040],
  181. [-13.0610, -12.8815, -12.9802, ..., -9.4830, -9.4141, -10.5692],
  182. [-14.2910, -13.4047, -14.0815, ..., -8.9679, -11.1636, -6.8003]]]), hidden_states=None, attentions=None)
  183. prediction_scores = output[0] = tensor([[[-12.9872, -12.3978, -12.9848, ..., -11.8125, -12.0875, -12.0079],
  184. [-17.5210, -16.8555, -17.3870, ..., -10.8851, -9.9333, -12.9947],
  185. [-19.9390, -18.8892, -19.5466, ..., -12.1456, -11.2809, -13.7224],
  186. ...,
  187. [-14.5711, -13.7166, -14.6204, ..., -10.1978, -11.9384, -9.0040],
  188. [-13.0610, -12.8815, -12.9802, ..., -9.4830, -9.4141, -10.5692],
  189. [-14.2910, -13.4047, -14.0815, ..., -8.9679, -11.1636, -6.8003]]])
  190. ps = tensor([-23.7871, -23.3177, -23.7469, ..., -19.6898, -21.1636, -19.3445])
  191. word_loss = -1.905866265296936
  192. idx = 10
  193. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 103, 4638,
  194. 3198, 1045, 511]])
  195. output = MaskedLMOutput(loss=None, logits=tensor([[[-10.2435, -9.4682, -9.9029, ..., -8.6173, -7.9944, -9.5463],
  196. [-14.7155, -14.1531, -14.7035, ..., -7.7060, -7.0066, -8.7167],
  197. [-17.8262, -16.8357, -17.2724, ..., -9.3416, -9.6015, -11.3678],
  198. ...,
  199. [-13.5025, -12.6059, -13.4680, ..., -9.6887, -10.2040, -7.5718],
  200. [-11.8572, -11.8200, -11.6956, ..., -8.0838, -8.2098, -7.9838],
  201. [-11.4906, -10.7753, -11.1489, ..., -6.4764, -8.7700, -4.7994]]]), hidden_states=None, attentions=None)
  202. prediction_scores = output[0] = tensor([[[-10.2435, -9.4682, -9.9029, ..., -8.6173, -7.9944, -9.5463],
  203. [-14.7155, -14.1531, -14.7035, ..., -7.7060, -7.0066, -8.7167],
  204. [-17.8262, -16.8357, -17.2724, ..., -9.3416, -9.6015, -11.3678],
  205. ...,
  206. [-13.5025, -12.6059, -13.4680, ..., -9.6887, -10.2040, -7.5718],
  207. [-11.8572, -11.8200, -11.6956, ..., -8.0838, -8.2098, -7.9838],
  208. [-11.4906, -10.7753, -11.1489, ..., -6.4764, -8.7700, -4.7994]]])
  209. ps = tensor([-23.3028, -23.2676, -24.0384, ..., -20.8967, -21.3373, -20.7125])
  210. word_loss = -0.3310864269733429
  211. idx = 11
  212. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 103,
  213. 3198, 1045, 511]])
  214. output = MaskedLMOutput(loss=None, logits=tensor([[[-11.1587, -10.6746, -11.6326, ..., -9.9938, -8.8795, -10.4635],
  215. [-14.6800, -14.1649, -14.6931, ..., -7.1853, -6.1263, -11.4231],
  216. [-17.5996, -16.4610, -17.1693, ..., -7.9229, -7.0681, -13.4018],
  217. ...,
  218. [-13.6107, -12.4848, -13.5183, ..., -9.4305, -9.1442, -7.4951],
  219. [-11.5701, -11.2959, -11.3109, ..., -7.2745, -7.0823, -8.4521],
  220. [-13.5606, -12.9446, -13.3137, ..., -8.5220, -9.7932, -7.7482]]]), hidden_states=None, attentions=None)
  221. prediction_scores = output[0] = tensor([[[-11.1587, -10.6746, -11.6326, ..., -9.9938, -8.8795, -10.4635],
  222. [-14.6800, -14.1649, -14.6931, ..., -7.1853, -6.1263, -11.4231],
  223. [-17.5996, -16.4610, -17.1693, ..., -7.9229, -7.0681, -13.4018],
  224. ...,
  225. [-13.6107, -12.4848, -13.5183, ..., -9.4305, -9.1442, -7.4951],
  226. [-11.5701, -11.2959, -11.3109, ..., -7.2745, -7.0823, -8.4521],
  227. [-13.5606, -12.9446, -13.3137, ..., -8.5220, -9.7932, -7.7482]]])
  228. ps = tensor([-24.5581, -24.6442, -24.8213, ..., -21.0443, -21.8916, -20.5020])
  229. word_loss = -0.0409548319876194
  230. idx = 12
  231. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
  232. 103, 1045, 511]])
  233. output = MaskedLMOutput(loss=None, logits=tensor([[[-11.2262, -11.1173, -11.6287, ..., -10.3565, -9.4391, -11.4225],
  234. [-15.2089, -14.6585, -15.3925, ..., -7.9105, -6.7598, -10.2716],
  235. [-17.7514, -16.8604, -17.4242, ..., -8.0904, -8.6169, -12.3799],
  236. ...,
  237. [-11.9148, -11.5928, -12.1447, ..., -7.0739, -9.0568, -7.8991],
  238. [-10.9299, -10.9160, -10.9438, ..., -5.0096, -7.1774, -7.3603],
  239. [-14.6292, -14.3548, -14.3348, ..., -6.8946, -10.3034, -8.7604]]]), hidden_states=None, attentions=None)
  240. prediction_scores = output[0] = tensor([[[-11.2262, -11.1173, -11.6287, ..., -10.3565, -9.4391, -11.4225],
  241. [-15.2089, -14.6585, -15.3925, ..., -7.9105, -6.7598, -10.2716],
  242. [-17.7514, -16.8604, -17.4242, ..., -8.0904, -8.6169, -12.3799],
  243. ...,
  244. [-11.9148, -11.5928, -12.1447, ..., -7.0739, -9.0568, -7.8991],
  245. [-10.9299, -10.9160, -10.9438, ..., -5.0096, -7.1774, -7.3603],
  246. [-14.6292, -14.3548, -14.3348, ..., -6.8946, -10.3034, -8.7604]]])
  247. ps = tensor([-26.6420, -26.3200, -26.8719, ..., -21.8011, -23.7840, -22.6264])
  248. word_loss = -0.2741313576698303
  249. idx = 13
  250. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
  251. 3198, 103, 511]])
  252. output = MaskedLMOutput(loss=None, logits=tensor([[[-12.9708, -12.1014, -12.6502, ..., -10.6858, -10.9495, -11.6393],
  253. [-17.4693, -16.4352, -17.2923, ..., -10.1345, -9.2979, -12.3043],
  254. [-19.2976, -17.8839, -18.8252, ..., -11.4233, -10.9146, -13.9556],
  255. ...,
  256. [-14.2439, -13.8837, -14.3827, ..., -10.8131, -9.7626, -10.4449],
  257. [-11.0731, -11.4156, -11.2104, ..., -8.5579, -9.0104, -8.7935],
  258. [-13.5802, -13.1632, -13.3280, ..., -9.2640, -10.9600, -8.3216]]]), hidden_states=None, attentions=None)
  259. prediction_scores = output[0] = tensor([[[-12.9708, -12.1014, -12.6502, ..., -10.6858, -10.9495, -11.6393],
  260. [-17.4693, -16.4352, -17.2923, ..., -10.1345, -9.2979, -12.3043],
  261. [-19.2976, -17.8839, -18.8252, ..., -11.4233, -10.9146, -13.9556],
  262. ...,
  263. [-14.2439, -13.8837, -14.3827, ..., -10.8131, -9.7626, -10.4449],
  264. [-11.0731, -11.4156, -11.2104, ..., -8.5579, -9.0104, -8.7935],
  265. [-13.5802, -13.1632, -13.3280, ..., -9.2640, -10.9600, -8.3216]]])
  266. ps = tensor([-26.7180, -27.0605, -26.8553, ..., -24.2028, -24.6553, -24.4384])
  267. word_loss = -2.3570048809051514
  268. idx = 14
  269. mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638,
  270. 3198, 1045, 103]])
  271. output = MaskedLMOutput(loss=None, logits=tensor([[[-11.3928, -10.5590, -11.3276, ..., -8.2870, -7.0281, -9.3417],
  272. [-15.1979, -14.2848, -14.9167, ..., -8.0477, -3.3199, -9.9085],
  273. [-16.9765, -15.7591, -16.4064, ..., -7.3844, -3.6073, -10.5002],
  274. ...,
  275. [-14.3350, -13.5203, -14.7181, ..., -9.1939, -8.4368, -6.3008],
  276. [-11.5855, -11.6669, -11.5224, ..., -6.1303, -7.0456, -5.4713],
  277. [ -9.3767, -9.1142, -9.3964, ..., -5.2297, -5.3290, -3.2478]]]), hidden_states=None, attentions=None)
  278. prediction_scores = output[0] = tensor([[[-11.3928, -10.5590, -11.3276, ..., -8.2870, -7.0281, -9.3417],
  279. [-15.1979, -14.2848, -14.9167, ..., -8.0477, -3.3199, -9.9085],
  280. [-16.9765, -15.7591, -16.4064, ..., -7.3844, -3.6073, -10.5002],
  281. ...,
  282. [-14.3350, -13.5203, -14.7181, ..., -9.1939, -8.4368, -6.3008],
  283. [-11.5855, -11.6669, -11.5224, ..., -6.1303, -7.0456, -5.4713],
  284. [ -9.3767, -9.1142, -9.3964, ..., -5.2297, -5.3290, -3.2478]]])
  285. ps = tensor([-20.6789, -20.4164, -20.6986, ..., -16.5319, -16.6312, -14.5500])
  286. word_loss = -1.3718788623809814
  287. sentence_loss = -23.16600384376943ppl = 4.685160888290345
  288. Process finished with exit code 0



参考资料:
基于BertForMaskedLM(Language Model) 的数据增强技术

发表评论

表情:
评论列表 (有 0 条评论,76人围观)

还没有评论,来说两句吧...

相关阅读