转:pytorch版的bilstm+crf实现sequence label

在理解CRF的时候费了一些功夫,将一些难以理解的地方稍微做了下标注,隔三差五看看加强记忆, 代码是pytorch文档上的example

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim

def to_scalar(var): #var是Variable,维度是1
    # returns a python float
    return var.view(-1).data.tolist()[0]

def argmax(vec):
    # return the argmax as a python int
    _, idx = torch.max(vec, 1)
    return to_scalar(idx)

def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)

# Compute log sum exp in a numerically stable way for the forward algorithm
def log_sum_exp(vec): #vec是1*5, type是Variable

    max_score = vec[0, argmax(vec)]
    #max_score维度是1, max_score.view(1,-1)维度是1*1,max_score.view(1, -1).expand(1, vec.size()[1])的维度是1*5
    max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1]) # vec.size()维度是1*5
    return max_score + torch.log(torch.sum(torch.exp(vec - max_score_broadcast)))#为什么指数之后再求和,而后才log呢

class BiLSTM_CRF(nn.Module):
    def