torch.cuda.is_available() #cuda是否可用
import torch
import torch.nn as nn

class MyModule(nn.Module):
    def __init__(self, <args>):
        super().__init__()
        #初始化
        self.fc = nn.Linear(in, out) 
        self.fc.weight.data.uniform_(-0.5, 0.5)
        self.fc.bias.data.zero_()

    def forward(self, <args>):
        return self.fc()


device = torch.device("cuda")

model = MyModule(<args>).to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=4.0)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

def train(nn, iterator, optimizer, criteon):
    nn.train()
    for i, batch in enumerate(iterator):
        optimizer.zero_grad()
        pred = nn(batch.text)
        loss = criteon(pred, batch.label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def eval(nn, iterator, criteon):
    rnn.eval()
    with torch.no_grad():
        for batch in iterator:
            pred = nn(batch.text)
            loss = criteon(pred, batch.label)


变换

torch.triu(tensor) 将 tensor 变为右上三角
tensor.masked_fill(mask, value) 将 mask 为 True 位置用 value 填充
下标索引x::y:从x开始间隔y取一个

操作

model.parameters() : 返回模型所有参数的generator
tensor.numel() :返回模型参数数量

optim

torch.optim.SGD(model.parameters(), lr = 0.01, momentum)

layer

nn.Embedding(vocab_size, embedding_dim)
单词到word vector
vocab_size : 词汇量大小
输入:1维index索引
输出:embedding_dim维word vector

nn.LSTM(embedding_dim, hidden_dim, num_layers = 2)
LSTM层
dropout:默认0
bidirectional:默认False

nn.Linear(in_size, out_size)
全连接层

torch.cat([hidden[-2], hidden[-1]], )

并行

if local_rank != 0:
    torch.distributed.barrier()
# 只有主进程执行
if local_rank == 0:
    torch.distributed.barrier()
#所有进程执行

model = torch.nn.DataParallel(model) #多GPU数据并行
torch.cuda.device_count() #GPU数量
model = torch.nn.DataParallel(model) #多GPU数据并行

标签: none

添加新评论