torch.cuda.is_available() #cuda是否可用
import torch
import torch.nn as nn
class MyModule(nn.Module):
def __init__(self, <args>):
super().__init__()
#初始化
self.fc = nn.Linear(in, out)
self.fc.weight.data.uniform_(-0.5, 0.5)
self.fc.bias.data.zero_()
def forward(self, <args>):
return self.fc()
device = torch.device("cuda")
model = MyModule(<args>).to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=4.0)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)
def train(nn, iterator, optimizer, criteon):
nn.train()
for i, batch in enumerate(iterator):
optimizer.zero_grad()
pred = nn(batch.text)
loss = criteon(pred, batch.label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
def eval(nn, iterator, criteon):
rnn.eval()
with torch.no_grad():
for batch in iterator:
pred = nn(batch.text)
loss = criteon(pred, batch.label)
变换
torch.triu(tensor)
将 tensor 变为右上三角
tensor.masked_fill(mask, value)
将 mask 为 True 位置用 value 填充
下标索引x::y
:从x开始间隔y取一个
操作
model.parameters()
: 返回模型所有参数的generator
tensor.numel()
:返回模型参数数量
optim
torch.optim.SGD(model.parameters(), lr = 0.01, momentum)
layer
nn.Embedding(vocab_size, embedding_dim)
单词到word vector
vocab_size : 词汇量大小
输入:1维index索引
输出:embedding_dim维word vector
nn.LSTM(embedding_dim, hidden_dim, num_layers = 2)
LSTM层
dropout
:默认0
bidirectional
:默认False
nn.Linear(in_size, out_size)
全连接层
torch.cat([hidden[-2], hidden[-1]], )
并行
if local_rank != 0:
torch.distributed.barrier()
# 只有主进程执行
if local_rank == 0:
torch.distributed.barrier()
#所有进程执行
model = torch.nn.DataParallel(model) #多GPU数据并行
torch.cuda.device_count() #GPU数量
model = torch.nn.DataParallel(model) #多GPU数据并行