DataLoader:批量加载数据

from torch.utils.data import TensorDataset, DataLoader

# 把数据包成 TensorDataset
X = torch.randn(1000, 10)
y = torch.randint(0, 2, (1000,))

dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)

for xb, yb in loader:
    print(xb.shape, yb.shape)    # (32, 10) (32,)
    break

DataLoader 自动分批、打乱、并行加载——比手写循环健壮。

自定义 Dataset

from torch.utils.data import Dataset

class CSVDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path)
        self.X = torch.tensor(df.drop("label", axis=1).values, dtype=torch.float32)
        self.y = torch.tensor(df["label"].values, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

完整训练循环(模板)

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MyModel().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64)

for epoch in range(10):
    # 训练
    model.train()
    train_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        preds = model(xb)
        loss = loss_fn(preds, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * xb.size(0)
    train_loss /= len(train_set)

    # 验证
    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            val_loss += loss_fn(preds, yb).item() * xb.size(0)
            correct += (preds.argmax(1) == yb).sum().item()
    val_loss /= len(val_set)
    val_acc = correct / len(val_set)

    print(f"Epoch {epoch+1}: train={train_loss:.4f}, val={val_loss:.4f}, acc={val_acc:.3f}")

这套模板99% 监督学习项目都能用

关键细节

1. .to(device) 移到 GPU

每个 batch 都要把数据 + 模型放到同一设备上。

2. zero_grad / backward / step 三连

每个 batch 必须按这个顺序:

optimizer.zero_grad()   # 清零梯度
loss.backward()          # 反向传播算梯度
optimizer.step()         # 用梯度更新参数

3. 验证 with no_grad

with torch.no_grad():
    ...

不算梯度——省内存、快很多。

4. .item() 把 0 维 tensor 变 Python 数字

loss.item() 把张量取出 Python float——避免在 list 里堆张量耗内存。

学习率调度

from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR

scheduler = StepLR(optimizer, step_size=5, gamma=0.5)    # 每 5 epoch lr ×0.5
scheduler = CosineAnnealingLR(optimizer, T_max=10)        # 余弦退火

# 每 epoch 结束调
for epoch in range(epochs):
    train_one_epoch()
    scheduler.step()

早停(避免过拟合)

best_val_loss = float("inf")
patience = 5
counter = 0

for epoch in range(100):
    train_one_epoch()
    val_loss = evaluate()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best.pth")
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print("早停")
            break

现代化:Lightning

pytorch-lightning 把这些样板代码变成 Trainer 一行:

import lightning as L

class LitModel(L.LightningModule):
    def training_step(self, batch, idx):
        ...
    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=1e-3)

trainer = L.Trainer(max_epochs=10)
trainer.fit(model, train_loader, val_loader)

写真实项目可以试 Lightning——少写一半代码。

下一篇是真实例子:MNIST 手写识别。