image-20220731205814540

一、从零实现

1
2
3
4
import torch
import torchvision
from torch.utils import data
from torchvision import transforms

1、示例数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
##数据集为fashion_minist,10类衣服及对应的图片
def load_data_fashion_minist(batch_size):
	#将图片转为张量矩阵
    trans = transforms.ToTensor()
    mnist_train = torchvision.datasets.FashionMNIST(
        root="./data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="./data", train=False, transform=trans, download=True)
    #生成训练集数据迭代器
    train_iter = data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
    #生成测试集数据迭代器
    test_iter = data.DataLoader(mnist_train, batch_size=batch_size, shuffle=False)
    return train_iter, test_iter
# batch_size = 256
# train_iter, test_iter = load_data_fashion_minist(batch_size)
# X, y = next(iter(train_iter))
# X.shape
# # torch.Size([256, 1, 28, 28])

2、定义模型

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
def net(X):
    # W.shape -- torch.Size([784, 10])
    # b.shape -- torch.Size([10])
    # 全连接层
    X = torch.matmul(X.reshape((-1, 784)), W) + b
    # 激活函数(见上)
    X_softmax = softmax(X)
    return X_softmax
def softmax(X):
    # 幂函数使数据具有非负性
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    # 归一化,一个样本对于全部类别预测结果和为1
    return X_exp/partition
# W = torch.normal(0, 0.01, (784, 10), requires_grad = True)
# b = torch.zeros(10, requires_grad = True)
# y_hat = net(X)
# y_hat.shape
# # torch.Size([256, 10])

3、定义损失函数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def cross_entropy(y_hat, y):
    # 对于某个样本真实类别的预测概率
    y_hat_target = y_hat[range(len(y_hat)), y]
    # 负log转换--→ 符合最小化
    return - torch.log(y_hat_target)
# cross_entropy(y_hat, y).shape
# # torch.Size([256])

## 计算分类精度评价指标
# 计算1个batch的分类正确数
def accuracy(y_hat, y):
    y_hat_class = y_hat.argmax(1)
    # 样本类别预测与否(True/False)
    cmp = y_hat_class.type(y.dtype) == y
    return cmp.sum().item()
# accuracy(y_hat, y)
# # 8

# 定义一个累加值计数器:用以累计1轮epoch所有batch的分类精度
class Accumulator:
    def __init__(self, n):
        self.data = [0.0]*n
        # [0, 0]
    def add(self, *args):
        # [0, 0] + [1, 2] = [1, 2]
        self.data = [a + b for a, b in zip(self.data, args)]
    def reset(self):
        self.data = [0.0]*len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]

# 计算测试集的分类精度
def evaluate_accuracy(net, data_iter):
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), len(y))
    Acc_avg = metric[0]/metric[1]
    return Acc_avg

# evaluate_accuracy(net, test_iter)
# # 0.0257

4、定义优化算法

1
2
3
4
5
6
# (同线性回归)
def sgd(params, lr, batch_size):
	with torch.no_grad():
		for param in  params:
			param -= lr * param.grad / batch_size
			param.grad.zero_()

5、训练模型

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
W = torch.normal(0, 0.01, (784, 10), requires_grad = True)
b = torch.zeros(10, requires_grad = True)
lr = 0.01
batch_size=256
epoch_metric = []
num_epochs = 10

for epoch in range(num_epochs):
    train_metric = Accumulator(3)
    for X, y in train_iter:
        y_hat = net(X)
        l = cross_entropy(y_hat, y)
        l.sum().backward()
        sgd([W, b], lr, batch_size)
        train_metric.add(l.sum().item(), accuracy(y_hat, y), len(y))
    acc_avg = train_metric[1]/train_metric[2]
    loss_avg = train_metric[0]/train_metric[2]
    test_acc_avg = evaluate_accuracy(net, test_iter)
    epoch_metric.append([loss_avg, acc_avg, test_acc_avg])
    print(f'epoch {epoch + 1},train loss {loss_avg:.3f} | train acc {acc_avg:.3f} | test acc {test_acc_avg:.3f}')

import pandas as pd
epoch_metric_df = pd.DataFrame(epoch_metric, columns=["train_loss","train_acc","test_acc"])
epoch_metric_df.plot.line()
image-20220731211211079

二、torch框架

1
2
3
4
5
import torch
import torchvision
from torch import nn
from torch.utils import data
from torchvision import transforms
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
## (1) 示例数据
def load_data_fashion_minist(batch_size):
    trans = transforms.ToTensor()
    mnist_train = torchvision.datasets.FashionMNIST(
        root="./data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="./data", train=False, transform=trans, download=True)
    train_iter = data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
    test_iter = data.DataLoader(mnist_train, batch_size=batch_size, shuffle=False)
    return train_iter, test_iter

# batch_size = 256
# train_iter, test_iter = load_data_fashion_minist(batch_size)
# X, y = next(iter(train_iter))
# X.shape
# # torch.Size([256, 1, 28, 28])

## (2) 定义模型
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))
# len(net)
# # 2
# net[1].weight
# net[1].bias
# y_hat = net(X)
# y_hat.shape
# # torch.Size([256, 10])

## (3) 定义损失函数
loss = nn.CrossEntropyLoss(reduction='none')
# loss(y_hat, y).shape
# # torch.Size([256])
# 计算1个batch的分类正确数
def accuracy(y_hat, y):
    y_hat_class = y_hat.argmax(1)
    # 样本类别预测与否(True/False)
    cmp = y_hat_class.type(y.dtype) == y
    return cmp.sum().item()
# accuracy(y_hat, y)
# # 13
# 定义一个累加值计数器:用以累计1轮epoch所有batch的分类精度
class Accumulator:
    def __init__(self, n):
        self.data = [0.0]*n
        # [0, 0]
    def add(self, *args):
        # [0, 0] + [1, 2] = [1, 2]
        self.data = [a + b for a, b in zip(self.data, args)] 
    def reset(self):
        self.data = [0.0]*len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]
# 计算测试集的分类精度
def evaluate_accuracy(net, data_iter):
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), len(y))
    Acc_avg = metric[0]/metric[1]
    return Acc_avg

# evaluate_accuracy(net, test_iter)
# # 0.0515

## (4) 定义优化算法
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))
optimizer = torch.optim.SGD(net.parameters(), lr = 0.1)

## (5) 训练模型
batch_size=256
train_iter, test_iter = load_data_fashion_minist(batch_size)
epoch_metric = []
num_epochs = 10

for epoch in range(num_epochs):
    train_metric = Accumulator(3)
    net.train()
    for X, y in train_iter:
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        l.mean().backward()
        optimizer.step()
        train_metric.add(l.sum().item(), accuracy(y_hat, y), len(y))
    acc_avg = train_metric[1]/train_metric[2]
    loss_avg = train_metric[0]/train_metric[2]
    net.eval()
    test_acc_avg = evaluate_accuracy(net, test_iter)
    epoch_metric.append([loss_avg, acc_avg, test_acc_avg])
    print(f'epoch {epoch + 1},train loss {loss_avg:.3f} | train acc {acc_avg:.3f} | test acc {test_acc_avg:.3f}')


import pandas as pd
epoch_metric_df = pd.DataFrame(epoch_metric, columns=["train_loss","train_acc","test_acc"])
epoch_metric_df.plot.line()
image-20220731211938373

值得注意的是在使用torch框架时,并没有像从零实现那样进行幂函数归一化转换。