1、加载库 1 2 3 4 5 6 7 import pandas as pd import torch from torch import nn from torch.nn import functional as F from torch.utils import data import itertools 2、示例数据 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # http://d2l-data.s3-accelerate.amazonaws.com/kaggle_house_pred_train.csv # http://d2l-data.s3-accelerate.amazonaws.com/kaggle_house_pred_test.csv train_data = pd.read_csv("../data/kaggle_house_pred_train.csv") test_data = pd.read_csv("../data/kaggle_house_pred_test.csv") train_data.shape, test_data.shape all_features = pd.concat((train_data.iloc[:, 1:-1], test_data.iloc[:, 1:])) num_features = all_features.dtypes[all_features.dtypes != "object"].index all_features[num_features] = all_features[num_features].apply( lambda x: (x - x.mean()) / (x.std()) ) all_features[num_features] = all_features[num_features].fillna(0) all_features = pd.get_dummies(all_features, dummy_na=True) all_features.shape n_train = train_data.shape[0] train_feats = torch.tensor(all_features[:n_train].values, dtype=torch.float32) test_feats = torch.tensor(all_features[n_train:].values, dtype=torch.float32) train_labels = torch.tensor(train_data.SalePrice.values.reshape((-1,1)), dtype=torch.float32) 3、定义模型框架 1 2 3 4 5 6 7 8 9 10 11 class MLP(nn.Module): def __init__(self, in_feats, hidden_feats, dropout): super().__init__() self.hidden = nn.Linear(in_feats, hidden_feats) self.out = nn.Linear(hidden_feats, 1) self.dropout = nn.Dropout(dropout) def forward(self, X): hiddens = F.relu(self.hidden(X)) output = self.out(self.dropout(hiddens)) return output torch模型基础 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 model = MLP(10, 6, 0.1) model ## 查看torch默认初始化的每一层参数 model.state_dict() model.state_dict().keys() model.state_dict()['hidden.bias'] model.hidden.bias.data model.out.weight.grad == None #自定义模型参数初始化方式 def init_normal(m): if type(m) == nn.Linear: nn.init.normal_(m.weight, mean=0, std=0.01) nn.init.zeros_(m.bias) model.apply(init_normal) model.state_dict() def xvaier(m): if type(m) == nn.Linear: nn.init.xavier_uniform_(m.weight) model.apply(xvaier) model.state_dict() #保存与加载模型参数 torch.save(model.state_dict(), "mlp.params") new_model = MLP(10, 6, 0.1) new_model.load_state_dict(torch.load("mlp.params")) #GPU加速 nvidia-smi #查看当前系统的GPU情况 watch -n 0.1 -d nvidia-smi #动态刷新查看 torch.cuda.is_available() #是否有GPU资源 torch.cuda.device_count() #查看可用的GPU数量 ##将数据与模型都转移到同一个GPU上 def try_gpu(i=0): if torch.cuda.device_count() >= i + 1 : return torch.device(f'cuda:{i}') return torch.device("cpu") X = torch.ones(2, 3, device = try_gpu(0)) model.to("cuda:0") 4、定义损失函数与性能评价方法 1 2 3 4 5 6 loss = nn.MSELoss() def log_rmse(model, feature, labels): clipped_preds = torch.clamp(model(feature), 1, float('inf')) rmse = torch.sqrt(loss(torch.log(clipped_preds), torch.log(labels))) return rmse.item() 5、小批量训练框架 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 def load_array(data_arrays, batch_size, is_train=True): dataset = data.TensorDataset(*data_arrays) return data.DataLoader(dataset, batch_size, shuffle=is_train) def train(model, train_feats, train_labels, test_feats, test_labels, num_epochs, lr, weight_decay, batch_size): train_ls, test_ls = [],[] #记录每一轮epoch的训练集/测试集性能 train_iter = load_array((train_feats, train_labels), batch_size) optimizer = torch.optim.Adam(model.parameters(), lr = lr, weight_decay=weight_decay) for epoch in range(num_epochs): for X, y in train_iter: optimizer.zero_grad() l = loss(model(X), y) l.backward() optimizer.step() train_ls.append(log_rmse(model, train_feats, train_labels)) if test_labels is not None: test_ls.append(log_rmse(model, test_feats, test_labels)) return train_ls, test_ls 6、K折交叉验证 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 def get_k_fold_data(k, i, X, y): assert k > 1 fold_size = X.shape[0] // k X_train, y_train = None, None for j in range(k): idx = slice(j*fold_size, (j+1)*fold_size) X_part, y_part = X[idx, :], y[idx] if j == i: X_valid, y_valid = X_part, y_part elif X_train is None: X_train, y_train = X_part, y_part else: X_train = torch.cat([X_train, X_part], 0) y_train = torch.cat([y_train, y_part], 0) return X_train, y_train, X_valid, y_valid def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size, in_feats, hidden_feats, dropout): train_l_sum, valid_l_sum = 0,0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) model = MLP(in_feats, hidden_feats, dropout) train_ls, valid_ls = train(model, *data, num_epochs, learning_rate, weight_decay, batch_size) #将最后一轮的性能作为该模型的最终性能 train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] # print(f'Fold-{i+1}, train log rmse {float(train_ls[-1]):f},' # f'valid log rmse {float(valid_ls[-1]):f}') return train_l_sum / k, valid_l_sum / k # k, num_epochs, learning_rate, weight_decay, batch_size = 10, 100, 5, 0, 64 # in_feats, hidden_feats, dropout = train_feats.shape[1], 64, 0.5 # train_l, valid_l = k_fold(k, train_feats, train_labels, # num_epochs, learning_rate, weight_decay, batch_size, # in_feats, hidden_feats, dropout) 7、超参数遍历 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 k, num_epochs= 5, 100 in_feats = [train_feats.shape[1]] learning_rate = [0.1, 1, 3, 5] weight_decay = [0, 0.001] batch_size = [32, 64] hidden_feats = [16, 64, 128] dropout = [0, 0.1] grid_iter = itertools.product(learning_rate, weight_decay, batch_size, in_feats, hidden_feats, dropout) len_grids = len(list(grid_iter)) grid_train_l, grid_valid_l = [], [] for j, args in enumerate(itertools.product(learning_rate, weight_decay, batch_size, in_feats, hidden_feats, dropout)): print(f'{j+1}--{len_grids}: {args}') train_l, valid_l = k_fold(k, train_feats, train_labels, num_epochs, *args) grid_train_l.append(train_l) grid_valid_l.append(valid_l) print(f'---- valid rmse {valid_l:.2f}')