思路

加载数据集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class DiabetesDataset(Dataset):
def __init__(self, filepath):
xy = np.loadtxt(filepath,delimiter=',',dtype=np.float32)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, :-1])
self.y_data = torch.from_numpy(xy[:, [-1]])

def __getitem__(self, index):
return self.x_data[index], self.y_data[index]

def __len__(self):
return self.len

dataset = DiabetesDataset('diabetes.csv')
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=0)
  • filepath是外部传入的参数(数据文件路径)
  • np.loadtxt从CSV文件加载数据,返回NumPy数组
  • xy.shape[0]表示数据的第一维大小,即数据集的样本总数(行数)
  • torch.from_numpy()将NumPy数组转换为PyTorch Tensor
  • __getitem__
    • 允许对象支持下标访问(如dataset[i]
    • 参数index表示要访问的数据索引
    • 返回一个元组(x, y),即特征和 标签
  • shuffle=True表示是否在每个epoch开始时打乱数据顺序。True打乱或False不打乱
  • num_workers=0表示用于数据加载的子进程数量。

模型定义

1
2
3
4
5
6
7
8
9
10
11
12
13
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.sigmoid = torch.nn.Sigmoid()

def forward(self, x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x

损失函数与优化器

1
2
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

训练循环

1
2
3
4
5
6
7
8
9
10
11
for epoch in range(100):
for i, data in enumerate(train_loader, 0):
inputs, labels = data
y_pred = model(inputs)
loss = criterion(y_pred, labels)
print(epoch, i, loss.item())

optimizer.zero_grad()
loss.backward()

optimizer.step()
  • for i, data in enumerate(train_loader, 0)
    • train_loader表示PyTorch的DataLoader对象,用于按批次加载数据
    • 0表示索引从0开始计数
    • 返回迭代器,i表示当前批次的索引,data表示当前批次的数据
  • inputs, labes表示元组解包

代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class DiabetesDataset(Dataset):
def __init__(self, filepath):
xy = np.loadtxt(filepath,delimiter=',',dtype=np.float32)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, :-1])
self.y_data = torch.from_numpy(xy[:, [-1]])

def __getitem__(self, index):
return self.x_data[index], self.y_data[index]

def __len__(self):
return self.len

dataset = DiabetesDataset('diabetes.csv')
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=0)

class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.sigmoid = torch.nn.Sigmoid()

def forward(self, x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x

model = Model()

criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

if __name__ == '__main__':
for epoch in range(100):
for i, data in enumerate(train_loader, 0):
inputs, labels = data
y_pred = model(inputs)
loss = criterion(y_pred, labels)
print(epoch, i, loss.item())

optimizer.zero_grad()
loss.backward()

optimizer.step()