今天讲一讲VGG块,看下图VGGNet系列模型结构
图
首先来看A列,也就是VGG11(8层卷积,3层全连接),每一格后面都接了个maxpool,这里每一格就是一个VGG块,这里为什么只用5个VGG块呢,就是应为自然语言的高宽是224*224,经过5个VGG块后就是7*7了,奇数,加不了了,为什么这么设计呢,是因为论文有说过2个(3*3)的卷积核可以相当于1个(5*5)的卷积核,3个(3*3)相当于1个(7*7),而且这样做而且这样做增加了深度,总之效果更好。
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
train_dataset = datasets.MNIST('./data', train=True, download=False, transform=transforms.Compose([
transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]))
test_dataset = datasets.MNIST('./data', train=False, download=False, transform=transforms.Compose([
transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]))
batch_size = 64
epochs = 10
learning_rate = 0.001
device = torch.device('cuda')
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
def plot_curve(data, name):
plt.plot(range(len(data)), data, color='blue')
plt.legend([name], loc='upper right')
plt.xlabel('step')
plt.ylabel('value')
class ResBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super(ResBlock, self).__init__()
self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(out_channels), nn.ReLU()) # 对数据进行归一化,防止数据过大导致网络性能不稳定
self.conv2 = nn.Sequential(nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(out_channels))
# 对原始数据进行转变成与输出一致
self.extra = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
out = self.extra(x) + out
out = F.relu(out)
return out
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.MaxPool2d(2, 2))
self.block1 = ResBlock(64, 128)
self.block2 = ResBlock(128, 256)
self.block3 = ResBlock(256, 512)
self.block4 = ResBlock(512, 1024)
self.fc1 = nn.Linear(512, 10)
def forward(self, x):
x = self.conv(x)
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
# x = self.block4(x)
x = F.adaptive_avg_pool2d(x, (1, 1))
x = x.view(x.size(0), -1)
x = self.fc1(x)
return x
model = Net().to(device)
criteon = nn.CrossEntropyLoss().to(device)
opt = optim.SGD(model.parameters(), lr=learning_rate)
train_loss = []
train_100_loss = []
num = 0
for epoch in range(epochs):
model.train()
for batch_idx, (x, y) in enumerate(train_dataloader):
x, y = x.to(device), y.to(device)
out = model(x)
loss = criteon(out, y).to(device)
opt.zero_grad()
loss.backward()
opt.step()
if batch_idx % 100 == 0:
train_100_loss.append(loss.cpu().item())
num += 1
if batch_idx % 10 == 0:
print(epoch, batch_idx, loss.cpu().item())
train_loss.append(loss.cpu().item() / num)
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plot_curve(train_loss, 'train loss')
plt.subplot(1, 2, 2)
plot_curve(train_100_loss, 'train_100_loss')
plt.show()
# 模型评估
model.eval()
with torch.no_grad():
pred_all = []
y_all = []
for x, y in test_dataloader:
x = x.to(device)
x = model(x)
pred = x.argmax(dim=1).cpu()
pred_all.extend(pred.numpy())
y_all.extend(y.numpy())
test_accurary = accuracy_score(y_all, pred_all)
test_precision = precision_score(y_all, pred_all, average='weighted')
test_recall = recall_score(y_all, pred_all, average='weighted')
test_f1 = f1_score(y_all, pred_all, average='weighted')
print("[test] accurary:{:.4f} precision:{:.4f} recall:{:.4f} f1:{:.4f}".format(
test_accurary, test_precision, test_recall, test_f1
))
print(classification_report(y_all, pred_all))这是我的VGG网络,不想写了,下一篇再来做代码解释。
图