今天讲一讲VGG块,看下图VGGNet系列模型结构

首先来看A列,也就是VGG11(8层卷积,3层全连接),每一格后面都接了个maxpool,这里每一格就是一个VGG块,这里为什么只用5个VGG块呢,就是应为自然语言的高宽是224*224,经过5个VGG块后就是7*7了,奇数,加不了了,为什么这么设计呢,是因为论文有说过2个(3*3)的卷积核可以相当于1个(5*5)的卷积核,3个(3*3)相当于1个(7*7),而且这样做而且这样做增加了深度,总之效果更好。

import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

train_dataset = datasets.MNIST('./data', train=True, download=False, transform=transforms.Compose([
    transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]))
test_dataset = datasets.MNIST('./data', train=False, download=False, transform=transforms.Compose([
    transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]))

batch_size = 64
epochs = 10
learning_rate = 0.001
device = torch.device('cuda')

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


def plot_curve(data, name):
    plt.plot(range(len(data)), data, color='blue')
    plt.legend([name], loc='upper right')
    plt.xlabel('step')
    plt.ylabel('value')


class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
                                   nn.BatchNorm2d(out_channels), nn.ReLU())  # 对数据进行归一化,防止数据过大导致网络性能不稳定
        self.conv2 = nn.Sequential(nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
                                   nn.BatchNorm2d(out_channels))

        # 对原始数据进行转变成与输出一致
        self.extra = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1),
            nn.BatchNorm2d(out_channels)
        )

    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.extra(x) + out
        out = F.relu(out)

        return out


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                                  nn.BatchNorm2d(64), nn.MaxPool2d(2, 2))
        self.block1 = ResBlock(64, 128)
        self.block2 = ResBlock(128, 256)
        self.block3 = ResBlock(256, 512)
        self.block4 = ResBlock(512, 1024)

        self.fc1 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.conv(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        # x = self.block4(x)
        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x


model = Net().to(device)
criteon = nn.CrossEntropyLoss().to(device)
opt = optim.SGD(model.parameters(), lr=learning_rate)
train_loss = []
train_100_loss = []
num = 0
for epoch in range(epochs):
    model.train()
    for batch_idx, (x, y) in enumerate(train_dataloader):
        x, y = x.to(device), y.to(device)
        out = model(x)
        loss = criteon(out, y).to(device)

        opt.zero_grad()
        loss.backward()
        opt.step()

        if batch_idx % 100 == 0:
            train_100_loss.append(loss.cpu().item())
        num += 1

        if batch_idx % 10 == 0:
            print(epoch, batch_idx, loss.cpu().item())
    train_loss.append(loss.cpu().item() / num)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plot_curve(train_loss, 'train loss')
plt.subplot(1, 2, 2)
plot_curve(train_100_loss, 'train_100_loss')
plt.show()

# 模型评估
model.eval()
with torch.no_grad():
    pred_all = []
    y_all = []
    for x, y in test_dataloader:
        x = x.to(device)
        x = model(x)
        pred = x.argmax(dim=1).cpu()
        pred_all.extend(pred.numpy())
        y_all.extend(y.numpy())

test_accurary = accuracy_score(y_all, pred_all)
test_precision = precision_score(y_all, pred_all, average='weighted')
test_recall = recall_score(y_all, pred_all, average='weighted')
test_f1 = f1_score(y_all, pred_all, average='weighted')

print("[test] accurary:{:.4f} precision:{:.4f} recall:{:.4f} f1:{:.4f}".format(
    test_accurary, test_precision, test_recall, test_f1
))
print(classification_report(y_all, pred_all))

这是我的VGG网络,不想写了,下一篇再来做代码解释。