上一节我们介绍了数据集,定义了读取数据的函数,定义了网络结构,这一节我们来训练网络。
完整的 PyTorch C++ 系列教程目录如下(或者点击这里查看):
- 《PyTorch C++ API 系列 1:用 VGG-16 识别 MNIST》
- 《PyTorch C++ API 系列 2:使用自定义数据集》
- 《PyTorch C++ API 系列 3:训练网络》
- 《PyTorch C++ API 系列 4:实现猫狗分类器(一)》
- 《PyTorch C++ API 系列 5:实现猫狗分类器(二)》
之前我们用了一个类似 VGG-16 的网络,多加了一层全连接层在最后面做分类。这里我们可能要对网络输入的图像做一些改变,以便我们能在 CPU 上快速训练:
- 输入图像的大小变成
64x64x3
- 只使用 2 个卷积层和 2 个最大池化层来训练
这些改变肯定对最后的准确率有所影响,但我们教程的目的只是为了教大家如何使用 PyTorch 的 C++ API。
网络结构
下面是我们上一节里定义的最初的网络,放在这里供大家回顾一下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | struct NetImpl: public torch::nn::Module { NetImpl() { // Initialize the network // On how to pass strides and padding: https://github.com/pytorch/pytorch/issues/12649#issuecomment-430156160 conv1_1 = register_module("conv1_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(1, 10, 3).padding(1))); conv1_2 = register_module("conv1_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(10, 20, 3).padding(1))); // Insert pool layer conv2_1 = register_module("conv2_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(20, 30, 3).padding(1))); conv2_2 = register_module("conv2_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(30, 40, 3).padding(1))); // Insert pool layer conv3_1 = register_module("conv3_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(40, 50, 3).padding(1))); conv3_2 = register_module("conv3_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(50, 60, 3).padding(1))); conv3_3 = register_module("conv3_3", torch::nn::Conv2d(torch::nn::Conv2dOptions(60, 70, 3).padding(1))); // Insert pool layer conv4_1 = register_module("conv4_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(70, 80, 3).padding(1))); conv4_2 = register_module("conv4_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(80, 90, 3).padding(1))); conv4_3 = register_module("conv4_3", torch::nn::Conv2d(torch::nn::Conv2dOptions(90, 100, 3).padding(1))); // Insert pool layer conv5_1 = register_module("conv5_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(100, 110, 3).padding(1))); conv5_2 = register_module("conv5_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(110, 120, 3).padding(1))); conv5_3 = register_module("conv5_3", torch::nn::Conv2d(torch::nn::Conv2dOptions(120, 130, 3).padding(1))); // Insert pool layer fc1 = register_module("fc1", torch::nn::Linear(130*6*6, 2000)); fc2 = register_module("fc2", torch::nn::Linear(2000, 1000)); fc3 = register_module("fc3", torch::nn::Linear(1000, 100)); fc4 = register_module("fc4", torch::nn::Linear(100, 2)); } // Implement Algorithm torch::Tensor forward(torch::Tensor x) { x = torch::relu(conv1_1->forward(x)); x = torch::relu(conv1_2->forward(x)); x = torch::max_pool2d(x, 2); x = torch::relu(conv2_1->forward(x)); x = torch::relu(conv2_2->forward(x)); x = torch::max_pool2d(x, 2); x = torch::relu(conv3_1->forward(x)); x = torch::relu(conv3_2->forward(x)); x = torch::relu(conv3_3->forward(x)); x = torch::max_pool2d(x, 2); x = torch::relu(conv4_1->forward(x)); x = torch::relu(conv4_2->forward(x)); x = torch::relu(conv4_3->forward(x)); x = torch::max_pool2d(x, 2); x = torch::relu(conv5_1->forward(x)); x = torch::relu(conv5_2->forward(x)); x = torch::relu(conv5_3->forward(x)); x = torch::max_pool2d(x, 2); x = x.view({-1, 130*6*6}); x = torch::relu(fc1->forward(x)); x = torch::relu(fc2->forward(x)); x = torch::relu(fc3->forward(x)); x = fc4->forward(x); return torch::log_softmax(x, 1); } // Declare layers torch::nn::Conv2d conv1_1{nullptr}; torch::nn::Conv2d conv1_2{nullptr}; torch::nn::Conv2d conv2_1{nullptr}; torch::nn::Conv2d conv2_2{nullptr}; torch::nn::Conv2d conv3_1{nullptr}; torch::nn::Conv2d conv3_2{nullptr}; torch::nn::Conv2d conv3_3{nullptr}; torch::nn::Conv2d conv4_1{nullptr}; torch::nn::Conv2d conv4_2{nullptr}; torch::nn::Conv2d conv4_3{nullptr}; torch::nn::Conv2d conv5_1{nullptr}; torch::nn::Conv2d conv5_2{nullptr}; torch::nn::Conv2d conv5_3{nullptr}; torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}, fc4{nullptr}; }; |
可以看到,网络里有 13 个卷积层,5 个最大池化层,4 个全连接层。
新的网络如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | struct NetworkImpl : public torch::nn::Module { NetImpl(int64_t channels, int64_t height, int64_t width) { conv1_1 = register_module("conv1", torch::nn::Conv2d(torch::nn::Conv2dOptions(3, 50, 5).stride(2))); conv2_1 = register_module("conv2", torch::nn::Conv2d(torch::nn::Conv2dOptions(50, 100, 7).stride(2))); // Used to find the output size till previous convolutional layers n(get_output_shape(channels, height, width)); fc1 = register_module("fc1", torch::nn::Linear(n, 120)); fc2 = register_module("fc2", torch::nn::Linear(120, 100)); fc3 = register_module("fc3", torch::nn::Linear(100, 2)); register_module("conv1", conv1); register_module("conv2", conv2); register_module("fc1", fc1); register_module("fc2", fc2); register_module("fc3", fc3); } // Implement forward pass of each batch to the network torch::Tensor forward(torch::Tensor x) { x = torch::relu(torch::max_pool2d(conv1(x), 2)); x = torch::relu(torch::max_pool2d(conv2(x), 2)); // Flatten x = x.view({-1, n}); x = torch::relu(fc1(x)); x = torch::relu(fc2(x)); x = torch::log_softmax(fc3(x), 1); return x; }; // Function to calculate output size of input tensor after Convolutional layers int64_t get_output_shape(int64_t channels, int64_t height, int64_t width) { // Initialize a Tensor with zeros of input shape torch::Tensor x_sample = torch::zeros({1, channels, height, width}); x_sample = torch::max_pool2d(conv1(x_sample), 2); x_sample = torch::max_pool2d(conv2(x_sample), 2); // Return batch_size (here, 1) * channels * height * width of x_sample return x_sample.numel(); } }; |
新的网络只包含 2 个卷积层,2 个最大池化层,3 个全连接层。最为实验目的已经足够了。
训练网络
下面是我们训练这个网络的大概流程:
- 把网络设置为 train 模式
net->train()
。 - 对于每个 batch 的数据循环:
- 得到数据和对应的标签
- 清空 gradients
- 前向传播
- 计算损失函数
- 后向传播
- 更新参数
- 计算 training accuracy 和 mean square error
- 保存训练好的模型。
上述流程的具体代码为:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | void train(ConvNet& net, DataLoader& data_loader, torch::optim::Optimizer& optimizer, size_t dataset_size, int epoch) { /* This function trains the network on our data loader using optimizer for given number of epochs. Parameters ================== ConvNet& net: Network struct DataLoader& data_loader: Training data loader torch::optim::Optimizer& optimizer: Optimizer like Adam, SGD etc. size_t dataset_size: Size of training dataset int epoch: Number of epoch for training */ net->train(); size_t batch_index = 0; float mse = 0; float Acc = 0.0; for(auto& batch: *data_loader) { auto data = batch.data; auto target = batch.target.squeeze(); // Should be of length: batch_size data = data.to(torch::kF32); target = target.to(torch::kInt64); optimizer.zero_grad(); auto output = net->forward(data); auto loss = torch::nll_loss(output, target); loss.backward(); optimizer.step(); auto acc = output.argmax(1).eq(target).sum(); Acc += acc.template item<float>(); mse += loss.template item<float>(); batch_index += 1; count++; } mse = mse/float(batch_index); // Take mean of loss std::cout << "Epoch: " << epoch << ", " << "Accuracy: " << Acc/dataset_size << ", " << "MSE: " << mse << std::endl; torch::save(net, "best_model_try.pt"); } |
同样,我们的 test
函数如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | void test(ConvNet& network, DataLoader& loader, size_t data_size) { size_t batch_index = 0; network->eval(); float Loss = 0, Acc = 0; for (const auto& batch : *loader) { auto data = batch.data; auto targets = batch.target.view({-1}); data = data.to(torch::kF32); targets = targets.to(torch::kInt64); auto output = network->forward(data); auto loss = torch::nll_loss(output, targets); auto acc = output.argmax(1).eq(targets).sum(); Loss += loss.template item<float>(); Acc += acc.template item<float>(); } cout << "Test Loss: " << Loss/data_size << ", Acc:" << Acc/data_size << endl; } |
训练结果
在我们训练了 100 个 epoch 之后,得到下列准确率:
- 最好的 training accuracy: 99.82%
- 最好的 testing accuracy: 82.43%
我们来看一下结果:
正确的分类示例
狗:
猫:
错误的分类示例
狗:
猫:
好了,这一节就到这里。
本站微信群、QQ群(三群号 726282629):
这个教程“”实现猫狗分类器“的完整代码在哪, ConvNet的定义在哪
分类完了,是不是得上分割和目标检测。
借楼推一下小弟的分割开源
https://github.com/AllentDan/SegmentationCpp
一行代码生成分割网络,FPN,Unet,DeepLabV3,应有尽有。还可以加载预训练权重哟