在上一节中,我们介绍了怎样使用 PyTorch C++ API 实现自定义数据集读取并且完成网络训练。这一节,我们将实现一个具体的实例:实现猫狗分类器。
完整的 PyTorch C++ 系列教程目录如下(或者点击这里查看):
- 《PyTorch C++ API 系列 1:用 VGG-16 识别 MNIST》
- 《PyTorch C++ API 系列 2:使用自定义数据集》
- 《PyTorch C++ API 系列 3:训练网络》
- 《PyTorch C++ API 系列 4:实现猫狗分类器(一)》
- 《PyTorch C++ API 系列 5:实现猫狗分类器(二)》
数据集
这次的猫狗数据集来自 Kaggle,链接在此,需要的朋友可以下载到本地实验。
数据集的训练数据包含 25k 张图片,都是猫或狗的照片。例如:
数据读取
我们把猫的图片标记为 0,把狗的图片标记为 1。数据分为两个压缩文件:
- train.zip:所有训练集
- test.zip:所有测试集
在训练集里,图片的命名方式为 <class>.<number>.jpg
,其中:
class
是 0 或 1number
代表序列号
我们将所有猫的图片放到 train/cat
文件夹里,所有狗的图片放到 train/dog
文件夹里。这一步操作可以用 Python 的 shutil
模块解决:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | import shutil, os files = os.listdir('train/') count_cat = 0 # Number representing count of the cat image count_dog = 0 # Number representing count of the dog image for file in files: if(file.startswith('cat') and file.endswith('jpg')): count_cat += 1 shutil.copy('train/' + file, 'train/cat/' + str(count_cat) + ".jpg") elif(file.startswith('dog') and file.endswith('jpg')): count_dog += 1 shutil.copy('test/' + file, 'train/dog/' + str(count_dog) + '.jpg') |
然后我们就可以定义读取数据的函数了,我们之前在这里讲过,需要的可以回去参考。这里我们主要的函数有以下几个:
load_data_from_folder
:读取文件路径和对应的 label,文件路径为 string,label 为 int。process_image
:这个函数主要是处理图像,包括读取、调整大小、转换成 tensor 等,然后返回这个 tensor。process_labels
:这个函数主要返回 label 的 tensor。
这里是代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | torch::Tensor read_data(std::string loc) { // Read Image from the location of image cv::Mat img = cv::imread(loc, 0); cv::resize(img, img, cv::Size(200, 200), cv::INTER_CUBIC); std::cout << "Sizes: " << img.size() << std::endl; torch::Tensor img_tensor = torch::from_blob(img.data, {img.rows, img.cols, 1}, torch::kByte); img_tensor = img_tensor.permute({2, 0, 1}); // Channels x Height x Width return img_tensor.clone(); }; torch::Tensor read_label(int label) { torch::Tensor label_tensor = torch::full({1}, label); return label_tensor.clone(); } vector<torch::Tensor> process_images(vector<string> list_images) { cout << "Reading images..." << endl; vector<torch::Tensor> states; for (std::vector<string>::iterator it = list_images.begin(); it != list_images.end(); ++it) { cout << "Location being read: " << *it << endl; torch::Tensor img = read_data(*it); states.push_back(img); } cout << "Reading and Processing images done!" << endl; return states; } vector<torch::Tensor> process_labels(vector<int> list_labels) { cout << "Reading labels..." << endl; vector<torch::Tensor> labels; for (std::vector<int>::iterator it = list_labels.begin(); it != list_labels.end(); ++it) { torch::Tensor label = read_label(*it); labels.push_back(label); } cout << "Labels reading done!" << endl; return labels; } /* This function returns a pair of vector of images paths (strings) and labels (integers) */ std::pair<vector<string>,vector<int>> load_data_from_folder(vector<string> folders_name) { vector<string> list_images; vector<int> list_labels; int label = 0; for(auto const& value: folders_name) { string base_name = value + "/"; cout << "Reading from: " << base_name << endl; DIR* dir; struct dirent *ent; if((dir = opendir(base_name.c_str())) != NULL) { while((ent = readdir(dir)) != NULL) { string filename = ent->d_name; if(filename.length() > 4 && filename.substr(filename.length() - 3) == "jpg") { cout << base_name + ent->d_name << endl; // cv::Mat temp = cv::imread(base_name + "/" + ent->d_name, 1); list_images.push_back(base_name + ent->d_name); list_labels.push_back(label); } } closedir(dir); } else { cout << "Could not open directory" << endl; // return EXIT_FAILURE; } label += 1; } return std::make_pair(list_images, list_labels); } |
然后,我们就可以初始化 Dataset 了:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | int main(int argc, char const *argv[]) { // Load the model. // Read Data vector<string> folders_name; folders_name.push_back("/home/krshrimali/Documents/data-dogs-cats/train/cat"); folders_name.push_back("/home/krshrimali/Documents/data-dogs-cats/train/dog"); std::pair<vector<string>, vector<int>> pair_images_labels = load_data_from_folder(folders_name); vector<string> list_images = pair_images_labels.first; vector<int> list_labels = pair_images_labels.second; auto custom_dataset = CustomDataset(list_images, list_labels).map(torch::data::transforms::Stack<>()); } |
网络结构
这里是我们用到的网络结构:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | struct NetImpl: public torch::nn::Module { NetImpl() { // Initialize the network // On how to pass strides and padding: https://github.com/pytorch/pytorch/issues/12649#issuecomment-430156160 conv1_1 = register_module("conv1_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(1, 10, 3).padding(1))); conv1_2 = register_module("conv1_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(10, 20, 3).padding(1))); // Insert pool layer conv2_1 = register_module("conv2_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(20, 30, 3).padding(1))); conv2_2 = register_module("conv2_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(30, 40, 3).padding(1))); // Insert pool layer conv3_1 = register_module("conv3_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(40, 50, 3).padding(1))); conv3_2 = register_module("conv3_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(50, 60, 3).padding(1))); conv3_3 = register_module("conv3_3", torch::nn::Conv2d(torch::nn::Conv2dOptions(60, 70, 3).padding(1))); // Insert pool layer conv4_1 = register_module("conv4_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(70, 80, 3).padding(1))); conv4_2 = register_module("conv4_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(80, 90, 3).padding(1))); conv4_3 = register_module("conv4_3", torch::nn::Conv2d(torch::nn::Conv2dOptions(90, 100, 3).padding(1))); // Insert pool layer conv5_1 = register_module("conv5_1", torch::nn::Conv2d(torch::nn::Conv2dOptions(100, 110, 3).padding(1))); conv5_2 = register_module("conv5_2", torch::nn::Conv2d(torch::nn::Conv2dOptions(110, 120, 3).padding(1))); conv5_3 = register_module("conv5_3", torch::nn::Conv2d(torch::nn::Conv2dOptions(120, 130, 3).padding(1))); // Insert pool layer fc1 = register_module("fc1", torch::nn::Linear(130*6*6, 2000)); fc2 = register_module("fc2", torch::nn::Linear(2000, 1000)); fc3 = register_module("fc3", torch::nn::Linear(1000, 100)); fc4 = register_module("fc4", torch::nn::Linear(100, 2)); } // Implement Algorithm torch::Tensor forward(torch::Tensor x) { x = torch::relu(conv1_1->forward(x)); x = torch::relu(conv1_2->forward(x)); x = torch::max_pool2d(x, 2); x = torch::relu(conv2_1->forward(x)); x = torch::relu(conv2_2->forward(x)); x = torch::max_pool2d(x, 2); x = torch::relu(conv3_1->forward(x)); x = torch::relu(conv3_2->forward(x)); x = torch::relu(conv3_3->forward(x)); x = torch::max_pool2d(x, 2); x = torch::relu(conv4_1->forward(x)); x = torch::relu(conv4_2->forward(x)); x = torch::relu(conv4_3->forward(x)); x = torch::max_pool2d(x, 2); x = torch::relu(conv5_1->forward(x)); x = torch::relu(conv5_2->forward(x)); x = torch::relu(conv5_3->forward(x)); x = torch::max_pool2d(x, 2); x = x.view({-1, 130*6*6}); x = torch::relu(fc1->forward(x)); x = torch::relu(fc2->forward(x)); x = torch::relu(fc3->forward(x)); x = fc4->forward(x); return torch::log_softmax(x, 1); } // Declare layers torch::nn::Conv2d conv1_1{nullptr}; torch::nn::Conv2d conv1_2{nullptr}; torch::nn::Conv2d conv2_1{nullptr}; torch::nn::Conv2d conv2_2{nullptr}; torch::nn::Conv2d conv3_1{nullptr}; torch::nn::Conv2d conv3_2{nullptr}; torch::nn::Conv2d conv3_3{nullptr}; torch::nn::Conv2d conv4_1{nullptr}; torch::nn::Conv2d conv4_2{nullptr}; torch::nn::Conv2d conv4_3{nullptr}; torch::nn::Conv2d conv5_1{nullptr}; torch::nn::Conv2d conv5_2{nullptr}; torch::nn::Conv2d conv5_3{nullptr}; torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}, fc4{nullptr}; }; |
然后在训练过程中我们初始化这个网络并且传入我们的训练数据:
1 | auto net = std::make_shared<NetImpl>(); |
训练
接下来就是训练网络了,这一部分我们放在下一节讲解。
本站微信群、QQ群(三群号 726282629):
