Pytorch自定义dataloader以及在迭代过程中返回image的name

pytorch官方给的加载数据的方式是已经定义好的dataset以及loader,如何加载自己本地的图片以及label?

形如数据格式为

image1 label1

image2 label2

...

imagen labeln

实验中我采用的数据的格式如下,一个图片的名字对应一个label,每一个label是一个9维的向量

1_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.304295635957 0.952577642997 0.0614006041909 0.0938333659301 -0.995587916479 0.126405046864 -0.999368204665 0.0355414055005 0.382030624629 0.0

1_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.271224474168 0.962516121742 0.061399602839 0.128727689658 -0.991679979588 0.126495313272 -0.999999890616 0.000467726796359 0.381981952872 0.0

1_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.237868729379 0.971297311632 0.0614713240576 0.163626102983 -0.986522426721 0.1265439964 -0.999400990041 -0.0346072406472 0.382020891324 0.0

1.1_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.303575822293 0.95280728383 0.0675229548933 0.0939225945957 -0.995579502714 0.138745857429 -0.999376861795 0.0352971402251 0.410670255038 0.1

1.1_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.270745576918 0.962650940154 0.0674654115238 0.128659340525 -0.991688849436 0.138685653232 -0.999999909615 0.000425170029598 0.410739827476 0.1

1.1_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.23757921143 0.971368168253 0.0674866175928 0.16322766122 -0.986588430204 0.138789623782 -0.999406504329 -0.0344476284471 0.410661183171 0.1

1.2_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.305474635089 0.952200213882 0.0736939767933 0.0939968709874 -0.995572492712 0.150981626608 -0.999370773952 0.0354690875311 0.437620875774 0.2

1.2_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.270346113421 0.962763199836 0.073518963401 0.128433455959 -0.991718129002 0.150964425444 -0.999999924062 0.000389711583812 0.437667827367 0.2

1.2_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.237337349604 0.971427291403 0.0734898449879 0.162895476227 -0.986643331617 0.150931800731 -0.999411541516 -0.0343011761519 0.437608139736 0.2

1.3_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.305514664536 0.952187371137 0.0795990377393 0.0941741911595 -0.995555735115 0.162914965783 -0.999378340534 0.0352552474342 0.462816755558 0.3

1.3_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.272366931798 0.962193459998 0.0796135882128 0.128398130503 -0.991722703221 0.162940731132 -0.999999935257 0.000359841646368 0.462733965419 0.3

...

源程序如下

  1 import torch
  2 import torch.nn as nn
  3 import math
  4 import os
  5 from PIL import Image
  6 import random
  7 from torchvision import datasets, transforms
  8 import torch.utils.data as data
  9 from torch.autograd import Variable
 10 
 11 torch.cuda.set_device(0)
 12 # os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 13 kwargs = {'num_workers': 1, 'pin_memory': True}
 14 batch_size = 8
 15 # load the data
 16 def random_choose_data(label_path):
 17     random.seed(1)
 18     file = open(label_path)
 19     lines = file.readlines()
 20     slice_initial = random.sample(lines, 200000)  # if don't change this ,it will be all the same
 21     slice = list(set(lines)-set(slice_initial))
 22     random.shuffle(slice)
 23 
 24     train_label = slice[:150000]
 25     test_label = slice[150000:200000]
 26     return train_label, test_label  # output the list and delvery it into ImageFolder
 27 
 28 
 29 # def my data loader, return the data and corresponding label
 30 def default_loader(path):
 31     return Image.open(path).convert('RGB')  # operation object is the PIL image object
 32 
 33 
 34 class myImageFloder(data.Dataset):  # Class inheritance
 35     def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
 36         # fh = open(label)
 37         c = 0
 38         imgs = []
 39         class_names = ['regression']
 40         for line in label:  # label is a list
 41             cls = line.split()  # cls is a list
 42             fn = cls.pop(0)
 43             if os.path.isfile(os.path.join(root, fn)):
 44                 imgs.append((fn, tuple([float(v) for v in cls[:len(cls)-1]])))
 45                 # access the last label
 46                 # images is the list,and the content is the tuple, every image corresponds to a label
 47                 # despite the label's dimension
 48                 # we can use the append way to append the element for list
 49             c = c + 1
 50         print('the total image is',c)
 51         print(class_names)
 52         self.root = root
 53         self.imgs = imgs
 54         self.classes = class_names
 55         self.transform = transform
 56         self.target_transform = target_transform
 57         self.loader = loader
 58     def __getitem__(self, index):
 59         fn, label = self.imgs[index]  # even though the imgs is just a list, it can return the elements of it
 60         # in a proper way
 61         img = self.loader(os.path.join(self.root, fn))
 62         if self.transform is not None:
 63             img = self.transform(img)
 64         return img, torch.Tensor(label), fn
 65 
 66     def __len__(self):
 67         return len(self.imgs)
 68 
 69     def getName(self):
 70         return self.classes
 71 
 72 mytransform = transforms.Compose([transforms.ToTensor()])  # transform [0,255] to [0,1]
 73 test_data_root = "/home/ying/data/google_streetview_train_test1"
 74 data_label = "/home/ying/data/google_streetview_train_test1/label.txt"
 75 # test_label="/home/ying/data/google_streetview_train_test1/label.txt"
 76 train_label, test_label = random_choose_data(data_label)
 77 test_loader = torch.utils.data.DataLoader(
 78          myImageFloder(root=test_data_root, label=test_label, transform=mytransform),batch_size=batch_size, shuffle=True, **kwargs)
 79 
 80 
 81 def conv3x3(in_planes, out_planes, stride=1):
 82     "3x3 convolution with padding"
 83     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 84                      padding=1, bias=False)
 85 
 86 
 87 class BasicBlock(nn.Module):
 88     expansion = 1
 89 
 90     def __init__(self, inplanes, planes, stride=1, downsample=None):
 91         super(BasicBlock, self).__init__()
 92         self.conv1 = conv3x3(inplanes, planes, stride)
 93         self.bn1 = nn.BatchNorm2d(planes)
 94         self.relu = nn.ReLU(inplace=True)
 95         self.conv2 = conv3x3(planes, planes)
 96         self.bn2 = nn.BatchNorm2d(planes)
 97         self.downsample = downsample
 98         self.stride = stride
 99 
100     def forward(self, x):
101         residual = x
102 
103         out = self.conv1(x)
104         out = self.bn1(out)
105         out = self.relu(out)
106 
107         out = self.conv2(out)
108         out = self.bn2(out)
109 
110         if self.downsample is not None:
111             residual = self.downsample(x)
112 
113         out += residual
114         out = self.relu(out)
115 
116         return out
117 
118 
119 class Bottleneck(nn.Module):
120     expansion = 4
121 
122     def __init__(self, inplanes, planes, stride=1, downsample=None):
123         super(Bottleneck, self).__init__()
124         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)  # decrease the channel, does't change size
125         self.bn1 = nn.BatchNorm2d(planes)
126         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
127                                padding=1, bias=False)
128         self.bn2 = nn.BatchNorm2d(planes)
129         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
130         self.bn3 = nn.BatchNorm2d(planes * 4)
131         self.relu = nn.ReLU(inplace=True)
132         self.downsample = downsample
133         self.stride = stride
134 
135     def forward(self, x):
136         residual = x
137 
138         out = self.conv1(x)
139         out = self.bn1(out)
140         out = self.relu(out)
141 
142         out = self.conv2(out)
143         out = self.bn2(out)
144         out = self.relu(out)
145 
146         out = self.conv3(out)
147         out = self.bn3(out)
148 
149         if self.downsample is not None:
150             residual = self.downsample(x)
151 
152         out += residual
153         out = self.relu(out)
154 
155         return out
156 
157 
158 class ResNet(nn.Module):
159 
160     def __init__(self, block, layers, num_classes=9):
161         self.inplanes = 64
162         super(ResNet, self).__init__()
163         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
164                                bias=False)  # the size become 1/2
165         self.bn1 = nn.BatchNorm2d(64)
166         self.relu = nn.ReLU(inplace=True)
167         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # the size become 1/2
168         self.layer1 = self._make_layer(block, 64, layers[0])
169         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
170         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
171         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
172         self.avgpool = nn.AvgPool2d(7)
173         # self.fc = nn.Linear(512 * block.expansion, num_classes)
174         self.fc = nn.Linear(2048, num_classes)
175 
176 
177         for m in self.modules():
178             if isinstance(m, nn.Conv2d):
179                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
180                 m.weight.data.normal_(0, math.sqrt(2. / n))
181             elif isinstance(m, nn.BatchNorm2d):
182                 m.weight.data.fill_(1)
183                 m.bias.data.zero_()
184 
185     def _make_layer(self, block, planes, blocks, stride=1):
186         #  block: object, planes: output channel, blocks: the num of blocks
187         downsample = None
188         if stride != 1 or self.inplanes != planes * block.expansion:
189             downsample = nn.Sequential(
190                 nn.Conv2d(self.inplanes, planes * block.expansion,
191                           kernel_size=1, stride=stride, bias=False),
192                 nn.BatchNorm2d(planes * block.expansion),
193             )
194 
195         layers = []
196         layers.append(block(self.inplanes, planes, stride, downsample))
197         self.inplanes = planes * block.expansion  # the input channel num become 4 times
198         for i in range(1, blocks):
199             layers.append(block(self.inplanes, planes))
200 
201         return nn.Sequential(*layers)
202 
203     def forward(self, x):
204         x = self.conv1(x)
205         x = self.bn1(x)
206         x = self.relu(x)
207         x = self.maxpool(x)
208 
209         x = self.layer1(x)
210         x = self.layer2(x)
211         x = self.layer3(x)
212         x = self.layer4(x)
213 
214         x = self.avgpool(x)
215         x = x.view(x.size(0), -1)
216         x = self.fc(x)
217         return x
218 
219 
220 def resnet50(pretrained = True):
221     """Constructs a ResNet-50 model.
222 
223     Args:
224         pretrained (bool): If True, returns a model pre-trained on ImageNet
225     """
226     model = ResNet(Bottleneck, [3, 4, 6, 3])
227     # model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
228     model.load_state_dict(torch.load('./resnet50_20170907_state_dict.pth'))
229     return model
230 cnn = resnet50(pretrained=True)  # the output number is 9
231 cnn.cuda()
232 cnn.eval()
233 criterion = nn.MSELoss().cuda()
234 
235 for i, (test_images, test_labels, fn) in enumerate(test_loader):  # the first i in index, and the () is the content
236     test_images = Variable(test_images.cuda())
237     test_labels = Variable(test_labels.cuda())
238     outputs = cnn(test_images)
239     print(outputs.data[0])
240     print(fn)
241     loss = criterion(outputs, test_labels)
242     print("Iter [%d/%d] Test_Loss: %.4f" % (i + 1, 781, loss.data[0]))

着重看定义dataloader以及返回图像名称的一段代码:

 1 def random_choose_data(label_path):
 2     random.seed(1)
 3     file = open(label_path)
 4     lines = file.readlines()
 5     slice_initial = random.sample(lines, 200000)  # if don't change this ,it will be all the same
 6     slice = list(set(lines)-set(slice_initial))
 7     random.shuffle(slice)
 8 
 9     train_label = slice[:150000]
10     test_label = slice[150000:200000]
11     return train_label, test_label  # output the list and delvery it into ImageFolder
12 
13 
14 # def my data loader, return the data and corresponding label
15 def default_loader(path):
16     return Image.open(path).convert('RGB')  # operation object is the PIL image object
17 
18 
19 class myImageFloder(data.Dataset):  # Class inheritance,继承Dataset类
20     def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
21         # fh = open(label)
22         c = 0
23         imgs = []
24         class_names = ['regression']
25         for line in label:  # label is a list
26             cls = line.split()  # cls is a list
27             fn = cls.pop(0)
28             if os.path.isfile(os.path.join(root, fn)):
29                 imgs.append((fn, tuple([float(v) for v in cls[:len(cls)-1]])))
30                 # access the last label
31                 # images is the list,and the content is the tuple, every image corresponds to a label
32                 # despite the label's dimension
33                 # we can use the append way to append the element for list
34             c = c + 1
35         print('the total image is',c)
36         print(class_names)
37         self.root = root
38         self.imgs = imgs
39         self.classes = class_names
40         self.transform = transform
41         self.target_transform = target_transform
42         self.loader = loader
43     def __getitem__(self, index):
44         fn, label = self.imgs[index]  # even though the imgs is just a list, it can return the elements of it
45         # in a proper way
46         img = self.loader(os.path.join(self.root, fn))
47         if self.transform is not None:
48             img = self.transform(img)
49         return img, torch.Tensor(label), fn  # 在这里返回图像数据以及对应的label以及对应的名称
50 
51     def __len__(self):
52         return len(self.imgs)
53 
54     def getName(self):
55         return self.classes

实际上是继承Dataset这个类中的两个函数__getitem__与__len__,并且返回的变量类型是torch.Tensor即可

看dataloader定义方式以及如何在dataloader中加载数据

 1 mytransform = transforms.Compose([transforms.ToTensor()])  # transform [0,255] to [0,1]
 2 test_data_root = "/home/ying/data/google_streetview_train_test1"
 3 data_label = "/home/ying/data/google_streetview_train_test1/label.txt"
 4 # test_label="/home/ying/data/google_streetview_train_test1/label.txt"
 5 train_label, test_label = random_choose_data(data_label)
 6 test_loader = torch.utils.data.DataLoader(
 7          myImageFloder(root=test_data_root, label=test_label, transform=mytransform),batch_size=batch_size, shuffle=True, **kwargs)
 8 ...
 9 for i, (test_images, test_labels, fn) in enumerate(test_loader):  # the first i in index, and the () is the content
10     test_images = Variable(test_images.cuda())
11     test_labels = Variable(test_labels.cuda())
12     outputs = cnn(test_images)
13     print(outputs.data[0])
14     print(fn)
15     loss = criterion(outputs, test_labels)
16     print("Iter [%d/%d] Test_Loss: %.4f" % (i + 1, 781, loss.data[0]))

实际上刚刚在myImageFloder中定义的__getitem__实际上就是i, (test_images, test_labels, fn) in enumerate(test_loader): 中返回的对象, 其中第一个i是与enumberate相关的index

这样就能够在模型test的时候观察哪些数据误差比较大并且进行输出