用tensorflow迁移学习猫狗分类
笔者这几天在跟着莫烦学习TensorFlow,正好到迁移学习(至于什么是迁移学习,看这篇),莫烦老师做的是预测猫和老虎尺寸大小的学习。作为一个有为的学生,笔者当然不能再预测猫啊狗啊的大小啦,正好之前正好有做过猫狗大战数据集的图像分类,做好的数据都还在,二话不说,开撸。
既然是VGG16模型,当然首先上模型代码了:
1 def conv_layers_simple_api(net_in): 2 with tf.name_scope(\'preprocess\'): 3 # Notice that we include a preprocessing layer that takes the RGB image 4 # with pixels values in the range of 0-255 and subtracts the mean image 5 # values (calculated over the entire ImageNet training set). 6 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name=\'img_mean\') 7 net_in.outputs = net_in.outputs - mean 8 9 # conv1 10 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 11 name=\'conv1_1\') 12 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 13 name=\'conv1_2\') 14 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool1\') 15 16 # conv2 17 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 18 name=\'conv2_1\') 19 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 20 name=\'conv2_2\') 21 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool2\') 22 23 # conv3 24 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 25 name=\'conv3_1\') 26 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 27 name=\'conv3_2\') 28 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 29 name=\'conv3_3\') 30 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool3\') 31 32 # conv4 33 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 34 name=\'conv4_1\') 35 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 36 name=\'conv4_2\') 37 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 38 name=\'conv4_3\') 39 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool4\') 40 41 # conv5 42 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 43 name=\'conv5_1\') 44 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 45 name=\'conv5_2\') 46 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 47 name=\'conv5_3\') 48 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool5\') 49 return network`` 50 def conv_layers_simple_api(net_in): 51 with tf.name_scope(\'preprocess\'): 52 # Notice that we include a preprocessing layer that takes the RGB image 53 # with pixels values in the range of 0-255 and subtracts the mean image 54 # values (calculated over the entire ImageNet training set). 55 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name=\'img_mean\') 56 net_in.outputs = net_in.outputs - mean 57 58 # conv1 59 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 60 name=\'conv1_1\') 61 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 62 name=\'conv1_2\') 63 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool1\') 64 65 # conv2 66 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 67 name=\'conv2_1\') 68 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 69 name=\'conv2_2\') 70 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool2\') 71 72 # conv3 73 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 74 name=\'conv3_1\') 75 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 76 name=\'conv3_2\') 77 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 78 name=\'conv3_3\') 79 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool3\') 80 81 # conv4 82 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 83 name=\'conv4_1\') 84 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 85 name=\'conv4_2\') 86 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 87 name=\'conv4_3\') 88 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool4\') 89 90 # conv5 91 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 92 name=\'conv5_1\') 93 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 94 name=\'conv5_2\') 95 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 96 name=\'conv5_3\') 97 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool5\') 98 return network`` 99 def conv_layers_simple_api(net_in): 100 with tf.name_scope(\'preprocess\'): 101 # Notice that we include a preprocessing layer that takes the RGB image 102 # with pixels values in the range of 0-255 and subtracts the mean image 103 # values (calculated over the entire ImageNet training set). 104 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name=\'img_mean\') 105 net_in.outputs = net_in.outputs - mean 106 107 # conv1 108 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 109 name=\'conv1_1\') 110 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 111 name=\'conv1_2\') 112 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool1\') 113 114 # conv2 115 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 116 name=\'conv2_1\') 117 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 118 name=\'conv2_2\') 119 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool2\') 120 121 # conv3 122 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 123 name=\'conv3_1\') 124 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 125 name=\'conv3_2\') 126 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 127 name=\'conv3_3\') 128 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool3\') 129 130 # conv4 131 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 132 name=\'conv4_1\') 133 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 134 name=\'conv4_2\') 135 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 136 name=\'conv4_3\') 137 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool4\') 138 139 # conv5 140 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 141 name=\'conv5_1\') 142 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 143 name=\'conv5_2\') 144 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding=\'SAME\', 145 name=\'conv5_3\') 146 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding=\'SAME\', name=\'pool5\') 147 return network
笔者偷懒直接用的是TensorLayer库中的Vgg16模型,至于什么是tensorlayer请移步这里
按照莫烦老师的教程,改写最后的全连接层做二分类学习:
def fc_layers(net): # 全连接层前的预处理 network = FlattenLayer(net, name=\'flatten\') # tf.layers.dense(self.flatten, 256, tf.nn.relu, name=\'fc6\') network = DenseLayer(network, n_units=256, act=tf.nn.relu, name=\'fc1_relu\') # network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name=\'fc2_relu\') # self.out = tf.layers.dense(self.fc6, 1, name=\'out\') network = DenseLayer(network, n_units=2, act=tf.identity, name=\'fc3_relu\') return network
定义输入输出以及损失函数已及学习步骤:
1 # 输入 2 x = tf.placeholder(tf.float32, [None, 224, 224, 3]) 3 # 输出 4 y_ = tf.placeholder(tf.int32, shape=[None, ], name=\'y_\') 5 net_in = InputLayer(x, name=\'input\') 6 # net_cnn = conv_layers(net_in) # professional CNN APIs 7 net_cnn = conv_layers_simple_api(net_in) # simplified CNN APIs 8 network = fc_layers(net_cnn) 9 y = network.outputs 10 # probs = tf.nn.softmax(y) 11 y_op = tf.argmax(tf.nn.softmax(y), 1) 12 cost = tl.cost.cross_entropy(y, y_, name=\'cost\') 13 correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.float32), tf.cast(y_, tf.float32)) 14 acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 15 # 定义 optimizer 16 train_params = network.all_params[26:] 17 # print(train_params) 18 global_step = tf.Variable(0) 19 # --------------学习速率的设置(学习速率呈指数下降)--------------------- #将 global_step/decay_steps 强制转换为整数 20 # learning_rate = tf.train.exponential_decay(1e-2, global_step, decay_steps=1000, decay_rate=0.98, staircase=True) 21 train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, 22 epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)
读取数据读取训练、验证数据,加载模型参数:
1 img, label = read_and_decode("F:\\001-python\\train.tfrecords") 2 img_v, label_v = read_and_decode("F:\\001-python\\val.tfrecords") 3 # 使用shuffle_batch可以随机打乱输入 4 X_train, y_train = tf.train.shuffle_batch([img, label], 5 batch_size=30, capacity=400, 6 min_after_dequeue=300) 7 X_Val, y_val = tf.train.shuffle_batch([img_v, label_v], 8 batch_size=30, capacity=400, 9 min_after_dequeue=300) 10 tl.layers.initialize_global_variables(sess) 11 network.print_params() 12 network.print_layers() 13 npz = np.load(\'vgg16_weights.npz\') 14 params = [] 15 for val in sorted(npz.items())[0:25]: 16 # print(" Loading %s" % str(val[1].shape)) 17 params.append(val[1]) 18 加载预训练的参数 19 tl.files.assign_params(sess, params, network)
加载好之后,开始训练,200个epoch:
1 for epoch in range(n_epoch): 2 start_time = time.time() 3 val, l = sess.run([X_train, y_train]) 4 for X_train_a, y_train_a in tl.iterate.minibatches(val, l, batch_size, shuffle=True): 5 sess.run(train_op, feed_dict={x: X_train_a, y_: y_train_a}) 6 if epoch + 1 == 1 or (epoch + 1) % 5 == 0: 7 print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) 8 train_loss, train_acc, n_batch = 0, 0, 0 9 for X_train_a, y_train_a in tl.iterate.minibatches(val, l, batch_size, shuffle=True): 10 err, ac = sess.run([cost, acc], feed_dict={x: X_train_a, y_: y_train_a}) 11 train_loss += err 12 train_acc += ac 13 n_batch += 1 14 print(" train loss: %f" % (train_loss / n_batch)) 15 print(" train acc: %f" % (train_acc / n_batch))
保存训练的参数:
1 tl.files.save_npz(network.all_params, name=\'model.npz\', sess=sess)
下面就是开始训练啦,笔者很高兴的拿着自己的笔记本显卡呼呼的跑了一遍:
~~~~~~~~~~~~~~~~~~~~~~~~下面是漫长的等待
....... [TL] Epoch 138 of 150 took 0.999402s [TL] val loss: 0.687194 [TL] val acc: 0.562500 [TL] Epoch 140 of 150 took 3.782207s [TL] val loss: 0.619966 [TL] val acc: 0.750000 [TL] Epoch 142 of 150 took 0.983802s [TL] val loss: 0.685686 [TL] val acc: 0.562500 [TL] Epoch 144 of 150 took 0.986604s [TL] val loss: 0.661224 [TL] val acc: 0.687500 [TL] Epoch 146 of 150 took 1.022403s [TL] val loss: 0.675885 [TL] val acc: 0.687500 [TL] Epoch 148 of 150 took 0.991802s [TL] val loss: 0.682124 [TL] val acc: 0.625000 [TL] Epoch 150 of 150 took 3.487811s [TL] val loss: 0.674932 [TL] val acc: 0.687500 [TL] Total training time: 319.859640s [TL] [*] model.npz saved
额~~~~~~~~~~~~~~~~~
0.68的正确率,群里一位朋友看了之后说:跟猜差不多了(一脸黑线)。问题出哪儿呢?难道是笔者训练的次数不够多?莫烦老师可是100次就能出很好的结果啊
不管怎么样,要试试,笔者于是加载刚刚保存的model.npz参数继续跑100个epoch
~~~~~~~~~~~~~~~~~~~~~~~~又是漫长的等待
[TL] Epoch 1 of 100 took 8.477617s [TL] val loss: 0.685957 [TL] val acc: 0.562500 [TL] Epoch 2 of 100 took 0.999402s [TL] val loss: 0.661529 [TL] val acc: 0.625000 ...... [TL] Epoch 94 of 100 took 0.992208s [TL] val loss: 0.708815 [TL] val acc: 0.562500 [TL] Epoch 96 of 100 took 0.998406s [TL] val loss: 0.710636 [TL] val acc: 0.562500 [TL] Epoch 98 of 100 took 0.992807s [TL] val loss: 0.621505 [TL] val acc: 0.687500 [TL] Epoch 100 of 100 took 0.986405s [TL] val loss: 0.670647 [TL] val acc: 0.625000 [TL] Total training time: 156.734633s [TL] [*] model.npz saved
坑爹啊这是,还不如之前的结果。
笔者陷入深深的沉思中,难道是改了全连接层导致的?于是笔者又把之前去掉的全连接层加上:
1 def fc_layers(net): 2 # 全连接层前的预处理 3 network = FlattenLayer(net, name=\'flatten\') 4 # tf.layers.dense(self.flatten, 256, tf.nn.relu, name=\'fc6\') 5 network = DenseLayer(network, n_units=256, act=tf.nn.relu, name=\'fc1_relu\') 6 network = DenseLayer(network, n_units=256, act=tf.nn.relu, name=\'fc2_relu\') 7 # self.out = tf.layers.dense(self.fc6, 1, name=\'out\') 8 network = DenseLayer(network, n_units=2, act=tf.identity, name=\'fc3_relu\') 9 return network
接着训练
~~~~~~~~~~~~~~~~~~~~~~~~下面又是漫长的等待
1 [TL] Epoch 1 of 100 took 8.477229s 2 [TL] val loss: 2.370650 3 [TL] val acc: 0.562500 4 ... 5 [TL] Epoch 100 of 100 took 1.016002s 6 [TL] val loss: 0.762171 7 [TL] val acc: 0.437500 8 [TL] Total training time: 156.836465s 9 [TL] [*] model.npz saved
还是一样,笔者已崩溃了,一定是哪儿不对啊啊啊....于是笔者去翻莫烦老师的代码,一点点对下来,每一层参数肯定不会有错,那就是在训练设置的参数有问题。
1 self.train_op = tf.train.RMSPropOptimizer(0.001).minimize(self.loss) #莫烦的代码 2 train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, 3 epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)#笔者的
看到train_params难道是这个train_params?笔者只优化了最后的全连接层参数而莫烦老师优化的是全部参数
已经深夜了,笔者表示即使不睡觉也要跑一遍试试,于是改成
1 # 定义 optimizer 2 train_params = network.all_params 3 ~~~~~~~~~~~~~~~~~~~~~~~~于是又是是漫长的等待 4 5 [TL] Epoch 1 of 100 took 20.286640s 6 [TL] val loss: 11.938850 7 [TL] val acc: 0.312500 8 [TL] Epoch 2 of 100 took 3.091806s 9 [TL] val loss: 2.890055 10 [TL] val acc: 0.625000 11 [TL] Epoch 4 of 100 took 3.074205s 12 [TL] val loss: 24.055895 13 [TL] val acc: 0.687500 14 [TL] .... 15 [TL] val loss: 0.699907 16 [TL] val acc: 0.500000 17 [TL] Epoch 98 of 100 took 3.089206s 18 [TL] val loss: 0.683627 19 [TL] val acc: 0.562500 20 [TL] Epoch 100 of 100 took 3.091806s 21 [TL] val loss: 0.708496 22 [TL] val acc: 0.562500 23 [TL] Total training time: 375.727307s 24 [TL] [*] model.npz saved
效果变得更差了....
排除参数的问题,已经深夜1点了,明天还要上班,不得不睡啦。
继续崩溃第三天~~~
第四天~~~
第五天,今天供应商过来公司调试机器,正好是一个学图像处理的小伙子,我提到这个说:我为啥训练了这么多代为啥还是像猜一样的概率....?小伙儿说:莫不是过拟合了吧?我说:不可能啊现成的数据现成的模型和参数,不应该的啊!
不过我还是得检查一下数据处理的代码
1 # 生成是数据文件 2 def create_record(filelist): 3 random.shuffle(filelist) 4 i = 0 5 writer = tf.python_io.TFRecordWriter(recordpath) 6 for file in filelist: 7 name = file.split(sep=\'.\') 8 lable_val = 0 9 if name[0] == \'cat\': 10 lable_val = 0 11 else: 12 lable_val = 1 13 img_path = file_dir + file 14 img = Image.open(img_path) 15 img = img.resize((240, 240)) 16 img_raw = img.tobytes() # 将图片转化为原生bytes 17 example = tf.train.Example(features=tf.train.Features(feature={ 18 "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[lable_val])), 19 \'img_raw\': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])) 20 })) #example对象对label和image进行封装 21 writer.write(example.SerializeToString()) 22 i=i+1 23 print(name[1]) 24 print(lable_val) 25 print(i) 26 writer.close() 27 # 用队列形式读取文件 28 def read_and_decode(filename): 29 # 根据文件名生成一个队列 30 filename_queue = tf.train.string_input_producer([filename]) 31 reader = tf.TFRecordReader() 32 _, serialized_example = reader.read(filename_queue) # 返回文件名和文件 33 features = tf.parse_single_example(serialized_example, 34 features={ 35 \'label\': tf.FixedLenFeature([], tf.int64), 36 \'img_raw\': tf.FixedLenFeature([], tf.string), 37 }) 38 img = tf.decode_raw(features[\'img_raw\'], tf.uint8) 39 img = tf.reshape(img, [224, 224, 3]) 40 img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 41 label = tf.cast(features[\'label\'], tf.int32) 42 return img, label
img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 难道是这一步处理多余?注销之后,训练模型
1 Epoch 85 of 200 took 1.234071s 2 train loss: 14.689816 3 train acc: 0.900000 4 [TL] [*] model3.npz saved 5 Epoch 90 of 200 took 1.241071s 6 train loss: 17.104382 7 train acc: 0.800000 8 [TL] [*] model3.npz saved 9 Epoch 95 of 200 took 1.236071s 10 train loss: 11.190630 11 train acc: 0.850000 12 [TL] [*] model3.npz saved 13 Epoch 100 of 200 took 1.238071s 14 train loss: 0.000000 15 train acc: 1.000000 16 [TL] [*] model3.npz saved 17 Epoch 105 of 200 took 1.236071s 18 train loss: 7.622324 19 train acc: 0.900000 20 [TL] [*] model3.npz saved 21 Epoch 110 of 200 took 1.234071s 22 train loss: 2.164670 23 train acc: 0.950000 24 [TL] [*] model3.npz saved 25 Epoch 115 of 200 took 1.237071s 26 train loss: 0.000000 27 train acc: 1.000000 28 [TL] [*] model3.npz saved
准确度1,停停停...不用跑完了,Perfect!
原来如此,必须要真实的像素值.......心好累......,笔者已经不记得哪儿抄来的这一行了。
嗯,VGG16模型的迁移学习到此结束,代码见github
- 上一篇 »java 8-6 抽象的练习
- 下一篇 »java8-3 多态的好处和弊端以及多态的理解