TensorFlow - CIFA10のチュートリアルを試してみる
- 原文
- 翻訳
http://qiita.com/KojiOhki/items/e218f36840df10ae358d
所感
注意点
実験結果
$ wget https://tensorflow.googlesource.com/tensorflow/+archive/0.6.0/tensorflow/models/image/cifar10.tar.gz $ tar xzvf cifar10.tar.gz $ python cifar10_train.py # 時間がかかるので同ファイルのmax_stepの値を下げたほうが無難かも. $ python cifar10_eval.py
チュートリアルを実行するだけなら上のコマンドで大丈夫です.(最新版だとcifar10_eval.pyで下記のエラーを吐くのでv0.6.0を利用しています.)
AttributeError: 'ExponentialMovingAverage' object has no attribute 'variables_to_restore'
cifar10_train.pyではモデルの評価をしないので,分類精度を知りたい場合はcifar10_eval.pyを実行する必要があります.
cifar10_train.pyでは反復処理1000回ごとに一度,モデルの全パラメータを保存したcheckpointを生成してくれるので,3000回程度反復させてから精度を調べてみると良いかもしれません.
コード
あとMNISTのチュートリアルのコードを基に,保存処理や視覚化,GPU演算などを取り払ってチュートリアルの訓練部分を書き直してみたのが下記コードとなります..
# cifar10_train.py import os import tensorflow as tf import cPickle import numpy as np import cv2 import time IMAGE_SIZE = 24 NUM_CLASSES = 10 def unpickle(filename): with open(filename, 'rb') as fp: return cPickle.load(fp) def maybe_download(): if not os.path.exists("cifar-10-batches-py"): os.system("curl http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -o cifar-10-python.tar.gz") os.system("tar xzvf cifar-10-python.tar.gz") def shuffle(images, labels): perm = np.arange(len(labels)) np.random.shuffle(perm) return np.array(images)[perm], np.array(labels)[perm] def dense_to_one_hot(labels_dense, num_classes=10): num_labels = labels_dense.shape[0] index_offset = np.arange(num_labels) * num_classes labels_one_hot = np.zeros((num_labels, num_classes)) labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 return labels_one_hot def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width): return image[offset_width:offset_width+target_width, offset_height:offset_height+target_height] def random_contrast(image, lower, upper, seed=None): contrast_factor = np.random.uniform(lower, upper) avg = np.mean(image) return (image - avg) * contrast_factor + avg def random_brightness(image, max_delta, seed=None): delta = np.random.randint(-max_delta, max_delta) return image - delta def per_image_whitening(image): return (image - np.mean(image)) / np.std(image) def random_flip_left_right(image): if np.random.random() < 0.5: image = cv2.flip(image, 1) return image def random_crop(image, size): W, H, D = image.shape w, h, d = size left, top = np.random.randint(W - w + 1), np.random.randint(H - h + 1) return image[left:left+w, top:top+h] def distort(images, is_train=True): for i, image in enumerate(images): image = np.array(image) image = image.astype(float) if is_train: image = random_crop(image, (24, 24, 3)) image = random_flip_left_right(image) image = random_brightness(image, max_delta=63) image = random_contrast(image, lower=0.2, upper=1.8) else: image = crop_to_bounding_box(image, 4, 4, 24, 24) images[i] = per_image_whitening(image) return images def load(is_train=True): images, labels = [], [] if is_train: for j in range(1, 6): cifar10 = unpickle("cifar-10-batches-py/data_batch_%d" % j) for i in range(len(cifar10["labels"])): image = np.reshape(cifar10["data"][i], (3, 32, 32)) image = np.transpose(image, (1, 2, 0)) images.append(image) labels.append(cifar10["labels"][i]) else: cifar10 = unpickle("cifar-10-batches-py/test_batch") for i in range(len(cifar10["labels"])): image = np.reshape(cifar10["data"][i], (3, 32, 32)) image = np.transpose(image, (1, 2, 0)) images.append(image) labels.append(cifar10["labels"][i]) images = distort(images, is_train) one_hot_labels = dense_to_one_hot(np.array(labels)) return shuffle(images, one_hot_labels) def _variable_on_cpu(name, shape, initializer): with tf.device('/cpu:0'): var = tf.get_variable(name, shape, initializer=initializer) return var def _variable_with_weight_decay(name, shape, stddev, wd): var = _variable_on_cpu(name, shape, tf.truncated_normal_initializer(stddev=stddev)) if wd is not None: weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss') tf.add_to_collection('losses', weight_decay) return var def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') def inference(images, keep_prob): with tf.variable_scope('conv1') as scope: dim1 = 64 W1 = _variable_with_weight_decay('weights', shape=[5, 5, 3, dim1], stddev=1e-4, wd=0.0) b1 = _variable_on_cpu('biases', [dim1], tf.constant_initializer(0.0)) conv1 = tf.nn.relu(conv2d(images, W1) + b1) pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') h1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') with tf.variable_scope('conv2') as scope: dim2 = 64 W2 = _variable_with_weight_decay('weights', shape=[5, 5, dim1, dim2], stddev=1e-4, wd=0.0) b2 = _variable_on_cpu('biases', [dim2], tf.constant_initializer(0.1)) conv2 = tf.nn.relu(conv2d(h1, W2) + b2) pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') h2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') with tf.variable_scope('local3') as scope: dim3 = 384 W3 = _variable_with_weight_decay('weights', shape=[IMAGE_SIZE*IMAGE_SIZE*dim2 / 16, dim3], stddev=0.04, wd=0.004) b3 = _variable_on_cpu('biases', [dim3], tf.constant_initializer(0.1)) h3 = tf.nn.relu(tf.matmul(tf.reshape(h2, [-1, IMAGE_SIZE*IMAGE_SIZE*dim2 / 16]), W3) + b3) with tf.variable_scope('local4') as scope: dim4 = 192 W4 = _variable_with_weight_decay('weights', shape=[dim3, dim4], stddev=0.04, wd=0.004) b4 = _variable_on_cpu('biases', [dim4], tf.constant_initializer(0.1)) h4 = tf.nn.relu(tf.matmul(h3, W4) + b4) with tf.variable_scope('softmax_linear') as scope: W5 = _variable_with_weight_decay('weights', shape=[dim4, NUM_CLASSES], stddev=1/192.0, wd=0.0) b5 = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) y = tf.nn.softmax(tf.matmul(h4, W5) + b5) return y def loss(labels, logits): return -tf.reduce_mean(labels * tf.log(tf.clip_by_value(logits, 1e-10, 1.0))) def train(total_loss): return tf.train.AdamOptimizer(1e-4).minimize(total_loss) def accuracy_score(labels, logits): correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) return accuracy def main(argv): maybe_download() train_images, train_labels = load(is_train=True) test_images, test_labels = load(is_train=False) max_epoch, batch_size = 200, 50 with tf.Session() as sess: images = tf.placeholder("float", shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3]) labels = tf.placeholder("float", shape=[None, NUM_CLASSES]) keep_prob = tf.placeholder("float") is_train = tf.placeholder("bool") logits = inference(images, keep_prob) total_loss = loss(labels, logits) if is_train: train_op = train(total_loss) accuracy = accuracy_score(labels, logits) sess.run(tf.initialize_all_variables()) feed_dict={images: test_images, labels: test_labels, keep_prob: 1.0, is_train: False} test_logits, test_total_loss, test_acc = sess.run(fetches=[logits, total_loss, accuracy], feed_dict=feed_dict) best_test_acc, best_test_total_loss, num_keep = test_acc, test_total_loss, 0 print "total test loss:%.1f, test accuracy:%.2f" % (best_test_total_loss, best_test_acc) for epoch in range(max_epoch): train_images, train_labels = load(is_train=True) start_time = time.time() for i in range(0, len(train_images), batch_size): batch_images, batch_labels = train_images[i:i+batch_size], train_labels[i:i+batch_size] feed_dict={images: batch_images, labels: batch_labels, keep_prob: 0.5, is_train: True} _, train_logits, train_total_loss, train_acc = sess.run(fetches=[train_op, logits, total_loss, accuracy], feed_dict=feed_dict) duration = time.time() - start_time feed_dict={images: test_images, labels: test_labels, keep_prob: 1.0, is_train: False} test_logits, test_total_loss, test_acc = sess.run(fetches=[logits, total_loss, accuracy], feed_dict=feed_dict) examples_per_sec = (len(train_images) / duration) print "[%d][cifar10]train-loss:%.3f, train-accuracy:%.2f," % (epoch, train_total_loss, 100 * train_acc), print "test-loss:%.3f(best: %.3f), test-accuracy:%.2f(best: %.2f)" % (test_total_loss, best_test_total_loss, 100 * test_acc, 100 * best_test_acc), print '(%.1f examples/sec)' % examples_per_sec if best_test_acc < test_acc: print "[BEST] Acc: %.3f -> %.3f, Loss: %.3f -> %.3f" % (best_test_acc, test_acc, best_test_total_loss, test_total_loss) best_test_acc, best_test_total_loss = test_acc, test_total_loss if __name__ == "__main__": tf.app.run()
実行結果
$ time python cifar10_train.py I tensorflow/stream_executor/dso_loader.cc:105] successfully opened CUDA library libcublas.so locally I tensorflow/stream_executor/dso_loader.cc:105] successfully opened CUDA library libcudnn.so locally I tensorflow/stream_executor/dso_loader.cc:105] successfully opened CUDA library libcufft.so locally I tensorflow/stream_executor/dso_loader.cc:105] successfully opened CUDA library libcuda.so.1 locally I tensorflow/stream_executor/dso_loader.cc:105] successfully opened CUDA library libcurand.so locally I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:900] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero I tensorflow/core/common_runtime/gpu/gpu_init.cc:102] Found device 0 with properties: name: GeForce GTX TITAN X major: 5 minor: 2 memoryClockRate (GHz) 1.076 ... total test loss:0.2, test accuracy:0.10 [0][cifar10]train-loss:0.177, train-accuracy:34.00, test-loss:0.160(best: 0.230), test-accuracy:40.74(best: 10.00) (563.9 examples/sec) [BEST] Acc: 0.100 -> 0.407, Loss: 0.230 -> 0.160 [1][cifar10]train-loss:0.152, train-accuracy:56.00, test-loss:0.151(best: 0.160), test-accuracy:44.96(best: 40.74) (545.4 examples/sec) [BEST] Acc: 0.407 -> 0.450, Loss: 0.160 -> 0.151 [2][cifar10]train-loss:0.162, train-accuracy:40.00, test-loss:0.137(best: 0.151), test-accuracy:51.04(best: 44.96) (552.1 examples/sec) [BEST] Acc: 0.450 -> 0.510, Loss: 0.151 -> 0.137 [3][cifar10]train-loss:0.121, train-accuracy:62.00, test-loss:0.128(best: 0.137), test-accuracy:54.03(best: 51.04) (549.5 examples/sec) [BEST] Acc: 0.510 -> 0.540, Loss: 0.137 -> 0.128 ...