From 8e0effd8adfb638ced00dbbd3725a13c6f8195f2 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Sun, 23 Feb 2014 13:02:32 -0800 Subject: [PATCH 1/3] Adding GPU coverage to the DataLayer test. --- src/caffe/test/test_data_layer.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/caffe/test/test_data_layer.cpp b/src/caffe/test/test_data_layer.cpp index 66e9956838b..719c50eb058 100644 --- a/src/caffe/test/test_data_layer.cpp +++ b/src/caffe/test/test_data_layer.cpp @@ -81,6 +81,7 @@ TYPED_TEST(DataLayerTest, TestRead) { EXPECT_EQ(this->blob_top_label_->channels(), 1); EXPECT_EQ(this->blob_top_label_->height(), 1); EXPECT_EQ(this->blob_top_label_->width(), 1); + // Go through the data 100 times for (int iter = 0; iter < 100; ++iter) { layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_); @@ -94,6 +95,21 @@ TYPED_TEST(DataLayerTest, TestRead) { } } } + + // Same test, in GPU mode. + Caffe::set_mode(Caffe::GPU); + for (int iter = 0; iter < 100; ++iter) { + layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_); + for (int i = 0; i < 5; ++i) { + EXPECT_EQ(i, this->blob_top_label_->cpu_data()[i]); + } + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 24; ++j) { + EXPECT_EQ(i, this->blob_top_data_->cpu_data()[i * 24 + j]) + << "debug: i " << i << " j " << j; + } + } + } } } From e3db0c063a86a84e7573f0d5b5aa4e348eb2005a Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Sun, 23 Feb 2014 12:29:14 -0800 Subject: [PATCH 2/3] HDF5DataLayer, with test. --- Makefile | 9 +- include/caffe/util/io.hpp | 9 ++ include/caffe/vision_layers.hpp | 30 ++++ src/caffe/layer_factory.cpp | 2 + src/caffe/layers/hdf5_data_layer.cpp | 107 ++++++++++++++ .../test/test_data/generate_sample_data.py | 17 +++ src/caffe/test/test_data/sample_data.h5 | Bin 0 -> 2504 bytes src/caffe/test/test_hdf5data_layer.cpp | 130 ++++++++++++++++++ src/caffe/util/io.cpp | 38 +++++ 9 files changed, 340 insertions(+), 2 deletions(-) create mode 100644 src/caffe/layers/hdf5_data_layer.cpp create mode 100644 src/caffe/test/test_data/generate_sample_data.py create mode 100644 src/caffe/test/test_data/sample_data.h5 create mode 100644 src/caffe/test/test_hdf5data_layer.cpp diff --git a/Makefile b/Makefile index dc3d51d8da9..3ebec0d3ca7 100644 --- a/Makefile +++ b/Makefile @@ -69,8 +69,13 @@ MKL_LIB_DIR := $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64 INCLUDE_DIRS += ./src ./include $(CUDA_INCLUDE_DIR) $(MKL_INCLUDE_DIR) LIBRARY_DIRS += $(CUDA_LIB_DIR) $(MKL_LIB_DIR) -LIBRARIES := cudart cublas curand mkl_rt pthread \ - glog protobuf leveldb snappy boost_system \ +LIBRARIES := cudart cublas curand \ + mkl_rt \ + pthread \ + glog protobuf leveldb \ + snappy \ + boost_system \ + hdf5 hdf5_hl \ opencv_core opencv_highgui opencv_imgproc PYTHON_LIBRARIES := boost_python python2.7 WARNINGS := -Wall diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index 3174fd0d5fc..b2d489ce720 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -5,6 +5,10 @@ #include +#include +#include "hdf5.h" +#include "hdf5_hl.h" + #include #include "caffe/blob.hpp" @@ -48,6 +52,11 @@ inline bool ReadImageToDatum(const string& filename, const int label, return ReadImageToDatum(filename, label, 0, 0, datum); } +template +void load_2d_dataset( + hid_t file_id, const char* dataset_name_, + boost::scoped_ptr* array, hsize_t* dims); + } // namespace caffe #endif // CAFFE_UTIL_IO_H_ diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 5dd3c9076d6..b6f607a1d9f 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -5,6 +5,9 @@ #include #include +#include + +#include "hdf5.h" #include @@ -351,6 +354,33 @@ class DataLayer : public Layer { }; +template +class HDF5DataLayer : public Layer { + public: + explicit HDF5DataLayer(const LayerParameter& param) + : Layer(param) {} + virtual ~HDF5DataLayer(); + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + protected: + virtual void Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual void Forward_gpu(const vector*>& bottom, + vector*>* top); + virtual Dtype Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual Dtype Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + + boost::scoped_ptr data; + boost::scoped_ptr label; + hsize_t data_dims[2]; + hsize_t label_dims[2]; + hsize_t current_row; +}; + + template class SoftmaxLayer : public Layer { public: diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 8733ff863e6..d7798ea9053 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -27,6 +27,8 @@ Layer* GetLayer(const LayerParameter& param) { return new ConvolutionLayer(param); } else if (type == "data") { return new DataLayer(param); + } else if (type == "hdf5_data") { + return new HDF5DataLayer(param); } else if (type == "dropout") { return new DropoutLayer(param); } else if (type == "euclidean_loss") { diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp new file mode 100644 index 00000000000..eac01d56c68 --- /dev/null +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -0,0 +1,107 @@ +/* +TODO: +- only load parts of the file, in accordance with a prototxt param "max_mem" +- How does Dtype affect the loading of the data, which is always float? +*/ + +#include +#include +#include +#include + +#include "hdf5.h" +#include "hdf5_hl.h" + +#include "caffe/layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/vision_layers.hpp" + +using std::string; + +namespace caffe { + +template +HDF5DataLayer::~HDF5DataLayer() { } + +template +void HDF5DataLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK_EQ(bottom.size(), 0) << "HDF5DataLayer takes no input blobs."; + CHECK_EQ(top->size(), 2) << "HDF5DataLayer takes two blobs as output."; + + // Load the HDF5 file and initialize the counter. + const char* hdf_filename = this->layer_param_.source().c_str(); + LOG(INFO) << "Loading HDF5 file" << hdf_filename; + hid_t file_id = H5Fopen(hdf_filename, H5F_ACC_RDONLY, H5P_DEFAULT); + load_2d_dataset(file_id, "data", &data, data_dims); + load_2d_dataset(file_id, "label", &label, label_dims); + herr_t status = H5Fclose(file_id); + assert(data_dims[0] == label_dims[0]); + current_row = 0; + + // Reshape blobs. + (*top)[0]->Reshape(this->layer_param_.batchsize(), data_dims[1], 1, 1); + (*top)[1]->Reshape(this->layer_param_.batchsize(), label_dims[1], 1, 1); + LOG(INFO) << "output data size: " << (*top)[0]->num() << "," + << (*top)[0]->channels() << "," << (*top)[0]->height() << "," + << (*top)[0]->width(); +} + +template +void HDF5DataLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const int batchsize = this->layer_param_.batchsize(); + for (int i = 0; i < batchsize; ++i, ++current_row) { + if (current_row == data_dims[0]) { + current_row = 0; + } + + memcpy( &(*top)[0]->mutable_cpu_data()[i * data_dims[1]], + &(data.get()[current_row * data_dims[1]]), + sizeof(Dtype) * data_dims[1]); + + memcpy( &(*top)[1]->mutable_cpu_data()[i * label_dims[1]], + &(label.get()[current_row * label_dims[1]]), + sizeof(Dtype) * label_dims[1]); + } +} + +template +void HDF5DataLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const int batchsize = this->layer_param_.batchsize(); + for (int i = 0; i < batchsize; ++i, ++current_row) { + if (current_row == data_dims[0]) { + current_row = 0; + } + + CUDA_CHECK(cudaMemcpy( + &(*top)[0]->mutable_gpu_data()[i * data_dims[1]], + &(data.get()[current_row * data_dims[1]]), + sizeof(Dtype) * data_dims[1], + cudaMemcpyHostToDevice)); + + CUDA_CHECK(cudaMemcpy( + &(*top)[1]->mutable_gpu_data()[i * label_dims[1]], + &(label.get()[current_row * label_dims[1]]), + sizeof(Dtype) * label_dims[1], + cudaMemcpyHostToDevice)); + } +} + +// The backward operations are dummy - they do not carry any computation. +template +Dtype HDF5DataLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + return Dtype(0.); +} + +template +Dtype HDF5DataLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + return Dtype(0.); +} + +INSTANTIATE_CLASS(HDF5DataLayer); + +} // namespace caffe diff --git a/src/caffe/test/test_data/generate_sample_data.py b/src/caffe/test/test_data/generate_sample_data.py new file mode 100644 index 00000000000..1f16fad9160 --- /dev/null +++ b/src/caffe/test/test_data/generate_sample_data.py @@ -0,0 +1,17 @@ +""" +Generate data used in the HDF5DataLayer test. +""" + +import numpy as np +import h5py + +num_cols = 8 +num_rows = 10 +data = np.arange(num_cols * num_rows).reshape(num_rows, num_cols) +label = np.arange(num_rows)[:, np.newaxis] +print data +print label + +with h5py.File('./sample_data.h5', 'w') as f: + f['data'] = data.astype('float32') + f['label'] = label.astype('float32') diff --git a/src/caffe/test/test_data/sample_data.h5 b/src/caffe/test/test_data/sample_data.h5 new file mode 100644 index 0000000000000000000000000000000000000000..fe581afaa72bf3acb536682456e3104d0400f0fa GIT binary patch literal 2504 zcmeIyF)Twt7{KxGy_P%;Nr{Ar#2X9-LkELF;#FGdAT$^ZhE_-nO&AOYiAF?9L>dth z5$R|&8jT%|Mx)Val-FQJ8p4JtuFMQkZ#JohhwZC2CznwQboy`g}U3^#TZIX)H*6VNQM*r*Iz5}^z zI&aoErweuYRM$Ln>t8Ic6-~QTTv#emyS$7{=NF16%{GPx@?@Y9#q({4Jvba4js5mI zM&_HD?O`8h%al;M0_oapv_+b@(~SGR4H)SBm{-t{k$sTP@6aci&XuXejL5Q%d`~qq zIhFCme_3xBd;huayskIBKku)Nem1Y%kMHuzU4K2VYWh!Z^VseCPv@2U9{=@k+=22a ztBQD|j2f!Z1b +#include + +#include + +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/test/test_caffe_main.hpp" + +using std::string; + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class HDF5DataLayerTest : public ::testing::Test { + protected: + HDF5DataLayerTest() + : blob_top_data_(new Blob()), + blob_top_label_(new Blob()), + filename(NULL) {}; + virtual void SetUp() { + blob_top_vec_.push_back(blob_top_data_); + blob_top_vec_.push_back(blob_top_label_); + + // TODO: generate sample HDF5 file on the fly. + // For now, use example HDF5 file. + // TODO: how to best deal with the relativeness of the path? + filename = "src/caffe/test/test_data/sample_data.h5"; + LOG(INFO) << "Using sample HDF5 data file " << filename; + }; + + virtual ~HDF5DataLayerTest() { + delete blob_top_data_; + delete blob_top_label_; + } + + char* filename; + Blob* const blob_top_data_; + Blob* const blob_top_label_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(HDF5DataLayerTest, Dtypes); + +TYPED_TEST(HDF5DataLayerTest, TestRead) { + // Create LayerParameter with the known parameters. + // The data file we are reading has 10 rows and 8 columns, + // with values from 0 to 10*8 reshaped in row-major order. + LayerParameter param; + int batchsize = 5; + param.set_batchsize(batchsize); + param.set_source(this->filename); + int num_rows = 10; + int num_cols = 8; + HDF5DataLayer layer(param); + + // Test that the layer setup got the correct parameters. + layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_); + EXPECT_EQ(this->blob_top_data_->num(), batchsize); + EXPECT_EQ(this->blob_top_data_->channels(), num_cols); + EXPECT_EQ(this->blob_top_data_->height(), 1); + EXPECT_EQ(this->blob_top_data_->width(), 1); + + EXPECT_EQ(this->blob_top_label_->num(), batchsize); + EXPECT_EQ(this->blob_top_label_->channels(), 1); + EXPECT_EQ(this->blob_top_label_->height(), 1); + EXPECT_EQ(this->blob_top_label_->width(), 1); + + // Go through the data 100 times. + for (int iter = 0; iter < 100; ++iter) { + layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_); + + // On even iterations, we're reading the first half of the data. + // On odd iterations, we're reading the second half of the data. + int label_offset = (iter % 2 == 0) ? 0 : batchsize; + int data_offset = (iter % 2 == 0) ? 0 : batchsize * num_cols; + + for (int i = 0; i < batchsize; ++i) { + EXPECT_EQ( + label_offset + i, + this->blob_top_label_->cpu_data()[i]); + } + for (int i = 0; i < batchsize; ++i) { + for (int j = 0; j < num_cols; ++j) { + EXPECT_EQ( + data_offset + i * num_cols + j, + this->blob_top_data_->cpu_data()[i * num_cols + j]) + << "debug: i " << i << " j " << j; + } + } + } + + // Exact same test in GPU mode. + Caffe::set_mode(Caffe::GPU); + // Go through the data 100 times. + for (int iter = 0; iter < 100; ++iter) { + layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_); + + // On even iterations, we're reading the first half of the data. + // On odd iterations, we're reading the second half of the data. + int label_offset = (iter % 2 == 0) ? 0 : batchsize; + int data_offset = (iter % 2 == 0) ? 0 : batchsize * num_cols; + + for (int i = 0; i < batchsize; ++i) { + EXPECT_EQ( + label_offset + i, + this->blob_top_label_->cpu_data()[i]); + } + for (int i = 0; i < batchsize; ++i) { + for (int j = 0; j < num_cols; ++j) { + EXPECT_EQ( + data_offset + i * num_cols + j, + this->blob_top_data_->cpu_data()[i * num_cols + j]) + << "debug: i " << i << " j " << j; + } + } + } +} + +} // namespace caffe diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index 0f0060aeb1c..2ed8127d27b 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -102,4 +102,42 @@ bool ReadImageToDatum(const string& filename, const int label, return true; } +template <> +void load_2d_dataset(hid_t file_id, const char* dataset_name_, + boost::scoped_ptr* array, hsize_t* dims) { + herr_t status; + + int ndims; + status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims); + assert(ndims == 2); + + H5T_class_t class_; + status = H5LTget_dataset_info( + file_id, dataset_name_, dims, &class_, NULL); + assert(class_ == H5T_NATIVE_FLOAT); + + array->reset(new float[dims[0] * dims[1]]); + status = H5LTread_dataset_float( + file_id, dataset_name_, array->get()); +} + +template <> +void load_2d_dataset(hid_t file_id, const char* dataset_name_, + boost::scoped_ptr* array, hsize_t* dims) { + herr_t status; + + int ndims; + status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims); + assert(ndims == 2); + + H5T_class_t class_; + status = H5LTget_dataset_info( + file_id, dataset_name_, dims, &class_, NULL); + assert(class_ == H5T_NATIVE_DOUBLE); + + array->reset(new double[dims[0] * dims[1]]); + status = H5LTread_dataset_double( + file_id, dataset_name_, array->get()); +} + } // namespace caffe From e21cea021ded3c470e32885a23ca8351f9309bef Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Mon, 24 Feb 2014 22:35:33 -0800 Subject: [PATCH 3/3] minor comment edit --- src/caffe/layers/hdf5_data_layer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index eac01d56c68..0f8f68e9d6d 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -1,7 +1,6 @@ /* TODO: - only load parts of the file, in accordance with a prototxt param "max_mem" -- How does Dtype affect the loading of the data, which is always float? */ #include