Skip to content

Commit

Permalink
Add Spectrogram Layer
Browse files Browse the repository at this point in the history
  • Loading branch information
aidangomez committed Oct 15, 2015
1 parent 815aee5 commit e4c7c79
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 1 deletion.
41 changes: 41 additions & 0 deletions include/caffe/common_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,47 @@ class SoftmaxLayer : public Layer<Dtype> {
Blob<Dtype> scale_;
};

#ifdef USE_AUDIO
/**
* @brief Computes a spectogram of input data.
*
* TODO(dox): thorough documentation for Forward, Backward, and proto params.
*/
template <typename Dtype>
class SpectrogramLayer : public Layer<Dtype> {
public:
explicit SpectrogramLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Spectrogram"; }
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 2; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
NOT_IMPLEMENTED;
}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
NOT_IMPLEMENTED;
}

int window_size_;
int step_size_;
};
#endif

#ifdef USE_CUDNN
/**
* @brief cuDNN implementation of SoftmaxLayer.
Expand Down
83 changes: 83 additions & 0 deletions src/caffe/layers/spectrogram_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#ifdef USE_AUDIO
#include <vector>

#include "caffe/common_layers.hpp"
#include "caffe/util/fft.hpp"

namespace caffe {

template <typename Dtype>
void SpectrogramLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
window_size_ = this->layer_param_.spectrogram_param().window_size();
step_size_ = this->layer_param_.spectrogram_param().step_size();
}

template <typename Dtype>
void SpectrogramLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();

const Dtype* bottom_labels = bottom[1]->cpu_data();
Dtype* top_labels = top[1]->mutable_cpu_data();

caffe_copy(bottom[1]->count(), bottom_labels, top_labels);

int n = bottom[0]->num();
int w = bottom[0]->width();

int top_width = window_size_ / 2;
int top_height = (w - window_size_ + step_size_) / step_size_;

vector<int> top_shape(0);
top_shape.push_back(n);
top_shape.push_back(1);
top_shape.push_back(top_height);
top_shape.push_back(top_width);

top[0]->Reshape(top_shape);

FastFourierTransform_cpu<Dtype> fft(window_size_);

int bottom_offset = 0;
int top_offset = 0;
for (int i = 0; i < n; ++i) {
for (int j = 0; j < top_height; ++j) {
fft.process(const_cast<Dtype*>(bottom_data + bottom_offset),
top_data + top_offset, window_size_);
bottom_offset = bottom[0]->offset(i, 0, 0, j * step_size_);
top_offset = top[0]->offset(i, 0, j, 0);
}
}
}

template <typename Dtype>
void SpectrogramLayer<Dtype>::Reshape(const vector<Blob<Dtype> *> &bottom,
const vector<Blob<Dtype> *> &top) {
top[1]->ReshapeLike(*bottom[1]);

int n = bottom[0]->num();
int w = bottom[0]->width();

int top_width = window_size_ / 2;
int top_height = (w - window_size_ + step_size_) / step_size_;

vector<int> top_shape(4);
top_shape[0] = n;
top_shape[1] = 1;
top_shape[2] = top_height;
top_shape[3] = top_width;

top[0]->Reshape(top_shape);
}

#ifdef CPU_ONLY
STUB_GPU_FORWARD(SpectrogramLayer, Forward);
#endif

INSTANTIATE_CLASS(SpectrogramLayer);
REGISTER_LAYER_CLASS(Spectrogram);

} // namespace caffe
#endif // USE_AUDIO
48 changes: 48 additions & 0 deletions src/caffe/layers/spectrogram_layer.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifdef USE_AUDIO
#include <vector>

#include "caffe/common_layers.hpp"
#include "caffe/util/fft.hpp"

namespace caffe {

template <typename Dtype>
void SpectrogramLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();

int n = bottom[0]->num();
int c = bottom[0]->channels();
int h = bottom[0]->height();
int w = bottom[0]->width();

int top_width = window_size_ / 2;
int top_height = (w - window_size_ + step_size_) / step_size_;

vector<int> top_shape();
top_shape.push_back(n);
top_shape.push_back(1);
top_shape.push_back(top_height);
top_shape.push_back(top_width);

top[0]->Reshape(top_shape);

FastFourierTransform_gpu<Dtype> fft(window_size_);

int bottom_offset = 0;
int top_offset = 0;
for (int i = 0; i < n; ++i) {
for (int j = 0; j < top_height; ++j) {
fft.process(const_cast<Dtype*>(bottom_data + bottom_offset),
top_data + top_offset, window_size_);
bottom_offset = bottom[0]->offset(i, 0, 0, j * step_size_);
top_offset = top[0]->offset(i, 0, j, 0);
}
}
}

INSTANTIATE_LAYER_GPU_FORWARD(SpectrogramLayer);

} // namespace caffe
#endif // USE_AUDIO
8 changes: 7 additions & 1 deletion src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 140 (last added: audio_data_param)
// LayerParameter next available layer-specific ID: 141 (last added: spectrogram_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
Expand Down Expand Up @@ -380,6 +380,7 @@ message LayerParameter {
optional ReshapeParameter reshape_param = 133;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SpectrogramParameter spectrogram_param = 140;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
Expand Down Expand Up @@ -953,6 +954,11 @@ message SoftmaxParameter {
optional int32 axis = 2 [default = 1];
}

message SpectrogramParameter {
required int32 window_size = 1;
required int32 step_size = 2;
}

message TanHParameter {
enum Engine {
DEFAULT = 0;
Expand Down

0 comments on commit e4c7c79

Please sign in to comment.