-
Notifications
You must be signed in to change notification settings - Fork 6
/
range_loss_layer.hpp
136 lines (119 loc) · 5.19 KB
/
range_loss_layer.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#ifndef CAFFE_RANGE_LOSS_LAYER_HPP_
#define CAFFE_RANGE_LOSS_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/loss_layer.hpp"
namespace caffe {
/**
* @brief Computes the Range (L2) loss @f$
* E = \frac{1}{2N} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n
* \right| \right|_2^2 @f$ for real-valued regression tasks.
*
* @param bottom input Blob vector (length 2)
* -# @f$ (N \times C \times H \times W) @f$
* the predictions @f$ \hat{y} \in [-\infty, +\infty]@f$
* -# @f$ (N \times C \times H \times W) @f$
* the targets @f$ y \in [-\infty, +\infty]@f$
* @param top output Blob vector (length 1)
* -# @f$ (1 \times 1 \times 1 \times 1) @f$
* the computed Range loss: @f$ E =
* \frac{1}{2n} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n
* \right| \right|_2^2 @f$
*
* This can be used for least-squares regression tasks. An InnerProductLayer
* input to a RangeLossLayer exactly formulates a linear least squares
* regression problem. With non-zero weight decay the problem becomes one of
* ridge regression -- see src/caffe/test/test_sgd_solver.cpp for a concrete
* example wherein we check that the gradients computed for a Net with exactly
* this structure match hand-computed gradient formulas for ridge regression.
*
* (Note: Caffe, and SGD in general, is certainly \b not the best way to solve
* linear least squares problems! We use it only as an instructive example.)
*/
template <typename Dtype>
class RangeLossLayer : public LossLayer<Dtype> {
public:
explicit RangeLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "RangeLoss"; }
/**
* Unlike most loss layers, in the RangeLossLayer we can backpropagate
* to both inputs -- override to return true and always allow force_backward.
*/
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}
protected:
/// @copydoc RangeLossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
//virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
// const vector<Blob<Dtype>*>& top);
/**
* @brief Computes the Range error gradient w.r.t. the inputs.
*
* Unlike other children of LossLayer, RangeLossLayer \b can compute
* gradients with respect to the label inputs bottom[1] (but still only will
* if propagate_down[1] is set, due to being produced by learnable parameters
* or if force_backward is set). In fact, this layer is "commutative" -- the
* result is the same regardless of the order of the two bottoms.
*
* @param top output Blob vector (length 1), providing the error gradient with
* respect to the outputs
* -# @f$ (1 \times 1 \times 1 \times 1) @f$
* This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
* as @f$ \lambda @f$ is the coefficient of this layer's output
* @f$\ell_i@f$ in the overall Net loss
* @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
* @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
* (*Assuming that this top Blob is not used as a bottom (input) by any
* other layer of the Net.)
* @param propagate_down see Layer::Backward.
* @param bottom input Blob vector (length 2)
* -# @f$ (N \times C \times H \times W) @f$
* the predictions @f$\hat{y}@f$; Backward fills their diff with
* gradients @f$
* \frac{\partial E}{\partial \hat{y}} =
* \frac{1}{n} \sum\limits_{n=1}^N (\hat{y}_n - y_n)
* @f$ if propagate_down[0]
* -# @f$ (N \times C \times H \times W) @f$
* the targets @f$y@f$; Backward fills their diff with gradients
* @f$ \frac{\partial E}{\partial y} =
* \frac{1}{n} \sum\limits_{n=1}^N (y_n - \hat{y}_n)
* @f$ if propagate_down[1]
*/
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
//virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
// const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
Blob<Dtype> diff_;
// struct Distance
class Dist {
public:
int ind1, ind2; // index of point
Dtype dist;
bool operator <(const Dist& other) const { return dist<other.dist; }
bool operator >(const Dist& other) const { return dist>other.dist; }
};
std::map<int, std::vector<int> > map_class; // first: lable; second: feat index
std::vector<std::vector<Dist > > intra_distance; // intra distance for each class
std::vector<Dist> inter_distance;
std::vector<int> class_label_;
int K_, M_;
int choose_k_;
Dtype margin_;
Dtype inter_weight_, intra_weight_;
Blob<Dtype> center_;
Blob<Dtype> distance_;
Blob<Dtype> S_;
Dtype loss_intra;
Dtype loss_inter;
};
} // namespace caffe
#endif // CAFFE_RANGE_LOSS_LAYER_HPP_