-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlearn_old.m
executable file
·87 lines (75 loc) · 2.78 KB
/
learn_old.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
function W = learn_old(X,Y,lambda)
%learn one vs all logistic regression model.
T = length(unique(Y)); % the number of classes
d = size(X,2); % the number of features
m = size(X,1); % the number of train samples
W = zeros(d,T); %initialize weight parameter(model parameter)
alpha = 5; %learning rate
eps = 0.001;
Maxiter = 15;
Out_maxiter = 8000;
m =length(Y);
parfor t = 1:T ;
m = length(Y);
y=zeros(m,1);
y(find(Y==t)) = 1;
m_t = sum(y);
%gradient descent for each task
% for j =1:5; %change it to while
% %J_old = general_loss(W,X,Y,0,lambda,t,'logistic');
% W_grad = 1/m*X'*(sigmoid(X*W(:,t))-y)+lambda/m*[W(1:end-1,t); 0];
% W(:,t) = W(:,t)-alpha*W_grad;
% %J_new = general_loss(W,X,Y,0,lambda,t,'logistic');
% end
W_grad = ones(d,1);
W_t = W(:,t);
m = m_t;
Out_iter = 1;
alpha_temp = alpha;
while(norm(W_grad) > eps) &&(Out_maxiter > Out_iter )
%alpha_temp = alpha;
%J_old = general_loss(W,X,Y,0,lambda,t,'logistic');
J_old = -1/(2*m) *( y'*log(sigmoid(X*W_t)) + (1-y)'*log(1-sigmoid(X*W_t)) ) + lambda/2/m*norm(W_t(1:end-1),'fro');
W_grad = 1/m*X'*(sigmoid(X*W_t)-y)+lambda/m*[W_t(1:end-1); 0];
%W_grad = 1/m*X'*(sigmoid(X*W(:,t))-y)+lambda/m * W(:,t);
W_before = W_t;
W_t = W_t-alpha_temp*W_grad;
%J_new = general_loss(W,X,Y,0,lambda,t,'logistic');
J_new = -1/(2*m) *( y'*log(sigmoid(X*W_t)) + (1-y)'*log(1-sigmoid(X*W_t)) ) + lambda/2/m*norm(W_t(1:end-1),'fro');
iter=1;
while ( J_new > J_old - alpha_temp*0.1*(W_grad'*W_grad) ) && (Maxiter > iter);
alpha_temp = alpha_temp*0.5;
W_t = W_before - alpha_temp * W_grad;
%J_new = general_loss(W,X,Y,0,lambda,t,'logistic');
J_new = -1/(2*m) *( y'*log(sigmoid(X*W_t)) + (1-y)'*log(1-sigmoid(X*W_t)) ) + lambda/2/m*norm(W_t(1:end-1),'fro');
% iter
iter = iter+1;
end
% if mod(Out_iter,200)==1;
% fprintf('%d ) J is %6.4d\n',Out_iter,J_new);
% X*W(:,t)
% if(isnan(J_new))
% m
% y'*log(sigmoid(X*W(:,t))) + (1-y)'*log(1-sigmoid(X*W(:,t)))
% log(sigmoid(X*W(:,t)))'
% log(1-sigmoid(X*W(:,t)))'
% pause;
% end
% end
% plot(Out_iter,J_new,'b.');hold on; drawnow;
% end
% J_new = general_loss(W,X,Y,0,lambda,t,'logistic');
% norm(W)
% pause;
% J_new
W(:,t) = W_t;
Out_iter = Out_iter + 1;
end
end
% for k = 1:T;
% init_W = zeros(d,1);
% options = optimset('GradObj','on','MaxIter',100);
% [theta] = fmincg( @(t)( lrCostFunction( t , X , (Y==k) , lambda ) ) , init_W , options) ;
% W(:,k) = theta;
% end
end