-
Notifications
You must be signed in to change notification settings - Fork 9
/
pca_analysis.m
52 lines (40 loc) · 1.32 KB
/
pca_analysis.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
%Process gene expression data by PCA analysis.
function pca_analysis(dataset)
[dataFile processDataMat processDataTxt PCAdataFile dataFolder resultsDir intermediate_filesDir figuresDir] = initialization(dataset);
load(processDataMat);
% Added to avoid conflict with the pca function of drtoolbox
present_dir = pwd;
if verLessThan('matlab','8') % older versions do not have pca
cd(fullfile(toolboxdir('stats')))
handle_pca = @princomp;
else
cd(fullfile(toolboxdir('stats'),'stats'))
handle_pca = @pca;
end
cd(present_dir)
%PCA analysis using all samples
ncell = size(pro.expr, 1);
ngene = size(pro.expr, 2);
npca = min(ncell-1,ngene); %meaningful number of PCs
mu = mean(pro.expr, 1);
expr = pro.expr - repmat(mu, ncell, 1);
[c, s] = handle_pca(expr);
expr_all = pro.expr - repmat(mu, ncell, 1);
s_all = expr_all * c;
X = s_all(:, 1:npca); %reduced data
pro.pca = X;
pro.weight = c;
%PCA analysis using samples from the last cell stage.
I = find(pro.cell_stage == max(pro.cell_stage));
npca2 = min(length(I)-1,ngene); %meaningful number of PCs
mu = mean(pro.expr(I, :), 1);
nI = length(I);
expr = pro.expr(I,:) - repmat(mu, nI, 1);
[c, s] = handle_pca(expr);
expr_all = pro.expr - repmat(mu, ncell, 1);
s_all = expr_all * c;
X = s_all(:, 1:npca2); %reduced data
pro.pca2 = X;
pro.weight2 = c;
save(PCAdataFile, 'pro');
end