Skip to content

Commit

Permalink
Several small fixes
Browse files Browse the repository at this point in the history
- bugs resulting from char/string incompatibilities
- bug concerning Neuromorphometrics atlas
- bug concerning "phenotype" field when using custom data
- missing n_rois field when using custom parcellation
  • Loading branch information
LeonDLotter committed Apr 21, 2022
1 parent ab2343c commit bc345d0
Show file tree
Hide file tree
Showing 13 changed files with 100 additions and 46 deletions.
104 changes: 69 additions & 35 deletions ABAnnotate.m
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
'or data (opt.phenotype_data)!']);
end

% get directory & start logging
% get directories & start logging
wd = fileparts(which('ABAnnotate'));
tempdir = fullfile(wd, 'temp')
tempdir = fullfile(wd, 'temp');
if ~exist(tempdir, 'dir')
mkdir(tempdir)
mkdir(tempdir);
end
logfile = fullfile(tempdir, 'logfile_temp.txt');
diary(logfile);
Expand All @@ -41,19 +41,23 @@
if ~isfield(opt, 'analysis_name') && isfield(opt, 'phenotype')
[~, opt.analysis_name, ~] = fileparts(opt.phenotype);
elseif ~isfield(opt, 'analysis_name')
opt.analysis_name = ['enrich_' date];
opt.analysis_name = sprintf('enrich_%s', date);
else
opt.analysis_name = char(opt.analysis_name);
end
print_section(['Starting ABAnnotate: ' opt.analysis_name]);
print_section(sprintf('Starting ABAnnotate: %s', opt.analysis_name));


%% ------------------------------------------------------------------------
% Prepare imaging files & null maps
% -------------------------------------------------------------------------

% set atlas & aba data ---------------------------------------------------
% set atlas & aba data ----------------------------------------------------

% default: SchaeferTian-116
if ~isfield(opt, 'atlas'); opt.atlas = 'Schaefer100-7_TianS1'; end
% choose atlas - currently only SchaeferTian116, Schaefer100, and Neuromorphometrics defined
opt.atlas = char(opt.atlas);
switch opt.atlas
case {'SchaeferTian', 'Schaefer100-7_TianS1'}
atlas = 'Schaefer100-7_TianS1';
Expand All @@ -67,8 +71,10 @@
otherwise
atlas_type = 'user';
end

% get atlas
switch atlas_type

case 'integrated'
atlas_dir = fullfile(wd, 'atlas', atlas);
if ~exist(atlas_dir, 'dir')
Expand All @@ -82,15 +88,17 @@
delete(atlas_zip);
end
% set atlas
opt.atlas = fullfile(wd, 'atlas', atlas, [atlas '_atlas.nii']);
opt.aba_mat = fullfile(wd, 'atlas', atlas, [atlas '_expression.mat']);
opt.atlas = fullfile(wd, 'atlas', atlas, sprintf('%s_atlas.nii', atlas));
opt.aba_mat = fullfile(wd, 'atlas', atlas, sprintf('%s_expression.mat', atlas));
load(opt.aba_mat, 'expression_matrix')
opt.n_rois = height(expression_matrix);
clear('expression_matrix');
fprintf('Setting atlas & ABA data to %s - %u parcels.\n', opt.atlas, opt.n_rois)
fprintf('Setting atlas & ABA data to %s - %u parcels.\n', opt.atlas, opt.n_rois);

case 'user'
% check if custom volume. if yes, unzip if ending *.nii.gz
try
% load & unzip atlas
[~, ~, atlas_ext] = fileparts(opt.atlas);
if strcmp(atlas_ext, '.gz')
atlas_unzip = gunzip(opt.atlas, fullfile(wd, 'temp'));
Expand All @@ -99,18 +107,35 @@
else
error('Filetype of %s is not supported!', opt.phenotype);
end
disp(['Using custom provided atlas: ' opt.atlas]);
if ~isfield(opt, 'aba_mat')
error('If you provide a custom atlas, you must also provide matching ABA data!')
end
catch
error(['Atlas not valid. Choose one of the integrated atlases or ', ...
'provide a path to a custom atlas volume! ', ...
'Run <abannotate_get_datasets("parcellation");> for a list of available atlases.']);
end
% get roi number
opt.n_rois = get_number_of_rois(opt.atlas);
fprintf('Using custom provided atlas: %s - %u parcels.\n', opt.atlas, opt.n_rois);
% check associated ABA expression matrix
if ~isfield(opt, 'aba_mat')
error('If you provide a custom atlas, you must also provide matching ABA data!')
end
try
load(opt.aba_mat, 'expression_matrix', 'gene_symbols');
catch
error('Problems loading variables "expression_matrix" and "gene_symbols" from opt.aba_mat!');
end
% compare numbers
if opt.n_rois~=height(expression_matrix)
error('Number of ROIs in atlas (%u) does not match number of values in expression matrix (%u)!', opt.n_rois, height(expression_matrix))
end
if length(gene_symbols)~=width(expression_matrix)
error('Length of gene_symbols width of expression_matrix do not match!');
end
clear('expression_matrix', 'gene_symbols');
end

% parcellate pheno volume -------------------------------------------------

% if pheno data provided, will use data and ignore volume!
if ~isfield(opt, 'phenotype_data')
fprintf('Applying parcellation to %s.\n', opt.phenotype);
Expand All @@ -119,25 +144,34 @@
if strcmp(phenotype_ext, '.gz')
phenotype_unzip = gunzip(opt.phenotype, fullfile(wd, 'temp'));
elseif strcmp(phenotype_ext, '.nii')
phenotype_unzip = {opt.phenotype};
phenotype_unzip = cellstr(opt.phenotype);
else
error('Filetype of %s is not supported!', opt.phenotype);
end
% parcellate
[phenotype_parc, ~] = mean_time_course(phenotype_unzip, opt.atlas, 1:opt.n_rois);
opt.phenotype_data = phenotype_parc';

% else check if input data is provided
elseif isfield(opt, 'phenotype_data')
if size(opt.phenotype_data) ~= [opt.n_rois 1]
error('Phenotype data has to be opt.n_rois x 1 vector!');
disp('Phenotype data array provided, overriding opt.phenotype with "data".');
opt.phenotype = 'data';
if ~isequal(size(opt.phenotype_data), [opt.n_rois 1])
error('Phenotype data has to be vector of size [%d, 1]!', opt.n_rois);
end
disp('Phenotype data array provided, ignoring opt.phenotype');

% else something did not work
else
error('It seems you did not provide opt.phenotype or opt.phenotype_data!');
end
end

% check for NaNs
if any(isnan(opt.phenotype_data))
warning('Parcellated phenotype data contains NaNs! Respective regions will be ignored.');
end

% get phenotype nulls -----------------------------------------------------

% number of phenotype nulls
if ~isfield(opt, 'n_nulls'); opt.n_nulls = 1000; end

Expand Down Expand Up @@ -204,22 +238,22 @@

% Dataset -----------------------------------------------------------------
% default to Gene Ontology - Biological Process
% case 1: one of integrated datasets
if isfield(opt.GCEA, 'dataset') && ~strcmp(opt.GCEA.dataset, 'custom')
% case 1: none set, default to GO-BP
if ~isfield(opt.GCEA, 'dataset') && ~isfield(opt.GCEA, 'dataset_mat')
opt.GCEA.dataset = 'GO-biologicalProcessDirect-discrete';
fprintf('No dataset provided, defaulting to: %s\n', opt.GCEA.dataset);
% get path or download
opt.GCEA.dataset_mat = get_gcea_dataset(opt.GCEA.dataset);
% case 2: one of integrated datasets
elseif isfield(opt.GCEA, 'dataset') && ~strcmp(opt.GCEA.dataset, 'custom')
% get path or download
opt.GCEA.dataset_mat = get_gcea_dataset(opt.GCEA.dataset);
disp(['GCEA dataset: ' opt.GCEA.dataset]);
fprintf('GCEA dataset: %s\n', opt.GCEA.dataset);
validate_gcea_dataset(opt.GCEA.dataset_mat);
% case 2: a custom mat file
% case 3: a custom mat file
elseif strcmp(opt.GCEA.dataset, 'custom') && isfield(opt.GCEA, 'dataset_mat')
disp(['Custom GCEA dataset: ' opt.GCEA.dataset_mat]);
fprintf('Custom GCEA dataset: %s\n', opt.GCEA.dataset_mat);
validate_gcea_dataset(opt.GCEA.dataset_mat);
% case 3: none set, default to GO-BP
elseif ~isfield(opt.GCEA, 'dataset') && ~isfield(opt.GCEA, 'dataset_mat')
opt.GCEA.dataset = 'GO-biologicalProcessDirect-discrete';
% get path or download
opt.GCEA.dataset_mat = get_gcea_dataset(opt.GCEA.dataset);
disp(['No dataset provided, defaulting to: ' opt.GCEA.dataset]);
% case 4: other
else
disp(['You must provide ether a valid dataset name or set <opt.gcea.dataset> to ', ...
Expand Down Expand Up @@ -277,26 +311,26 @@
cTablePheno_csv = cTablePheno;
cTablePheno_csv = removevars(cTablePheno_csv, {'cWeights', 'cScoresNull'});
cTablePheno_csv.cGenes = cellfun(@(x) strjoin(x, ', '), cTablePheno.cGenes, 'UniformOutput', false);
writetable(cTablePheno_csv, fullfile(opt.dir_result, ['GCEA_' opt.analysis_name '.csv']));
writetable(cTablePheno_csv, fullfile(opt.dir_result, sprintf('GCEA_%s.csv', opt.analysis_name)));

% write mat
save(fullfile(opt.dir_result, ['GCEA_' opt.analysis_name '.mat']), 'cTablePheno', 'opt');
save(fullfile(opt.dir_result, sprintf('GCEA_%s.mat', opt.analysis_name)), 'cTablePheno', 'opt');

% write setting xml
writestruct(opt, fullfile(opt.dir_result, ['GCEA_' opt.analysis_name '_settings.xml']))
writestruct(opt, fullfile(opt.dir_result, sprintf('GCEA_%s_settings.xml', opt.analysis_name)))

% copy logfile
copyfile(logfile, fullfile(opt.dir_result, ['GCEA_' opt.analysis_name '_log.txt']));
copyfile(logfile, fullfile(opt.dir_result, sprintf('GCEA_%s_log.txt', opt.analysis_name)));

% print
fprintf('Saved results to %s.\n', fullfile(opt.dir_result, ['GCEA_' opt.analysis_name '*.*']));
fprintf('Saved results to %s.\n', fullfile(opt.dir_result, sprintf('GCEA_%s*.*', opt.analysis_name)));
end

% delete temp data
diary('off');
delete(fullfile(tempdir, '*'));

print_section(['Finished ABAnnotate: ' opt.analysis_name]);
print_section(sprintf('Finished ABAnnotate: %s', opt.analysis_name));

end % function

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

The method basically consists of the following steps:

1. An input volume ("phenotype") is parcellated according to given parcellation and null models corrected for spatial-autocorrelation are generated.
1. An input volume ("phenotype") is parcellated according to given parcellation and null models corrected for spatial autocorrelation are generated (if autocorrelation was detected in the data).
2. For each null phenotype and each gene category a "category score" is obtained by correlating the null phenotype with the spatial mRNA expression pattern of each gene and averaging the z-transformed correlation coefficients off all genes annotated to a certain category within each category (= null categories).
3. The generated null category scores are then compared to the "real" category score obtained by correlating the "real" phenotype with all genes in each category and averaging the correlation coefficients per category.
4. One-sided p values for each category are obtained from the estimated null distribution of category scores and the resulting p-values are FDR-corrected.
Expand Down
Binary file modified example/example_customization.mlx
Binary file not shown.
2 changes: 1 addition & 1 deletion scripts/abannotate_delete_datasets.m
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
if ismember(del, {'Y', 'y', 'yes', 'Yes', 'YES'})
for i=1:length(files_to_delete)
file_to_delete = files_to_delete{i};
disp(['Deleting file: ' file_to_delete]);
fprintf('Deleting file: %s\n', file_to_delete);
if endsWith(file_to_delete, '.mat')
delete(file_to_delete);
else
Expand Down
2 changes: 1 addition & 1 deletion scripts/abannotate_delete_null_data.m
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
if ismember(del, {'Y', 'y', 'yes', 'Yes', 'YES'})
for i=1:length(files_to_delete)
fullfile_to_delete = fullfile(null_dir, files_to_delete{i});
disp(['Deleting file: ' fullfile_to_delete]);
fprintf('Deleting file: %s\n', fullfile_to_delete);
delete(fullfile_to_delete);
end
else
Expand Down
2 changes: 2 additions & 0 deletions scripts/abannotate_get_datasets.m
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

if ~exist('dataset_type', 'var')
dataset_type = 'gcea_dataset';
else
dataset_type = char(dataset_type);
end
if ~exist('disp_names', 'var')
disp_names = true;
Expand Down
2 changes: 1 addition & 1 deletion scripts/aggregate_scores.m
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
case 'absweightedmean'
category_score = nansum(abs(gene_scores) .* gene_weights, 1) ./ nansum(gene_weights, 1);
otherwise
error('Unknown aggregation option: ''%s''', aggregation_method);
error('Unknown aggregation option: "%s"', aggregation_method);
end

end
2 changes: 1 addition & 1 deletion scripts/ensemble_enrichment.m
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
% Give a basic output about significance
fprintf(1,'%u significant categories at p < %.3f derived from gaussian distributions.\n', ...
sum(cTablePheno.pValZCorr < gcea_opt.p_thresh), gcea_opt.p_thresh);
fprintf(1,'%u significant categories at p < %.3f derived from null distributions. \n', ...
fprintf(1,'%u significant categories at p < %.3f derived from null distributions.\n', ...
sum(cTablePheno.pValPermCorr < gcea_opt.p_thresh), gcea_opt.p_thresh);

end
4 changes: 2 additions & 2 deletions scripts/generate_category_nulls.m
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
fprintf('\n');

%--------------------------------------------------------------------------
% Agglomerate gene scores by GO category
% Agglomerate gene scores by gene category
%--------------------------------------------------------------------------
category_scores = cell(n_categories, 1);
for i = 1:n_categories
Expand Down Expand Up @@ -151,7 +151,7 @@
% Save results to .mat file
save_path = gcea_opt.category_nulls;
if save_result
fprintf(1,'Saving nulls from %u iterations to ''%s''\n', n_null, save_path);
fprintf(1,'Saving nulls from %u iterations to "%s"\n', n_null, save_path);
save(save_path, 'cTable', 'gcea_opt', '-v7');
end

Expand Down
3 changes: 2 additions & 1 deletion scripts/get_gcea_dataset.m
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
% Gets path to ABAnnotate GCEA dataset or downloads it from OSF

wd = fileparts(which('Abannotate'));
dataset_name = char(dataset_name);

% get list of datasets
dataset_list = abannotate_get_datasets('gcea_dataset', false);
Expand All @@ -20,7 +21,7 @@
if ~exist(dataset_path, 'file')
disp('Dataset not found. Downloading from OSF...')
abannotate_download_osf(osf_id, dataset_path);
disp(['Saved to: ' dataset_path]);
fprintf('Saved to: %s\n', dataset_path);
end

end
Expand Down
15 changes: 15 additions & 0 deletions scripts/get_number_of_rois.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
function [n_rois] = get_number_of_rois(vol)
%function [n_rois] = get_number_of_rois(vol)

vol_hdr = spm_vol(vol);
vol_data = spm_read_vols(vol_hdr);
roi_idc = unique(vol_data);
roi_idc(roi_idc==0) = [];

if ~isequal(roi_idc, round(roi_idc))
warning('Parcellation volume seems to contain non-integers!')
end

n_rois = length(roi_idc);

end
6 changes: 4 additions & 2 deletions scripts/mean_time_course.m
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
function [D, Reg_all] = mean_time_course(data_mean, data_ROI, numberROIs)
%[D,Reg_all] = mean_time_course(data_for_mean, mask_ROIs,numberROIs)

%[D, Reg_all] = mean_time_course(data_mean, data_ROI, numberROIs)
%
% Extract region-wise data from a volume {data_mean} and a parcellation
% {data_ROI}. {numberROIs} must be a vector of length 1:number of rois.

size_data = size(data_mean);
[ROI_matrix] = resize_img_useTemp_imcalc(data_ROI,data_mean{1});
Expand Down
2 changes: 1 addition & 1 deletion scripts/resize_img_useTemp_imcalc.m
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function [img3d] = resize_img_useTemp_imcalc(file,temp)
% function [img3d] = resize_img_useTemp_imcalc(file,temp,opt_save)
% function [img3d] = resize_img_useTemp_imcalc(file,temp)
%-----------------------------------------------------------------------
% Job saved on 15-Jul-2015 09:33:17 by cfg_util (rev $Rev: 6134 $)
% spm SPM - SPM12 (6225)
Expand Down

0 comments on commit bc345d0

Please sign in to comment.