-
Notifications
You must be signed in to change notification settings - Fork 16
/
NSPN_CorticalMyelination_AnalysisWrapper.py
317 lines (264 loc) · 15.5 KB
/
NSPN_CorticalMyelination_AnalysisWrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
#!/usr/bin/env python
'''
This is the analyses wrapper for the NSPN_CorticalMyelination analyses that
are reported in Adolescent Consolidation of Assocation Cortical Hubs of the
Human Brain Connectome.
Created by Kirstie Whitaker in October 2015
Contact kw401@cam.ac.uk
'''
#==============================================================================
# IMPORTS
#==============================================================================
import itertools as it
import os
import pickle
import sys
#==============================================================================
# DEFINE DIRECTORIES
#------------------------------------------------------------------------------
# These are a bunch of useful directory path names with the locations of:
# FS_SUBJECTS - the directory containing the raw MRI data and the
# fsaverage directory that contains the 500 parcellation
# DATA - the directory containing the freesurfer extracted values
# for each of the three cohorts, demographic data and
# input data files for genetic analyses
# SCRIPTS - the directory containing all the analysis code
# (except this wrapper)
# CT_MT_ANALYSES - where all the analysis outputs for this set of analyses
# are saved
#==============================================================================
study_dir = os.path.dirname(os.path.abspath(__file__))
sub_data_dir = os.path.join(study_dir, 'FS_SUBJECTS')
fsaverage_dir = os.path.join(sub_data_dir, 'fsaverageSubP')
data_dir = os.path.join(study_dir, 'DATA')
scripts_dir = os.path.join(study_dir, 'SCRIPTS')
paper_dir = os.path.join(study_dir, 'CT_MT_ANALYSES')
matlab_dir = os.path.join('/usr','local','MATLAB','R2012b','bin','matlab') ##### EDIT HERE FOR YOUR VERSION OF MATLAB!
#==============================================================================
# LOAD THE ANALYSIS CODE MODULES
#==============================================================================
sys.path.append(scripts_dir)
from read_fsaverage_info import *
from make_corr_matrices import *
from fill_measure_dict import *
from make_graphs import *
from call_pls import *
from make_pysurfer_pictures import *
from make_tables import *
from make_figures import *
from make_demo_table import *
from report_stats_tables import *
#==============================================================================
# GET SOME USEFUL INFORMATION FROM FSAVERAGE DIRECTORY
#------------------------------------------------------------------------------
# Names of each region in the parcellation (aparc_names)
# Coordinates of the center of mass of these regions (centroids)
# Pairs of coordiantes for sagital, coronal and axial projections
# Von economo and lobe labels for each region
#==============================================================================
fsaverage_dict = read_fsaverage_info(fsaverage_dir)
#==============================================================================
# DEFINE THE COVARIATES YOU CARE ABOUT
#------------------------------------------------------------------------------
# While the network analyses in the paper are presented without correction for
# any covariates in order to aid interpretation, it is important to check
# the influence of certain demographic measures on the results.
# Therefor all analyses are repeated with the following combinations of
# covariates removed before the structural covariance network is created
# ONES ----------------- only the intercept is removed
# AGE ------------------ each participant's age at time of scan
# GENDER --------------- adding an covariate for boys controls for systematic difference
# in mean values between genders
# SITE ----------------- there are three scanner locations in this study
# so adding separate intercepts for WBIC and UCL controls
# for systematic differences in the mean values between
# scanner locations
# AGE and GENDER ------- combination of age and male covariates
# AGE and SITE --------- combination of age, wbic and ucl covariates
# GENDER and SITE ------ combination of male, wbic and ucl covariates
# AGE, GENDER and SITE - combination of age, male, wbic and ucl covariates
#==============================================================================
# Define the covars dictionary
'''
covars_dict = { 'ones' : ['ones'],
'age' : ['age'],
'gender' : ['male'],
'site' : ['wbic', 'ucl'],
'age_gender' : ['age', 'male'],
'age_site' : ['age', 'wbic', 'ucl'],
'gender_site' : ['male', 'wbic', 'ucl'],
'age_gender_site' : ['age', 'male', 'wbic', 'ucl'] }
'''
covars_dict = { 'ones' : ['ones'] } ##### EDIT HERE FOR FULL RUNNING!
#==============================================================================
# DEFINE THE COHORTS YOU CARE ABOUT
#------------------------------------------------------------------------------
# We have three cohorts, all of which are evenly balanced with respect
# to age and gender in each of 5 age bins (14-15, 16-17, 18-19, 20-21, 22-24)
# DISCOVERY - the inital 100 participants who were
# sequestered to ensure that analyses
# were not overfit
# VALIDATION - the additional 200 participants (197 usable)
# COMPLETE - all 300 participants as the study was designed (297 usable)
#==============================================================================
cohort_list = [ 'DISCOVERY', 'VALIDATION', 'COMPLETE' ]
#==============================================================================
# CREATE A MEASURE_DICT_DICT THAT WILL CONTAIN ALL THE USEFUL VALUES
#------------------------------------------------------------------------------
# Basically, this is a dictionary of dictionaries that will let us compare
# across cohorts later on. If the file already exists you can read it in and
# then the calculations will not be re-done (this saves you a bunch of time)
#==============================================================================
measure_dict_dict_file = os.path.join(paper_dir, "measure_dict_dict.p")
if os.path.isfile(measure_dict_dict_file):
measure_dict_dict = pickle.load(open(measure_dict_dict_file, 'rb'))
else:
measure_dict_dict = {}
#measure_dict_dict = {} ##### UNCOMMENT HERE TO RE-WRITE DICTIONARY
#==============================================================================
# START THE ANALYSIS LOOP FOR EACH COMBINATION OF PARTICIPANTS
# AND COVARIATES
#==============================================================================
for mpm, cohort in it.product(['MT'], cohort_list):
#==========================================================================
# Define some useful locations
#--------------------------------------------------------------------------
# Although these are the directory names used for the analyses you can
# change these if you'd like. Just makes sure the files in the data_dir
# are copied over or replaced with your own data
#==========================================================================
cohort_dir = os.path.join(paper_dir, cohort)
cohort_data_dir = os.path.join(data_dir, cohort)
pls_dir = os.path.join(cohort_dir, 'PLS')
pysurfer_dir = os.path.join(cohort_dir, 'PYSURFER_IMAGES')
figures_dir = os.path.join(cohort_dir, 'FIGS')
corrmat_dir = os.path.join(cohort_dir, 'CORR_MATS')
graph_dir = os.path.join(cohort_dir, 'GRAPHS')
tables_dir = os.path.join(cohort_dir, 'TABLES')
for d in [ pls_dir, pysurfer_dir, figures_dir, corrmat_dir, graph_dir, tables_dir ]:
if not os.path.isdir(d):
os.makedirs(d)
#==========================================================================
# Print the cohort name to screen so you know where you're up to
#==========================================================================
print 'COHORT: {}'.format(cohort)
#==========================================================================
# The CT data file is important for a lot of analyses so we're going
# to set explicitly here
#==========================================================================
ct_data_file = os.path.join(cohort_data_dir, 'PARC_500aparc_thickness_behavmerge.csv')
#==========================================================================
# Make the correlation matrices
#--------------------------------------------------------------------------
# These files can take a little while to make, so they've already been
# created. If you'd like to re-make them you can either delete the files
# that are already there, or re-name the corrmat_dir (above)
#
# The matrices are saved in dictionary (mat_dict) for future use within
# this wrapper code
#==========================================================================
mat_dict = make_corr_matrices(corrmat_dir,
covars_dict,
ct_data_file,
fsaverage_dict['aparc_names'])
#==========================================================================
# Save a bunch of useful measures
#--------------------------------------------------------------------------
# So that you can easily access the output of a bunch of different analyses
# from a bunch of different files (such as the different MPM depths) we're
# going to save them all to a dictionary that can easily be passed to
# the reporting functions such as those that make figures and tables.
# It takes a bit of time to calculate all these values, so if the dictionary
# already exists we'll just read it in, and not bother making a new one,
# otherwise we'll create an empty one and fill it up.
#--------------------------------------------------------------------------
# This is the part of the code that is doing all the statistical
# tests! So it's kind of a big deal!
# There are too many measures to outline here, but please see the
# documentation for the fill_measure_dict function for all the details
#==========================================================================
measure_dict = measure_dict_dict.get('{}_{}'.format(cohort, mpm), {})
measure_dict = fill_measure_dict_part1(measure_dict,
mpm,
cohort_data_dir,
fsaverage_dict,
n_perm=1000)
#==========================================================================
# Save the measure dictionary in a dictionary of dictionaries
#--------------------------------------------------------------------------
# This is so you can compare different cohorts and different MPM measures
# later on, and also so you don't have to re-calculate all the stats over
# and over again :)
#==========================================================================
measure_dict_dict['{}_{}'.format(cohort, mpm)] = measure_dict
pickle.dump(measure_dict_dict, open(measure_dict_dict_file, "wb" ) )
#==========================================================================
# Run PLS
#--------------------------------------------------------------------------
# The PLS code is written in matlab so we need to call those commands
#==========================================================================
measure_dict = pls_commands(measure_dict,
mpm,
pls_dir,
scripts_dir,
matlab_dir,
data_dir)
#==========================================================================
# Make the graphs
#--------------------------------------------------------------------------
# These files can take a little while to make, so they've already been
# created. If you'd like to re-make them you can either delete the files
# that are already there, or re-name the graph_dir (above)
#
# The graphs and nodal and global measures are saved in dictionary
# (graph_dict) for future use within this wrapper code
#
# n is the number of random graphs to create for comparison to the actual
# graph
#==========================================================================
graph_dict = make_graphs(graph_dir,
mat_dict,
fsaverage_dict['centroids'],
fsaverage_dict['aparc_names'],
n_rand=1000)
#==========================================================================
# Save a bunch of useful measures (part 2)
#==========================================================================
measure_dict = fill_measure_dict_part2(measure_dict,
graph_dict,
n_perm=1000)
#==========================================================================
# Save the measure dictionary in a dictionary of dictionaries (again)
#==========================================================================
measure_dict_dict['{}_{}'.format(cohort, mpm)] = measure_dict
pickle.dump(measure_dict_dict, open(measure_dict_dict_file, "wb" ) )
#==========================================================================
# Make the tables
#--------------------------------------------------------------------------
# These tables summarise the regional measures and are sorted by dMT
#==========================================================================
make_tables(measure_dict, tables_dir, cohort_name=cohort.title())
#==========================================================================
# Make the figures
#--------------------------------------------------------------------------
# We've done the statistics so now we're going to make some pretty pictures
#==========================================================================
make_pysurfer_figures(measure_dict,
pysurfer_dir,
sub_data_dir,
scripts_dir,
paper_dir,
overwrite=False)
make_figures(measure_dict, figures_dir, pysurfer_dir, data_dir, graph_dict)
#==========================================================================
# Youre done with the cohorts separately!
#--------------------------------------------------------------------------
# Now just make the summary stats tables and combined figures and relax :)
#==========================================================================
make_demo_table(data_dir, paper_dir)
make_stats_tables(measure_dict_dict, paper_dir)
make_combo_figures(measure_dict_dict, paper_dir)
#==========================================================================
# Aaaaaaand.....you're done!! Thanks for playing along :)
# Kx
#==========================================================================