Skip to content

ENH: Add cosine-basis high-pass-filter to CompCor #2107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jul 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 196 additions & 35 deletions nipype/algorithms/confounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,21 +323,34 @@ class CompCorInputSpec(BaseInterfaceInputSpec):
desc=('Position of mask in `mask_files` to use - '
'first is the default.'))
components_file = traits.Str('components_file.txt', usedefault=True,
desc='Filename to store physiological components')
desc='Filename to store physiological components')
num_components = traits.Int(6, usedefault=True) # 6 for BOLD, 4 for ASL
use_regress_poly = traits.Bool(True, usedefault=True,
pre_filter = traits.Enum('polynomial', 'cosine', False, usedefault=True,
desc='Detrend time series prior to component '
'extraction')
use_regress_poly = traits.Bool(True,
deprecated='0.15.0', new_name='pre_filter',
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@satra I set this to deprecate at 0.15.0, two minor versions in the future. Should that be 3.0.0, instead? It's pretty unclear what a deprecation schedule should look like.

desc=('use polynomial regression '
'pre-component extraction'))
regress_poly_degree = traits.Range(low=1, default=1, usedefault=True,
desc='the degree polynomial to use')
header_prefix = traits.Str(desc=('the desired header for the output tsv '
'file (one column). If undefined, will '
'default to "CompCor"'))
high_pass_cutoff = traits.Float(
128, usedefault=True,
desc='Cutoff (in seconds) for "cosine" pre-filter')
repetition_time = traits.Float(
desc='Repetition time (TR) of series - derived from image header if '
'unspecified')
save_pre_filter = traits.Either(
traits.Bool, File, desc='Save pre-filter basis as text file')


class CompCorOutputSpec(TraitedSpec):
components_file = File(exists=True,
desc='text file containing the noise components')
pre_filter_file = File(desc='text file containing high-pass filter basis')


class CompCor(BaseInterface):
Expand All @@ -351,7 +364,7 @@ class CompCor(BaseInterface):
>>> ccinterface.inputs.realigned_file = 'functional.nii'
>>> ccinterface.inputs.mask_files = 'mask.nii'
>>> ccinterface.inputs.num_components = 1
>>> ccinterface.inputs.use_regress_poly = True
>>> ccinterface.inputs.pre_filter = 'polynomial'
>>> ccinterface.inputs.regress_poly_degree = 2

"""
Expand Down Expand Up @@ -383,17 +396,20 @@ def _run_interface(self, runtime):
self.inputs.merge_method,
self.inputs.mask_index)

if self.inputs.use_regress_poly:
self.inputs.pre_filter = 'polynomial'

# Degree 0 == remove mean; see compute_noise_components
degree = (self.inputs.regress_poly_degree if
self.inputs.use_regress_poly else 0)
self.inputs.pre_filter == 'polynomial' else 0)

imgseries = nb.load(self.inputs.realigned_file,
mmap=NUMPY_MMAP)
imgseries = nb.load(self.inputs.realigned_file, mmap=NUMPY_MMAP)

if len(imgseries.shape) != 4:
raise ValueError('tCompCor expected a 4-D nifti file. Input {} has '
'{} dimensions (shape {})'.format(
self.inputs.realigned_file, len(imgseries.shape),
imgseries.shape))
raise ValueError('{} expected a 4-D nifti file. Input {} has '
'{} dimensions (shape {})'.format(
self._header, self.inputs.realigned_file,
len(imgseries.shape), imgseries.shape))

if len(mask_images) == 0:
img = nb.Nifti1Image(np.ones(imgseries.shape[:3], dtype=np.bool),
Expand All @@ -403,13 +419,41 @@ def _run_interface(self, runtime):

mask_images = self._process_masks(mask_images, imgseries.get_data())

components = compute_noise_components(imgseries.get_data(),
mask_images, degree,
self.inputs.num_components)
TR = 0
if self.inputs.pre_filter == 'cosine':
if isdefined(self.inputs.repetition_time):
TR = self.inputs.repetition_time
else:
# Derive TR from NIfTI header, if possible
try:
TR = imgseries.header.get_zooms()[3]
if imgseries.get_xyzt_units()[1] == 'msec':
TR /= 1000
except (AttributeError, IndexError):
TR = 0

if TR == 0:
raise ValueError(
'{} cannot detect repetition time from image - '
'Set the repetition_time input'.format(self._header))

components, filter_basis = compute_noise_components(
imgseries.get_data(), mask_images, self.inputs.num_components,
self.inputs.pre_filter, degree, self.inputs.high_pass_cutoff, TR)

components_file = os.path.join(os.getcwd(), self.inputs.components_file)
np.savetxt(components_file, components, fmt=b"%.10f", delimiter='\t',
header=self._make_headers(components.shape[1]), comments='')

if self.inputs.pre_filter and self.inputs.save_pre_filter:
pre_filter_file = self._list_outputs()['pre_filter_file']
ftype = {'polynomial': 'poly',
'cosine': 'cos'}[self.inputs.pre_filter]
ncols = filter_basis.shape[1] if filter_basis.size > 0 else 0
header = ['{}{:02d}'.format(ftype, i) for i in range(ncols)]
np.savetxt(pre_filter_file, filter_basis, fmt=b'%.10f',
delimiter='\t', header='\t'.join(header), comments='')

return runtime

def _process_masks(self, mask_images, timeseries=None):
Expand All @@ -418,14 +462,19 @@ def _process_masks(self, mask_images, timeseries=None):
def _list_outputs(self):
outputs = self._outputs().get()
outputs['components_file'] = os.path.abspath(self.inputs.components_file)

save_pre_filter = self.inputs.save_pre_filter
if save_pre_filter:
if isinstance(save_pre_filter, bool):
save_pre_filter = os.path.abspath('pre_filter.tsv')
outputs['pre_filter_file'] = save_pre_filter

return outputs

def _make_headers(self, num_col):
headers = []
header = self.inputs.header_prefix if \
isdefined(self.inputs.header_prefix) else self._header
for i in range(num_col):
headers.append(header + '{:02d}'.format(i))
headers = ['{}{:02d}'.format(header, i) for i in range(num_col)]
return '\t'.join(headers)


Expand Down Expand Up @@ -473,7 +522,7 @@ class TCompCor(CompCor):
>>> ccinterface.inputs.realigned_file = 'functional.nii'
>>> ccinterface.inputs.mask_files = 'mask.nii'
>>> ccinterface.inputs.num_components = 1
>>> ccinterface.inputs.use_regress_poly = True
>>> ccinterface.inputs.pre_filter = 'polynomial'
>>> ccinterface.inputs.regress_poly_degree = 2
>>> ccinterface.inputs.percentile_threshold = .03

Expand All @@ -494,7 +543,7 @@ def _process_masks(self, mask_images, timeseries=None):
for i, img in enumerate(mask_images):
mask = img.get_data().astype(np.bool)
imgseries = timeseries[mask, :]
imgseries = regress_poly(2, imgseries)
imgseries = regress_poly(2, imgseries)[0]
tSTD = _compute_tSTD(imgseries, 0, axis=-1)
threshold_std = np.percentile(tSTD, np.round(100. *
(1. - self.inputs.percentile_threshold)).astype(int))
Expand Down Expand Up @@ -569,7 +618,7 @@ def _run_interface(self, runtime):
data = data.astype(np.float32)

if isdefined(self.inputs.regress_poly):
data = regress_poly(self.inputs.regress_poly, data, remove_mean=False)
data = regress_poly(self.inputs.regress_poly, data, remove_mean=False)[0]
img = nb.Nifti1Image(data, img.affine, header)
nb.save(img, op.abspath(self.inputs.detrended_file))

Expand Down Expand Up @@ -618,7 +667,7 @@ def _run_interface(self, runtime):
global_signal = in_nii.get_data()[:,:,:,:50].mean(axis=0).mean(axis=0).mean(axis=0)

self._results = {
'n_volumes_to_discard': _is_outlier(global_signal)
'n_volumes_to_discard': is_outlier(global_signal)
}

return runtime
Expand Down Expand Up @@ -685,9 +734,10 @@ def compute_dvars(in_file, in_mask, remove_zerovariance=False,
func_sd = func_sd[func_sd != 0]

# Compute (non-robust) estimate of lag-1 autocorrelation
ar1 = np.apply_along_axis(AR_est_YW, 1,
regress_poly(0, mfunc, remove_mean=True).astype(
np.float32), 1)[:, 0]
ar1 = np.apply_along_axis(
AR_est_YW, 1,
regress_poly(0, mfunc, remove_mean=True)[0].astype(np.float32),
1)[:, 0]

# Compute (predicted) standard deviation of temporal difference time series
diff_sdhat = np.squeeze(np.sqrt(((1 - ar1) * 2).tolist())) * func_sd
Expand Down Expand Up @@ -794,6 +844,27 @@ def is_outlier(points, thresh=3.5):
return timepoints_to_discard


def cosine_filter(data, timestep, period_cut, remove_mean=True, axis=-1):
datashape = data.shape
timepoints = datashape[axis]

data = data.reshape((-1, timepoints))

frametimes = timestep * np.arange(timepoints)
X = _full_rank(_cosine_drift(period_cut, frametimes))[0]
non_constant_regressors = X[:, :-1] if X.shape[1] > 1 else np.array([])

betas = np.linalg.lstsq(X, data.T)[0]

if not remove_mean:
X = X[:, :-1]
betas = betas[:-1]

residuals = data - X.dot(betas).T

return residuals.reshape(datashape), non_constant_regressors


def regress_poly(degree, data, remove_mean=True, axis=-1):
"""
Returns data with degree polynomial regressed out.
Expand All @@ -817,6 +888,8 @@ def regress_poly(degree, data, remove_mean=True, axis=-1):
value_array = np.linspace(-1, 1, timepoints)
X = np.hstack((X, polynomial_func(value_array)[:, np.newaxis]))

non_constant_regressors = X[:, :-1] if X.shape[1] > 1 else np.array([])

# Calculate coefficients
betas = np.linalg.pinv(X).dot(data.T)

Expand All @@ -828,7 +901,7 @@ def regress_poly(degree, data, remove_mean=True, axis=-1):
regressed_data = data - datahat

# Back to original shape
return regressed_data.reshape(datashape)
return regressed_data.reshape(datashape), non_constant_regressors


def combine_mask_files(mask_files, mask_method=None, mask_index=None):
Expand Down Expand Up @@ -886,37 +959,57 @@ def combine_mask_files(mask_files, mask_method=None, mask_index=None):
return [img]


def compute_noise_components(imgseries, mask_images, degree, num_components):
def compute_noise_components(imgseries, mask_images, num_components,
filter_type, degree, period_cut,
repetition_time):
"""Compute the noise components from the imgseries for each mask

imgseries: a nibabel img
mask_images: a list of nibabel images
degree: order of polynomial used to remove trends from the timeseries
num_components: number of noise components to return
filter_type: type off filter to apply to time series before computing
noise components.
'polynomial' - Legendre polynomial basis
'cosine' - Discrete cosine (DCT) basis
False - None (mean-removal only)

Filter options:

degree: order of polynomial used to remove trends from the timeseries
period_cut: minimum period (in sec) for DCT high-pass filter
repetition_time: time (in sec) between volume acquisitions

returns:

components: a numpy array
basis: a numpy array containing the (non-constant) filter regressors

"""
components = None
basis = np.array([])
for img in mask_images:
mask = img.get_data().astype(np.bool)
if imgseries.shape[:3] != mask.shape:
raise ValueError('Inputs for CompCor, timeseries and mask, '
'do not have matching spatial dimensions '
'({} and {}, respectively)'.format(
imgseries.shape[:3], mask.shape))
raise ValueError(
'Inputs for CompCor, timeseries and mask, do not have '
'matching spatial dimensions ({} and {}, respectively)'.format(
imgseries.shape[:3], mask.shape))

voxel_timecourses = imgseries[mask, :]

# Zero-out any bad values
voxel_timecourses[np.isnan(np.sum(voxel_timecourses, axis=1)), :] = 0

# from paper:
# "The constant and linear trends of the columns in the matrix M were
# removed [prior to ...]"
voxel_timecourses = regress_poly(degree, voxel_timecourses)
# Currently support Legendre-polynomial or cosine or detrending
# With no filter, the mean is nonetheless removed (poly w/ degree 0)
if filter_type == 'cosine':
voxel_timecourses, basis = cosine_filter(
voxel_timecourses, repetition_time, period_cut)
elif filter_type in ('polynomial', False):
# from paper:
# "The constant and linear trends of the columns in the matrix M were
# removed [prior to ...]"
voxel_timecourses, basis = regress_poly(degree, voxel_timecourses)

# "Voxel time series from the noise ROI (either anatomical or tSTD) were
# placed in a matrix M of size Nxm, with time along the row dimension
Expand All @@ -936,7 +1029,7 @@ def compute_noise_components(imgseries, mask_images, degree, num_components):
u[:, :num_components]))
if components is None and num_components > 0:
raise ValueError('No components found')
return components
return components, basis


def _compute_tSTD(M, x, axis=0):
Expand All @@ -945,3 +1038,71 @@ def _compute_tSTD(M, x, axis=0):
stdM[stdM == 0] = x
stdM[np.isnan(stdM)] = x
return stdM


# _cosine_drift and _full_rank copied from nipy/modalities/fmri/design_matrix
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a a big disadvantage of depending on nipy here instead of copying the code?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Originally I did it simply to avoid adding a dependency. I ended up augmenting it to set a minimum order of 1.

#
# Nipy release: 0.4.1
# Modified for smooth integration in CompCor classes

def _cosine_drift(period_cut, frametimes):
"""Create a cosine drift matrix with periods greater or equals to period_cut

Parameters
----------
period_cut: float
Cut period of the low-pass filter (in sec)
frametimes: array of shape(nscans)
The sampling times (in sec)

Returns
-------
cdrift: array of shape(n_scans, n_drifts)
cosin drifts plus a constant regressor at cdrift[:,0]

Ref: http://en.wikipedia.org/wiki/Discrete_cosine_transform DCT-II
"""
len_tim = len(frametimes)
n_times = np.arange(len_tim)
hfcut = 1. / period_cut # input parameter is the period

# frametimes.max() should be (len_tim-1)*dt
dt = frametimes[1] - frametimes[0]
# hfcut = 1/(2*dt) yields len_time
# If series is too short, return constant regressor
order = max(int(np.floor(2*len_tim*hfcut*dt)), 1)
cdrift = np.zeros((len_tim, order))
nfct = np.sqrt(2.0/len_tim)

for k in range(1, order):
cdrift[:, k-1] = nfct * np.cos((np.pi / len_tim) * (n_times + .5) * k)

cdrift[:, order-1] = 1. # or 1./sqrt(len_tim) to normalize
return cdrift


def _full_rank(X, cmax=1e15):
"""
This function possibly adds a scalar matrix to X
to guarantee that the condition number is smaller than a given threshold.

Parameters
----------
X: array of shape(nrows, ncols)
cmax=1.e-15, float tolerance for condition number

Returns
-------
X: array of shape(nrows, ncols) after regularization
cmax=1.e-15, float tolerance for condition number
"""
U, s, V = np.linalg.svd(X, 0)
smax, smin = s.max(), s.min()
c = smax / smin
if c < cmax:
return X, c
IFLOG.warn('Matrix is singular at working precision, regularizing...')
lda = (smax - cmax * smin) / (cmax - 1)
s = s + lda
X = np.dot(U, np.dot(np.diag(s), V))
return X, cmax
Loading