Skip to content

Commit

Permalink
[ENH, MRG] Add Automatic Muscle Artifact ICA Component Detection (#10534
Browse files Browse the repository at this point in the history
)
  • Loading branch information
alexrockhill authored Apr 21, 2022
1 parent ac45cbe commit 5adb552
Show file tree
Hide file tree
Showing 7 changed files with 263 additions and 9 deletions.
2 changes: 2 additions & 0 deletions doc/changes/latest.inc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Enhancements

- The :func:`mne.make_forward_dipole` function can now take a list of dipoles to make a multi-dipole forward models (:gh:`10464` by `Marijn van Vliet`_)

- Add :meth:`mne.preprocessing.ICA.find_bads_muscle` to find muscle-related ICA components with an example, :ref:`ex-muscle-ica` (:gh:`10534` by `Alex Rockhill`_)

- Add example of Xfit-style ECD modeling using multiple dipoles (:gh:`10464` by `Marijn van Vliet`_)

- It is now possible to compute inverse solutions with restricted source orientations using discrete forward models (:gh:`10464` by `Marijn van Vliet`_)
Expand Down
28 changes: 28 additions & 0 deletions doc/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,21 @@ @article{DestrieuxEtAl2010
year = {2010}
}

@inproceedings{DharmapraniEtAl2016,
address = {Orlando, FL, USA},
title = {A comparison of independent component analysis algorithms and measures to discriminate between {EEG} and artifact components},
isbn = {978-1-4577-0220-4},
url = {http://ieeexplore.ieee.org/document/7590828/},
doi = {10.1109/EMBC.2016.7590828},
urldate = {2022-04-13},
booktitle = {2016 38th {Annual} {International} {Conference} of the {IEEE} {Engineering} in {Medicine} and {Biology} {Society} ({EMBC})},
publisher = {IEEE},
author = {Dharmaprani, Dhani and Nguyen, Hoang K. and Lewis, Trent W. and DeLosAngeles, Dylan and Willoughby, John O. and Pope, Kenneth J.},
year = {2016},
pages = {825--828},
}


@article{DufauEtAl2015,
author = {Dufau, Stéphane and Grainger, Jonathan and Midgley, Katherine J. and Holcomb, Phillip J.},
doi = {10.1177/0956797615603934},
Expand Down Expand Up @@ -1842,6 +1857,19 @@ @article{WheatEtAl2010
year = {2010}
}

@article{WhithamEtAl2007,
title = {Scalp electrical recording during paralysis: quantitative evidence that {EEG} frequencies above 20 {Hz} are contaminated by {EMG}},
volume = {118},
issn = {1388-2457},
doi = {10.1016/j.clinph.2007.04.027},
number = {8},
journal = {Clinical Neurophysiology: Official Journal of the International Federation of Clinical Neurophysiology},
author = {Whitham, Emma M. and Pope, Kenneth J. and Fitzgibbon, Sean P. and Lewis, Trent and Clark, C. Richard and Loveless, Stephen and Broberg, Marita and Wallace, Angus and DeLosAngeles, Dylan and Lillie, Peter and Hardy, Andrew and Fronsko, Rik and Pulbrook, Alyson and Willoughby, John O.},
year = {2007},
pmid = {17574912},
pages = {1877--1888},
}

@article{WidmannEtAl2015,
author = {Widmann, Andreas and Schröger, Erich and Maess, Burkhard},
doi = {10.1016/j.jneumeth.2014.08.002},
Expand Down
3 changes: 0 additions & 3 deletions examples/decoding/decoding_csp_eeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@
montage = make_standard_montage('standard_1005')
raw.set_montage(montage)

# strip channel names of "." characters
raw.rename_channels(lambda x: x.strip('.'))

# Apply band-pass filter
raw.filter(7., 30., fir_design='firwin', skip_by_annotation='edge')

Expand Down
117 changes: 117 additions & 0 deletions examples/preprocessing/muscle_ica.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# -*- coding: utf-8 -*-
"""
.. _ex-muscle-ica:
==============================
Removing muscle ICA components
==============================
Gross movements produce widespread high-frequency activity across all channels
that is usually not recoverable and so the epoch must be rejected as shown in
:ref:`ex-muscle-artifacts`. More ubiquitously than gross movements, muscle
artifact is produced during postural maintenance. This is more appropriately
removed by ICA otherwise there wouldn't be any epochs left! Note that muscle
artifacts of this kind are much more pronounced in EEG than they are in MEG.
"""
# Authors: Alex Rockhill <aprockhill@mailbox.org>
#
# License: BSD-3-Clause

# %%

import os.path as op
import mne

data_path = mne.datasets.sample.data_path()
raw_fname = op.join(data_path, 'MEG', 'sample', 'sample_audvis_raw.fif')
raw = mne.io.read_raw_fif(raw_fname)
raw.crop(tmin=100, tmax=130) # take 30 seconds for speed

# pick only EEG channels, muscle artifact is basically not picked up by MEG
# if you have a simulatenous recording, you may want to do ICA on MEG and EEG
# separately
raw.pick_types(eeg=True)

# ICA works best with a highpass filter applied
raw.load_data()
raw.filter(l_freq=1., h_freq=None)

# %%
# Run ICA
ica = mne.preprocessing.ICA(
n_components=15, method='picard', max_iter='auto', random_state=97)
ica.fit(raw)

# %%
# Remove components with postural muscle artifact using ICA
ica.plot_sources(raw)

# %%
# By inspection, let's select out the muscle-artifact components based on
# :footcite:`DharmapraniEtAl2016` manually.
#
# The criteria are:
#
# - Positive slope of log-log power spectrum between 7 and 75 Hz
# (here just flat because it's not in log-log)
# - Peripheral focus or dipole/multi-pole foci (the blue and red
# blobs in the topomap are far from the vertex where the most
# muscle is)
# - Single focal point (low spatial smoothness; there is just one focus
# of the topomap compared to components like the first ones that are
# more likely neural which spread across the topomap)
#
# The other attribute worth noting is that the time course in
# :func:`mne.preprocessing.ICA.plot_sources` looks like EMG; you can
# see spikes when each motor unit fires so that the time course looks fuzzy
# and sometimes has large spikes that are often at regular intervals.
#
# ICA component 13 is a textbook example of what muscle artifact looks like.
# The focus of the topomap for this component is right on the temporalis
# muscle near the ears. There is also a minimum in the power spectrum at around
# 10 Hz, then a maximum at around 25 Hz, then a slow linear dropoff (relative
# to non-muscle components) in non-log-log units (this slope is positive in
# log-log units) this is a very typical pattern for muscle artifact.

muscle_idx = [6, 7, 8, 9, 10, 11, 12, 13, 14]
ica.plot_properties(raw, picks=muscle_idx)

# first, remove blinks and heartbeat to compare
blink_idx = [0]
heartbeat_idx = [5]
ica.apply(raw, exclude=blink_idx + heartbeat_idx)
ica.plot_overlay(raw, exclude=muscle_idx)

# %%
# Finally, let's try an automated algorithm to find muscle components
# and ensure that it gets the same components we did manually.
muscle_idx_auto, scores = ica.find_bads_muscle(raw)
ica.plot_scores(scores, exclude=muscle_idx_auto)
print(f'Manually found muscle artifact ICA components: {muscle_idx}\n'
f'Automatically found muscle artifact ICA components: {muscle_idx_auto}')

# %%
# Let's now replicate this on the EEGBCI dataset
# ----------------------------------------------

for sub in (1, 2):
raw = mne.io.read_raw_edf(
mne.datasets.eegbci.load_data(subject=sub, runs=(1,))[0], preload=True)
mne.datasets.eegbci.standardize(raw) # set channel names
montage = mne.channels.make_standard_montage('standard_1005')
raw.set_montage(montage)
raw.filter(l_freq=1., h_freq=None)

# Run ICA
ica = mne.preprocessing.ICA(
n_components=15, method='picard', max_iter='auto', random_state=97)
ica.fit(raw)
ica.plot_sources(raw)
muscle_idx_auto, scores = ica.find_bads_muscle(raw)
ica.plot_properties(raw, picks=muscle_idx_auto)
ica.plot_scores(scores, exclude=muscle_idx_auto)

print(f'Manually found muscle artifact ICA components: {muscle_idx}\n'
'Automatically found muscle artifact ICA components: '
f'{muscle_idx_auto}')
109 changes: 104 additions & 5 deletions mne/preprocessing/ica.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
from ..viz.topomap import _plot_corrmap

from ..channels.channels import _contains_ch_type
from ..channels.layout import _find_topomap_coords
from ..time_frequency import psd_welch, psd_multitaper
from ..io.write import start_and_end_file, write_id
from ..utils import (logger, check_fname, _check_fname, verbose,
_reject_data_segments, check_random_state, _validate_type,
Expand Down Expand Up @@ -1351,7 +1353,7 @@ def find_bads_ecg(self, inst, ch_name=None, threshold='auto', start=None,
See Also
--------
find_bads_eog, find_bads_ref
find_bads_eog, find_bads_ref, find_bads_muscle
Notes
-----
Expand Down Expand Up @@ -1486,7 +1488,7 @@ def find_bads_ref(self, inst, ch_name=None, threshold=3.0, start=None,
See Also
--------
find_bads_ecg, find_bads_eog
find_bads_ecg, find_bads_eog, find_bads_muscle
Notes
-----
Expand Down Expand Up @@ -1575,12 +1577,109 @@ def find_bads_ref(self, inst, ch_name=None, threshold=3.0, start=None,
normrats = np.linalg.norm(weights[ref_picks], axis=0) \
/ np.linalg.norm(weights[meg_picks], axis=0)
scores = np.log(normrats)
self.labels_['ref_meg'] = list(_find_outliers(scores,
threshold=threshold,
tail=1))
self.labels_['ref_meg'] = list(_find_outliers(
scores, threshold=threshold, tail=1))

return self.labels_['ref_meg'], scores

@verbose
def find_bads_muscle(self, inst, threshold=0.5, start=None,
stop=None, l_freq=7, h_freq=45, sphere=None,
verbose=None):
"""Detect muscle related components.
Detection is based on :footcite:`DharmapraniEtAl2016` which uses
data from a subject who has been temporarily paralyzed
:footcite:`WhithamEtAl2007`. The criteria are threefold:
1) Positive log-log spectral slope from 7 to 45 Hz
2) Peripheral component power (farthest away from the vertex)
3) A single focal point measured by low spatial smoothness
The threshold is relative to the slope, focal point and smoothness
of a typical muscle-related ICA component. Note the high frequency
of the power spectral density slope was 75 Hz in the reference but
has been modified to 45 Hz as a default based on the criteria being
more accurate in practice.
Parameters
----------
inst : instance of Raw, Epochs or Evoked
Object to compute sources from.
threshold : float | str
Value above which a component should be marked as muscle-related,
relative to a typical muscle component.
start : int | float | None
First sample to include. If float, data will be interpreted as
time in seconds. If None, data will be used from the first sample.
stop : int | float | None
Last sample to not include. If float, data will be interpreted as
time in seconds. If None, data will be used to the last sample.
l_freq : float
Low frequency for muscle-related power.
h_freq : float
High frequency for msucle related power.
%(sphere_topomap_auto)s
%(verbose)s
Returns
-------
muscle_idx : list of int
The indices of EOG related components, sorted by score.
scores : np.ndarray of float, shape (``n_components_``) | list of array
The correlation scores.
See Also
--------
find_bads_ecg, find_bads_eog, find_bads_ref
Notes
-----
.. versionadded:: 1.1
"""
from scipy.spatial.distance import pdist, squareform
_validate_type(threshold, 'numeric', 'threshold')

sources = self.get_sources(inst, start=start, stop=stop)
components = self.get_components()

# compute metric #1: slope of the log-log psd
psd_func = psd_welch if isinstance(inst, BaseRaw) else psd_multitaper
psds, freqs = psd_func(sources, fmin=l_freq, fmax=h_freq, picks='misc')
slopes = np.polyfit(np.log10(freqs), np.log10(psds).T, 1)[0]

# compute metric #2: distance from the vertex of focus
components_norm = abs(components) / np.max(abs(components), axis=0)
pos = _find_topomap_coords(inst.info, picks='data', sphere=sphere)
assert pos.shape[0] == components.shape[0] # pos for each sensor
pos -= pos.mean(axis=0) # center
dists = np.linalg.norm(pos, axis=1)
dists /= dists.max()
focus_dists = np.dot(dists, components_norm)

# compute metric #3: smoothness
smoothnesses = np.zeros((components.shape[1],))
dists = squareform(pdist(pos))
dists = 1 - (dists / dists.max()) # invert
for idx, comp in enumerate(components.T):
comp_dists = squareform(pdist(comp[:, np.newaxis]))
comp_dists /= comp_dists.max()
smoothnesses[idx] = np.multiply(dists, comp_dists).sum()

# typical muscle slope is ~0.15, non-muscle components negative
# so logistic with shift -0.5 and slope 0.25 so -0.5 -> 0.5 and 0->1
# focus distance is ~65% of max electrode distance with 10% slope
# (assumes typical head size)
# smoothnessness is around 150 for muscle and 450 otherwise
# so use reversed logistic centered at 300 with 100 slope
# multiply so that all three components must be present
scores = (1 / (1 + np.exp(-(slopes + 0.5) / 0.25))) * \
(1 / (1 + np.exp(-(focus_dists - 0.65) / 0.1))) * \
(1 - (1 / (1 + np.exp(-(smoothnesses - 300) / 100))))
# scale the threshold by the use of three metrics
self.labels_['muscle'] = [idx for idx, score in enumerate(scores)
if score > threshold**3]
return self.labels_['muscle'], scores

@verbose
def find_bads_eog(self, inst, ch_name=None, threshold=3.0, start=None,
stop=None, l_freq=1, h_freq=10,
Expand Down
8 changes: 7 additions & 1 deletion mne/preprocessing/tests/test_ica.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ def test_ica_noop(n_components, n_pca_components, tmp_path):

@requires_sklearn
@pytest.mark.parametrize("method, max_iter_default", [("fastica", 1000),
("infomax", 500), ("picard", 500)])
("infomax", 500),
("picard", 500)])
def test_ica_max_iter_(method, max_iter_default):
"""Test that ICA.max_iter is set to the right defaults."""
_skip_check_picard(method)
Expand Down Expand Up @@ -1321,6 +1322,11 @@ def test_ica_labels():
for key in ('ecg', 'eog', 'ref_meg', 'ecg/ECG-MAG'):
assert key in ica.labels_

scores = ica.find_bads_muscle(raw)[1]
assert 'muscle' in ica.labels_
assert ica.labels_['muscle'] == [0]
assert_allclose(scores, [0.56, 0.01, 0.03, 0.00], atol=0.03)


@requires_sklearn
@testing.requires_testing_data
Expand Down
5 changes: 5 additions & 0 deletions tutorials/preprocessing/40_artifact_correction_ica.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,11 @@
# of automated approaches like `~mne.preprocessing.ICA.find_bads_ecg`
# before accepting them.

# %%
# For EEG, activation of muscles for postural control of the head and neck
# contaminate the signal as well. This is usually not detected by MEG. For
# an example showing how to remove these components, see :ref:`ex-muscle-ica`.

# clean up memory before moving on
del raw, ica, new_ica

Expand Down

0 comments on commit 5adb552

Please sign in to comment.