fixed several errors, checked with eegbci

mne-tools · Apr 18, 2022 · 05bd4e5 · 05bd4e5
1 parent 0b4d180
commit 05bd4e5
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 32 deletions.
diff --git a/examples/decoding/decoding_csp_eeg.py b/examples/decoding/decoding_csp_eeg.py
@@ -52,9 +52,6 @@
 montage = make_standard_montage('standard_1005')
 raw.set_montage(montage)
 
-# strip channel names of "." characters
-raw.rename_channels(lambda x: x.strip('.'))
-
 # Apply band-pass filter
 raw.filter(7., 30., fir_design='firwin', skip_by_annotation='edge')
 

diff --git a/examples/preprocessing/muscle_ica.py b/examples/preprocessing/muscle_ica.py
@@ -44,7 +44,7 @@
 ica.fit(raw)
 
 # %%
-# Remove postural ICA
+# Remove components with postural muscle artifact using ICA
 ica.plot_sources(raw)
 
 # %%
@@ -87,5 +87,33 @@
 # and ensure that it gets the same components we did manually.
 muscle_idx_auto, scores = ica.find_bads_muscle(raw)
 ica.plot_scores(scores)
-print(f'Manually found muscle artifact ICA components: {muscle_idx}',
+print(f'Manually found muscle artifact ICA components:      {muscle_idx}\n'
       f'Automatically found muscle artifact ICA components: {muscle_idx_auto}')
+
+# %%
+# Compare to EEGBCI dataset
+# Note that the ICA for the third subject got
+
+muscle_idxs = {1: [9], 2: [5, 10, 12, 13, 14]}
+for sub in (1, 2, 3):
+    raw = mne.io.read_raw_edf(
+        mne.datasets.eegbci.load_data(subject=sub, runs=(1,))[0], preload=True)
+    mne.datasets.eegbci.standardize(raw)  # set channel names
+    montage = mne.channels.make_standard_montage('standard_1005')
+    raw.set_montage(montage)
+    raw.filter(l_freq=1., h_freq=None)
+
+    # %%
+    # Run ICA
+    ica = mne.preprocessing.ICA(
+        n_components=15, method='picard', max_iter='auto', random_state=97)
+    ica.fit(raw)
+    ica.plot_sources(raw)
+    muscle_idx = muscle_idxs[sub]
+    ica.plot_properties(raw, picks=muscle_idx)
+    muscle_idx_auto, scores = ica.find_bads_muscle(raw)
+    ica.plot_scores(scores)
+
+    print(f'Manually found muscle artifact ICA components:      {muscle_idx}\n'
+          'Automatically found muscle artifact ICA components: '
+          f'{muscle_idx_auto}')
diff --git a/mne/preprocessing/ica.py b/mne/preprocessing/ica.py
@@ -48,7 +48,7 @@
 
 from ..channels.channels import _contains_ch_type
 from ..channels.layout import _find_topomap_coords
-from ..time_frequency import psd_multitaper
+from ..time_frequency import psd_welch, psd_multitaper
 from ..io.write import start_and_end_file, write_id
 from ..utils import (logger, check_fname, _check_fname, verbose,
                      _reject_data_segments, check_random_state, _validate_type,
@@ -1584,14 +1584,15 @@ def find_bads_ref(self, inst, ch_name=None, threshold=3.0, start=None,
 
     @verbose
     def find_bads_muscle(self, inst, threshold=0.5, start=None,
-                         stop=None, l_freq=7, h_freq=75, sphere=None,
+                         stop=None, l_freq=7, h_freq=45, sphere=None,
                          verbose=None):
         """Detect muscle related components.
 
         Detection is based on :footcite:`DharmapraniEtAl2016` which uses
         data from a subject who has been temporarily paralyzed
         :footcite:`WhithamEtAl2007`. The criteria are threefold:
-        1) Positive log-log spectral slope from 7 to 75 Hz
+        1) Positive log-log spectral slope from 7 to 45 Hz
+           (modified from 75 Hz)
         2) Peripheral component power (farthest away from the vertex)
         3) A single focal point measured by low spatial smoothness
 
@@ -1633,47 +1634,45 @@ def find_bads_muscle(self, inst, threshold=0.5, start=None,
         -----
         .. versionadded:: 1.1
         """
+        from scipy.spatial.distance import pdist, squareform
         _validate_type(threshold, 'numeric', 'threshold')
 
         sources = self.get_sources(inst, start=start, stop=stop)
         components = self.get_components()
-        components_norm = abs(components) / np.sum(abs(components), axis=0)
 
         # compute metric #1: slope of the log-log psd
-        psds, freqs = psd_multitaper(
-            sources, fmin=l_freq, fmax=h_freq, picks='misc')
-        slopes = np.polyfit(np.log10(freqs), np.log10(psds).T, 1)[1]
+        psd_func = psd_welch if isinstance(inst, BaseRaw) else psd_multitaper
+        psds, freqs = psd_func(sources, fmin=l_freq, fmax=h_freq, picks='misc')
+        slopes = np.polyfit(np.log10(freqs), np.log10(psds).T, 1)[0]
 
         # compute metric #2: distance from the vertex of focus
+        components_norm = abs(components) / np.max(abs(components), axis=0)
         pos = _find_topomap_coords(inst.info, picks='data', sphere=sphere)
         assert pos.shape[0] == components.shape[0]  # pos for each component
+        pos -= pos.mean(axis=0)  # center
         dists = np.linalg.norm(pos, axis=1)
         dists /= dists.max()
         focus_dists = np.dot(dists, components_norm)
 
-        # compute metric #3: smoothness (inverse of focalness)
-        # the formula from the paper (below but with a sum) didn't work nearly
-        # as well, so the focalness of the tomomap was computed with KL
-        # divergence
-        focalnesses = np.array([np.sum(
-            comp * np.log(comp * comp.size)) for comp in components_norm.T])
-        # from scipy.spatial.distance import pdist, squareform
-        # dists = squareform(pdist(pos))
-        # dists /= dists.max()
-        # focalnesses = np.array([np.max(np.multiply(
-        #    dists, squareform(pdist(comp[:, np.newaxis]))))
-        #    for comp in components_norm.T])
-
-        # typical muscle component slope is 0.3, non-muscle components negative
-        # take logistic with slope 1 / 0.15 so that 0.3 -> 1
+        # compute metric #3: smoothness
+        dists = squareform(pdist(pos))
+        dists = 1 - (dists / dists.max())  # invert
+        smoothnesses = np.zeros((components.shape[1],))
+        for idx, comp in enumerate(components.T):
+            comp_dists = squareform(pdist(comp[:, np.newaxis]))
+            comp_dists /= comp_dists.max()
+            smoothnesses[idx] = np.multiply(dists, comp_dists).sum()
+
+        # typical muscle slope is ~0.15, non-muscle components negative
+        # so logistic with shift -0.5 and slope 0.25 so -0.5 -> 0.5 and 0->1
         # focus distance is ~65% of max electrode distance with 10% slope
         # (assumes typical head size)
-        # focalness is around 0.5 - 1 for muscle components and < 0.5 otherwise
-        # so use logistic centered at 0.1 with 0.1 slope
+        # smoothnessness is around 150 for muscle and 450 otherwise
+        # so use reversed logistic centered at 300 with 100 slope
         # multiply so that all three components must be present
-        scores = 1 / (1 + np.exp(-slopes / 0.15)) * \
-            1 / (1 + np.exp(-(focus_dists - 0.65) / 0.1)) * \
-            1 / (1 + np.exp(-(focalnesses - 0.5) / 0.1))
+        scores = (1 / (1 + np.exp(-(slopes + 0.5) / 0.25))) * \
+            (1 / (1 + np.exp(-(focus_dists - 0.65) / 0.1))) * \
+            (1 - (1 / (1 + np.exp(-(smoothnesses - 300) / 100))))
         # scale the threshold by the use of three metrics
         self.labels_['muscle'] = [idx for idx, score in enumerate(scores)
                                   if score > threshold**3]