Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SourceTFR #6543

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
305 changes: 305 additions & 0 deletions mne/source_tfr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
# -*- coding: utf-8 -*-
#
# Authors: Dirk Gütlin <dirk.guetlin@stud.sbg.ac.at>
# TODO: This file uses a lot of stuff from io/base.py and source_estimate.py
# TODO: Maybe name these persons as authors too?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to me, the authors are the list of people that would know something about that file. It is the people I'm going to send them an email when things go south.

People who wrote the file or closely reviewed.

#
# License: BSD (3-clause)

import copy
import numpy as np

from .filter import resample
from .utils import (_check_subject, verbose, _time_mask)
from .io.base import ToDataFrameMixin, TimeMixin
from .externals.h5io import write_hdf5


class SourceTFR(ToDataFrameMixin, TimeMixin):
"""Class for time-frequency transformed source level data.

Parameters
----------
data : array, shape (n_dipoles, n_freqs, n_times) | tuple, shape (2,)
Time-frequency transformed data in source space. The data can either
be a single array or a tuple with two arrays: "kernel" shape
(n_vertices, n_sensors) and "sens_data" shape (n_sensors, n_freqs,
n_times). In this case, the source space data corresponds to
"numpy.dot(kernel, sens_data)".
vertices : array | list of array
Vertex numbers corresponding to the data.
tmin : float
Time point of the first sample in data.
tstep : float
Time step between successive samples in data.
subject : str | None
The subject name. While not necessary, it is safer to set the
subject parameter to avoid analysis errors.
%(verbose)s

Attributes
----------
subject : str | None
The subject name.
times : array, shape (n_times,)
The time vector.
vertices : array | list of array of shape (n_dipoles,)
The indices of the dipoles in the different source spaces. Can
be an array if there is only one source space (e.g., for volumes).
data : array of shape (n_dipoles, n_times)
The data in source space.
shape : tuple
The shape of the data. A tuple of int (n_dipoles, n_times).
"""

@verbose
def __init__(self, data, vertices=None, tmin=None, tstep=None,
Copy link
Contributor

@massich massich Jul 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

break the __init__ with functions. I know that some of our inits are huge, but they should be short easy to read in a glance.

def __init__(foo, bar, data, vertices,  bla):
  _check_param(foo, ...)
  self._data = _prepare_data(data)
  self._vercies = _foo(vertices)
  ...

to me init should be just assigning stuff to the attributes. If _prepare_data has a signature like _prepare_data(data=data, n_col=vertices.shape[0], moon_color=foo) it tels me at a glance how data is related to the other things. Sure I would not know the details but I least I would know the possible relations. If I have to read the code and strip all the value errors out on my mind the task of reading the code is much harder.

keep in mind: code is written once but read endless times and when you read code you want to skim it. Not digest it.

subject=None, verbose=None): # noqa: D102

if not (isinstance(vertices, np.ndarray) or
isinstance(vertices, list)):
raise ValueError('Vertices must be a numpy array or a list of '
'arrays')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have functions for checking parameters.


self._data_ndim = 3
self._src_type = 'SourceTFR'

kernel, sens_data = None, None
if isinstance(data, tuple):
if len(data) != 2:
raise ValueError('If data is a tuple it has to be length 2')
kernel, sens_data = data
data = None
if kernel.shape[1] != sens_data.shape[0]:
raise ValueError('kernel and sens_data have invalid '
'dimensions')
if sens_data.ndim != self._data_ndim:
raise ValueError('The sensor data must have %s dimensions, got'
' %s' % (self._data_ndim, sens_data.ndim,))

if isinstance(vertices, list):
vertices = [np.asarray(v, int) for v in vertices]
if any(np.any(np.diff(v.astype(int)) <= 0) for v in vertices):
raise ValueError('Vertices must be ordered in increasing '
'order.')

n_src = sum([len(v) for v in vertices])

if len(vertices) == 1:
vertices = vertices[0]
elif isinstance(vertices, np.ndarray):
n_src = len(vertices)

# safeguard the user against doing something silly
if data is not None:
if data.shape[0] != n_src:
raise ValueError('Number of vertices (%i) and stfr.shape[0] '
'(%i) must match' % (n_src, data.shape[0]))
if data.ndim == self._data_ndim - 1: # allow upbroadcasting
data = data[..., np.newaxis]
if data.ndim != self._data_ndim:
raise ValueError('Data (shape %s) must have %s dimensions for '
'%s' % (data.shape, self._data_ndim,
self.__class__.__name__))

self._data = data
self._tmin = tmin
self._tstep = tstep
self.vertices = vertices
self.verbose = verbose
self._kernel = kernel
self._sens_data = sens_data
self._kernel_removed = False
self._times = None
self._update_times()
self.subject = _check_subject(None, subject, False)

def __repr__(self): # noqa: D105
Copy link
Contributor

@massich massich Jul 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't use '%' to parse strings (we are no longer in the 90s ;) ).

fstrings are much more readable and they are the way to go (see doc). However, we cannot use them until we drop python 3.5 support. Meanwhile, we could be using 'my name is {name}'.format(name='massich')

https://realpython.com/python-f-strings/

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still use % all over the code base and have not agreed to change to / prefer .format -- I think % is fine for now

s = "%d vertices" % (sum(len(v) for v in self._vertices_list),)
if self.subject is not None:
s += ", subject : %s" % self.subject
s += ", tmin : %s (ms)" % (1e3 * self.tmin)
s += ", tmax : %s (ms)" % (1e3 * self.times[-1])
s += ", tstep : %s (ms)" % (1e3 * self.tstep)
s += ", data shape : %s" % (self.shape,)
return "<%s | %s>" % (type(self).__name__, s)

@property
def _vertices_list(self):
return self.vertices

@verbose
def save(self, fname, ftype='h5', verbose=None):
"""Save the full SourceTFR to an HDF5 file.

Parameters
----------
fname : string
The file name to write the SourceTFR to, should end in
'-stfr.h5'.
ftype : string
File format to use. Currently, the only allowed values is "h5".
%(verbose_meth)s
"""
if ftype != 'h5':
raise ValueError('%s objects can only be written as HDF5 files.'
% (self.__class__.__name__,))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use checks

if not fname.endswith('.h5'):
fname += '-stfr.h5'
Copy link
Contributor

@massich massich Jul 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

avoid ifs without elses as much as possible. See this awesome talk by @ Katrina Owen

in a case like this one, I would do something like:

fname = fname if fname.endswith('h5') else '{}-stfr.h5'.format(fname)

or any way you like to build the fname. (I like this one because it has no spaces but maybe fname + '-stfr.h5' is a better choice)

write_hdf5(fname,
dict(vertices=self.vertices, data=self.data, tmin=self.tmin,
tstep=self.tstep, subject=self.subject,
src_type=self._src_type),
title='mnepython', overwrite=True)

@property
def sfreq(self):
"""Sample rate of the data."""
return 1. / self.tstep

def _remove_kernel_sens_data_(self):
"""Remove kernel and sensor space data and compute self._data."""
if self._kernel is not None or self._sens_data is not None:
self._kernel_removed = True
self._data = np.tensordot(self._kernel, self._sens_data,
axes=([-1], [0]))
self._kernel = None
self._sens_data = None

def crop(self, tmin=None, tmax=None):
"""Restrict SourceTFR to a time interval.

Parameters
----------
tmin : float | None
The first time point in seconds. If None the first present is used.
tmax : float | None
The last time point in seconds. If None the last present is used.
"""
mask = _time_mask(self.times, tmin, tmax, sfreq=self.sfreq)
self.tmin = self.times[np.where(mask)[0][0]]
if self._kernel is not None and self._sens_data is not None:
self._sens_data = self._sens_data[..., mask]
else:
self.data = self.data[..., mask]

return self # return self for chaining methods

@verbose
def resample(self, sfreq, npad='auto', window='boxcar', n_jobs=1,
verbose=None):
"""Resample data.

Parameters
----------
sfreq : float
New sample rate to use.
npad : int | str
Amount to pad the start and end of the data.
Can also be "auto" to use a padding that will result in
a power-of-two size (can be much faster).
window : string or tuple
Window to use in resampling. See scipy.signal.resample.
%(n_jobs)s
%(verbose_meth)s

Notes
-----
For some data, it may be more accurate to use npad=0 to reduce
artifacts. This is dataset dependent -- check your data!

Note that the sample rate of the original data is inferred from tstep.
"""
# resampling in sensor instead of source space gives a somewhat
# different result, so we don't allow it
self._remove_kernel_sens_data_()

o_sfreq = 1.0 / self.tstep
self.data = resample(self.data, sfreq, o_sfreq, npad, n_jobs=n_jobs)

# adjust indirectly affected variables
self.tstep = 1.0 / sfreq
return self

@property
def data(self):
"""Numpy array of SourceTFR data."""
if self._data is None:
# compute the solution the first time the data is accessed and
# remove the kernel and sensor data
self._remove_kernel_sens_data_()
return self._data

@data.setter
def data(self, value):
value = np.asarray(value)
if self._data is not None and value.ndim != self._data.ndim:
raise ValueError('Data array should have %d dimensions.' %
self._data.ndim)

# vertices can be a single number, so cast to ndarray
if isinstance(self.vertices, list):
n_verts = sum([len(v) for v in self.vertices])
elif isinstance(self.vertices, np.ndarray):
n_verts = len(self.vertices)
else:
raise ValueError('Vertices must be a list or numpy array')

if value.shape[0] != n_verts:
raise ValueError('The first dimension of the data array must '
'match the number of vertices (%d != %d)' %
(value.shape[0], n_verts))

self._data = value
self._update_times()

@property
def shape(self):
"""Shape of the data."""
if self._data is not None:
return self._data.shape
return (self._kernel.shape[0],
self._sens_data.shape[1],
self._sens_data.shape[2])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use else!!!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

document what returns.

does this return always the same kind of tuple?


@property
def tmin(self):
"""The first timestamp."""
return self._tmin

@tmin.setter
def tmin(self, value):
self._tmin = float(value)
self._update_times()

@property
def tstep(self):
"""The change in time between two consecutive samples (1 / sfreq)."""
return self._tstep

@tstep.setter
def tstep(self, value):
if value <= 0:
raise ValueError('.tstep must be greater than 0.')
self._tstep = float(value)
self._update_times()

@property
def times(self):
"""A timestamp for each sample."""
return self._times

@times.setter
def times(self, value):
raise ValueError('You cannot write to the .times attribute directly. '
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this a value error? isn't it a runtime?

Is there any other way to prevent the setter? maybe not.

'This property automatically updates whenever '
'.tmin, .tstep or .data changes.')

def _update_times(self):
"""Update the times attribute after changing tmin, tmax, or tstep."""
self._times = self.tmin + (self.tstep * np.arange(self.shape[-1]))
self._times.flags.writeable = False

def copy(self):
"""Return copy of SourceTFR instance."""
return copy.deepcopy(self)
Loading