From 5d0ff0b720820074ec26e0a4b66c3688915850c4 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 1 Jan 2021 19:12:45 -0500
Subject: [PATCH 01/14] Add update_bids_name

---
 phys2bids/bids.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/phys2bids/bids.py b/phys2bids/bids.py
index ccaa8d386..1b2cbb8a7 100644
--- a/phys2bids/bids.py
+++ b/phys2bids/bids.py
@@ -1,6 +1,7 @@
 """BIDS functions for phys2bids package."""
 import logging
 import os
+import re
 from csv import reader
 from pathlib import Path
 
@@ -290,3 +291,100 @@ def readme_file(outdir):
         LGR.warning('phys2bids could not find README,'
                     'generating it EMPTY, please fill in the necessary info')
         utils.write_file(file_path, '', text)
+
+
+def update_bids_name(basename, **kwargs):
+    """Add elements to a BIDS filename while retaining BIDS compatibility.
+
+    Parameters
+    ----------
+    basename : str
+        Name of valid BIDS file.
+    kwargs : dict
+        Keyword arguments indicating entities and/or suffix and/or extension
+        to update/add to the BIDS filename. If a keyword's value is None, then
+        the entity will be removed from the filename.
+
+    Returns
+    -------
+    outname : str
+        Valid BIDS filename with updated entities/suffix/extension.
+    """
+    # This is hardcoded, but would be nice to use the yaml-fied BIDS entity
+    # table when that's up and running.
+    ENTITY_ORDER = [
+        "sub",
+        "ses",
+        "task",
+        "acq",
+        "ce",
+        "rec",
+        "dir",
+        "run",
+        "mod",
+        "echo",
+        "fa",
+        "inv",
+        "mt",
+        "part",
+        "ch",
+        "recording",
+        "proc",
+        "space",
+        "split",
+    ]
+
+    outdir = os.path.dirname(basename)
+    outname = os.path.basename(basename)
+
+    # Determine scan suffix (should always be physio)
+    suffix = outname.split("_")[-1].split(".")[0]
+    extension = "." + ".".join(outname.split("_")[-1].split(".")[1:])
+    filetype = suffix + extension
+
+    for key, val in kwargs.items():
+        if key == "suffix":
+            if not val.startswith("_"):
+                val = "_" + val
+
+            if not val.endswith("."):
+                val = val + "."
+
+            outname = outname.replace("_" + suffix + ".", val)
+        elif key == "extension":
+            # add leading . if not ''
+            if val and not val.startswith("."):
+                val = "." + val
+            outname = outname.replace(extension, val)
+        else:
+            # entities
+            if key not in ENTITY_ORDER:
+                raise ValueError("Key {} not understood.".format(key))
+
+            if val is None:
+                # remove entity from filename if kwarg val is None
+                regex = "_{}-[0-9a-zA-Z]+".format(key)
+                outname = re.sub(regex, "", outname)
+            elif "_{}-{}".format(key, val) in basename:
+                LGR.warning(
+                    "Key-value pair {}/{} already found in "
+                    "basename {}. Skipping.".format(key, val, basename)
+                )
+            elif "_{}-".format(key) in basename:
+                LGR.warning(
+                    "Key {} already found in basename {}. "
+                    "Overwriting.".format(key, basename)
+                )
+                regex = "_{}-[0-9a-zA-Z]+".format(key)
+                outname = re.sub(regex, "_{}-{}".format(key, val), outname)
+            else:
+                loc = ENTITY_ORDER.index(key)
+                entities_to_check = ENTITY_ORDER[loc:]
+                entities_to_check = ["_{}-".format(etc) for etc in entities_to_check]
+                entities_to_check.append("_{}".format(filetype))
+                for etc in entities_to_check:
+                    if etc in outname:
+                        outname = outname.replace(etc, "_{}-{}{}".format(key, val, etc))
+                        break
+    outname = os.path.join(outdir, outname)
+    return outname

From 6ab2115ba2f3c701008477f3e7f6319c791471d0 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 1 Jan 2021 19:16:55 -0500
Subject: [PATCH 02/14] Add slice4phys.slice_phys.

---
 phys2bids/slice4phys.py | 119 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py
index 7d0305c58..8e9e11114 100644
--- a/phys2bids/slice4phys.py
+++ b/phys2bids/slice4phys.py
@@ -4,6 +4,9 @@
 from copy import deepcopy
 
 import numpy as np
+from scipy.signal import resample
+
+from phys2bids.bids import update_bids_name
 
 LGR = logging.getLogger(__name__)
 
@@ -106,6 +109,122 @@ def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9):
     return run_timestamps
 
 
+def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
+    """Slice a physio object based on run-/file-wise onsets and offsets.
+
+    Parameters
+    ----------
+    phys : BlueprintInput
+    run_timestamps : dict
+        Each key is a run-wise filename and value is a tuple of (onset, offset),
+        where onset and offset are integers corresponding to index values of
+        the trigger channel.
+    padding : float or tuple, optional
+        Amount of time before and after run to keep in physio time series, in seconds.
+        May be a single value (in which case the time before and after is the same) or
+        a two-item tuple (which case the first item is time before and the second is
+        time after).
+        These values will be automatically reduced in cases where the pad would extend
+        before or after the physio acquisition.
+    update_trigger : bool, optional
+        Whether to update the trigger channel time series based on estimated scan onsets from
+        the BIDS dataset (True) or to leave it as-is (False). Default is False.
+
+    Returns
+    -------
+    phys_in_slices : dict
+        Each key is a run-wise filename (possibly further split by frequency)
+        and each value is a BlueprintInput object.
+
+    Notes
+    -----
+    The goal of this function is to abstract out the general slicing procedure
+    from slice4phys.
+    """
+    phys = deepcopy(phys)
+    if update_trigger:
+        phys.freq.append(phys.freq[phys.trigger_idx])
+        phys.units.append(phys.units[phys.trigger_idx])
+        phys.timeseries.append(phys.timeseries[phys.trigger_idx].copy())
+        phys.ch_name.append('original trigger')
+
+        # Fix up the trigger time series
+        phys.timeseries[phys.trigger_idx][:] = 0
+        for ons, off in run_timestamps.values():
+            phys.timeseries[phys.trigger_idx][ons:off] = 1
+
+    if not isinstance(padding, tuple):
+        time_before, time_after = padding, padding
+    else:
+        time_before, time_after = padding
+
+    phys_in_slices = {}
+    for i_run, fname in enumerate(run_timestamps.keys()):
+        run_attributes = run_timestamps[fname]  # tmp variable to collect run's info
+        trigger_onset, trigger_offset = run_attributes
+        min_onset, max_offset = 0, phys.timeseries[phys.trigger_idx].shape[0]
+
+        unique_frequencies = np.unique(phys.freq)
+        trigger_freq = phys.freq[phys.trigger_idx]
+
+        # Limit padding based on beginning and end of physio recording.
+        # We could also limit padding to prevent overlap between scans, if desired.
+        run_time_before = np.minimum(time_before, (trigger_onset - min_onset) / trigger_freq)
+        run_time_after = np.minimum(time_after, (max_offset - trigger_offset) / trigger_freq)
+
+        for freq in unique_frequencies:
+            to_subtract = int(run_time_before * freq)
+            to_add = int(run_time_after * freq)
+
+            run_onset_idx = int(trigger_onset * freq / trigger_freq) - to_subtract
+            run_offset_idx = int(trigger_offset * freq / trigger_freq) + to_add
+
+            # Split into frequency-specific object limited to onset-offset
+            temp_phys_in = deepcopy(phys[run_onset_idx:run_offset_idx])
+
+            if temp_phys_in.freq[temp_phys_in.trigger_idx] != freq:
+                example_ts = phys.timeseries[phys.freq.index(freq)]
+
+                # Determine time
+                new_time = (np.arange(example_ts.shape[0]) / temp_phys_in.timeseries[
+                    phys.freq.index(freq)].freq
+                )
+                new_time += np.min(temp_phys_in.timeseries[0])
+                temp_phys_in.timeseries[0] = new_time
+                temp_phys_in.freq[0] = freq
+
+                # Resample trigger
+                new_trigger = temp_phys_in.timeseries[temp_phys_in.trigger_idx]
+                new_trigger = resample(new_trigger, example_ts.shape[0], window=10)
+                temp_phys_in.timeseries[temp_phys_in.trigger_idx] = new_trigger
+                temp_phys_in.freq[temp_phys_in.trigger_idx] = freq
+
+            if len(unique_frequencies) > 1:
+                run_fname = update_bids_name(fname, recording=str(freq) + 'Hz')
+
+                # Drop other frequency channels
+                channel_idx = np.arange(temp_phys_in.ch_amount)
+                nonfreq_channels = [i for i in channel_idx if temp_phys_in.freq[i] != freq]
+                freq_channels = [i for i in channel_idx if temp_phys_in.freq[i] == freq]
+                temp_phys_in = temp_phys_in.delete_at_index(nonfreq_channels)
+
+                # Update trigger channel index around dropped channels
+                new_trigger_idx = freq_channels.index(temp_phys_in.trigger_idx)
+                temp_phys_in.trigger_idx = new_trigger_idx
+            else:
+                run_fname = fname
+
+            # zero out time
+            temp_phys_in.timeseries[0] = (
+                temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0])
+            )
+            # Now take out the time before the scan starts
+            temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before
+
+            phys_in_slices[run_fname] = temp_phys_in
+    return phys_in_slices
+
+
 def slice4phys(phys_in, ntp_list, tr_list, thr, padding=9):
     """
     Slice runs for phys2bids.

From a7a31c4302b5632bbe0c12e26259d49c72c4f16b Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 1 Jan 2021 19:17:07 -0500
Subject: [PATCH 03/14] Add synchronization module.

---
 phys2bids/synchronization.py | 457 +++++++++++++++++++++++++++++++++++
 1 file changed, 457 insertions(+)
 create mode 100644 phys2bids/synchronization.py

diff --git a/phys2bids/synchronization.py b/phys2bids/synchronization.py
new file mode 100644
index 000000000..354a28422
--- /dev/null
+++ b/phys2bids/synchronization.py
@@ -0,0 +1,457 @@
+"""Functions for synchronizing multi-run physio files with BIDS imaging data."""
+import os.path as op
+from operator import itemgetter
+from itertools import groupby
+import logging
+import matplotlib.pyplot as plt
+
+from bids import BIDSLayout
+import pandas as pd
+import numpy as np
+import nibabel as nib
+
+from .bids import update_bids_name
+from .slice4phys import slice_phys
+from .physio_obj import BlueprintOutput
+
+LGR = logging.getLogger(__name__)
+
+
+def load_scan_data(layout, sub, ses=None):
+    """
+    Extract subject- and session-specific scan onsets and durations from BIDSLayout.
+
+    Parameters
+    ----------
+    layout : BIDSLayout
+    sub : str
+    ses : str or None, optional
+
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame with the following columns: 'filename', 'original_filename',
+        'acq_time', 'duration', 'onset'.
+        'filename' is the name of the BIDS file minus within-acquisition entities.
+        'original_filename' is the actual, existing BIDS file.
+        'acq_time' is the acquisition time in datetime format.
+        'duration' is the scan duration in seconds.
+        'onset' is the scan onset in seconds, starting with zero at the first scan.
+    """
+    # This is the strategy we'll use in the future. Commented out for now.
+    # scans_file = layout.get(extension='.tsv', suffix='scans', subject=sub, session=ses)
+    # df = pd.read_table(scans_file)
+
+    # Collect acquisition times
+    # NOTE: Will be replaced with scans file when heudiconv makes the change
+    img_files = layout.get(
+        datatype="func",
+        suffix="bold",
+        extension=[".nii.gz", ".nii"],
+        subject=sub,
+        session=ses,
+    )
+    df = pd.DataFrame(
+        columns=["original_filename", "acq_time"],
+    )
+    for i, img_file in enumerate(img_files):
+        df.loc[i, "original_filename"] = img_file.path
+        df.loc[i, "acq_time"] = img_file.get_metadata()["AcquisitionTime"]
+
+    # Get generic filenames (without within-acquisition entities like echo)
+    df["filename"] = df["original_filename"].apply(
+        update_bids_name, echo=None, fa=None, inv=None, mt=None, part=None, ch=None
+    )
+
+    # Get "first" scan from multi-file acquisitions
+    df["acq_time"] = pd.to_datetime(df["acq_time"])
+    df = df.sort_values(by="acq_time")
+    df = df.drop_duplicates(subset="filename", keep="first", ignore_index=True)
+
+    # Now back to general-purpose code
+    df = determine_scan_durations(layout, df, sub=sub, ses=ses)
+    df = df.dropna(subset=["duration"])  # limit to relevant scans
+
+    # Convert scan times to relative onsets (first scan is at 0 seconds)
+    df["onset"] = df["acq_time"] - df["acq_time"].min()
+    df["onset"] = df["onset"].dt.total_seconds()
+    return df
+
+
+def determine_scan_durations(layout, scan_df, sub, ses=None):
+    """
+    Determine scan durations.
+
+    Extract scan durations by loading fMRI files/metadata and
+    multiplying TR by number of volumes. This can be used to determine the
+    endpoints for the physio files.
+
+    Parameters
+    ----------
+    layout : bids.layout.BIDSLayout
+        Dataset layout. Used to identify functional scans and load them to
+        determine scan durations.
+    scan_df : pandas.DataFrame
+        Scans DataFrame containing functional scan filenames and onset times.
+    sub : str
+        Subject ID.
+    ses : str or None, optional
+        Session ID. If None, then no session.
+
+    Returns
+    -------
+    scan_df : pandas.DataFrame
+        Updated DataFrame with new "duration" column. Calculated durations are
+        in seconds.
+    """
+    func_files = layout.get(
+        datatype="func",
+        suffix="bold",
+        extension=[".nii.gz", ".nii"],
+        subject=sub,
+        session=ses,
+    )
+    scan_df["duration"] = None
+    for func_file in func_files:
+        filename = func_file.path
+        if filename in scan_df["original_filename"].values:
+            n_vols = nib.load(filename).shape[3]
+            tr = func_file.get_metadata()["RepetitionTime"]
+            duration = n_vols * tr
+            scan_df.loc[scan_df["original_filename"] == filename, "duration"] = duration
+        else:
+            LGR.info("Skipping {}".format(op.basename(filename)))
+    return scan_df
+
+
+def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5):
+    """
+    Collect onsets from physio file, both in terms of seconds and time series indices.
+
+    Parameters
+    ----------
+    trigger_timeseries : 1D numpy.ndarray
+        Trigger time series.
+    freq : float
+        Frequency of trigger time series, in Hertz.
+    threshold : float, optional
+        Threshold to apply to binarize trigger time series.
+
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame with one row for each trigger period and three columns:
+        onset (in seconds), index (onsets in index of time series array),
+        and duration (in seconds).
+    """
+    samplerate = 1.0 / freq
+    scan_idx = np.where(trigger_timeseries > 0)[0]
+    # Get groups of consecutive numbers in index
+    groups = []
+    for k, g in groupby(enumerate(scan_idx), lambda x: x[0] - x[1]):
+        groups.append(list(map(itemgetter(1), g)))
+
+    # Extract onsets
+    onsets = np.array([g[0] for g in groups])
+    onsets_in_sec = onsets * samplerate
+    durations = np.array([g[-1] - g[0] for g in groups])
+    durations_in_sec = durations * samplerate
+    df = pd.DataFrame(
+        columns=["onset"],
+        data=onsets_in_sec,
+    )
+    df["index"] = onsets
+    df["duration"] = durations_in_sec
+    return df
+
+
+def synchronize_onsets(phys_df, scan_df):
+    """
+    Find matching scans and physio trigger periods from separate DataFrames.
+
+    Uses time differences within each DataFrame.
+    There can be fewer physios than scans (task failed to trigger physio)
+    or fewer scans than physios (aborted scans are not retained in BIDS dataset).
+
+    Onsets are in seconds. The baseline (i.e., absolute timing) doesn't matter.
+    Relative timing is all that matters.
+
+    Parameters
+    ----------
+    phys_df : pandas.DataFrame
+        DataFrame with onsets of physio trigger periods, in seconds. The
+        baseline does not matter, so it is reasonable for the onsets to start
+        with zero. The following columns are required: 'onset', 'index'.
+    scan_df : pandas.DataFrame
+        DataFrame with onsets and names of functional scans from BIDS dataset,
+        in seconds. The baseline does not matter, so it is reasonable for the
+        onsets to start with zero. The following columns are required: 'onset',
+        'duration'.
+
+    Returns
+    -------
+    scan_df : pandas.DataFrame
+        Updated scan DataFrame, now with columns for predicted physio onsets in
+        seconds and in indices of the physio trigger channel, as well as scan
+        duration in units of the physio trigger channel.
+    """
+    phys_df = phys_df.sort_values(by=["onset"])
+    scan_df = scan_df.sort_values(by=["onset"])
+
+    # Get difference between each physio trigger onset and each scan onset
+    onset_diffs = np.zeros((scan_df.shape[0], phys_df.shape[0]))
+    for i, i_scan in scan_df.iterrows():
+        for j, j_phys in phys_df.iterrows():
+            onset_diff = j_phys["onset"] - i_scan["onset"]
+            onset_diffs[i, j] = onset_diff
+
+    # Find the delay that gives the smallest difference between scan onsets
+    # and physio onsets
+    selected = (None, None)
+    thresh = 1000
+    for i_scan in range(onset_diffs.shape[0]):
+        for j_phys in range(onset_diffs.shape[1]):
+            test_offset = onset_diffs[i_scan, j_phys]
+            diffs_from_phys_onset = onset_diffs - test_offset
+            diffs_from_abs = np.abs(diffs_from_phys_onset)
+            min_diff_row_idx = np.argmin(diffs_from_abs, axis=0)
+            min_diff_col_idx = np.arange(len(min_diff_row_idx))
+            min_diffs = diffs_from_abs[min_diff_row_idx, min_diff_col_idx]
+            min_diff_sum = np.sum(min_diffs)
+            if min_diff_sum < thresh:
+                selected = (i_scan, j_phys)
+                thresh = min_diff_sum
+
+    offset = onset_diffs[selected[0], selected[1]]
+
+    # Isolate close, but negative relative onsets, to ensure scan onsets are
+    # always before or at physio triggers.
+    close_thresh = 2  # threshold for "close" onsets, in seconds
+    diffs_from_phys_onset = onset_diffs - offset
+    min_diff_row_idx = np.argmin(np.abs(diffs_from_phys_onset), axis=0)
+    min_diff_col_idx = np.arange(len(min_diff_row_idx))
+    min_diffs = diffs_from_phys_onset[min_diff_row_idx, min_diff_col_idx]
+    min_diffs_tmp = min_diffs[abs(min_diffs) <= close_thresh]
+    min_val = min(min_diffs_tmp)
+    min_diffs += min_val
+    offset += min_val
+    LGR.info(
+        "Scan onsets should be adjusted forward by {} seconds to best "
+        "match physio onsets.".format(offset)
+    )
+
+    # Get onset of each scan in terms of the physio time series
+    scan_df["phys_onset"] = scan_df["onset"] + offset
+    rise = phys_df.loc[1, "index"] - phys_df.loc[0, "index"]
+    run = phys_df.loc[1, "onset"] - phys_df.loc[0, "onset"]
+    samplerate = rise / run
+    scan_df["index_onset"] = (scan_df["phys_onset"] * samplerate).astype(int)
+    scan_df["index_duration"] = (scan_df["duration"] * samplerate).astype(int)
+    scan_df["index_offset"] = scan_df["index_onset"] + scan_df["index_duration"]
+    return scan_df
+
+
+def plot_sync(scan_df, physio_df):
+    """
+    Plot unsynchronized and synchonized scan and physio onsets and durations.
+
+    Parameters
+    ----------
+    scan_df : pandas.DataFrame
+        DataFrame with timing associated with scan files. Must have the
+        following columns: onset (scan onsets in seconds), duration (scan
+        durations in seconds), and phys_onset (scan onsets after being matches
+        with physio onsets, in seconds).
+    physio_df : pandas.DataFrame
+        DataFrame with timing associated with physio trigger periods. Must have
+        the following columns: onset (trigger period onsets in seconds) and
+        duration (trigger period durations in seconds).
+
+    Returns
+    -------
+    fig : matplotlib.pyplot.Figure
+        Figure from plot.
+    axes : list of two matplotlib.pyplot.Axis objects
+        Axis objects for top and bottom subplots of figure. Top subplot has
+        unsynchronized onsets and bottom has synchronized ones.
+    """
+    fig, axes = plt.subplots(nrows=2, figsize=(20, 6), sharex=True)
+
+    # get max (onset time + duration) rounded up to nearest 1000
+    max_ = int(
+        1000
+        * np.ceil(
+            max(
+                (
+                    (physio_df["onset"] + physio_df["duration"]).max(),
+                    (scan_df["onset"] + scan_df["duration"]).max(),
+                    (scan_df["phys_onset"] + scan_df["duration"]).max(),
+                )
+            )
+            / 1000
+        )
+    )
+
+    # get x-axis values
+    scalar = 10
+    x = np.linspace(0, max_, (max_ * scalar) + 1)
+
+    # first plot the onsets and durations of the raw scan and physio runs in
+    # the top axis
+    physio_timeseries = np.zeros(x.shape)
+    func_timeseries = np.zeros(x.shape)
+    for i, row in scan_df.iterrows():
+        func_timeseries[
+            int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar)
+        ] = 1
+
+    for i, row in physio_df.iterrows():
+        physio_timeseries[
+            int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar)
+        ] = 0.5
+
+    axes[0].fill_between(
+        x,
+        func_timeseries,
+        where=func_timeseries >= 0,
+        interpolate=True,
+        color="red",
+        alpha=0.3,
+        label="Functional scans",
+    )
+    axes[0].fill_between(
+        x,
+        physio_timeseries,
+        where=physio_timeseries >= 0,
+        interpolate=True,
+        color="blue",
+        alpha=0.3,
+        label="Physio triggers",
+    )
+
+    # now plot the adjusted onsets (and original durations) in the bottom axis
+    physio_timeseries = np.zeros(x.shape)
+    func_timeseries = np.zeros(x.shape)
+    for i, row in scan_df.iterrows():
+        func_timeseries[
+            int(row["phys_onset"] * scalar): int(
+                (row["phys_onset"] + row["duration"]) * scalar
+            )
+        ] = 1
+
+    for i, row in physio_df.iterrows():
+        physio_timeseries[
+            int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar)
+        ] = 0.5
+
+    axes[1].fill_between(
+        x,
+        func_timeseries,
+        where=func_timeseries >= 0,
+        interpolate=True,
+        color="red",
+        alpha=0.3,
+        label="Functional scans",
+    )
+    axes[1].fill_between(
+        x,
+        physio_timeseries,
+        where=physio_timeseries >= 0,
+        interpolate=True,
+        color="blue",
+        alpha=0.3,
+        label="Physio triggers",
+    )
+
+    axes[0].set_xlim((min(x), max(x)))
+    axes[0].set_ylim((0, None))
+    axes[1].set_xlabel("Time (s)")
+    axes[0].legend()
+    return fig, axes
+
+
+def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False):
+    """
+    Run physio/scan onset synchronization and BIDSification.
+
+    This workflow writes out physio files to a BIDS dataset.
+
+    Parameters
+    ----------
+    physio : BlueprintInput
+        Object *must* contain multiple physio trigger periods associated with scans.
+    bids_dir : str
+        Path to BIDS dataset
+    sub : str
+        Subject ID. Used to search the BIDS dataset for relevant scans.
+    ses : str or None, optional
+        Session ID. Used to search the BIDS dataset for relevant scans in
+        longitudinal studies. Default is None.
+    padding : float or tuple, optional
+        Amount of time before and after run to keep in physio time series, in seconds.
+        May be a single value (in which case the time before and after is the same) or
+        a two-item tuple (which case the first item is time before and the second is
+        time after).
+        These values will be automatically reduced in cases where the pad would extend
+        before or after the physio acquisition.
+    update_trigger : bool, optional
+        Whether to update the trigger channel time series based on estimated scan onsets from
+        the BIDS dataset (True) or to leave it as-is (False). Default is False.
+
+    Returns
+    -------
+    out : list of BlueprintOutput
+
+    Notes
+    -----
+    The workflow works as follows:
+    1. Extract trigger period onsets from physio file.
+    2. Extract scan times from BIDS dataset, to get the following information:
+        - Scan name
+        - Onset with subsecond resolution as close to trigger pulse as possible
+        - Duration (from TR * data dimension)
+    3. Calculate difference in time between each scan onset and each physio
+       trigger onset.
+    4. Use differences to identify similar values across as many physio/scan
+       pairs as possible. Physio may be missing if trigger failed.
+       Physio may be delayed if task was manually triggered after scan began,
+       or if there's a bug in the task script. Scan may be missing if it wasn't
+       converted (e.g., a scan stopped early and re-run).
+    5. Infer physio periods from scan periods, once physio and scan onsets
+       are matched.
+    6. Assign scan names to physio periods, infer times for other scan times
+       in cases where trigger failed, and ignore trigger periods associated with
+       scans that weren't kept in the BIDS dataset.
+    7. Split physio file based on scan-specific onsets and offsets.
+    8. Write out scan-associated physio files.
+    9. Generate and return scan/physio onsets figure for manual QC.
+    """
+    layout = BIDSLayout(bids_dir)
+    scan_df = load_scan_data(layout, sub=sub, ses=ses)
+
+    trigger_timeseries = physio.timeseries[physio.trigger_idx]
+    freq = physio.freq[physio.trigger_idx]
+    physio_df = extract_physio_onsets(trigger_timeseries, freq=freq)
+    scan_df = synchronize_onsets(physio_df, scan_df)
+
+    # we should do something better with this figure, but it's nice to have for QC
+    fig, axes = plot_sync(scan_df, physio_df)
+    fig.savefig("synchronization_results.png")
+
+    run_dict = {}
+    # could probably be replaced with apply() followed by to_dict()
+    for _, row in scan_df.iterrows():
+        base_fname = update_bids_name(row["filename"], suffix="physio", extension="")
+        split_times = (row["index_onset"], row["index_offset"])
+        run_dict[base_fname] = split_times
+    phys_dict = slice_phys(
+        physio, run_dict, padding=padding, update_trigger=update_trigger
+    )
+    outputs = []
+    for k, v in phys_dict.items():
+        output = BlueprintOutput.init_from_blueprint(v)
+        output.filename = k
+        outputs.append(output)
+        # Let's not actually save files until we're more confident.
+        # output.save()
+    return outputs

From a3107c1ecd23ba1d992f17bf84ce8be1727d7ee1 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 1 Jan 2021 19:17:14 -0500
Subject: [PATCH 04/14] Update dependencies.

---
 setup.cfg | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index cafcfd943..acb8dd3f6 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -25,6 +25,9 @@ install_requires =
     numpy >=1.9.3, !=*rc*
     matplotlib >=3.1.1, !=3.3.0rc1
     PyYAML >=5.1, !=*rc*
+    scipy
+    pybids
+    pandas
 tests_require =
     pytest >=5.3
 test_suite = pytest

From 7842adc5dac8e211ac3f5fdacd7c35277df51b6e Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 1 Jan 2021 19:51:40 -0500
Subject: [PATCH 05/14] Add test for update_bids_name.

---
 phys2bids/tests/test_bids.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/phys2bids/tests/test_bids.py b/phys2bids/tests/test_bids.py
index c6f3c8f39..880d7c496 100644
--- a/phys2bids/tests/test_bids.py
+++ b/phys2bids/tests/test_bids.py
@@ -27,6 +27,29 @@ def test_bidsify_units():
         assert bids.bidsify_units(unit_key) == bids.UNIT_ALIASES[unit_key]
 
 
+def test_update_bids_name():
+    """Unit test for update_bids_name."""
+    f = "sub-01_bold.nii.gz"
+    new_file = bids.update_bids_name(f, echo=1)
+    assert new_file == "sub-01_echo-1_bold.nii.gz"
+    new_file = bids.update_bids_name(f, echo=1, acq="lowres")
+    assert new_file == "sub-01_acq-lowres_echo-1_bold.nii.gz"
+
+    f = "sub-01_echo-1_bold.nii.gz"
+    new_file = bids.update_bids_name(f, echo=2)
+    assert new_file == "sub-01_echo-2_bold.nii.gz"
+    new_file = bids.update_bids_name(f, echo=2, acq="lowres", suffix="cbv", extension="json")
+    assert new_file == "sub-01_acq-lowres_echo-2_cbv.json"
+
+    # Documenting poor performance.
+    # Hopefully there'd be an exception here in the future.
+    f = "noncompliant_bids_file.nii.gz"
+    new_file = bids.update_bids_name(f, echo=1)
+    assert new_file == "noncompliant_bids_echo-1_file.nii.gz"
+    new_file = bids.update_bids_name(f, echo=1, acq="lowres")
+    assert new_file == "noncompliant_bids_acq-lowres_echo-1_file.nii.gz"
+
+
 @pytest.mark.parametrize('test_sub', ['SBJ01', 'sub-006', '006'])
 @pytest.mark.parametrize('test_ses', ['', 'S05', 'ses-42', '42'])
 def test_use_heuristic(tmpdir, test_sub, test_ses):

From 75e7e0114d6c27f2979aec58242651f0c71a289b Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Sat, 2 Jan 2021 11:51:14 -0500
Subject: [PATCH 06/14] Use f-strings to match package convention.

---
 phys2bids/bids.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/phys2bids/bids.py b/phys2bids/bids.py
index 1b2cbb8a7..eb693976e 100644
--- a/phys2bids/bids.py
+++ b/phys2bids/bids.py
@@ -310,8 +310,7 @@ def update_bids_name(basename, **kwargs):
     outname : str
         Valid BIDS filename with updated entities/suffix/extension.
     """
-    # This is hardcoded, but would be nice to use the yaml-fied BIDS entity
-    # table when that's up and running.
+    # This is hardcoded, but would be nice to use BIDS spec's YAML files.
     ENTITY_ORDER = [
         "sub",
         "ses",
@@ -359,32 +358,32 @@ def update_bids_name(basename, **kwargs):
         else:
             # entities
             if key not in ENTITY_ORDER:
-                raise ValueError("Key {} not understood.".format(key))
+                raise ValueError(f"Key {key} not understood.")
 
             if val is None:
                 # remove entity from filename if kwarg val is None
-                regex = "_{}-[0-9a-zA-Z]+".format(key)
+                regex = f"_{key}-[0-9a-zA-Z]+"
                 outname = re.sub(regex, "", outname)
-            elif "_{}-{}".format(key, val) in basename:
+            elif f"_{key}-{val}" in basename:
                 LGR.warning(
-                    "Key-value pair {}/{} already found in "
-                    "basename {}. Skipping.".format(key, val, basename)
+                    f"Key-value pair {key}/{val} already found in "
+                    f"basename {basename}. Skipping."
                 )
-            elif "_{}-".format(key) in basename:
+            elif f"_{key}-" in basename:
                 LGR.warning(
-                    "Key {} already found in basename {}. "
-                    "Overwriting.".format(key, basename)
+                    f"Key {key} already found in basename {basename}. "
+                    "Overwriting."
                 )
-                regex = "_{}-[0-9a-zA-Z]+".format(key)
-                outname = re.sub(regex, "_{}-{}".format(key, val), outname)
+                regex = f"_{key}-[0-9a-zA-Z]+"
+                outname = re.sub(regex, f"_{key}-{val}", outname)
             else:
                 loc = ENTITY_ORDER.index(key)
                 entities_to_check = ENTITY_ORDER[loc:]
-                entities_to_check = ["_{}-".format(etc) for etc in entities_to_check]
-                entities_to_check.append("_{}".format(filetype))
+                entities_to_check = [f"_{etc}-" for etc in entities_to_check]
+                entities_to_check.append(f"_{filetype}")
                 for etc in entities_to_check:
                     if etc in outname:
-                        outname = outname.replace(etc, "_{}-{}{}".format(key, val, etc))
+                        outname = outname.replace(etc, f"_{key}-{val}{etc}")
                         break
     outname = os.path.join(outdir, outname)
     return outname

From f442c81e9e9994a65bb097995da920c71aecb166 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Sat, 2 Jan 2021 11:51:31 -0500
Subject: [PATCH 07/14] Improve docstrings.

Also try to use scans.tsv file when available.
---
 phys2bids/slice4phys.py      |   1 +
 phys2bids/synchronization.py | 107 +++++++++++++++++++----------------
 2 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py
index 8e9e11114..19111619f 100644
--- a/phys2bids/slice4phys.py
+++ b/phys2bids/slice4phys.py
@@ -115,6 +115,7 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
     Parameters
     ----------
     phys : BlueprintInput
+        Multi-run physio data in BlueprintInput object.
     run_timestamps : dict
         Each key is a run-wise filename and value is a tuple of (onset, offset),
         where onset and offset are integers corresponding to index values of
diff --git a/phys2bids/synchronization.py b/phys2bids/synchronization.py
index 354a28422..957e33e67 100644
--- a/phys2bids/synchronization.py
+++ b/phys2bids/synchronization.py
@@ -18,14 +18,17 @@
 
 
 def load_scan_data(layout, sub, ses=None):
-    """
-    Extract subject- and session-specific scan onsets and durations from BIDSLayout.
+    """Extract subject- and session-specific scan onsets and durations from BIDSLayout.
 
     Parameters
     ----------
-    layout : BIDSLayout
+    layout : bids.layout.BIDSLayout
+        Dataset layout. Used to identify functional scans and load them to
+        determine scan onsets.
     sub : str
+        Subject ID.
     ses : str or None, optional
+        Session ID. If None, then no session.
 
     Returns
     -------
@@ -38,25 +41,31 @@ def load_scan_data(layout, sub, ses=None):
         'duration' is the scan duration in seconds.
         'onset' is the scan onset in seconds, starting with zero at the first scan.
     """
-    # This is the strategy we'll use in the future. Commented out for now.
-    # scans_file = layout.get(extension='.tsv', suffix='scans', subject=sub, session=ses)
-    # df = pd.read_table(scans_file)
-
-    # Collect acquisition times
-    # NOTE: Will be replaced with scans file when heudiconv makes the change
-    img_files = layout.get(
-        datatype="func",
-        suffix="bold",
-        extension=[".nii.gz", ".nii"],
-        subject=sub,
-        session=ses,
-    )
-    df = pd.DataFrame(
-        columns=["original_filename", "acq_time"],
-    )
-    for i, img_file in enumerate(img_files):
-        df.loc[i, "original_filename"] = img_file.path
-        df.loc[i, "acq_time"] = img_file.get_metadata()["AcquisitionTime"]
+    # Use the scans.tsv file directly if it is available
+    scans_file = layout.get(extension=".tsv", suffix="scans", subject=sub, session=ses)
+    if scans_file:
+        df = pd.read_table(scans_file)
+        df["original_filename"] = df["filename"]
+    else:
+        LGR.info(
+            "No scans.tsv detected. "
+            "Attempting to extract relevant metadata from sidecar jsons."
+        )
+        # Collect acquisition times
+        # TODO: Make this search more general.
+        img_files = layout.get(
+            datatype="func",
+            suffix="bold",
+            extension=[".nii.gz", ".nii"],
+            subject=sub,
+            session=ses,
+        )
+        df = pd.DataFrame(
+            columns=["original_filename", "acq_time"],
+        )
+        for i_row, img_file in enumerate(img_files):
+            df.loc[i_row, "original_filename"] = img_file.path
+            df.loc[i_row, "acq_time"] = img_file.get_metadata()["AcquisitionTime"]
 
     # Get generic filenames (without within-acquisition entities like echo)
     df["filename"] = df["original_filename"].apply(
@@ -68,7 +77,7 @@ def load_scan_data(layout, sub, ses=None):
     df = df.sort_values(by="acq_time")
     df = df.drop_duplicates(subset="filename", keep="first", ignore_index=True)
 
-    # Now back to general-purpose code
+    # Determine the duration (in seconds) of each scan.
     df = determine_scan_durations(layout, df, sub=sub, ses=ses)
     df = df.dropna(subset=["duration"])  # limit to relevant scans
 
@@ -79,8 +88,7 @@ def load_scan_data(layout, sub, ses=None):
 
 
 def determine_scan_durations(layout, scan_df, sub, ses=None):
-    """
-    Determine scan durations.
+    """Determine scan durations.
 
     Extract scan durations by loading fMRI files/metadata and
     multiplying TR by number of volumes. This can be used to determine the
@@ -101,9 +109,10 @@ def determine_scan_durations(layout, scan_df, sub, ses=None):
     Returns
     -------
     scan_df : pandas.DataFrame
-        Updated DataFrame with new "duration" column. Calculated durations are
-        in seconds.
+        Updated DataFrame with new "duration" column.
+        Calculated durations are in seconds.
     """
+    # TODO: Make this search more general.
     func_files = layout.get(
         datatype="func",
         suffix="bold",
@@ -120,13 +129,12 @@ def determine_scan_durations(layout, scan_df, sub, ses=None):
             duration = n_vols * tr
             scan_df.loc[scan_df["original_filename"] == filename, "duration"] = duration
         else:
-            LGR.info("Skipping {}".format(op.basename(filename)))
+            LGR.info(f"Skipping {op.basename(filename)}")
     return scan_df
 
 
 def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5):
-    """
-    Collect onsets from physio file, both in terms of seconds and time series indices.
+    """Collect onsets from physio file in terms of seconds and time series indices.
 
     Parameters
     ----------
@@ -147,6 +155,8 @@ def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5):
     samplerate = 1.0 / freq
     scan_idx = np.where(trigger_timeseries > 0)[0]
     # Get groups of consecutive numbers in index
+    # This assumes that the trigger time series is block-like
+    # (i.e., scan runs constitute consecutive periods of non-zero values).
     groups = []
     for k, g in groupby(enumerate(scan_idx), lambda x: x[0] - x[1]):
         groups.append(list(map(itemgetter(1), g)))
@@ -166,15 +176,14 @@ def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5):
 
 
 def synchronize_onsets(phys_df, scan_df):
-    """
-    Find matching scans and physio trigger periods from separate DataFrames.
+    """Find matching scans and physio trigger periods from separate DataFrames.
 
     Uses time differences within each DataFrame.
     There can be fewer physios than scans (task failed to trigger physio)
     or fewer scans than physios (aborted scans are not retained in BIDS dataset).
 
     Onsets are in seconds. The baseline (i.e., absolute timing) doesn't matter.
-    Relative timing is all that matters.
+    Relative timing of the onsets is all that matters.
 
     Parameters
     ----------
@@ -205,8 +214,7 @@ def synchronize_onsets(phys_df, scan_df):
             onset_diff = j_phys["onset"] - i_scan["onset"]
             onset_diffs[i, j] = onset_diff
 
-    # Find the delay that gives the smallest difference between scan onsets
-    # and physio onsets
+    # Find the delay that gives the smallest difference between scan onsets and physio onsets
     selected = (None, None)
     thresh = 1000
     for i_scan in range(onset_diffs.shape[0]):
@@ -224,8 +232,9 @@ def synchronize_onsets(phys_df, scan_df):
 
     offset = onset_diffs[selected[0], selected[1]]
 
-    # Isolate close, but negative relative onsets, to ensure scan onsets are
-    # always before or at physio triggers.
+    # Isolate close, but negative, relative onsets, to ensure scan onsets are
+    # always before or at physio triggers (because there can be a delay between
+    # the start of the scan and when the physio is triggered).
     close_thresh = 2  # threshold for "close" onsets, in seconds
     diffs_from_phys_onset = onset_diffs - offset
     min_diff_row_idx = np.argmin(np.abs(diffs_from_phys_onset), axis=0)
@@ -236,8 +245,8 @@ def synchronize_onsets(phys_df, scan_df):
     min_diffs += min_val
     offset += min_val
     LGR.info(
-        "Scan onsets should be adjusted forward by {} seconds to best "
-        "match physio onsets.".format(offset)
+        f"Scan onsets should be adjusted forward by {offset} seconds to best "
+        "match physio onsets."
     )
 
     # Get onset of each scan in terms of the physio time series
@@ -252,15 +261,14 @@ def synchronize_onsets(phys_df, scan_df):
 
 
 def plot_sync(scan_df, physio_df):
-    """
-    Plot unsynchronized and synchonized scan and physio onsets and durations.
+    """Plot unsynchronized and synchonized scan and physio onsets and durations.
 
     Parameters
     ----------
     scan_df : pandas.DataFrame
         DataFrame with timing associated with scan files. Must have the
         following columns: onset (scan onsets in seconds), duration (scan
-        durations in seconds), and phys_onset (scan onsets after being matches
+        durations in seconds), and phys_onset (scan onsets after being matched
         with physio onsets, in seconds).
     physio_df : pandas.DataFrame
         DataFrame with timing associated with physio trigger periods. Must have
@@ -277,7 +285,7 @@ def plot_sync(scan_df, physio_df):
     """
     fig, axes = plt.subplots(nrows=2, figsize=(20, 6), sharex=True)
 
-    # get max (onset time + duration) rounded up to nearest 1000
+    # get max (onset time + duration) rounded up to nearest 1000 for limit of x-axis
     max_ = int(
         1000
         * np.ceil(
@@ -296,8 +304,7 @@ def plot_sync(scan_df, physio_df):
     scalar = 10
     x = np.linspace(0, max_, (max_ * scalar) + 1)
 
-    # first plot the onsets and durations of the raw scan and physio runs in
-    # the top axis
+    # first plot the onsets and durations of the raw scan and physio runs in the top axis
     physio_timeseries = np.zeros(x.shape)
     func_timeseries = np.zeros(x.shape)
     for i, row in scan_df.iterrows():
@@ -371,8 +378,7 @@ def plot_sync(scan_df, physio_df):
 
 
 def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False):
-    """
-    Run physio/scan onset synchronization and BIDSification.
+    """Run physio/scan onset synchronization and BIDSification.
 
     This workflow writes out physio files to a BIDS dataset.
 
@@ -390,7 +396,7 @@ def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False):
     padding : float or tuple, optional
         Amount of time before and after run to keep in physio time series, in seconds.
         May be a single value (in which case the time before and after is the same) or
-        a two-item tuple (which case the first item is time before and the second is
+        a two-item tuple (in which case the first item is time before and the second is
         time after).
         These values will be automatically reduced in cases where the pad would extend
         before or after the physio acquisition.
@@ -401,6 +407,7 @@ def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False):
     Returns
     -------
     out : list of BlueprintOutput
+        A list of run-specific BlueprintOutput objects.
 
     Notes
     -----
@@ -436,10 +443,10 @@ def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False):
 
     # we should do something better with this figure, but it's nice to have for QC
     fig, axes = plot_sync(scan_df, physio_df)
-    fig.savefig("synchronization_results.png")
+    fig.savefig("synchronization_results.svg")
 
     run_dict = {}
-    # could probably be replaced with apply() followed by to_dict()
+    # TODO: could probably be replaced with apply() followed by to_dict()
     for _, row in scan_df.iterrows():
         base_fname = update_bids_name(row["filename"], suffix="physio", extension="")
         split_times = (row["index_onset"], row["index_offset"])

From 3fb1656a6c87553b9d30580b1ff3589e41a9a548 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 7 Jan 2021 18:29:06 -0500
Subject: [PATCH 08/14] Run isort.

---
 phys2bids/synchronization.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/phys2bids/synchronization.py b/phys2bids/synchronization.py
index 957e33e67..9fc088c5c 100644
--- a/phys2bids/synchronization.py
+++ b/phys2bids/synchronization.py
@@ -1,18 +1,19 @@
 """Functions for synchronizing multi-run physio files with BIDS imaging data."""
+import logging
 import os.path as op
-from operator import itemgetter
 from itertools import groupby
-import logging
+from operator import itemgetter
+
 import matplotlib.pyplot as plt
+import nibabel as nib
+import numpy as np
+import pandas as pd
 
 from bids import BIDSLayout
-import pandas as pd
-import numpy as np
-import nibabel as nib
 
 from .bids import update_bids_name
-from .slice4phys import slice_phys
 from .physio_obj import BlueprintOutput
+from .slice4phys import slice_phys
 
 LGR = logging.getLogger(__name__)
 

From 4e6a637d6d393eb737c49ba0b915630716745ebd Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 7 Jan 2021 18:45:09 -0500
Subject: [PATCH 09/14] Drop scipy.

---
 phys2bids/slice4phys.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py
index 19111619f..875650a37 100644
--- a/phys2bids/slice4phys.py
+++ b/phys2bids/slice4phys.py
@@ -170,8 +170,12 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
 
         # Limit padding based on beginning and end of physio recording.
         # We could also limit padding to prevent overlap between scans, if desired.
-        run_time_before = np.minimum(time_before, (trigger_onset - min_onset) / trigger_freq)
-        run_time_after = np.minimum(time_after, (max_offset - trigger_offset) / trigger_freq)
+        run_time_before = np.minimum(
+            time_before, (trigger_onset - min_onset) / trigger_freq
+        )
+        run_time_after = np.minimum(
+            time_after, (max_offset - trigger_offset) / trigger_freq
+        )
 
         for freq in unique_frequencies:
             to_subtract = int(run_time_before * freq)
@@ -187,25 +191,33 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
                 example_ts = phys.timeseries[phys.freq.index(freq)]
 
                 # Determine time
-                new_time = (np.arange(example_ts.shape[0]) / temp_phys_in.timeseries[
-                    phys.freq.index(freq)].freq
+                new_time = (
+                    np.arange(example_ts.shape[0])
+                    / temp_phys_in.timeseries[phys.freq.index(freq)].freq
                 )
                 new_time += np.min(temp_phys_in.timeseries[0])
                 temp_phys_in.timeseries[0] = new_time
                 temp_phys_in.freq[0] = freq
 
                 # Resample trigger
-                new_trigger = temp_phys_in.timeseries[temp_phys_in.trigger_idx]
-                new_trigger = resample(new_trigger, example_ts.shape[0], window=10)
+                cur_xvals = np.arange(len(new_trigger))
+                new_xvals = np.linspace(0, len(new_trigger), example_ts.shape[0])
+                new_trigger = np.interp(
+                    new_xvals,
+                    cur_xvals,
+                    temp_phys_in.timeseries[temp_phys_in.trigger_idx],
+                )
                 temp_phys_in.timeseries[temp_phys_in.trigger_idx] = new_trigger
                 temp_phys_in.freq[temp_phys_in.trigger_idx] = freq
 
             if len(unique_frequencies) > 1:
-                run_fname = update_bids_name(fname, recording=str(freq) + 'Hz')
+                run_fname = update_bids_name(fname, recording=str(freq) + "Hz")
 
                 # Drop other frequency channels
                 channel_idx = np.arange(temp_phys_in.ch_amount)
-                nonfreq_channels = [i for i in channel_idx if temp_phys_in.freq[i] != freq]
+                nonfreq_channels = [
+                    i for i in channel_idx if temp_phys_in.freq[i] != freq
+                ]
                 freq_channels = [i for i in channel_idx if temp_phys_in.freq[i] == freq]
                 temp_phys_in = temp_phys_in.delete_at_index(nonfreq_channels)
 
@@ -216,8 +228,8 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
                 run_fname = fname
 
             # zero out time
-            temp_phys_in.timeseries[0] = (
-                temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0])
+            temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - np.min(
+                temp_phys_in.timeseries[0]
             )
             # Now take out the time before the scan starts
             temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before

From 7e443adc4963e8a427c64155dde058285226d0c4 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 7 Jan 2021 18:58:25 -0500
Subject: [PATCH 10/14] Drop scipy.

---
 phys2bids/slice4phys.py      | 1 -
 phys2bids/synchronization.py | 1 -
 setup.cfg                    | 1 -
 3 files changed, 3 deletions(-)

diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py
index 875650a37..511181706 100644
--- a/phys2bids/slice4phys.py
+++ b/phys2bids/slice4phys.py
@@ -4,7 +4,6 @@
 from copy import deepcopy
 
 import numpy as np
-from scipy.signal import resample
 
 from phys2bids.bids import update_bids_name
 
diff --git a/phys2bids/synchronization.py b/phys2bids/synchronization.py
index 9fc088c5c..b617aca16 100644
--- a/phys2bids/synchronization.py
+++ b/phys2bids/synchronization.py
@@ -8,7 +8,6 @@
 import nibabel as nib
 import numpy as np
 import pandas as pd
-
 from bids import BIDSLayout
 
 from .bids import update_bids_name
diff --git a/setup.cfg b/setup.cfg
index acb8dd3f6..e11312c5a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -25,7 +25,6 @@ install_requires =
     numpy >=1.9.3, !=*rc*
     matplotlib >=3.1.1, !=3.3.0rc1
     PyYAML >=5.1, !=*rc*
-    scipy
     pybids
     pandas
 tests_require =

From abad85a14918056f4ed1ab693daa08a69e52fc70 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 7 Jan 2021 19:06:22 -0500
Subject: [PATCH 11/14] Fix.

---
 phys2bids/slice4phys.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py
index 511181706..23b443e4e 100644
--- a/phys2bids/slice4phys.py
+++ b/phys2bids/slice4phys.py
@@ -199,8 +199,9 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
                 temp_phys_in.freq[0] = freq
 
                 # Resample trigger
-                cur_xvals = np.arange(len(new_trigger))
-                new_xvals = np.linspace(0, len(new_trigger), example_ts.shape[0])
+                trigger = temp_phys_in.timeseries[temp_phys_in.trigger_idx]
+                cur_xvals = np.arange(len(trigger))
+                new_xvals = np.linspace(0, len(trigger), example_ts.shape[0])
                 new_trigger = np.interp(
                     new_xvals,
                     cur_xvals,

From 0c1ade03366350a7f8f9191c76efb6b1397b4c51 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 8 Jan 2021 12:25:12 -0500
Subject: [PATCH 12/14] Add sync setup option.

---
 phys2bids/synchronization.py | 8 ++++++--
 setup.cfg                    | 7 +++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/phys2bids/synchronization.py b/phys2bids/synchronization.py
index b617aca16..a61611c94 100644
--- a/phys2bids/synchronization.py
+++ b/phys2bids/synchronization.py
@@ -7,8 +7,6 @@
 import matplotlib.pyplot as plt
 import nibabel as nib
 import numpy as np
-import pandas as pd
-from bids import BIDSLayout
 
 from .bids import update_bids_name
 from .physio_obj import BlueprintOutput
@@ -41,6 +39,8 @@ def load_scan_data(layout, sub, ses=None):
         'duration' is the scan duration in seconds.
         'onset' is the scan onset in seconds, starting with zero at the first scan.
     """
+    import pandas as pd
+
     # Use the scans.tsv file directly if it is available
     scans_file = layout.get(extension=".tsv", suffix="scans", subject=sub, session=ses)
     if scans_file:
@@ -152,6 +152,8 @@ def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5):
         onset (in seconds), index (onsets in index of time series array),
         and duration (in seconds).
     """
+    import pandas as pd
+
     samplerate = 1.0 / freq
     scan_idx = np.where(trigger_timeseries > 0)[0]
     # Get groups of consecutive numbers in index
@@ -433,6 +435,8 @@ def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False):
     8. Write out scan-associated physio files.
     9. Generate and return scan/physio onsets figure for manual QC.
     """
+    from bids import BIDSLayout
+
     layout = BIDSLayout(bids_dir)
     scan_df = load_scan_data(layout, sub=sub, ses=ses)
 
diff --git a/setup.cfg b/setup.cfg
index e11312c5a..9ae5b5e00 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -25,8 +25,6 @@ install_requires =
     numpy >=1.9.3, !=*rc*
     matplotlib >=3.1.1, !=3.3.0rc1
     PyYAML >=5.1, !=*rc*
-    pybids
-    pandas
 tests_require =
     pytest >=5.3
 test_suite = pytest
@@ -48,6 +46,9 @@ doc =
 style =
     flake8 >=3.7
     flake8-docstrings >=1.5
+sync =
+    pybids
+    pandas
 interfaces =
     %(acq)s
     %(mat)s
@@ -56,11 +57,13 @@ test =
     pytest-cov
     %(interfaces)s
     %(style)s
+    %(sync)s
 all =
     %(doc)s
     %(duecredit)s
     %(interfaces)s
     %(style)s
+    %(sync)s
     %(test)s
 
 [options.package_data]

From cb6654c7dc713b3a8d3779365dde9d52c8536171 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 14 Jan 2021 14:39:27 -0500
Subject: [PATCH 13/14] Add warning when updating the trigger.

---
 phys2bids/slice4phys.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py
index 23b443e4e..0f2d10571 100644
--- a/phys2bids/slice4phys.py
+++ b/phys2bids/slice4phys.py
@@ -143,10 +143,13 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
     """
     phys = deepcopy(phys)
     if update_trigger:
+        LGR.warning(
+            "Overwriting the trigger channel. The original signal will be retained in a 'original trigger' channel."
+        )
         phys.freq.append(phys.freq[phys.trigger_idx])
         phys.units.append(phys.units[phys.trigger_idx])
         phys.timeseries.append(phys.timeseries[phys.trigger_idx].copy())
-        phys.ch_name.append('original trigger')
+        phys.ch_name.append("original trigger")
 
         # Fix up the trigger time series
         phys.timeseries[phys.trigger_idx][:] = 0

From cab25c78a7b1d15d3764ffadff7d42cb817ba321 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 14 Jan 2021 14:41:37 -0500
Subject: [PATCH 14/14] Fix line length.

---
 phys2bids/slice4phys.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py
index 0f2d10571..b270a8a39 100644
--- a/phys2bids/slice4phys.py
+++ b/phys2bids/slice4phys.py
@@ -144,7 +144,8 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
     phys = deepcopy(phys)
     if update_trigger:
         LGR.warning(
-            "Overwriting the trigger channel. The original signal will be retained in a 'original trigger' channel."
+            "Overwriting the trigger channel. "
+            "The original signal will be retained in a 'original trigger' channel."
         )
         phys.freq.append(phys.freq[phys.trigger_idx])
         phys.units.append(phys.units[phys.trigger_idx])