catalystneuro · CBroz1 · Jul 27, 2022 · Jul 27, 2022 · Jul 27, 2022 · Aug 12, 2022
diff --git a/dlc2nwb/utils.py b/dlc2nwb/utils.py
@@ -13,7 +13,7 @@
 from ndx_pose import PoseEstimationSeries, PoseEstimation
 
 
-def get_movie_timestamps(movie_file, VARIABILITYBOUND=1000):
+def get_movie_timestamps(movie_file, VARIABILITYBOUND=1000, infer_timestamps=True):
     """
     Return numpy array of the timestamps for a video.
 
@@ -37,6 +37,24 @@ def get_movie_timestamps(movie_file, VARIABILITYBOUND=1000):
             "Variability of timestamps suspiciously small. See: https://github.com/DeepLabCut/DLC2NWB/issues/1"
         )
 
+    if any(timestamps[1:] == 0):
+        # Infers times when OpenCV provides 0s
+        warning_msg = "Removing"
+        timestamp_zero_count = np.count_nonzero(timestamps == 0)
+        timestamps[timestamps == 0] = np.nan                # replace 0s with nan
+        if infer_timestamps:
+            warning_msg = "Replacing"
+            timestamps[np.isnan(timestamps)] = (            # replace nans w/ 
+                np.isnan(timestamps).nonzero()[0]           # nans idx multiplied by
+                * np.mean(np.diff(                          # average frame difference
+                    timestamps[~np.isnan(timestamps)])      # (approx equal 1/fps)
+                )
+            )
+        warnings.warn( # warns user of percent of 0 frames
+            "%s cv2 timestamps returned as 0: %f%%"
+            % (warning_msg, ( timestamp_zero_count / len(timestamps) * 100))
+        )
+
     return timestamps
 
 
@@ -51,36 +69,11 @@ def _ensure_individuals_in_header(df, dummy_name):
     return df
 
 
-def convert_h5_to_nwb(config, h5file, individual_name="ind1"):
-    """
-    Convert a DeepLabCut (DLC) video prediction, h5 data file to Neurodata Without Borders (NWB). Also
-    takes project config, to store relevant metadata.
-
-    Parameters
-    ----------
-    config : str
-        Path to a project config.yaml file
-
-    h5file : str
-        Path to a h5 data file
-
-    individual_name : str
-        Name of the subject (whose pose is predicted) for single-animal DLC project.
-        For multi-animal projects, the names from the DLC project will be used directly.
-
-    TODO: allow one to overwrite those names, with a mapping?
-
-    Returns
-    -------
-    list of str
-        List of paths to the newly created NWB data files.
-        By default NWB files are stored in the same folder as the h5file.
-
-    """
+def _get_pes_args(config_file, h5file, individual_name, infer_timestamps=True):
     if "DLC" not in h5file or not h5file.endswith(".h5"):
         raise IOError("The file passed in is not a DeepLabCut h5 data file.")
 
-    cfg = auxiliaryfunctions.read_config(config)
+    cfg = auxiliaryfunctions.read_config(config_file)
 
     vidname, scorer = os.path.split(h5file)[-1].split("DLC")
     scorer = "DLC" + os.path.splitext(scorer)[0]
@@ -122,38 +115,117 @@ def convert_h5_to_nwb(config, h5file, individual_name="ind1"):
             df.index.tolist()
         )  # setting timestamps to dummy TODO: extract timestamps in DLC?
     else:
-        timestamps = get_movie_timestamps(video[0])
+        timestamps = get_movie_timestamps(video[0], infer_timestamps=infer_timestamps)
+    return scorer, df, video, paf_graph, timestamps, cfg
+
+
+def _write_pes_to_nwbfile(nwbfile, animal, df_animal, scorer, video, paf_graph, timestamps,
+                          exclude_nans):
+    pose_estimation_series = []
+    for kpt, xyp in df_animal.groupby(level="bodyparts", axis=1, sort=False):
+        data = xyp.to_numpy()
+
+        if exclude_nans: 
+            # exclude_nans is inverse infer_timestamps. if not infer, there may be nans
+            data = data[~np.isnan(timestamps)]
+            timestamps_cleaned = timestamps[~np.isnan(timestamps)]
+
+        pes = PoseEstimationSeries(
+            name=f"{animal}_{kpt}",
+            description=f"Keypoint {kpt} from individual {animal}.",
+            data=data[:, :2],
+            unit="pixels",
+            reference_frame="(0,0) corresponds to the bottom left corner of the video.",
+            timestamps=timestamps_cleaned,
+            confidence=data[:, 2],
+            confidence_definition="Softmax output of the deep neural network.",
+        )
+        pose_estimation_series.append(pes)
+
+    pe = PoseEstimation(
+        pose_estimation_series=pose_estimation_series,
+        description="2D keypoint coordinates estimated using DeepLabCut.",
+        original_videos=[video[0]],
+        # TODO check if this is a mandatory arg in ndx-pose (can skip if video is not found_
+        dimensions=[list(map(int, video[1].split(",")))[1::2]],
+        scorer=scorer,
+        source_software="DeepLabCut",
+        source_software_version=__version__,
+        nodes=[pes.name for pes in pose_estimation_series],
+        edges=paf_graph if paf_graph else None,
+    )
+    if 'behavior' in nwbfile.processing:
+        behavior_pm = nwbfile.processing["behavior"]
+    else:
+        behavior_pm = nwbfile.create_processing_module(
+            name="behavior", description="processed behavioral data"
+        )
+    behavior_pm.add(pe)
+    return nwbfile
+
+
+def write_subject_to_nwb(nwbfile, h5file, individual_name, config_file):
+    """
+    Given, subject name, write h5file to an existing nwbfile.
+
+    Parameters
+    ----------
+    nwbfile: pynwb.NWBFile
+        nwbfile to write the subject specific pose estimation series.
+    h5file : str
+        Path to a h5 data file
+    individual_name : str
+        Name of the subject (whose pose is predicted) for single-animal DLC project.
+        For multi-animal projects, the names from the DLC project will be used directly.
+    config_file : str
+        Path to a project config.yaml file
+    config_dict : dict
+        dict containing configuration options. Provide this as alternative to config.yml file.
+
+    Returns
+    -------
+    nwbfile: pynwb.NWBFile
+        nwbfile with pes written in the behavior module
+    """
+    scorer, df, video, paf_graph, timestamps, _ = _get_pes_args(config_file, h5file, individual_name)
+    df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
+    return _write_pes_to_nwbfile(nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps)
+
+
+def convert_h5_to_nwb(config, h5file, individual_name="ind1", infer_timestamps=True):
+    """
+    Convert a DeepLabCut (DLC) video prediction, h5 data file to Neurodata Without Borders (NWB). Also
+    takes project config, to store relevant metadata.
+
+    Parameters
+    ----------
+    config : str
+        Path to a project config.yaml file
+
+    h5file : str
+        Path to a h5 data file
 
+    individual_name : str
+        Name of the subject (whose pose is predicted) for single-animal DLC project.
+        For multi-animal projects, the names from the DLC project will be used directly.
+
+    infer_timestamps : bool
+        Default True. Uses framerate to infer the timestamps returned as 0 from OpenCV.
+        If False, exclude these frames from resulting NWB file.
+
+    TODO: allow one to overwrite those names, with a mapping?
+
+    Returns
+    -------
+    list of str
+        List of paths to the newly created NWB data files.
+        By default NWB files are stored in the same folder as the h5file.
+
+    """
+    scorer, df, video, paf_graph, timestamps, cfg = _get_pes_args(config, h5file, individual_name, 
+                                                                  infer_timestamps=infer_timestamps)
     output_paths = []
     for animal, df_ in df.groupby(level="individuals", axis=1):
-        pose_estimation_series = []
-        for kpt, xyp in df_.groupby(level="bodyparts", axis=1, sort=False):
-            data = xyp.to_numpy()
-
-            pes = PoseEstimationSeries(
-                name=f"{animal}_{kpt}",
-                description=f"Keypoint {kpt} from individual {animal}.",
-                data=data[:, :2],
-                unit="pixels",
-                reference_frame="(0,0) corresponds to the bottom left corner of the video.",
-                timestamps=timestamps,
-                confidence=data[:, 2],
-                confidence_definition="Softmax output of the deep neural network.",
-            )
-            pose_estimation_series.append(pes)
-
-        pe = PoseEstimation(
-            pose_estimation_series=pose_estimation_series,
-            description="2D keypoint coordinates estimated using DeepLabCut.",
-            original_videos=[video[0]],
-            dimensions=[list(map(int, video[1].split(",")))[1::2]],
-            scorer=scorer,
-            source_software="DeepLabCut",
-            source_software_version=__version__,
-            nodes=[pes.name for pes in pose_estimation_series],
-            edges=paf_graph,
-        )
-
         nwbfile = NWBFile(
             session_description=cfg["Task"],
             experimenter=cfg["scorer"],
@@ -162,10 +234,8 @@ def convert_h5_to_nwb(config, h5file, individual_name="ind1"):
         )
 
         # TODO Store the test_pose_config as well?
-        behavior_pm = nwbfile.create_processing_module(
-            name="behavior", description="processed behavioral data"
-        )
-        behavior_pm.add(pe)
+        nwbfile = _write_pes_to_nwbfile(nwbfile, animal, df_, scorer, video, paf_graph, timestamps,
+                                        exclude_nans=(not infer_timestamps))
         output_path = h5file.replace(".h5", f"_{animal}.nwb")
         with warnings.catch_warnings(), NWBHDF5IO(output_path, mode="w") as io:
             warnings.filterwarnings("ignore", category=DtypeConversionWarning)