Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ticket/psb-171: Various fixes for VBO release #2707

Merged
merged 1 commit into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions allensdk/brain_observatory/behavior/behavior_ophys_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,13 +611,17 @@ def cell_specimen_table(self) -> pd.DataFrame:
which image plane an ROI resides on. Overlapping
ROIs are stored on different mask image planes
max_corretion_down: (float)
max motion correction in down direction in pixels
max motion correction in down direction in pixels. Defines
the motion border at the top of the image.
max_correction_left: (float)
max motion correction in left direction in pixels
max motion correction in left direction in pixels. Defines
the motion border at the right of the image.
max_correction_right: (float)
max motion correction in right direction in pixels
max motion correction in right direction in pixels. Defines
the motion border at the left of the image.
max_correction_up: (float)
max motion correction in up direction in pixels
max motion correction in up direction in pixels. Defines
the motion border at the bottom of the image.
roi_mask: (array of bool)
an image array that displays the location of the
roi mask in the field of view
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _get_ophys_session_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)
self._ophys_session_table = df.set_index("ophys_session_id")

def get_ophys_session_table(self) -> pd.DataFrame:
Expand All @@ -188,7 +188,7 @@ def _get_behavior_session_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)

self._behavior_session_table = df.set_index("behavior_session_id")

Expand Down Expand Up @@ -218,7 +218,7 @@ def _get_ophys_experiment_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)
self._ophys_experiment_table = df.set_index("ophys_experiment_id")

def _get_ophys_cells_table(self):
Expand Down
2 changes: 1 addition & 1 deletion allensdk/brain_observatory/behavior/behavior_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ def stimulus_templates(self) -> Optional[pd.DataFrame]:
return None

@property
def stimulus_fingerprint_movie_template(self) -> Optional[pd.DataFrame]:
def stimulus_natural_movie_template(self) -> Optional[pd.DataFrame]:
"""Get stimulus templates movie for the behavior session.

Returns None if no stimulus movie is available.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def from_stimulus_file(
'stop_time': stop_time,
'start_frame': start_frame,
'end_frame': end_frame,
'repeat': repeat,
'movie_repeat': repeat,
'duration': stop_time - start_time
})
table = pd.DataFrame(res)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def __init__(
"is_change",
"is_image_novel",
"omitted",
"repeat",
"movie_repeat",
"flashes_since_change",
"trials_id",
],
Expand All @@ -104,7 +104,7 @@ def __init__(
"flashes_since_change",
"image_index",
"movie_frame_index",
"repeat",
"movie_repeat",
"stimulus_index",
],
)
Expand Down Expand Up @@ -290,6 +290,8 @@ def from_stimulus_file(
data, stimulus_timestamps.value
)
raw_stim_pres_df = raw_stim_pres_df.drop(columns=["index"])
raw_stim_pres_df = cls._check_for_errant_omitted_stimulus(
input_df=raw_stim_pres_df)

# Fill in nulls for image_name
# This makes two assumptions:
Expand Down Expand Up @@ -540,17 +542,37 @@ def _postprocess(
)
}
)
# Check if the first entry in the DataFrame is an omitted stimulus.
# This shouldn't happen and likely reflects some sort of camstim error
# with appending frames to the omitted flash frame log. See
# explanation here:
# https://github.com/AllenInstitute/AllenSDK/issues/2577
if "omitted" in df.columns and len(df) > 0:
first_row = df.iloc[0]
df = cls._check_for_errant_omitted_stimulus(input_df=df)
return df

@staticmethod
def _check_for_errant_omitted_stimulus(
input_df: pd.DataFrame
) -> pd.DataFrame:
"""Check if the first entry in the DataFrame is an omitted stimulus.

This shouldn't happen and likely reflects some sort of camstim error
with appending frames to the omitted flash frame log. See
explanation here:
https://github.com/AllenInstitute/AllenSDK/issues/2577

Parameters
----------/
input_df : DataFrame
Input stimulus table to check for "omitted" stimulus.

Returns
-------
modified_df : DataFrame
Dataframe with omitted stimulus removed from first row or if not
found, return input_df unmodified.
"""
if "omitted" in input_df.columns and len(input_df) > 0:
first_row = input_df.iloc[0]
if not pd.isna(first_row["omitted"]):
Copy link
Contributor

@mikejhuang mikejhuang Aug 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any value for first_row{"omitted"] such that is not pd.isna(first_row["omitted"]) and if first_row["omitted"] is False?

Edit: 0 and False would qualify.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Omitted can be nan for certain sessions. This is basically just a cautions test as the behaviors can be different for numpy and pandas NA/nan values (pd.NA returns errors as ambiguous and if np.nan evaluates as true). If the omitted value is nan we can continue as it's not "True".

if first_row["omitted"]:
df = df.drop(first_row.name, axis=0)
return df
input_df = input_df.drop(first_row.name, axis=0)
return input_df

@staticmethod
def _fill_missing_values_for_omitted_flashes(
Expand Down
14 changes: 11 additions & 3 deletions allensdk/core/dataframe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ def enforce_df_column_order(


def enforce_df_int_typing(input_df: pd.DataFrame,
int_columns: List[str]) -> pd.DataFrame:
int_columns: List[str],
use_pandas_type=False) -> pd.DataFrame:
"""Enforce integer typing for columns that may have lost int typing when
combined into the final DataFrame.

Expand All @@ -138,6 +139,10 @@ def enforce_df_int_typing(input_df: pd.DataFrame,
Columns to enforce int typing and fill any NaN/None values with the
value set in INT_NULL in this file. Requested columns not in the
dataframe are ignored.
use_pandas_type : bool
Instead of filling with the value INT_NULL to enforce integer typing,
use the pandas type Int64. This type can have issues converting to
numpy/array type values.

Returns
-------
Expand All @@ -147,6 +152,9 @@ def enforce_df_int_typing(input_df: pd.DataFrame,
"""
for col in int_columns:
if col in input_df.columns:
input_df[col] = \
input_df[col].fillna(INT_NULL).astype(int)
if use_pandas_type:
input_df[col] = input_df[col].astype('Int64')
else:
input_df[col] = \
input_df[col].fillna(INT_NULL).astype(int)
return input_df
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def mock_cache(request, tmpdir):
"ophys_session_table": pd.DataFrame(
{
"ophys_session_id": pd.Series([10, 11, 12, 13],
dtype='int64'),
dtype='Int64'),
"mouse_id": ["1"] * 4,
"date_of_acquisition": pd.to_datetime(["2021-01-01"] * 4),
"ophys_experiment_id": [4, 5, 6, [7, 8, 9]],
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def test_behavior_ophys_experiment_list_data_attributes_and_methods(
"segmentation_mask_image",
"stimulus_presentations",
"stimulus_templates",
'stimulus_fingerprint_movie_template',
'stimulus_natural_movie_template',
"stimulus_timestamps",
"task_parameters",
"trials",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def dummy_init(self):
'running_speed',
'stimulus_presentations',
'stimulus_templates',
'stimulus_fingerprint_movie_template',
'stimulus_natural_movie_template',
'stimulus_timestamps',
'task_parameters',
'trials',
Expand Down
Loading