Skip to content

Commit

Permalink
Check if all values in a groupby column are NaN or not NaN (#526)
Browse files Browse the repository at this point in the history
  • Loading branch information
andersy005 authored Sep 18, 2022
1 parent b3220bd commit 3e959d1
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 22 deletions.
32 changes: 32 additions & 0 deletions intake_esm/cat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import enum
import functools
import json
import os
import typing
Expand All @@ -12,6 +13,28 @@
from ._search import search, search_apply_require_all_on


def _allnan_or_nonan(df, column: str) -> bool:
"""Check if all values in a column are NaN or not NaN
Returns
-------
bool
Whether the dataframe column has all NaNs or no NaN valles
Raises
------
ValueError
When the column has a mix of NaNs non NaN values
"""
if df[column].isnull().all():
return False
if df[column].isnull().any():
raise ValueError(
f'The data in the {column} column should either be all NaN or there should be no NaNs'
)
return True


class AggregationType(str, enum.Enum):
join_new = 'join_new'
join_existing = 'join_existing'
Expand Down Expand Up @@ -286,6 +309,15 @@ def _cast_agg_columns_with_iterables(self) -> None:

@property
def grouped(self) -> typing.Union[pd.core.groupby.DataFrameGroupBy, pd.DataFrame]:

if self.aggregation_control.groupby_attrs:
self.aggregation_control.groupby_attrs = list(
filter(
functools.partial(_allnan_or_nonan, self.df),
self.aggregation_control.groupby_attrs,
)
)

if self.aggregation_control.groupby_attrs and set(
self.aggregation_control.groupby_attrs
) != set(self.df.columns):
Expand Down
22 changes: 0 additions & 22 deletions intake_esm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,6 @@
import sys


def _allnan_or_nonan(df, column: str) -> bool:
"""Check if all values in a column are NaN or not NaN
Returns
-------
bool
Whether the dataframe column has all NaNs or no NaN valles
Raises
------
ValueError
When the column has a mix of NaNs non NaN values
"""
if df[column].isnull().all():
return False
if df[column].isnull().any():
raise ValueError(
f'The data in the {column} column should either be all NaN or there should be no NaNs'
)
return True


def show_versions(file=sys.stdout): # pragma: no cover
"""print the versions of intake-esm and its dependencies.
Adapted from xarray/util/print_versions.py
Expand Down

0 comments on commit 3e959d1

Please sign in to comment.