diff --git a/skchange/base.py b/skchange/base.py index 797186d3..824c83f2 100644 --- a/skchange/base.py +++ b/skchange/base.py @@ -600,8 +600,10 @@ def dense_to_sparse(y_dense: pd.Series) -> pd.Series[pd.Interval]: class ChangepointDetector(BaseDetector): """Base class for changepoint detectors. - Changepoint detectors detect the point in time where a change in the data occurs. - A changepoint is defined as the index of the last element before a change. + Changepoint detectors detect points in time where a change in the data occurs. + Data between two changepoints is a segment where the data is considered to be + homogeneous, i.e., of the same distribution. A changepoint is defined as the + location of the last element of a segment. Output format of the predict method: See the dense_to_sparse method. Output format of the transform method: See the sparse_to_dense method. @@ -638,10 +640,6 @@ def sparse_to_dense(y_sparse: pd.Series, index: pd.Index) -> pd.Series[int]: ------- pd.Series """ - # TODO: Use segment labels as dense output or changepoint indicator? - # Segment labels probably more useful. - # y_dense = pd.Series(0, index=index, name="changepoint", dtype="int64") - # y_dense.iloc[y_sparse.values] = 1 changepoints = y_sparse.to_list() n = len(index) changepoints = [-1] + changepoints + [n - 1] @@ -667,10 +665,6 @@ def dense_to_sparse(y_dense: pd.Series) -> pd.Series[int]: ------- pd.Series """ - # TODO: Use segment labels as dense output or changepoint indicator? - # Segment labels probably more useful. - # y_dense = y_dense.reset_index(drop=True) - # y_sparse = y_dense.iloc[y_dense.values == 1].index y_dense = y_dense.reset_index(drop=True) # changepoint = end of segment, so the label diffs > 0 must be shiftet by -1. is_changepoint = np.roll(y_dense.diff().abs() > 0, -1)