Skip to content

Commit

Permalink
Monthly windstats (#172)
Browse files Browse the repository at this point in the history
* windstats with monthly features

Add post rules feature to windstats runner.

* windstats runner comments

* Add new feature to return only anomaly scores

* re-check pr

* Update numpy version

Avoid using numpy v2.0 to avoid version conflicts. numpy v2.0 was published on 2024-6-17.

* numpy version

Keep numpy version under v2.0, which was released on 2024-06-17 and will lead to conflict.
  • Loading branch information
shi-yu-wang authored Jun 20, 2024
1 parent bb7c349 commit d56fddd
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 22 deletions.
8 changes: 4 additions & 4 deletions merlion/models/anomaly/windstats_monthly.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
logger = logging.getLogger(__name__)


class WindStatsConfig(DetectorConfig):
class MonthlyWindStatsConfig(DetectorConfig):
"""
Config class for `WindStats`.
"""
Expand Down Expand Up @@ -61,15 +61,15 @@ class MonthlyWindStats(DetectorBase):
minimum of the scores is returned.
"""

config_class = WindStatsConfig
config_class = MonthlyWindStatsConfig

def __init__(self, config: WindStatsConfig = None):
def __init__(self, config: MonthlyWindStatsConfig = None):
"""
config.wind_sz: the window size in minutes, default is 30 minute window
config.max_days: maximum number of days stored in memory (only mean and std of each window are stored), default is 4 days
here the days are first bucketized and then bucketized by window id.
"""
super().__init__(WindStatsConfig() if config is None else config)
super().__init__(MonthlyWindStatsConfig() if config is None else config)
self.table = {}

@property
Expand Down
67 changes: 50 additions & 17 deletions merlion/models/anomaly/windstats_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,83 @@
For the implementation of only weekly/monthly seasonality, specify "enable_weekly" of "enable_monthly" arguments of RunWindStats().
"""

from windstats import WindStats, WindStatsConfig
from windstats_monthly import MonthlyWindStats, MonthlyWindStatsConfig
from ts_datasets.anomaly import NAB
from merlion.models.anomaly.windstats import WindStats, WindStatsConfig
from merlion.models.anomaly.windstats_monthly import MonthlyWindStats, MonthlyWindStatsConfig
from merlion.utils import TimeSeries
from merlion.post_process.threshold import AggregateAlarms

class RunWindStats:
def __init__(self, threshold, enable_weekly = True, enable_monthly = True, WeeklyWindStatsConfig = WindStatsConfig(), MonthlyWindStatsConfig = MonthlyWindStatsConfig()):
def __init__(
self,
threshold,
enable_weekly = True,
enable_monthly = True,
post_rule_on_anom_score = False,
WeeklyWindStatsConfig = WindStatsConfig(),
MonthlyWindStatsConfig = MonthlyWindStatsConfig(),
return_score = True
):
"""
Users can customize the configuration for weekly or monthly-based windstats. If not, then the default configuration will apply.
"""

self.enable_weekly = enable_weekly
self.enable_monthly = enable_monthly
self.return_score = return_score
assert self.enable_weekly == True or self.enable_monthly == True, "Must enable either weekly or monthly seasonality, or both!"

# Threshold on identifying anomaly based on anomaly score.
self.threshold = threshold
# If apply post rules on anomaly score
self.post_rule = post_rule_on_anom_score

# Intialize according model if enable weekly/monthly analysis
if self.enable_weekly:
self.model_weekly = WindStats(WeeklyWindStatsConfig)

if self.enable_monthly:
self.model_monthly = MonthlyWindStats(MonthlyWindStatsConfig)

# Identify anomaly based on the hard threshold.
def anomalyByScore(self, scores, threshold):
scores.loc[abs(scores["anom_score"]) <= threshold] = 0
scores.loc[abs(scores["anom_score"]) > threshold] = 1
labels = scores.copy()
labels.loc[abs(labels["anom_score"]) <= threshold] = 0
labels.loc[abs(labels["anom_score"]) > threshold] = 1

scores.rename(columns = {"anom_score": "anomaly"}, inplace = True)
return scores
labels.rename(columns = {"anom_score": "anomaly"}, inplace = True)
return labels

# Filter anomaly scores based on post rules. Same as "get_anomaly_label" in WindStats
def get_anomaly_label(self, model, ts):
scores = model.train(ts)
return model.post_rule(scores) if model.post_rule is not None else scores

def run(self, ts):
if self.enable_weekly:
scores_weekly = self.model_weekly.train(ts).to_pd()
scores_weekly = self.anomalyByScore(scores_weekly, self.threshold)
if self.post_rule:
scores_weekly = self.get_anomaly_label(self.model_weekly, ts).to_pd()
else:
scores_weekly = self.model_weekly.train(ts).to_pd()
labels_weekly = self.anomalyByScore(scores_weekly, self.threshold)

if self.enable_monthly:
scores_monthly = self.model_monthly.train(ts).to_pd()
scores_monthly = self.anomalyByScore(scores_monthly, self.threshold)
if self.post_rule:
scores_monthly = self.get_anomaly_label(self.model_monthly, ts).to_pd()
else:
scores_monthly = self.model_monthly.train(ts).to_pd()
labels_monthly = self.anomalyByScore(scores_monthly, self.threshold)

# Anomaly is identified if and only if it's detected in both weekly and monthly patterns.
if self.enable_weekly and self.enable_monthly:
return scores_weekly * scores_monthly
if self.return_score:
return scores_weekly, scores_monthly, scores_weekly * scores_monthly
else:
return scores_weekly, scores_monthly, labels_weekly * labels_monthly
elif self.enable_weekly:
return scores_weekly
if self.return_score:
return scores_weekly, None, scores_weekly
else:
return scores_weekly, None, labels_weekly
else:
return scores_monthly
if self.return_score:
return None, scores_monthly, scores_monthly
else:
return None, scores_monthly, labels_monthly
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def read_file(fname):
"py4j",
"matplotlib",
"plotly>=4.13",
"numpy>=1.21", # 1.21 remediates a security risk
"numpy>=1.21,<2.0", # 1.21 remediates a security risk
"packaging",
"pandas>=1.1.0", # >=1.1.0 for origin kwarg to df.resample()
"prophet>=1.1", # 1.1 removes dependency on pystan
Expand Down

0 comments on commit d56fddd

Please sign in to comment.