From 50f04ca1d251373bb1131e49b1cb223272af86db Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 26 Jun 2019 00:29:17 -0700 Subject: [PATCH 01/21] forecast API. --- .../Transforms/TimeSeries/Forecasting.cs | 6 +- .../ForecastingWithConfidenceInterval.cs | 6 +- ...AdaptiveSingularSpectrumSequenceModeler.cs | 2681 ++++++++--------- .../ExtensionsCatalog.cs | 10 +- src/Microsoft.ML.TimeSeries/Forecast.cs | 6 +- .../PredictionFunction.cs | 70 +- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 304 ++ .../SequentialForecastingTransformBase.cs | 255 ++ .../SequentialTransformBase.cs | 18 + .../SequentialTransformerBase.cs | 16 +- .../SsaAnomalyDetectionBase.cs | 4 +- .../SsaForecastingBase.cs | 310 ++ .../TimeSeriesDirectApi.cs | 139 +- 13 files changed, 2400 insertions(+), 1425 deletions(-) create mode 100644 src/Microsoft.ML.TimeSeries/SSaForecasting.cs create mode 100644 src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs create mode 100644 src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs index 8ea98c8d99..f69f8ff8d8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -12,7 +12,7 @@ public static class Forecasting // does forecasting. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + /*// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); @@ -47,7 +47,7 @@ public static void Example() // Instantiate the forecasting model. var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler(inputColumnName, data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); + 1, AdaptiveSingularSpectrumSequenceForecasting.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); // Train. model.Train(dataView); @@ -77,7 +77,7 @@ public static void Example() // Forecast with the original model(that was checkpointed to disk). forecast = model.Forecast(5); Console.WriteLine("[{0}]", string.Join(", ", forecast)); - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081]*/ } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs index b5c3c5c9c2..de546e043f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs @@ -12,7 +12,7 @@ public static class ForecastingWithConfidenceInternal // does forecasting. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + /*// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); @@ -47,7 +47,7 @@ public static void Example() // Instantiate forecasting model. var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler(inputColumnName, data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); + 1, AdaptiveSingularSpectrumSequenceForecasting.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); // Train. model.Train(dataView); @@ -97,7 +97,7 @@ static void PrintForecastValuesAndIntervals(float[] forecast, float[] confidence Console.WriteLine($"Confidence intervals:"); for (int index = 0; index < forecast.Length; index++) Console.Write($"[{confidenceIntervalLowerBounds[index]} - {confidenceIntervalUpperBounds[index]}] "); - Console.WriteLine(); + Console.WriteLine();*/ } class TimeSeriesData diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index da14de2bcb..4bc50590a9 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -5,1700 +5,1595 @@ using System; using System.Collections.Generic; using System.Numerics; -using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Internal.CpuMath; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; -using Microsoft.ML.TimeSeries; -using Microsoft.ML.Transforms.TimeSeries; - -[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal), - typeof(AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal), null, typeof(SignatureLoadModel), - "SSA Sequence Modeler", - AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal.LoaderSignature)] - -[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModeler), - typeof(AdaptiveSingularSpectrumSequenceModeler), null, typeof(SignatureLoadModel), - "SSA Sequence Modeler Wrapper", - AdaptiveSingularSpectrumSequenceModeler.LoaderSignature)] namespace Microsoft.ML.Transforms.TimeSeries { /// - /// This class implements basic Singular Spectrum Analysis (SSA) model for modeling univariate time-series. - /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. + /// Ranking selection method for the signal. + /// + public enum RankSelectionMethod + { + Fixed, + Exact, + Fast + } + + /// + /// Growth ratio. Defined as Growth^(1/TimeSpan). /// - public sealed class AdaptiveSingularSpectrumSequenceModeler : ICanForecast + public struct GrowthRatio { + private int _timeSpan; + private Double _growth; + /// - /// Ranking selection method for the signal. + /// Time span of growth ratio. Must be strictly positive. /// - public enum RankSelectionMethod + public int TimeSpan { - Fixed, - Exact, - Fast + get + { + return _timeSpan; + } + set + { + Contracts.CheckParam(value > 0, nameof(TimeSpan), "The time span must be strictly positive."); + _timeSpan = value; + } } /// - /// Growth ratio. Defined as Growth^(1/TimeSpan). + /// Growth. Must be non-negative. /// - public struct GrowthRatio + public Double Growth { - private int _timeSpan; - private Double _growth; - - /// - /// Time span of growth ratio. Must be strictly positive. - /// - public int TimeSpan - { - get - { - return _timeSpan; - } - set - { - Contracts.CheckParam(value > 0, nameof(TimeSpan), "The time span must be strictly positive."); - _timeSpan = value; - } - } - - /// - /// Growth. Must be non-negative. - /// - public Double Growth + get { - get - { - return _growth; - } - set - { - Contracts.CheckParam(value >= 0, nameof(Growth), "The growth must be non-negative."); - _growth = value; - } + return _growth; } - - public GrowthRatio(int timeSpan = 1, double growth = Double.PositiveInfinity) + set { - Contracts.CheckParam(timeSpan > 0, nameof(TimeSpan), "The time span must be strictly positive."); - Contracts.CheckParam(growth >= 0, nameof(Growth), "The growth must be non-negative."); - - _growth = growth; - _timeSpan = timeSpan; + Contracts.CheckParam(value >= 0, nameof(Growth), "The growth must be non-negative."); + _growth = value; } - - public Double Ratio { get { return Math.Pow(_growth, 1d / _timeSpan); } } } - private AdaptiveSingularSpectrumSequenceModelerInternal _modeler; + public GrowthRatio(int timeSpan = 1, double growth = Double.PositiveInfinity) + { + Contracts.CheckParam(timeSpan > 0, nameof(TimeSpan), "The time span must be strictly positive."); + Contracts.CheckParam(growth >= 0, nameof(Growth), "The growth must be non-negative."); - private readonly string _inputColumnName; + _growth = growth; + _timeSpan = timeSpan; + } - internal const string LoaderSignature = "ForecastModel"; + public Double Ratio { get { return Math.Pow(_growth, 1d / _timeSpan); } } + } - private readonly IHost _host; + /// + /// This class implements basic Singular Spectrum Analysis (SSA) model for modeling univariate time-series. + /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. + /// + internal sealed class AdaptiveSingularSpectrumSequenceModelerInternal : SequenceModelerBase + { + internal const string LoaderSignature = "SSAModel"; - private static VersionInfo GetVersionInfo() + internal sealed class SsaForecastResult : ForecastResultBase { - return new VersionInfo( - modelSignature: "SSAMODLW", - verWrittenCur: 0x00010001, // Initial - verReadableCur: 0x00010001, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModeler).Assembly.FullName); - } + public VBuffer ForecastStandardDeviation; + public VBuffer UpperBound; + public VBuffer LowerBound; + public Single ConfidenceLevel; - internal AdaptiveSingularSpectrumSequenceModeler(IHostEnvironment env, string inputColumnName, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, - RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, - bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) - { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - _host.CheckParam(!string.IsNullOrEmpty(inputColumnName), nameof(inputColumnName)); + internal bool CanComputeForecastIntervals; + internal Single BoundOffset; - _inputColumnName = inputColumnName; - _modeler = new AdaptiveSingularSpectrumSequenceModelerInternal(env, trainSize, seriesLength, windowSize, discountFactor, - rankSelectionMethod, rank, maxRank, shouldComputeForecastIntervals, shouldstablize, shouldMaintainInfo, maxGrowth); + public bool IsVarianceValid { get { return CanComputeForecastIntervals; } } } - internal AdaptiveSingularSpectrumSequenceModeler(IHostEnvironment env, ModelLoadContext ctx) + public sealed class ModelInfo { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - _inputColumnName = ctx.Reader.ReadString(); - ctx.LoadModel(_host, out _modeler, "ForecastWrapper"); - } + /// + /// The singular values of the SSA of the input time-series + /// + public Single[] Spectrum; - /// - /// This class implements basic Singular Spectrum Analysis (SSA) model for modeling univariate time-series. - /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. - /// - internal sealed class AdaptiveSingularSpectrumSequenceModelerInternal : SequenceModelerBase - { - internal const string LoaderSignature = "SSAModel"; + /// + /// The roots of the characteristic polynomial after stabilization (meaningful only if the model is stabilized.) + /// + public Complex[] RootsAfterStabilization; - internal sealed class SsaForecastResult : ForecastResultBase - { - public VBuffer ForecastStandardDeviation; - public VBuffer UpperBound; - public VBuffer LowerBound; - public Single ConfidenceLevel; + /// + /// The roots of the characteristic polynomial before stabilization (meaningful only if the model is stabilized.) + /// + public Complex[] RootsBeforeStabilization; - internal bool CanComputeForecastIntervals; - internal Single BoundOffset; + /// + /// The rank of the model + /// + public int Rank; - public bool IsVarianceValid { get { return CanComputeForecastIntervals; } } - } + /// + /// The window size used to compute the SSA model + /// + public int WindowSize; - public sealed class ModelInfo - { - /// - /// The singular values of the SSA of the input time-series - /// - public Single[] Spectrum; - - /// - /// The roots of the characteristic polynomial after stabilization (meaningful only if the model is stabilized.) - /// - public Complex[] RootsAfterStabilization; - - /// - /// The roots of the characteristic polynomial before stabilization (meaningful only if the model is stabilized.) - /// - public Complex[] RootsBeforeStabilization; - - /// - /// The rank of the model - /// - public int Rank; - - /// - /// The window size used to compute the SSA model - /// - public int WindowSize; - - /// - /// The auto-regressive coefficients learned by the model - /// - public Single[] AutoRegressiveCoefficients; - - /// - /// The flag indicating whether the model has been trained - /// - public bool IsTrained; - - /// - /// The flag indicating a naive model is trained instead of SSA - /// - public bool IsNaiveModelTrained; - - /// - /// The flag indicating whether the learned model has an exponential trend (meaningful only if the model is stabilized.) - /// - public bool IsExponentialTrendPresent; - - /// - /// The flag indicating whether the learned model has a polynomial trend (meaningful only if the model is stabilized.) - /// - public bool IsPolynomialTrendPresent; - - /// - /// The flag indicating whether the learned model has been stabilized - /// - public bool IsStabilized; - - /// - /// The flag indicating whether any artificial seasonality (a seasonality with period greater than the window size) is removed - /// (meaningful only if the model is stabilized.) - /// - public bool IsArtificialSeasonalityRemoved; - - /// - /// The exponential trend magnitude (meaningful only if the model is stabilized.) - /// - public Double ExponentialTrendFactor; - } + /// + /// The auto-regressive coefficients learned by the model + /// + public Single[] AutoRegressiveCoefficients; - private ModelInfo _info; + /// + /// The flag indicating whether the model has been trained + /// + public bool IsTrained; /// - /// Returns the meta information about the learned model. + /// The flag indicating a naive model is trained instead of SSA /// - public ModelInfo Info - { - get { return _info; } - } + public bool IsNaiveModelTrained; - private Single[] _alpha; - private Single[] _state; - private readonly FixedSizeQueue _buffer; - private CpuAlignedVector _x; - private CpuAlignedVector _xSmooth; - private int _windowSize; - private readonly int _seriesLength; - private readonly RankSelectionMethod _rankSelectionMethod; - private readonly Single _discountFactor; - private readonly int _trainSize; - private int _maxRank; - private readonly Double _maxTrendRatio; - private readonly bool _shouldStablize; - private readonly bool _shouldMaintainInfo; - - private readonly IHost _host; - - private CpuAlignedMatrixRow _wTrans; - private Single _observationNoiseVariance; - private Single _observationNoiseMean; - private Single _autoregressionNoiseVariance; - private Single _autoregressionNoiseMean; - - private int _rank; - private CpuAlignedVector _y; - private Single _nextPrediction; + /// + /// The flag indicating whether the learned model has an exponential trend (meaningful only if the model is stabilized.) + /// + public bool IsExponentialTrendPresent; /// - /// Determines whether the confidence interval required for forecasting. + /// The flag indicating whether the learned model has a polynomial trend (meaningful only if the model is stabilized.) /// - public bool ShouldComputeForecastIntervals { get; set; } + public bool IsPolynomialTrendPresent; /// - /// Returns the rank of the subspace used for SSA projection (parameter r). + /// The flag indicating whether the learned model has been stabilized /// - public int Rank { get { return _rank; } } + public bool IsStabilized; /// - /// Returns the smoothed (via SSA projection) version of the last series observation fed to the model. + /// The flag indicating whether any artificial seasonality (a seasonality with period greater than the window size) is removed + /// (meaningful only if the model is stabilized.) /// - public Single LastSmoothedValue { get { return _state[_windowSize - 2]; } } + public bool IsArtificialSeasonalityRemoved; /// - /// Returns the last series observation fed to the model. + /// The exponential trend magnitude (meaningful only if the model is stabilized.) /// - public Single LastValue { get { return _buffer.Count > 0 ? _buffer[_buffer.Count - 1] : Single.NaN; } } + public Double ExponentialTrendFactor; + } - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "SSAMODLR", - //verWrittenCur: 0x00010001, // Initial - verWrittenCur: 0x00010002, // Added saving _state and _nextPrediction - verReadableCur: 0x00010002, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModelerInternal).Assembly.FullName); - } + private ModelInfo _info; - private const int VersionSavingStateAndPrediction = 0x00010002; + /// + /// Returns the meta information about the learned model. + /// + public ModelInfo Info + { + get { return _info; } + } - /// - /// The constructor for Adaptive SSA model. - /// - /// The exception context. - /// The length of series from the begining used for training. - /// The length of series that is kept in buffer for modeling (parameter N). - /// The length of the window on the series for building the trajectory matrix (parameter L). - /// The discount factor in [0,1] used for online updates (default = 1). - /// The rank selection method (default = Exact). - /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. - /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) - /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. - /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = true) - /// The flag determining whether the model should be stabilized. - /// The flag determining whether the meta information for the model needs to be maintained. - /// The maximum growth on the exponential trend - public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, - RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, - bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) - : base() - { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - _host.CheckParam(windowSize >= 2, nameof(windowSize), "The window size should be at least 2."); // ...because otherwise we have nothing to autoregress on - _host.CheckParam(seriesLength > windowSize, nameof(seriesLength), "The series length should be greater than the window size."); - _host.Check(trainSize > 2 * windowSize, "The input series length for training should be greater than twice the window size."); - _host.CheckParam(0 <= discountFactor && discountFactor <= 1, nameof(discountFactor), "The discount factor should be in [0,1]."); - - if (maxRank != null) - { - _maxRank = (int)maxRank; - _host.CheckParam(1 <= _maxRank && _maxRank < windowSize, nameof(maxRank), - "The max rank should be in [1, windowSize)."); - } - else - _maxRank = windowSize - 1; + private Single[] _alpha; + private Single[] _state; + private readonly FixedSizeQueue _buffer; + private CpuAlignedVector _x; + private CpuAlignedVector _xSmooth; + private int _windowSize; + private readonly int _seriesLength; + private readonly RankSelectionMethod _rankSelectionMethod; + private readonly Single _discountFactor; + private readonly int _trainSize; + private int _maxRank; + private readonly Double _maxTrendRatio; + private readonly bool _shouldStablize; + private readonly bool _shouldMaintainInfo; - _rankSelectionMethod = rankSelectionMethod; - if (_rankSelectionMethod == RankSelectionMethod.Fixed) - { - if (rank != null) - { - _rank = (int)rank; - _host.CheckParam(1 <= _rank && _rank < windowSize, nameof(rank), "The rank should be in [1, windowSize)."); - } - else - _rank = _maxRank; - } + private readonly IHost _host; - _seriesLength = seriesLength; - _windowSize = windowSize; - _trainSize = trainSize; - _discountFactor = discountFactor; + private CpuAlignedMatrixRow _wTrans; + private Single _observationNoiseVariance; + private Single _observationNoiseMean; + private Single _autoregressionNoiseVariance; + private Single _autoregressionNoiseMean; - _buffer = new FixedSizeQueue(seriesLength); + private int _rank; + private CpuAlignedVector _y; + private Single _nextPrediction; - _alpha = new Single[windowSize - 1]; - _state = new Single[windowSize - 1]; - _x = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); - ShouldComputeForecastIntervals = shouldComputeForecastIntervals; + /// + /// Determines whether the confidence interval required for forecasting. + /// + public bool ShouldComputeForecastIntervals { get; set; } - _observationNoiseVariance = 0; - _autoregressionNoiseVariance = 0; - _observationNoiseMean = 0; - _autoregressionNoiseMean = 0; - _shouldStablize = shouldstablize; - _maxTrendRatio = maxGrowth == null ? Double.PositiveInfinity : ((GrowthRatio)maxGrowth).Ratio; + /// + /// Returns the rank of the subspace used for SSA projection (parameter r). + /// + public int Rank { get { return _rank; } } - _shouldMaintainInfo = shouldMaintainInfo; - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } - } + /// + /// Returns the smoothed (via SSA projection) version of the last series observation fed to the model. + /// + public Single LastSmoothedValue { get { return _state[_windowSize - 2]; } } - /// - /// The copy constructor. - /// - /// An object whose contents are copied to the current object. - private AdaptiveSingularSpectrumSequenceModelerInternal(AdaptiveSingularSpectrumSequenceModelerInternal model) - { - _host = model._host.Register(LoaderSignature); - _host.Assert(model._windowSize >= 2); - _host.Assert(model._seriesLength > model._windowSize); - _host.Assert(model._trainSize > 2 * model._windowSize); - _host.Assert(0 <= model._discountFactor && model._discountFactor <= 1); - _host.Assert(1 <= model._rank && model._rank < model._windowSize); - - _rank = model._rank; - _maxRank = model._maxRank; - _rankSelectionMethod = model._rankSelectionMethod; - _seriesLength = model._seriesLength; - _windowSize = model._windowSize; - _trainSize = model._trainSize; - _discountFactor = model._discountFactor; - ShouldComputeForecastIntervals = model.ShouldComputeForecastIntervals; - _observationNoiseVariance = model._observationNoiseVariance; - _autoregressionNoiseVariance = model._autoregressionNoiseVariance; - _observationNoiseMean = model._observationNoiseMean; - _autoregressionNoiseMean = model._autoregressionNoiseMean; - _nextPrediction = model._nextPrediction; - _maxTrendRatio = model._maxTrendRatio; - _shouldStablize = model._shouldStablize; - _shouldMaintainInfo = model._shouldMaintainInfo; - _info = model._info; - _buffer = model._buffer.Clone(); - _alpha = new Single[_windowSize - 1]; - Array.Copy(model._alpha, _alpha, _windowSize - 1); - _state = new Single[_windowSize - 1]; - Array.Copy(model._state, _state, _windowSize - 1); + /// + /// Returns the last series observation fed to the model. + /// + public Single LastValue { get { return _buffer.Count > 0 ? _buffer[_buffer.Count - 1] : Single.NaN; } } + + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "SSAMODLR", + //verWrittenCur: 0x00010001, // Initial + verWrittenCur: 0x00010002, // Added saving _state and _nextPrediction + verReadableCur: 0x00010002, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModelerInternal).Assembly.FullName); + } - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + private const int VersionSavingStateAndPrediction = 0x00010002; - if (model._wTrans != null) - { - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - _wTrans.CopyFrom(model._wTrans); - } + /// + /// The constructor for Adaptive SSA model. + /// + /// The exception context. + /// The length of series from the begining used for training. + /// The length of series that is kept in buffer for modeling (parameter N). + /// The length of the window on the series for building the trajectory matrix (parameter L). + /// The discount factor in [0,1] used for online updates (default = 1). + /// The rank selection method (default = Exact). + /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. + /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) + /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. + /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = true) + /// The flag determining whether the model should be stabilized. + /// The flag determining whether the meta information for the model needs to be maintained. + /// The maximum growth on the exponential trend + public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, + RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, + bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) + : base() + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(LoaderSignature); + _host.CheckParam(windowSize >= 2, nameof(windowSize), "The window size should be at least 2."); // ...because otherwise we have nothing to autoregress on + _host.CheckParam(seriesLength > windowSize, nameof(seriesLength), "The series length should be greater than the window size."); + _host.Check(trainSize > 2 * windowSize, "The input series length for training should be greater than twice the window size."); + _host.CheckParam(0 <= discountFactor && discountFactor <= 1, nameof(discountFactor), "The discount factor should be in [0,1]."); + + if (maxRank != null) + { + _maxRank = (int)maxRank; + _host.CheckParam(1 <= _maxRank && _maxRank < windowSize, nameof(maxRank), + "The max rank should be in [1, windowSize)."); } + else + _maxRank = windowSize - 1; - public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, ModelLoadContext ctx) + _rankSelectionMethod = rankSelectionMethod; + if (_rankSelectionMethod == RankSelectionMethod.Fixed) { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - - // *** Binary format *** - // int: _seriesLength - // int: _windowSize - // int: _trainSize - // int: _rank - // float: _discountFactor - // RankSelectionMethod: _rankSelectionMethod - // bool: isWeightSet - // float[]: _alpha - // float[]: _state - // bool: ShouldComputeForecastIntervals - // float: _observationNoiseVariance - // float: _autoregressionNoiseVariance - // float: _observationNoiseMean - // float: _autoregressionNoiseMean - // float: _nextPrediction - // int: _maxRank - // bool: _shouldStablize - // bool: _shouldMaintainInfo - // double: _maxTrendRatio - // float[]: _wTrans (only if _isWeightSet == true) - - _seriesLength = ctx.Reader.ReadInt32(); - // Do an early check. We'll have the stricter check later. - _host.CheckDecode(_seriesLength > 2); - - _windowSize = ctx.Reader.ReadInt32(); - _host.CheckDecode(_windowSize >= 2); - _host.CheckDecode(_seriesLength > _windowSize); - - _trainSize = ctx.Reader.ReadInt32(); - _host.CheckDecode(_trainSize > 2 * _windowSize); - - _rank = ctx.Reader.ReadInt32(); - _host.CheckDecode(1 <= _rank && _rank < _windowSize); - - _discountFactor = ctx.Reader.ReadSingle(); - _host.CheckDecode(0 <= _discountFactor && _discountFactor <= 1); - - byte temp; - temp = ctx.Reader.ReadByte(); - _rankSelectionMethod = (RankSelectionMethod)temp; - bool isWeightSet = ctx.Reader.ReadBoolByte(); - - _alpha = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(_alpha) == _windowSize - 1); - - if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) + if (rank != null) { - _state = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(_state) == _windowSize - 1); + _rank = (int)rank; + _host.CheckParam(1 <= _rank && _rank < windowSize, nameof(rank), "The rank should be in [1, windowSize)."); } else - _state = new Single[_windowSize - 1]; - - ShouldComputeForecastIntervals = ctx.Reader.ReadBoolByte(); - - _observationNoiseVariance = ctx.Reader.ReadSingle(); - _host.CheckDecode(_observationNoiseVariance >= 0); + _rank = _maxRank; + } - _autoregressionNoiseVariance = ctx.Reader.ReadSingle(); - _host.CheckDecode(_autoregressionNoiseVariance >= 0); + _seriesLength = seriesLength; + _windowSize = windowSize; + _trainSize = trainSize; + _discountFactor = discountFactor; - _observationNoiseMean = ctx.Reader.ReadSingle(); - _autoregressionNoiseMean = ctx.Reader.ReadSingle(); - if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) - _nextPrediction = ctx.Reader.ReadSingle(); + _buffer = new FixedSizeQueue(seriesLength); - _maxRank = ctx.Reader.ReadInt32(); - _host.CheckDecode(1 <= _maxRank && _maxRank <= _windowSize - 1); + _alpha = new Single[windowSize - 1]; + _state = new Single[windowSize - 1]; + _x = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); + ShouldComputeForecastIntervals = shouldComputeForecastIntervals; - _shouldStablize = ctx.Reader.ReadBoolByte(); - _shouldMaintainInfo = ctx.Reader.ReadBoolByte(); - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; - Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); + _observationNoiseVariance = 0; + _autoregressionNoiseVariance = 0; + _observationNoiseMean = 0; + _autoregressionNoiseMean = 0; + _shouldStablize = shouldstablize; + _maxTrendRatio = maxGrowth == null ? Double.PositiveInfinity : ((GrowthRatio)maxGrowth).Ratio; - _info.IsStabilized = _shouldStablize; - _info.Rank = _rank; - _info.WindowSize = _windowSize; - } + _shouldMaintainInfo = shouldMaintainInfo; + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.WindowSize = _windowSize; + } + } - _maxTrendRatio = ctx.Reader.ReadDouble(); - _host.CheckDecode(_maxTrendRatio >= 0); + /// + /// The copy constructor. + /// + /// An object whose contents are copied to the current object. + private AdaptiveSingularSpectrumSequenceModelerInternal(AdaptiveSingularSpectrumSequenceModelerInternal model) + { + _host = model._host.Register(LoaderSignature); + _host.Assert(model._windowSize >= 2); + _host.Assert(model._seriesLength > model._windowSize); + _host.Assert(model._trainSize > 2 * model._windowSize); + _host.Assert(0 <= model._discountFactor && model._discountFactor <= 1); + _host.Assert(1 <= model._rank && model._rank < model._windowSize); + + _rank = model._rank; + _maxRank = model._maxRank; + _rankSelectionMethod = model._rankSelectionMethod; + _seriesLength = model._seriesLength; + _windowSize = model._windowSize; + _trainSize = model._trainSize; + _discountFactor = model._discountFactor; + ShouldComputeForecastIntervals = model.ShouldComputeForecastIntervals; + _observationNoiseVariance = model._observationNoiseVariance; + _autoregressionNoiseVariance = model._autoregressionNoiseVariance; + _observationNoiseMean = model._observationNoiseMean; + _autoregressionNoiseMean = model._autoregressionNoiseMean; + _nextPrediction = model._nextPrediction; + _maxTrendRatio = model._maxTrendRatio; + _shouldStablize = model._shouldStablize; + _shouldMaintainInfo = model._shouldMaintainInfo; + _info = model._info; + _buffer = model._buffer.Clone(); + _alpha = new Single[_windowSize - 1]; + Array.Copy(model._alpha, _alpha, _windowSize - 1); + _state = new Single[_windowSize - 1]; + Array.Copy(model._state, _state, _windowSize - 1); + + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + + if (model._wTrans != null) + { + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + _wTrans.CopyFrom(model._wTrans); + } + } - if (isWeightSet) - { - var tempArray = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(tempArray) == _rank * _windowSize); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - int i = 0; - _wTrans.CopyFrom(tempArray, ref i); - tempArray = ctx.Reader.ReadFloatArray(); - i = 0; - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _y.CopyFrom(tempArray, ref i); - } + public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(LoaderSignature); - _buffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(ctx.Reader, _host); - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + // *** Binary format *** + // int: _seriesLength + // int: _windowSize + // int: _trainSize + // int: _rank + // float: _discountFactor + // RankSelectionMethod: _rankSelectionMethod + // bool: isWeightSet + // float[]: _alpha + // float[]: _state + // bool: ShouldComputeForecastIntervals + // float: _observationNoiseVariance + // float: _autoregressionNoiseVariance + // float: _observationNoiseMean + // float: _autoregressionNoiseMean + // float: _nextPrediction + // int: _maxRank + // bool: _shouldStablize + // bool: _shouldMaintainInfo + // double: _maxTrendRatio + // float[]: _wTrans (only if _isWeightSet == true) + + _seriesLength = ctx.Reader.ReadInt32(); + // Do an early check. We'll have the stricter check later. + _host.CheckDecode(_seriesLength > 2); + + _windowSize = ctx.Reader.ReadInt32(); + _host.CheckDecode(_windowSize >= 2); + _host.CheckDecode(_seriesLength > _windowSize); + + _trainSize = ctx.Reader.ReadInt32(); + _host.CheckDecode(_trainSize > 2 * _windowSize); + + _rank = ctx.Reader.ReadInt32(); + _host.CheckDecode(1 <= _rank && _rank < _windowSize); + + _discountFactor = ctx.Reader.ReadSingle(); + _host.CheckDecode(0 <= _discountFactor && _discountFactor <= 1); + + byte temp; + temp = ctx.Reader.ReadByte(); + _rankSelectionMethod = (RankSelectionMethod)temp; + bool isWeightSet = ctx.Reader.ReadBoolByte(); + + _alpha = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(_alpha) == _windowSize - 1); + + if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) + { + _state = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(_state) == _windowSize - 1); } + else + _state = new Single[_windowSize - 1]; - private protected override void SaveModel(ModelSaveContext ctx) - { - _host.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(); - ctx.SetVersionInfo(GetVersionInfo()); - - _host.Assert(_windowSize >= 2); - _host.Assert(_seriesLength > _windowSize); - _host.Assert(_trainSize > 2 * _windowSize); - _host.Assert(0 <= _discountFactor && _discountFactor <= 1); - _host.Assert(1 <= _rank && _rank < _windowSize); - _host.Assert(Utils.Size(_alpha) == _windowSize - 1); - _host.Assert(_observationNoiseVariance >= 0); - _host.Assert(_autoregressionNoiseVariance >= 0); - _host.Assert(1 <= _maxRank && _maxRank <= _windowSize - 1); - _host.Assert(_maxTrendRatio >= 0); - - // *** Binary format *** - // int: _seriesLength - // int: _windowSize - // int: _trainSize - // int: _rank - // float: _discountFactor - // RankSelectionMethod: _rankSelectionMethod - // bool: _isWeightSet - // float[]: _alpha - // float[]: _state - // bool: ShouldComputeForecastIntervals - // float: _observationNoiseVariance - // float: _autoregressionNoiseVariance - // float: _observationNoiseMean - // float: _autoregressionNoiseMean - // float: _nextPrediction - // int: _maxRank - // bool: _shouldStablize - // bool: _shouldMaintainInfo - // double: _maxTrendRatio - // float[]: _wTrans (only if _isWeightSet == true) - - ctx.Writer.Write(_seriesLength); - ctx.Writer.Write(_windowSize); - ctx.Writer.Write(_trainSize); - ctx.Writer.Write(_rank); - ctx.Writer.Write(_discountFactor); - ctx.Writer.Write((byte)_rankSelectionMethod); - ctx.Writer.WriteBoolByte(_wTrans != null); - ctx.Writer.WriteSingleArray(_alpha); - ctx.Writer.WriteSingleArray(_state); - ctx.Writer.WriteBoolByte(ShouldComputeForecastIntervals); - ctx.Writer.Write(_observationNoiseVariance); - ctx.Writer.Write(_autoregressionNoiseVariance); - ctx.Writer.Write(_observationNoiseMean); - ctx.Writer.Write(_autoregressionNoiseMean); - ctx.Writer.Write(_nextPrediction); - ctx.Writer.Write(_maxRank); - ctx.Writer.WriteBoolByte(_shouldStablize); - ctx.Writer.WriteBoolByte(_shouldMaintainInfo); - ctx.Writer.Write(_maxTrendRatio); - - if (_wTrans != null) - { - // REVIEW: this may not be the most efficient way for serializing an aligned matrix. - var tempArray = new Single[_rank * _windowSize]; - int iv = 0; - _wTrans.CopyTo(tempArray, ref iv); - ctx.Writer.WriteSingleArray(tempArray); - tempArray = new float[_rank]; - iv = 0; - _y.CopyTo(tempArray, ref iv); - ctx.Writer.WriteSingleArray(tempArray); - } + ShouldComputeForecastIntervals = ctx.Reader.ReadBoolByte(); - TimeSeriesUtils.SerializeFixedSizeQueue(_buffer, ctx.Writer); - } + _observationNoiseVariance = ctx.Reader.ReadSingle(); + _host.CheckDecode(_observationNoiseVariance >= 0); - private static void ReconstructSignal(TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) - { - Contracts.Assert(tMat != null); - Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); - Contracts.Assert(Utils.Size(output) >= tMat.SeriesLength); + _autoregressionNoiseVariance = ctx.Reader.ReadSingle(); + _host.CheckDecode(_autoregressionNoiseVariance >= 0); - var k = tMat.SeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); + _observationNoiseMean = ctx.Reader.ReadSingle(); + _autoregressionNoiseMean = ctx.Reader.ReadSingle(); + if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) + _nextPrediction = ctx.Reader.ReadSingle(); - var v = new Single[k]; - int i; + _maxRank = ctx.Reader.ReadInt32(); + _host.CheckDecode(1 <= _maxRank && _maxRank <= _windowSize - 1); - for (i = 0; i < tMat.SeriesLength; ++i) - output[i] = 0; + _shouldStablize = ctx.Reader.ReadBoolByte(); + _shouldMaintainInfo = ctx.Reader.ReadBoolByte(); + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; + Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); - for (i = 0; i < rank; ++i) - { - // Reconstructing the i-th eigen triple component and adding it to output - tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); - tMat.RankOneHankelization(singularVectors, v, 1, output, true, tMat.WindowSize * i, 0, 0); - } + _info.IsStabilized = _shouldStablize; + _info.Rank = _rank; + _info.WindowSize = _windowSize; } - private static void ReconstructSignalTailFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) - { - Contracts.Assert(tMat != null); - Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); + _maxTrendRatio = ctx.Reader.ReadDouble(); + _host.CheckDecode(_maxTrendRatio >= 0); - int len = 2 * tMat.WindowSize - 1; - Contracts.Assert(Utils.Size(output) >= len); + if (isWeightSet) + { + var tempArray = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(tempArray) == _rank * _windowSize); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + int i = 0; + _wTrans.CopyFrom(tempArray, ref i); + tempArray = ctx.Reader.ReadFloatArray(); + i = 0; + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _y.CopyFrom(tempArray, ref i); + } - Single v; - /*var k = tMat.SeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - var v = new Single[k];*/ + _buffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(ctx.Reader, _host); + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + } - Single temp; - int i; - int j; - int offset1 = tMat.SeriesLength - 2 * tMat.WindowSize + 1; - int offset2 = tMat.SeriesLength - tMat.WindowSize; + private protected override void SaveModel(ModelSaveContext ctx) + { + _host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + ctx.SetVersionInfo(GetVersionInfo()); - for (i = 0; i < len; ++i) - output[i] = 0; + _host.Assert(_windowSize >= 2); + _host.Assert(_seriesLength > _windowSize); + _host.Assert(_trainSize > 2 * _windowSize); + _host.Assert(0 <= _discountFactor && _discountFactor <= 1); + _host.Assert(1 <= _rank && _rank < _windowSize); + _host.Assert(Utils.Size(_alpha) == _windowSize - 1); + _host.Assert(_observationNoiseVariance >= 0); + _host.Assert(_autoregressionNoiseVariance >= 0); + _host.Assert(1 <= _maxRank && _maxRank <= _windowSize - 1); + _host.Assert(_maxTrendRatio >= 0); + + // *** Binary format *** + // int: _seriesLength + // int: _windowSize + // int: _trainSize + // int: _rank + // float: _discountFactor + // RankSelectionMethod: _rankSelectionMethod + // bool: _isWeightSet + // float[]: _alpha + // float[]: _state + // bool: ShouldComputeForecastIntervals + // float: _observationNoiseVariance + // float: _autoregressionNoiseVariance + // float: _observationNoiseMean + // float: _autoregressionNoiseMean + // float: _nextPrediction + // int: _maxRank + // bool: _shouldStablize + // bool: _shouldMaintainInfo + // double: _maxTrendRatio + // float[]: _wTrans (only if _isWeightSet == true) + + ctx.Writer.Write(_seriesLength); + ctx.Writer.Write(_windowSize); + ctx.Writer.Write(_trainSize); + ctx.Writer.Write(_rank); + ctx.Writer.Write(_discountFactor); + ctx.Writer.Write((byte)_rankSelectionMethod); + ctx.Writer.WriteBoolByte(_wTrans != null); + ctx.Writer.WriteSingleArray(_alpha); + ctx.Writer.WriteSingleArray(_state); + ctx.Writer.WriteBoolByte(ShouldComputeForecastIntervals); + ctx.Writer.Write(_observationNoiseVariance); + ctx.Writer.Write(_autoregressionNoiseVariance); + ctx.Writer.Write(_observationNoiseMean); + ctx.Writer.Write(_autoregressionNoiseMean); + ctx.Writer.Write(_nextPrediction); + ctx.Writer.Write(_maxRank); + ctx.Writer.WriteBoolByte(_shouldStablize); + ctx.Writer.WriteBoolByte(_shouldMaintainInfo); + ctx.Writer.Write(_maxTrendRatio); + + if (_wTrans != null) + { + // REVIEW: this may not be the most efficient way for serializing an aligned matrix. + var tempArray = new Single[_rank * _windowSize]; + int iv = 0; + _wTrans.CopyTo(tempArray, ref iv); + ctx.Writer.WriteSingleArray(tempArray); + tempArray = new float[_rank]; + iv = 0; + _y.CopyTo(tempArray, ref iv); + ctx.Writer.WriteSingleArray(tempArray); + } - for (i = 0; i < rank; ++i) - { - // Reconstructing the i-th eigen triple component and adding it to output - v = 0; - for (j = offset1; j < offset1 + tMat.WindowSize; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset1 + j]; + TimeSeriesUtils.SerializeFixedSizeQueue(_buffer, ctx.Writer); + } - for (j = 0; j < tMat.WindowSize - 1; ++j) - output[j] += v * singularVectors[tMat.WindowSize * i + j]; + private static void ReconstructSignal(TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) + { + Contracts.Assert(tMat != null); + Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); + Contracts.Assert(Utils.Size(output) >= tMat.SeriesLength); - temp = v * singularVectors[tMat.WindowSize * (i + 1) - 1]; + var k = tMat.SeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); - v = 0; - for (j = offset2; j < offset2 + tMat.WindowSize; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset2 + j]; + var v = new Single[k]; + int i; - for (j = tMat.WindowSize; j < 2 * tMat.WindowSize - 1; ++j) - output[j] += v * singularVectors[tMat.WindowSize * (i - 1) + j + 1]; + for (i = 0; i < tMat.SeriesLength; ++i) + output[i] = 0; - temp += v * singularVectors[tMat.WindowSize * i]; - output[tMat.WindowSize - 1] += (temp / 2); - } + for (i = 0; i < rank; ++i) + { + // Reconstructing the i-th eigen triple component and adding it to output + tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); + tMat.RankOneHankelization(singularVectors, v, 1, output, true, tMat.WindowSize * i, 0, 0); } + } - private static void ComputeNoiseMoments(Single[] series, Single[] signal, Single[] alpha, out Single observationNoiseVariance, out Single autoregressionNoiseVariance, - out Single observationNoiseMean, out Single autoregressionNoiseMean, int startIndex = 0) - { - Contracts.Assert(Utils.Size(alpha) > 0); - Contracts.Assert(Utils.Size(signal) > 2 * Utils.Size(alpha)); // To assure that the autoregression noise variance is unbiased. - Contracts.Assert(Utils.Size(series) >= Utils.Size(signal) + startIndex); + private static void ReconstructSignalTailFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) + { + Contracts.Assert(tMat != null); + Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); - var signalLength = Utils.Size(signal); - var windowSize = Utils.Size(alpha) + 1; - var k = signalLength - windowSize + 1; - Contracts.Assert(k > 0); + int len = 2 * tMat.WindowSize - 1; + Contracts.Assert(Utils.Size(output) >= len); - var y = new Single[k]; - int i; + Single v; + /*var k = tMat.SeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + var v = new Single[k];*/ - observationNoiseMean = 0; - observationNoiseVariance = 0; - autoregressionNoiseMean = 0; - autoregressionNoiseVariance = 0; + Single temp; + int i; + int j; + int offset1 = tMat.SeriesLength - 2 * tMat.WindowSize + 1; + int offset2 = tMat.SeriesLength - tMat.WindowSize; - // Computing the observation noise moments - for (i = 0; i < signalLength; ++i) - observationNoiseMean += (series[i + startIndex] - signal[i]); - observationNoiseMean /= signalLength; + for (i = 0; i < len; ++i) + output[i] = 0; - for (i = 0; i < signalLength; ++i) - observationNoiseVariance += (series[i + startIndex] - signal[i]) * (series[i + startIndex] - signal[i]); - observationNoiseVariance /= signalLength; - observationNoiseVariance -= (observationNoiseMean * observationNoiseMean); + for (i = 0; i < rank; ++i) + { + // Reconstructing the i-th eigen triple component and adding it to output + v = 0; + for (j = offset1; j < offset1 + tMat.WindowSize; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset1 + j]; - // Computing the auto-regression noise moments - TrajectoryMatrix xTM = new TrajectoryMatrix(null, signal, windowSize - 1, signalLength - 1); - xTM.MultiplyTranspose(alpha, y); + for (j = 0; j < tMat.WindowSize - 1; ++j) + output[j] += v * singularVectors[tMat.WindowSize * i + j]; - for (i = 0; i < k; ++i) - autoregressionNoiseMean += (signal[windowSize - 1 + i] - y[i]); - autoregressionNoiseMean /= k; + temp = v * singularVectors[tMat.WindowSize * (i + 1) - 1]; - for (i = 0; i < k; ++i) - { - autoregressionNoiseVariance += (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean) * - (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean); - } + v = 0; + for (j = offset2; j < offset2 + tMat.WindowSize; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset2 + j]; - autoregressionNoiseVariance /= (k - windowSize + 1); - Contracts.Assert(autoregressionNoiseVariance >= 0); + for (j = tMat.WindowSize; j < 2 * tMat.WindowSize - 1; ++j) + output[j] += v * singularVectors[tMat.WindowSize * (i - 1) + j + 1]; + + temp += v * singularVectors[tMat.WindowSize * i]; + output[tMat.WindowSize - 1] += (temp / 2); } + } - private static int DetermineSignalRank(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, - Single[] outputSignal, int maxRank) + private static void ComputeNoiseMoments(Single[] series, Single[] signal, Single[] alpha, out Single observationNoiseVariance, out Single autoregressionNoiseVariance, + out Single observationNoiseMean, out Single autoregressionNoiseMean, int startIndex = 0) + { + Contracts.Assert(Utils.Size(alpha) > 0); + Contracts.Assert(Utils.Size(signal) > 2 * Utils.Size(alpha)); // To assure that the autoregression noise variance is unbiased. + Contracts.Assert(Utils.Size(series) >= Utils.Size(signal) + startIndex); + + var signalLength = Utils.Size(signal); + var windowSize = Utils.Size(alpha) + 1; + var k = signalLength - windowSize + 1; + Contracts.Assert(k > 0); + + var y = new Single[k]; + int i; + + observationNoiseMean = 0; + observationNoiseVariance = 0; + autoregressionNoiseMean = 0; + autoregressionNoiseVariance = 0; + + // Computing the observation noise moments + for (i = 0; i < signalLength; ++i) + observationNoiseMean += (series[i + startIndex] - signal[i]); + observationNoiseMean /= signalLength; + + for (i = 0; i < signalLength; ++i) + observationNoiseVariance += (series[i + startIndex] - signal[i]) * (series[i + startIndex] - signal[i]); + observationNoiseVariance /= signalLength; + observationNoiseVariance -= (observationNoiseMean * observationNoiseMean); + + // Computing the auto-regression noise moments + TrajectoryMatrix xTM = new TrajectoryMatrix(null, signal, windowSize - 1, signalLength - 1); + xTM.MultiplyTranspose(alpha, y); + + for (i = 0; i < k; ++i) + autoregressionNoiseMean += (signal[windowSize - 1 + i] - y[i]); + autoregressionNoiseMean /= k; + + for (i = 0; i < k; ++i) { - Contracts.Assert(tMat != null); - Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(outputSignal) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); - Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); - Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); - - var inputSeriesLength = tMat.SeriesLength; - var k = inputSeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - - var x = new Single[inputSeriesLength]; - var y = new Single[k]; - var alpha = new Single[tMat.WindowSize - 1]; - var v = new Single[k]; - - Single nu = 0; - Double minErr = Double.MaxValue; - int minIndex = maxRank - 1; - int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); - - TrajectoryMatrix xTM = new TrajectoryMatrix(null, x, tMat.WindowSize - 1, inputSeriesLength - 1); - - int i; - int j; - int n; - Single temp; - Double error; - Double sumSingularVals = 0; - Single lambda; - Single observationNoiseMean; - - FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); - - for (i = 0; i < tMat.WindowSize; ++i) - sumSingularVals += singularValues[i]; - - for (i = 0; i < maxRank; ++i) - { - // Updating the auto-regressive coefficients - lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; - for (j = 0; j < tMat.WindowSize - 1; ++j) - alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; + autoregressionNoiseVariance += (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean) * + (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean); + } + + autoregressionNoiseVariance /= (k - windowSize + 1); + Contracts.Assert(autoregressionNoiseVariance >= 0); + } - // Updating nu - nu += lambda * lambda; + private static int DetermineSignalRank(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, + Single[] outputSignal, int maxRank) + { + Contracts.Assert(tMat != null); + Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(outputSignal) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); + Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); + Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); + + var inputSeriesLength = tMat.SeriesLength; + var k = inputSeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + + var x = new Single[inputSeriesLength]; + var y = new Single[k]; + var alpha = new Single[tMat.WindowSize - 1]; + var v = new Single[k]; + + Single nu = 0; + Double minErr = Double.MaxValue; + int minIndex = maxRank - 1; + int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); + + TrajectoryMatrix xTM = new TrajectoryMatrix(null, x, tMat.WindowSize - 1, inputSeriesLength - 1); + + int i; + int j; + int n; + Single temp; + Double error; + Double sumSingularVals = 0; + Single lambda; + Single observationNoiseMean; + + FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); + + for (i = 0; i < tMat.WindowSize; ++i) + sumSingularVals += singularValues[i]; + + for (i = 0; i < maxRank; ++i) + { + // Updating the auto-regressive coefficients + lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; - // Reconstructing the i-th eigen triple component and adding it to x - tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); - tMat.RankOneHankelization(singularVectors, v, 1, x, true, tMat.WindowSize * i, 0, 0); + // Updating nu + nu += lambda * lambda; - observationNoiseMean = 0; - for (j = 0; j < inputSeriesLength; ++j) - observationNoiseMean += (series[j] - x[j]); - observationNoiseMean /= inputSeriesLength; + // Reconstructing the i-th eigen triple component and adding it to x + tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); + tMat.RankOneHankelization(singularVectors, v, 1, x, true, tMat.WindowSize * i, 0, 0); - for (j = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; j < inputSeriesLength - evaluationLength; ++j) - window.AddLast(x[j]); + observationNoiseMean = 0; + for (j = 0; j < inputSeriesLength; ++j) + observationNoiseMean += (series[j] - x[j]); + observationNoiseMean /= inputSeriesLength; - error = 0; - for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) - { - temp = 0; - for (n = 0; n < tMat.WindowSize - 1; ++n) - temp += alpha[n] * window[n]; - - temp /= (1 - nu); - temp += observationNoiseMean; - window.AddLast(temp); - error += Math.Abs(series[j] - temp); - if (error > minErr) - break; - } + for (j = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; j < inputSeriesLength - evaluationLength; ++j) + window.AddLast(x[j]); - if (error < minErr) - { - minErr = error; - minIndex = i; - Array.Copy(x, outputSignal, inputSeriesLength); - } + error = 0; + for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) + { + temp = 0; + for (n = 0; n < tMat.WindowSize - 1; ++n) + temp += alpha[n] * window[n]; + + temp /= (1 - nu); + temp += observationNoiseMean; + window.AddLast(temp); + error += Math.Abs(series[j] - temp); + if (error > minErr) + break; } - return minIndex + 1; + if (error < minErr) + { + minErr = error; + minIndex = i; + Array.Copy(x, outputSignal, inputSeriesLength); + } } - internal override void InitState() - { - for (int i = 0; i < _windowSize - 2; ++i) - _state[i] = 0; + return minIndex + 1; + } - _buffer.Clear(); - } + internal override void InitState() + { + for (int i = 0; i < _windowSize - 2; ++i) + _state[i] = 0; + + _buffer.Clear(); + } - private static int DetermineSignalRankFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, int maxRank) + private static int DetermineSignalRankFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, int maxRank) + { + Contracts.Assert(tMat != null); + Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); + Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); + Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); + + var inputSeriesLength = tMat.SeriesLength; + var k = inputSeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + + var x = new Single[tMat.WindowSize - 1]; + var alpha = new Single[tMat.WindowSize - 1]; + Single v; + + Single nu = 0; + Double minErr = Double.MaxValue; + int minIndex = maxRank - 1; + int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); + + int i; + int j; + int n; + int offset; + Single temp; + Double error; + Single lambda; + Single observationNoiseMean; + + FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); + + for (i = 0; i < maxRank; ++i) { - Contracts.Assert(tMat != null); - Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); - Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); - Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); - - var inputSeriesLength = tMat.SeriesLength; - var k = inputSeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - - var x = new Single[tMat.WindowSize - 1]; - var alpha = new Single[tMat.WindowSize - 1]; - Single v; - - Single nu = 0; - Double minErr = Double.MaxValue; - int minIndex = maxRank - 1; - int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); - - int i; - int j; - int n; - int offset; - Single temp; - Double error; - Single lambda; - Single observationNoiseMean; - - FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); - - for (i = 0; i < maxRank; ++i) - { - // Updating the auto-regressive coefficients - lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; - for (j = 0; j < tMat.WindowSize - 1; ++j) - alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; + // Updating the auto-regressive coefficients + lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; - // Updating nu - nu += lambda * lambda; + // Updating nu + nu += lambda * lambda; - // Reconstructing the i-th eigen triple component and adding it to x - v = 0; - offset = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; + // Reconstructing the i-th eigen triple component and adding it to x + v = 0; + offset = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; - for (j = offset; j <= inputSeriesLength - evaluationLength; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset + j]; + for (j = offset; j <= inputSeriesLength - evaluationLength; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset + j]; - for (j = 0; j < tMat.WindowSize - 1; ++j) - x[j] += v * singularVectors[tMat.WindowSize * i + j]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + x[j] += v * singularVectors[tMat.WindowSize * i + j]; - // Computing the empirical observation noise mean - observationNoiseMean = 0; - for (j = offset; j < inputSeriesLength - evaluationLength; ++j) - observationNoiseMean += (series[j] - x[j - offset]); - observationNoiseMean /= (tMat.WindowSize - 1); + // Computing the empirical observation noise mean + observationNoiseMean = 0; + for (j = offset; j < inputSeriesLength - evaluationLength; ++j) + observationNoiseMean += (series[j] - x[j - offset]); + observationNoiseMean /= (tMat.WindowSize - 1); - for (j = 0; j < tMat.WindowSize - 1; ++j) - window.AddLast(x[j]); + for (j = 0; j < tMat.WindowSize - 1; ++j) + window.AddLast(x[j]); - error = 0; - for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) - { - temp = 0; - for (n = 0; n < tMat.WindowSize - 1; ++n) - temp += alpha[n] * window[n]; - - temp /= (1 - nu); - temp += observationNoiseMean; - window.AddLast(temp); - error += Math.Abs(series[j] - temp); - if (error > minErr) - break; - } + error = 0; + for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) + { + temp = 0; + for (n = 0; n < tMat.WindowSize - 1; ++n) + temp += alpha[n] * window[n]; + + temp /= (1 - nu); + temp += observationNoiseMean; + window.AddLast(temp); + error += Math.Abs(series[j] - temp); + if (error > minErr) + break; + } - if (error < minErr) - { - minErr = error; - minIndex = i; - } + if (error < minErr) + { + minErr = error; + minIndex = i; } + } + + return minIndex + 1; + } + + private class SignalComponent + { + public Double Phase; + public int Index; + public int Cluster; - return minIndex + 1; + public SignalComponent(Double phase, int index) + { + Phase = phase; + Index = index; } + } - private class SignalComponent + private bool Stabilize() + { + if (Utils.Size(_alpha) == 1) { - public Double Phase; - public int Index; - public int Cluster; + if (_shouldMaintainInfo) + _info.RootsBeforeStabilization = new[] { new Complex(_alpha[0], 0) }; - public SignalComponent(Double phase, int index) + if (_alpha[0] > 1) + _alpha[0] = 1; + else if (_alpha[0] < -1) + _alpha[0] = -1; + + if (_shouldMaintainInfo) { - Phase = phase; - Index = index; + _info.IsStabilized = true; + _info.RootsAfterStabilization = new[] { new Complex(_alpha[0], 0) }; + _info.IsExponentialTrendPresent = false; + _info.IsPolynomialTrendPresent = false; + _info.ExponentialTrendFactor = Math.Abs(_alpha[0]); } + return true; } - private bool Stabilize() + var coeff = new Double[_windowSize - 1]; + Complex[] roots = null; + bool trendFound = false; + bool polynomialTrendFound = false; + Double maxTrendMagnitude = Double.NegativeInfinity; + Double maxNonTrendMagnitude = Double.NegativeInfinity; + Double eps = 1e-9; + Double highFrequenceyBoundry = Math.PI / 2; + var sortedComponents = new List(); + var trendComponents = new List(); + int i; + + // Computing the roots of the characteristic polynomial + for (i = 0; i < _windowSize - 1; ++i) + coeff[i] = -_alpha[i]; + + if (!PolynomialUtils.FindPolynomialRoots(coeff, ref roots)) + return false; + + if (_shouldMaintainInfo) { - if (Utils.Size(_alpha) == 1) - { - if (_shouldMaintainInfo) - _info.RootsBeforeStabilization = new[] { new Complex(_alpha[0], 0) }; + _info.RootsBeforeStabilization = new Complex[_windowSize - 1]; + Array.Copy(roots, _info.RootsBeforeStabilization, _windowSize - 1); + } - if (_alpha[0] > 1) - _alpha[0] = 1; - else if (_alpha[0] < -1) - _alpha[0] = -1; + // Detecting trend components + for (i = 0; i < _windowSize - 1; ++i) + { + if (roots[i].Magnitude > 1 && (Math.Abs(roots[i].Phase) <= eps || Math.Abs(Math.Abs(roots[i].Phase) - Math.PI) <= eps)) + trendComponents.Add(i); + } - if (_shouldMaintainInfo) + // Removing unobserved seasonalities and consequently introducing polynomial trend + for (i = 0; i < _windowSize - 1; ++i) + { + if (roots[i].Phase != 0) + { + if (roots[i].Magnitude >= 1 && 2 * Math.PI / Math.Abs(roots[i].Phase) > _windowSize) { - _info.IsStabilized = true; - _info.RootsAfterStabilization = new[] { new Complex(_alpha[0], 0) }; - _info.IsExponentialTrendPresent = false; - _info.IsPolynomialTrendPresent = false; - _info.ExponentialTrendFactor = Math.Abs(_alpha[0]); + /*if (roots[i].Real > 1) + { + polynomialTrendFound = true; + roots[i] = new Complex(Math.Max(1, roots[i].Magnitude), 0); + maxPolynomialTrendMagnitude = Math.Max(maxPolynomialTrendMagnitude, roots[i].Magnitude); + } + else + roots[i] = Complex.FromPolarCoordinates(1, 0); + //roots[i] = Complex.FromPolarCoordinates(0.99, roots[i].Phase);*/ + + /* if (_maxTrendRatio > 1) + { + roots[i] = new Complex(roots[i].Real, 0); + polynomialTrendFound = true; + } + else + roots[i] = roots[i].Imaginary > 0 ? new Complex(roots[i].Real, 0) : new Complex(1, 0);*/ + + roots[i] = new Complex(roots[i].Real, 0); + polynomialTrendFound = true; + + if (_shouldMaintainInfo) + _info.IsArtificialSeasonalityRemoved = true; } - return true; + else if (roots[i].Magnitude > 1) + sortedComponents.Add(new SignalComponent(roots[i].Phase, i)); } + } - var coeff = new Double[_windowSize - 1]; - Complex[] roots = null; - bool trendFound = false; - bool polynomialTrendFound = false; - Double maxTrendMagnitude = Double.NegativeInfinity; - Double maxNonTrendMagnitude = Double.NegativeInfinity; - Double eps = 1e-9; - Double highFrequenceyBoundry = Math.PI / 2; - var sortedComponents = new List(); - var trendComponents = new List(); - int i; - - // Computing the roots of the characteristic polynomial - for (i = 0; i < _windowSize - 1; ++i) - coeff[i] = -_alpha[i]; - - if (!PolynomialUtils.FindPolynomialRoots(coeff, ref roots)) - return false; + if (_maxTrendRatio > 1) + { + // Combining the close exponential-seasonal components + if (sortedComponents.Count > 1 && polynomialTrendFound) + { + sortedComponents.Sort((a, b) => a.Phase.CompareTo(b.Phase)); + var clusterNum = 0; - if (_shouldMaintainInfo) - { - _info.RootsBeforeStabilization = new Complex[_windowSize - 1]; - Array.Copy(roots, _info.RootsBeforeStabilization, _windowSize - 1); - } - - // Detecting trend components - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Magnitude > 1 && (Math.Abs(roots[i].Phase) <= eps || Math.Abs(Math.Abs(roots[i].Phase) - Math.PI) <= eps)) - trendComponents.Add(i); - } - - // Removing unobserved seasonalities and consequently introducing polynomial trend - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Phase != 0) + for (i = 0; i < sortedComponents.Count - 1; ++i) { - if (roots[i].Magnitude >= 1 && 2 * Math.PI / Math.Abs(roots[i].Phase) > _windowSize) + if ((sortedComponents[i].Phase < 0 && sortedComponents[i + 1].Phase < 0) || + (sortedComponents[i].Phase > 0 && sortedComponents[i + 1].Phase > 0)) { - /*if (roots[i].Real > 1) - { - polynomialTrendFound = true; - roots[i] = new Complex(Math.Max(1, roots[i].Magnitude), 0); - maxPolynomialTrendMagnitude = Math.Max(maxPolynomialTrendMagnitude, roots[i].Magnitude); - } - else - roots[i] = Complex.FromPolarCoordinates(1, 0); - //roots[i] = Complex.FromPolarCoordinates(0.99, roots[i].Phase);*/ - - /* if (_maxTrendRatio > 1) - { - roots[i] = new Complex(roots[i].Real, 0); - polynomialTrendFound = true; - } - else - roots[i] = roots[i].Imaginary > 0 ? new Complex(roots[i].Real, 0) : new Complex(1, 0);*/ - - roots[i] = new Complex(roots[i].Real, 0); - polynomialTrendFound = true; - - if (_shouldMaintainInfo) - _info.IsArtificialSeasonalityRemoved = true; + sortedComponents[i].Cluster = clusterNum; + if (Math.Abs(sortedComponents[i + 1].Phase - sortedComponents[i].Phase) > 0.05) + clusterNum++; + sortedComponents[i + 1].Cluster = clusterNum; } - else if (roots[i].Magnitude > 1) - sortedComponents.Add(new SignalComponent(roots[i].Phase, i)); + else + clusterNum++; } - } - if (_maxTrendRatio > 1) - { - // Combining the close exponential-seasonal components - if (sortedComponents.Count > 1 && polynomialTrendFound) + int start = 0; + bool polynomialSeasonalityFound = false; + Double largestSeasonalityPhase = 0; + for (i = 1; i < sortedComponents.Count; ++i) { - sortedComponents.Sort((a, b) => a.Phase.CompareTo(b.Phase)); - var clusterNum = 0; - - for (i = 0; i < sortedComponents.Count - 1; ++i) + if (sortedComponents[i].Cluster != sortedComponents[i - 1].Cluster) { - if ((sortedComponents[i].Phase < 0 && sortedComponents[i + 1].Phase < 0) || - (sortedComponents[i].Phase > 0 && sortedComponents[i + 1].Phase > 0)) + if (i - start > 1) // There are more than one point in the cluster { - sortedComponents[i].Cluster = clusterNum; - if (Math.Abs(sortedComponents[i + 1].Phase - sortedComponents[i].Phase) > 0.05) - clusterNum++; - sortedComponents[i + 1].Cluster = clusterNum; - } - else - clusterNum++; - } + Double avgPhase = 0; + Double avgMagnitude = 0; - int start = 0; - bool polynomialSeasonalityFound = false; - Double largestSeasonalityPhase = 0; - for (i = 1; i < sortedComponents.Count; ++i) - { - if (sortedComponents[i].Cluster != sortedComponents[i - 1].Cluster) - { - if (i - start > 1) // There are more than one point in the cluster + for (var j = start; j < i; ++j) { - Double avgPhase = 0; - Double avgMagnitude = 0; - - for (var j = start; j < i; ++j) - { - avgPhase += sortedComponents[j].Phase; - avgMagnitude += roots[sortedComponents[j].Index].Magnitude; - } - avgPhase /= (i - start); - avgMagnitude /= (i - start); - - for (var j = start; j < i; ++j) - roots[sortedComponents[j].Index] = Complex.FromPolarCoordinates(avgMagnitude, - avgPhase); - - if (!polynomialSeasonalityFound && avgPhase > 0) - { - largestSeasonalityPhase = avgPhase; - polynomialSeasonalityFound = true; - } + avgPhase += sortedComponents[j].Phase; + avgMagnitude += roots[sortedComponents[j].Index].Magnitude; } + avgPhase /= (i - start); + avgMagnitude /= (i - start); - start = i; - } - } - } - - // Combining multiple exponential trends into polynomial ones - if (!polynomialTrendFound) - { - var ind1 = -1; - var ind2 = -1; + for (var j = start; j < i; ++j) + roots[sortedComponents[j].Index] = Complex.FromPolarCoordinates(avgMagnitude, + avgPhase); - foreach (var ind in trendComponents) - { - if (Math.Abs(roots[ind].Phase) <= eps) - { - ind1 = ind; - break; + if (!polynomialSeasonalityFound && avgPhase > 0) + { + largestSeasonalityPhase = avgPhase; + polynomialSeasonalityFound = true; + } } - } - for (i = 0; i < _windowSize - 1; ++i) - { - if (Math.Abs(roots[i].Phase) <= eps && 0.9 <= roots[i].Magnitude && i != ind1) - { - ind2 = i; - break; - } - } - - if (ind1 >= 0 && ind2 >= 0 && ind1 != ind2) - { - roots[ind1] = Complex.FromPolarCoordinates(1, 0); - roots[ind2] = Complex.FromPolarCoordinates(1, 0); - polynomialTrendFound = true; + start = i; } } } - if (polynomialTrendFound) // Suppress the exponential trend + // Combining multiple exponential trends into polynomial ones + if (!polynomialTrendFound) { - maxTrendMagnitude = Math.Min(1, _maxTrendRatio); + var ind1 = -1; + var ind2 = -1; + foreach (var ind in trendComponents) - roots[ind] = Complex.FromPolarCoordinates(0.99, roots[ind].Phase); - } - else - { - // Spotting the exponential trend components and finding the max trend magnitude - for (i = 0; i < _windowSize - 1; ++i) { - if (roots[i].Magnitude > 1 && Math.Abs(roots[i].Phase) <= eps) + if (Math.Abs(roots[ind].Phase) <= eps) { - trendFound = true; - maxTrendMagnitude = Math.Max(maxTrendMagnitude, roots[i].Magnitude); + ind1 = ind; + break; } - else - maxNonTrendMagnitude = Math.Max(maxNonTrendMagnitude, roots[i].Magnitude); } - if (!trendFound) - maxTrendMagnitude = 1; - - maxTrendMagnitude = Math.Min(maxTrendMagnitude, _maxTrendRatio); - } + for (i = 0; i < _windowSize - 1; ++i) + { + if (Math.Abs(roots[i].Phase) <= eps && 0.9 <= roots[i].Magnitude && i != ind1) + { + ind2 = i; + break; + } + } - // Squeezing all components below the maximum trend magnitude - var smallTrendMagnitude = Math.Min(maxTrendMagnitude, (maxTrendMagnitude + 1) / 2); - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Magnitude >= maxTrendMagnitude) + if (ind1 >= 0 && ind2 >= 0 && ind1 != ind2) { - if ((highFrequenceyBoundry < roots[i].Phase && roots[i].Phase < Math.PI - eps) || - (-Math.PI + eps < roots[i].Phase && roots[i].Phase < -highFrequenceyBoundry)) - roots[i] = Complex.FromPolarCoordinates(smallTrendMagnitude, roots[i].Phase); - else - roots[i] = Complex.FromPolarCoordinates(maxTrendMagnitude, roots[i].Phase); + roots[ind1] = Complex.FromPolarCoordinates(1, 0); + roots[ind2] = Complex.FromPolarCoordinates(1, 0); + polynomialTrendFound = true; } } + } - // Correcting all the other trend components + if (polynomialTrendFound) // Suppress the exponential trend + { + maxTrendMagnitude = Math.Min(1, _maxTrendRatio); + foreach (var ind in trendComponents) + roots[ind] = Complex.FromPolarCoordinates(0.99, roots[ind].Phase); + } + else + { + // Spotting the exponential trend components and finding the max trend magnitude for (i = 0; i < _windowSize - 1; ++i) { - var phase = roots[i].Phase; - if (Math.Abs(phase) <= eps) - roots[i] = new Complex(roots[i].Magnitude, 0); - else if (Math.Abs(phase - Math.PI) <= eps) - roots[i] = new Complex(-roots[i].Magnitude, 0); - else if (Math.Abs(phase + Math.PI) <= eps) - roots[i] = new Complex(-roots[i].Magnitude, 0); + if (roots[i].Magnitude > 1 && Math.Abs(roots[i].Phase) <= eps) + { + trendFound = true; + maxTrendMagnitude = Math.Max(maxTrendMagnitude, roots[i].Magnitude); + } + else + maxNonTrendMagnitude = Math.Max(maxNonTrendMagnitude, roots[i].Magnitude); } - // Computing the characteristic polynomial from the modified roots - try - { - if (!PolynomialUtils.FindPolynomialCoefficients(roots, ref coeff)) - return false; - } - catch (OverflowException) - { - return false; - } + if (!trendFound) + maxTrendMagnitude = 1; - // Updating alpha - for (i = 0; i < _windowSize - 1; ++i) - _alpha[i] = (Single)(-coeff[i]); + maxTrendMagnitude = Math.Min(maxTrendMagnitude, _maxTrendRatio); + } - if (_shouldMaintainInfo) + // Squeezing all components below the maximum trend magnitude + var smallTrendMagnitude = Math.Min(maxTrendMagnitude, (maxTrendMagnitude + 1) / 2); + for (i = 0; i < _windowSize - 1; ++i) + { + if (roots[i].Magnitude >= maxTrendMagnitude) { - _info.RootsAfterStabilization = roots; - _info.IsStabilized = true; - _info.IsPolynomialTrendPresent = polynomialTrendFound; - _info.IsExponentialTrendPresent = maxTrendMagnitude > 1; - _info.ExponentialTrendFactor = maxTrendMagnitude; + if ((highFrequenceyBoundry < roots[i].Phase && roots[i].Phase < Math.PI - eps) || + (-Math.PI + eps < roots[i].Phase && roots[i].Phase < -highFrequenceyBoundry)) + roots[i] = Complex.FromPolarCoordinates(smallTrendMagnitude, roots[i].Phase); + else + roots[i] = Complex.FromPolarCoordinates(maxTrendMagnitude, roots[i].Phase); } + } - return true; + // Correcting all the other trend components + for (i = 0; i < _windowSize - 1; ++i) + { + var phase = roots[i].Phase; + if (Math.Abs(phase) <= eps) + roots[i] = new Complex(roots[i].Magnitude, 0); + else if (Math.Abs(phase - Math.PI) <= eps) + roots[i] = new Complex(-roots[i].Magnitude, 0); + else if (Math.Abs(phase + Math.PI) <= eps) + roots[i] = new Complex(-roots[i].Magnitude, 0); } - /// - /// Feeds the next observation on the series to the model and as a result changes the state of the model. - /// - /// The next observation on the series. - /// Determines whether the model parameters also need to be updated upon consuming the new observation (default = false). - internal override void Consume(ref Single input, bool updateModel = false) + // Computing the characteristic polynomial from the modified roots + try { - if (Single.IsNaN(input)) - return; + if (!PolynomialUtils.FindPolynomialCoefficients(roots, ref coeff)) + return false; + } + catch (OverflowException) + { + return false; + } - int i; + // Updating alpha + for (i = 0; i < _windowSize - 1; ++i) + _alpha[i] = (Single)(-coeff[i]); - if (_wTrans == null) - { - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - Single[] vecs = new Single[_rank * _windowSize]; + if (_shouldMaintainInfo) + { + _info.RootsAfterStabilization = roots; + _info.IsStabilized = true; + _info.IsPolynomialTrendPresent = polynomialTrendFound; + _info.IsExponentialTrendPresent = maxTrendMagnitude > 1; + _info.ExponentialTrendFactor = maxTrendMagnitude; + } - for (i = 0; i < _rank; ++i) - vecs[(_windowSize + 1) * i] = 1; + return true; + } - i = 0; - _wTrans.CopyFrom(vecs, ref i); - } + /// + /// Feeds the next observation on the series to the model and as a result changes the state of the model. + /// + /// The next observation on the series. + /// Determines whether the model parameters also need to be updated upon consuming the new observation (default = false). + internal override void Consume(ref Single input, bool updateModel = false) + { + if (Single.IsNaN(input)) + return; - // Forming vector x + int i; - if (_buffer.Count == 0) - { - for (i = 0; i < _windowSize - 1; ++i) - _buffer.AddLast(_state[i]); - } + if (_wTrans == null) + { + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + Single[] vecs = new Single[_rank * _windowSize]; - int len = _buffer.Count; - for (i = 0; i < _windowSize - len - 1; ++i) - _x[i] = 0; - for (i = Math.Max(0, len - _windowSize + 1); i < len; ++i) - _x[i - len + _windowSize - 1] = _buffer[i]; - _x[_windowSize - 1] = input; + for (i = 0; i < _rank; ++i) + vecs[(_windowSize + 1) * i] = 1; - // Computing y: Eq. (11) in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf - CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); + i = 0; + _wTrans.CopyFrom(vecs, ref i); + } - // Updating the state vector - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + // Forming vector x - _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; - for (i = 0; i < _windowSize - 2; ++i) - { - _state[i] = ((_windowSize - 2 - i) * _state[i + 1] + _xSmooth[i + 1]) / (_windowSize - 1 - i); - _nextPrediction += _state[i] * _alpha[i]; - } - _state[_windowSize - 2] = _xSmooth[_windowSize - 1]; - _nextPrediction += _state[_windowSize - 2] * _alpha[_windowSize - 2]; + if (_buffer.Count == 0) + { + for (i = 0; i < _windowSize - 1; ++i) + _buffer.AddLast(_state[i]); + } - if (updateModel) - { - // REVIEW: to be implemented in the next version based on the FAPI algorithm - // in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf. - } + int len = _buffer.Count; + for (i = 0; i < _windowSize - len - 1; ++i) + _x[i] = 0; + for (i = Math.Max(0, len - _windowSize + 1); i < len; ++i) + _x[i - len + _windowSize - 1] = _buffer[i]; + _x[_windowSize - 1] = input; - _buffer.AddLast(input); + // Computing y: Eq. (11) in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf + CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); + + // Updating the state vector + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + + _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; + for (i = 0; i < _windowSize - 2; ++i) + { + _state[i] = ((_windowSize - 2 - i) * _state[i + 1] + _xSmooth[i + 1]) / (_windowSize - 1 - i); + _nextPrediction += _state[i] * _alpha[i]; } + _state[_windowSize - 2] = _xSmooth[_windowSize - 1]; + _nextPrediction += _state[_windowSize - 2] * _alpha[_windowSize - 2]; - /// - /// Train the model parameters based on a training series. - /// - /// The training time-series. - internal override void Train(FixedSizeQueue data) + if (updateModel) { - _host.CheckParam(data != null, nameof(data), "The input series for training cannot be null."); - _host.CheckParam(data.Count >= _trainSize, nameof(data), "The input series for training does not have enough points for training."); + // REVIEW: to be implemented in the next version based on the FAPI algorithm + // in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf. + } - Single[] dataArray = new Single[_trainSize]; + _buffer.AddLast(input); + } - int i; - int count; - for (i = 0, count = 0; count < _trainSize && i < data.Count; ++i) - if (!Single.IsNaN(data[i])) - dataArray[count++] = data[i]; + /// + /// Train the model parameters based on a training series. + /// + /// The training time-series. + internal override void Train(FixedSizeQueue data) + { + _host.CheckParam(data != null, nameof(data), "The input series for training cannot be null."); + _host.CheckParam(data.Count >= _trainSize, nameof(data), "The input series for training does not have enough points for training."); - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } + Single[] dataArray = new Single[_trainSize]; - if (count <= 2 * _windowSize) - { -#if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning( - "Training cannot be completed because the input series for training does not have enough points."); -#endif - } - else - { - if (count != _trainSize) - Array.Resize(ref dataArray, count); + int i; + int count; + for (i = 0, count = 0; count < _trainSize && i < data.Count; ++i) + if (!Single.IsNaN(data[i])) + dataArray[count++] = data[i]; - TrainCore(dataArray, count); - } + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.WindowSize = _windowSize; } + if (count <= 2 * _windowSize) + { #if !TLCSSA - /// - /// Train the model parameters based on a training series. - /// - /// The training time-series. - internal override void Train(RoleMappedData data) + using (var ch = _host.Start("Train")) + ch.Warning( + "Training cannot be completed because the input series for training does not have enough points."); +#endif + } + else { - _host.CheckValue(data, nameof(data)); - _host.CheckParam(data.Schema.Feature.HasValue, nameof(data), "Must have features column."); - var featureCol = data.Schema.Feature.Value; - if (featureCol.Type != NumberDataViewType.Single) - throw _host.ExceptSchemaMismatch(nameof(data), "feature", featureCol.Name, "Single", featureCol.Type.ToString()); + if (count != _trainSize) + Array.Resize(ref dataArray, count); - Single[] dataArray = new Single[_trainSize]; + TrainCore(dataArray, count); + } + } - int count = 0; - using (var cursor = data.Data.GetRowCursor(featureCol)) - { - var getVal = cursor.GetGetter(featureCol); - Single val = default; - while (cursor.MoveNext() && count < _trainSize) - { - getVal(ref val); - if (!Single.IsNaN(val)) - dataArray[count++] = val; - } - } +#if !TLCSSA + /// + /// Train the model parameters based on a training series. + /// + /// The training time-series. + internal override void Train(RoleMappedData data) + { + _host.CheckValue(data, nameof(data)); + _host.CheckParam(data.Schema.Feature.HasValue, nameof(data), "Must have features column."); + var featureCol = data.Schema.Feature.Value; + if (featureCol.Type != NumberDataViewType.Single) + throw _host.ExceptSchemaMismatch(nameof(data), "feature", featureCol.Name, "Single", featureCol.Type.ToString()); - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } + Single[] dataArray = new Single[_trainSize]; - if (count <= 2 * _windowSize) - { - using (var ch = _host.Start("Train")) - ch.Warning("Training cannot be completed because the input series for training does not have enough points."); - } - else + int count = 0; + using (var cursor = data.Data.GetRowCursor(featureCol)) + { + var getVal = cursor.GetGetter(featureCol); + Single val = default; + while (cursor.MoveNext() && count < _trainSize) { - if (count != _trainSize) - Array.Resize(ref dataArray, count); - - TrainCore(dataArray, count); + getVal(ref val); + if (!Single.IsNaN(val)) + dataArray[count++] = val; } } -#endif - private void TrainCore(Single[] dataArray, int originalSeriesLength) + if (_shouldMaintainInfo) { - _host.Assert(Utils.Size(dataArray) > 0); - Single[] singularVals; - Single[] leftSingularVecs; - var learnNaiveModel = false; + _info = new ModelInfo(); + _info.WindowSize = _windowSize; + } - var signalLength = _rankSelectionMethod == RankSelectionMethod.Exact ? originalSeriesLength : 2 * _windowSize - 1;//originalSeriesLength; - var signal = new Single[signalLength]; + if (count <= 2 * _windowSize) + { + using (var ch = _host.Start("Train")) + ch.Warning("Training cannot be completed because the input series for training does not have enough points."); + } + else + { + if (count != _trainSize) + Array.Resize(ref dataArray, count); - int i; - // Creating the trajectory matrix for the series - TrajectoryMatrix tMat = new TrajectoryMatrix(_host, dataArray, _windowSize, originalSeriesLength); + TrainCore(dataArray, count); + } + } +#endif - // Computing the SVD of the trajectory matrix - if (!tMat.ComputeSvd(out singularVals, out leftSingularVecs)) - learnNaiveModel = true; - else + private void TrainCore(Single[] dataArray, int originalSeriesLength) + { + _host.Assert(Utils.Size(dataArray) > 0); + Single[] singularVals; + Single[] leftSingularVecs; + var learnNaiveModel = false; + + var signalLength = _rankSelectionMethod == RankSelectionMethod.Exact ? originalSeriesLength : 2 * _windowSize - 1;//originalSeriesLength; + var signal = new Single[signalLength]; + + int i; + // Creating the trajectory matrix for the series + TrajectoryMatrix tMat = new TrajectoryMatrix(_host, dataArray, _windowSize, originalSeriesLength); + + // Computing the SVD of the trajectory matrix + if (!tMat.ComputeSvd(out singularVals, out leftSingularVecs)) + learnNaiveModel = true; + else + { + for (i = 0; i < _windowSize * _maxRank; ++i) { - for (i = 0; i < _windowSize * _maxRank; ++i) + if (Single.IsNaN(leftSingularVecs[i])) { - if (Single.IsNaN(leftSingularVecs[i])) - { - learnNaiveModel = true; - break; - } + learnNaiveModel = true; + break; } } + } - // Checking for standard eigenvectors, if found reduce the window size and reset training. - if (!learnNaiveModel) + // Checking for standard eigenvectors, if found reduce the window size and reset training. + if (!learnNaiveModel) + { + for (i = 0; i < _windowSize; ++i) { - for (i = 0; i < _windowSize; ++i) + var v = leftSingularVecs[(i + 1) * _windowSize - 1]; + if (v * v == 1) { - var v = leftSingularVecs[(i + 1) * _windowSize - 1]; - if (v * v == 1) + if (_windowSize > 2) { - if (_windowSize > 2) - { - _windowSize--; - _maxRank = _windowSize / 2; - _alpha = new Single[_windowSize - 1]; - _state = new Single[_windowSize - 1]; - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - - TrainCore(dataArray, originalSeriesLength); - return; - } - else - { - learnNaiveModel = true; - break; - } + _windowSize--; + _maxRank = _windowSize / 2; + _alpha = new Single[_windowSize - 1]; + _state = new Single[_windowSize - 1]; + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + + TrainCore(dataArray, originalSeriesLength); + return; + } + else + { + learnNaiveModel = true; + break; } } } + } - // Learn the naive (averaging) model in case the eigen decomposition is not possible - if (learnNaiveModel) - { + // Learn the naive (averaging) model in case the eigen decomposition is not possible + if (learnNaiveModel) + { #if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning("The precise SSA model cannot be trained."); + using (var ch = _host.Start("Train")) + ch.Warning("The precise SSA model cannot be trained."); #endif - _rank = 1; - var temp = (Single)(1f / Math.Sqrt(_windowSize)); - for (i = 0; i < _windowSize; ++i) - leftSingularVecs[i] = temp; - } - else - { - // Computing the signal rank - if (_rankSelectionMethod == RankSelectionMethod.Exact) - _rank = DetermineSignalRank(dataArray, tMat, leftSingularVecs, singularVals, signal, _maxRank); - else if (_rankSelectionMethod == RankSelectionMethod.Fast) - _rank = DetermineSignalRankFast(dataArray, tMat, leftSingularVecs, singularVals, _maxRank); - } + _rank = 1; + var temp = (Single)(1f / Math.Sqrt(_windowSize)); + for (i = 0; i < _windowSize; ++i) + leftSingularVecs[i] = temp; + } + else + { + // Computing the signal rank + if (_rankSelectionMethod == RankSelectionMethod.Exact) + _rank = DetermineSignalRank(dataArray, tMat, leftSingularVecs, singularVals, signal, _maxRank); + else if (_rankSelectionMethod == RankSelectionMethod.Fast) + _rank = DetermineSignalRankFast(dataArray, tMat, leftSingularVecs, singularVals, _maxRank); + } - // Setting the the y vector - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + // Setting the the y vector + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - // Setting the weight matrix - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - i = 0; - _wTrans.CopyFrom(leftSingularVecs, ref i); + // Setting the weight matrix + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + i = 0; + _wTrans.CopyFrom(leftSingularVecs, ref i); - // Setting alpha - Single nu = 0; - for (i = 0; i < _rank; ++i) - { - _y[i] = leftSingularVecs[_windowSize * (i + 1) - 1]; - nu += _y[i] * _y[i]; - } + // Setting alpha + Single nu = 0; + for (i = 0; i < _rank; ++i) + { + _y[i] = leftSingularVecs[_windowSize * (i + 1) - 1]; + nu += _y[i] * _y[i]; + } - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); - for (i = 0; i < _windowSize - 1; ++i) - _alpha[i] = _xSmooth[i] / (1 - nu); + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + for (i = 0; i < _windowSize - 1; ++i) + _alpha[i] = _xSmooth[i] / (1 - nu); - // Stabilizing the model - if (_shouldStablize && !learnNaiveModel) + // Stabilizing the model + if (_shouldStablize && !learnNaiveModel) + { + if (!Stabilize()) { - if (!Stabilize()) - { #if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning("The trained model cannot be stablized."); + using (var ch = _host.Start("Train")) + ch.Warning("The trained model cannot be stablized."); #endif - } } + } - // Computing the noise moments - if (ShouldComputeForecastIntervals) - { - if (_rankSelectionMethod != RankSelectionMethod.Exact) - ReconstructSignalTailFast(dataArray, tMat, leftSingularVecs, _rank, signal); - - ComputeNoiseMoments(dataArray, signal, _alpha, out _observationNoiseVariance, out _autoregressionNoiseVariance, - out _observationNoiseMean, out _autoregressionNoiseMean, originalSeriesLength - signalLength); - _observationNoiseMean = 0; - _autoregressionNoiseMean = 0; - } + // Computing the noise moments + if (ShouldComputeForecastIntervals) + { + if (_rankSelectionMethod != RankSelectionMethod.Exact) + ReconstructSignalTailFast(dataArray, tMat, leftSingularVecs, _rank, signal); - // Setting the state - _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; + ComputeNoiseMoments(dataArray, signal, _alpha, out _observationNoiseVariance, out _autoregressionNoiseVariance, + out _observationNoiseMean, out _autoregressionNoiseMean, originalSeriesLength - signalLength); + _observationNoiseMean = 0; + _autoregressionNoiseMean = 0; + } - if (_buffer.Count > 0) // Use the buffer to set the state when there are data points pushed into the buffer using the Consume() method - { - int len = _buffer.Count; - for (i = 0; i < _windowSize - len; ++i) - _x[i] = 0; - for (i = Math.Max(0, len - _windowSize); i < len; ++i) - _x[i - len + _windowSize] = _buffer[i]; - } - else // use the training data points otherwise - { - for (i = originalSeriesLength - _windowSize; i < originalSeriesLength; ++i) - _x[i - originalSeriesLength + _windowSize] = dataArray[i]; - } + // Setting the state + _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; - CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + if (_buffer.Count > 0) // Use the buffer to set the state when there are data points pushed into the buffer using the Consume() method + { + int len = _buffer.Count; + for (i = 0; i < _windowSize - len; ++i) + _x[i] = 0; + for (i = Math.Max(0, len - _windowSize); i < len; ++i) + _x[i - len + _windowSize] = _buffer[i]; + } + else // use the training data points otherwise + { + for (i = originalSeriesLength - _windowSize; i < originalSeriesLength; ++i) + _x[i - originalSeriesLength + _windowSize] = dataArray[i]; + } - for (i = 1; i < _windowSize; ++i) - { - _state[i - 1] = _xSmooth[i]; - _nextPrediction += _state[i - 1] * _alpha[i - 1]; - } + CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); - if (_shouldMaintainInfo) - { - _info.IsTrained = true; - _info.WindowSize = _windowSize; - _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; - Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); - _info.Rank = _rank; - _info.IsNaiveModelTrained = learnNaiveModel; - _info.Spectrum = singularVals; - } + for (i = 1; i < _windowSize; ++i) + { + _state[i - 1] = _xSmooth[i]; + _nextPrediction += _state[i - 1] * _alpha[i - 1]; } - /// - /// Forecasts the future values of the series up to the given horizon. - /// - /// The forecast result. - /// The forecast horizon. - internal override void Forecast(ref ForecastResultBase result, int horizon = 1) + if (_shouldMaintainInfo) { - _host.CheckParam(horizon >= 1, nameof(horizon), "The horizon parameter should be greater than 0."); - if (result == null) - result = new SsaForecastResult(); + _info.IsTrained = true; + _info.WindowSize = _windowSize; + _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; + Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); + _info.Rank = _rank; + _info.IsNaiveModelTrained = learnNaiveModel; + _info.Spectrum = singularVals; + } + } - var str = "The result argument must be of type " + typeof(SsaForecastResult).ToString(); - _host.CheckParam(result is SsaForecastResult, nameof(result), str); + /// + /// Forecasts the future values of the series up to the given horizon. + /// + /// The forecast result. + /// The forecast horizon. + internal override void Forecast(ref ForecastResultBase result, int horizon = 1) + { + _host.CheckParam(horizon >= 1, nameof(horizon), "The horizon parameter should be greater than 0."); + if (result == null) + result = new SsaForecastResult(); - var output = result as SsaForecastResult; + var str = "The result argument must be of type " + typeof(SsaForecastResult).ToString(); + _host.CheckParam(result is SsaForecastResult, nameof(result), str); - var resEditor = VBufferEditor.Create(ref result.PointForecast, horizon); + var output = result as SsaForecastResult; - int i; - int j; - int k; + var resEditor = VBufferEditor.Create(ref result.PointForecast, horizon); - // Computing the point forecasts - resEditor.Values[0] = _nextPrediction; - for (i = 1; i < horizon; ++i) - { - k = 0; - resEditor.Values[i] = _autoregressionNoiseMean + _observationNoiseMean; - for (j = i; j < _windowSize - 1; ++j, ++k) - resEditor.Values[i] += _state[j] * _alpha[k]; + int i; + int j; + int k; - for (j = Math.Max(0, i - _windowSize + 1); j < i; ++j, ++k) - resEditor.Values[i] += resEditor.Values[j] * _alpha[k]; - } - - // Computing the forecast variances - if (ShouldComputeForecastIntervals) - { - var sdEditor = VBufferEditor.Create(ref output.ForecastStandardDeviation, horizon); - var lastCol = new FixedSizeQueue(_windowSize - 1); + // Computing the point forecasts + resEditor.Values[0] = _nextPrediction; + for (i = 1; i < horizon; ++i) + { + k = 0; + resEditor.Values[i] = _autoregressionNoiseMean + _observationNoiseMean; + for (j = i; j < _windowSize - 1; ++j, ++k) + resEditor.Values[i] += _state[j] * _alpha[k]; - for (i = 0; i < _windowSize - 3; ++i) - lastCol.AddLast(0); - lastCol.AddLast(1); - lastCol.AddLast(_alpha[_windowSize - 2]); - sdEditor.Values[0] = _autoregressionNoiseVariance + _observationNoiseVariance; + for (j = Math.Max(0, i - _windowSize + 1); j < i; ++j, ++k) + resEditor.Values[i] += resEditor.Values[j] * _alpha[k]; + } - for (i = 1; i < horizon; ++i) - { - Single temp = 0; - for (j = 0; j < _windowSize - 1; ++j) - temp += _alpha[j] * lastCol[j]; - lastCol.AddLast(temp); + // Computing the forecast variances + if (ShouldComputeForecastIntervals) + { + var sdEditor = VBufferEditor.Create(ref output.ForecastStandardDeviation, horizon); + var lastCol = new FixedSizeQueue(_windowSize - 1); - sdEditor.Values[i] = sdEditor.Values[i - 1] + _autoregressionNoiseVariance * temp * temp; - } + for (i = 0; i < _windowSize - 3; ++i) + lastCol.AddLast(0); + lastCol.AddLast(1); + lastCol.AddLast(_alpha[_windowSize - 2]); + sdEditor.Values[0] = _autoregressionNoiseVariance + _observationNoiseVariance; - for (i = 0; i < horizon; ++i) - sdEditor.Values[i] = (float)Math.Sqrt(sdEditor.Values[i]); + for (i = 1; i < horizon; ++i) + { + Single temp = 0; + for (j = 0; j < _windowSize - 1; ++j) + temp += _alpha[j] * lastCol[j]; + lastCol.AddLast(temp); - output.ForecastStandardDeviation = sdEditor.Commit(); + sdEditor.Values[i] = sdEditor.Values[i - 1] + _autoregressionNoiseVariance * temp * temp; } - result.PointForecast = resEditor.Commit(); - output.CanComputeForecastIntervals = ShouldComputeForecastIntervals; - output.BoundOffset = 0; - } - - /// - /// Predicts the next value on the series. - /// - /// The prediction result. - internal override void PredictNext(ref Single output) - { - output = _nextPrediction; - } + for (i = 0; i < horizon; ++i) + sdEditor.Values[i] = (float)Math.Sqrt(sdEditor.Values[i]); - internal override SequenceModelerBase Clone() - { - return new AdaptiveSingularSpectrumSequenceModelerInternal(this); + output.ForecastStandardDeviation = sdEditor.Commit(); } - /// - /// Computes the forecast intervals for the input forecast object at the given confidence level. The results are stored in the forecast object. - /// - /// The input forecast object - /// The confidence level in [0, 1) - internal static void ComputeForecastIntervals(ref SsaForecastResult forecast, Single confidenceLevel = 0.95f) - { - Contracts.CheckParam(0 <= confidenceLevel && confidenceLevel < 1, nameof(confidenceLevel), "The confidence level must be in [0, 1)."); - Contracts.CheckValue(forecast, nameof(forecast)); - Contracts.Check(forecast.CanComputeForecastIntervals, "The forecast intervals cannot be computed for this forecast object."); - - var meanForecast = forecast.PointForecast.GetValues(); - var horizon = meanForecast.Length; - var sdForecast = forecast.ForecastStandardDeviation.GetValues(); - Contracts.Check(sdForecast.Length >= horizon, "The forecast standard deviation values are not available."); + result.PointForecast = resEditor.Commit(); + output.CanComputeForecastIntervals = ShouldComputeForecastIntervals; + output.BoundOffset = 0; + } - forecast.ConfidenceLevel = confidenceLevel; - if (horizon == 0) - return; + /// + /// Predicts the next value on the series. + /// + /// The prediction result. + internal override void PredictNext(ref Single output) + { + output = _nextPrediction; + } - var upper = VBufferEditor.Create(ref forecast.UpperBound, horizon); - var lower = VBufferEditor.Create(ref forecast.LowerBound, horizon); + internal override SequenceModelerBase Clone() + { + return new AdaptiveSingularSpectrumSequenceModelerInternal(this); + } - var z = ProbabilityFunctions.Probit(0.5 + confidenceLevel / 2.0); - double temp; + /// + /// Computes the forecast intervals for the input forecast object at the given confidence level. The results are stored in the forecast object. + /// + /// The input forecast object + /// The confidence level in [0, 1) + internal static void ComputeForecastIntervals(ref SsaForecastResult forecast, Single confidenceLevel = 0.95f) + { + Contracts.CheckParam(0 <= confidenceLevel && confidenceLevel < 1, nameof(confidenceLevel), "The confidence level must be in [0, 1)."); + Contracts.CheckValue(forecast, nameof(forecast)); + Contracts.Check(forecast.CanComputeForecastIntervals, "The forecast intervals cannot be computed for this forecast object."); - for (int i = 0; i < horizon; ++i) - { - temp = z * sdForecast[i]; - upper.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset + temp); - lower.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset - temp); - } + var meanForecast = forecast.PointForecast.GetValues(); + var horizon = meanForecast.Length; + var sdForecast = forecast.ForecastStandardDeviation.GetValues(); + Contracts.Check(sdForecast.Length >= horizon, "The forecast standard deviation values are not available."); - forecast.UpperBound = upper.Commit(); - forecast.LowerBound = lower.Commit(); - } + forecast.ConfidenceLevel = confidenceLevel; + if (horizon == 0) + return; - public void Train(IDataView dataView, string inputColumnName) => Train(new RoleMappedData(dataView, null, inputColumnName)); + var upper = VBufferEditor.Create(ref forecast.UpperBound, horizon); + var lower = VBufferEditor.Create(ref forecast.LowerBound, horizon); - public float[] Forecast(int horizon) - { - ForecastResultBase result = null; - Forecast(ref result, horizon); - return result.PointForecast.GetValues().ToArray(); - } + var z = ProbabilityFunctions.Probit(0.5 + confidenceLevel / 2.0); + double temp; - public void ForecastWithConfidenceIntervals(int horizon, out float[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f) + for (int i = 0; i < horizon; ++i) { - ForecastResultBase result = null; - Forecast(ref result, horizon); - SsaForecastResult ssaResult = (SsaForecastResult)result; - ComputeForecastIntervals(ref ssaResult, confidenceLevel); - forecast = result.PointForecast.GetValues().ToArray(); - confidenceIntervalLowerBounds = ssaResult.LowerBound.GetValues().ToArray(); - confidenceIntervalUpperBounds = ssaResult.UpperBound.GetValues().ToArray(); + temp = z * sdForecast[i]; + upper.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset + temp); + lower.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset - temp); } - public void Update(IDataView dataView, string inputColumnName) - { - _host.CheckParam(dataView != null, nameof(dataView), "The input series for updating cannot be null."); + forecast.UpperBound = upper.Commit(); + forecast.LowerBound = lower.Commit(); + } - var data = new RoleMappedData(dataView, null, inputColumnName); - if (data.Schema.Feature.Value.Type != NumberDataViewType.Single) - throw _host.ExceptUserArg(nameof(data.Schema.Feature.Value.Name), "The time series input column has " + - "type '{0}', but must be a float.", data.Schema.Feature.Value.Type); + public void Train(IDataView dataView, string inputColumnName) => Train(new RoleMappedData(dataView, null, inputColumnName)); - var col = data.Schema.Feature.Value; - using (var cursor = data.Data.GetRowCursor(data.Data.Schema)) - { - var getVal = cursor.GetGetter(col); - Single val = default(Single); - while (cursor.MoveNext()) - { - getVal(ref val); - if (!Single.IsNaN(val)) - Consume(ref val); - } - } - } + public float[] Forecast(int horizon) + { + ForecastResultBase result = null; + Forecast(ref result, horizon); + return result.PointForecast.GetValues().ToArray(); } - /// - /// Train a forecasting model from an . - /// - /// Reference to the - public void Train(IDataView dataView) => _modeler.Train(dataView, _inputColumnName); + public void ForecastWithConfidenceIntervals(int horizon, out float[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f) + { + ForecastResultBase result = null; + Forecast(ref result, horizon); + SsaForecastResult ssaResult = (SsaForecastResult)result; + ComputeForecastIntervals(ref ssaResult, confidenceLevel); + forecast = result.PointForecast.GetValues().ToArray(); + confidenceIntervalLowerBounds = ssaResult.LowerBound.GetValues().ToArray(); + confidenceIntervalUpperBounds = ssaResult.UpperBound.GetValues().ToArray(); + } - /// - /// Update a forecasting model with the new observations in the form of an . - /// - /// Reference to the observations as an - /// Name of the input column to update from. If null then input column name specified at model initiation is taken as default. - public void Update(IDataView dataView, string inputColumnName = null) => _modeler.Update(dataView, inputColumnName ?? _inputColumnName); + public void Update(IDataView dataView, string inputColumnName) + { + _host.CheckParam(dataView != null, nameof(dataView), "The input series for updating cannot be null."); - /// - /// Perform forecasting until a particular . - /// - /// Number of values to forecast. - /// Forecasted values. - public float[] Forecast(int horizon) => _modeler.Forecast(horizon); + var data = new RoleMappedData(dataView, null, inputColumnName); + if (data.Schema.Feature.Value.Type != NumberDataViewType.Single) + throw _host.ExceptUserArg(nameof(data.Schema.Feature.Value.Name), "The time series input column has " + + "type '{0}', but must be a float.", data.Schema.Feature.Value.Type); - /// - /// For saving a model into a repository. - /// - public void Save(ModelSaveContext ctx) - { - _host.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(); - ctx.SetVersionInfo(GetVersionInfo()); - ctx.Writer.Write(_inputColumnName); - ctx.SaveModel(_modeler, "ForecastWrapper"); + var col = data.Schema.Feature.Value; + using (var cursor = data.Data.GetRowCursor(data.Data.Schema)) + { + var getVal = cursor.GetGetter(col); + Single val = default(Single); + while (cursor.MoveNext()) + { + getVal(ref val); + if (!Single.IsNaN(val)) + Consume(ref val); + } + } } - - /// - /// Perform forecasting until a particular and also computes confidence intervals. - /// For confidence intervals to be computed the model must be trained with - /// set to true. - /// - /// Number of values to forecast. - /// Forecasted values - /// Lower bound confidence intervals of forecasted values. - /// Upper bound confidence intervals of forecasted values. - /// Forecast confidence level. - public void ForecastWithConfidenceIntervals(int horizon, out float[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f) => - _modeler.ForecastWithConfidenceIntervals(horizon, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds, confidenceLevel); } } diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index e36633d7b5..c6e6ec8966 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -5,7 +5,7 @@ using System; using Microsoft.ML.Data; using Microsoft.ML.Transforms.TimeSeries; -using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceModeler; +//using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceForecasting; namespace Microsoft.ML { @@ -148,7 +148,7 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog int windowSize=64, int backAddWindowSize=5, int lookaheadWindowSize=5, int averageingWindowSize=3, int judgementWindowSize=21, double threshold=0.3) => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); - /// + /*/// /// Singular Spectrum Analysis (SSA) model for modeling univariate time-series. /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. /// @@ -174,10 +174,10 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// ]]> /// /// - public static AdaptiveSingularSpectrumSequenceModeler AdaptiveSingularSpectrumSequenceModeler(this ForecastingCatalog catalog, + public static AdaptiveSingularSpectrumSequenceForecasting AdaptiveSingularSpectrumSequenceModeler(this ForecastingCatalog catalog, string inputColumnName, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) => - new AdaptiveSingularSpectrumSequenceModeler(CatalogUtils.GetEnvironment(catalog), inputColumnName, trainSize, seriesLength, windowSize, discountFactor, - rankSelectionMethod, rank, maxRank, shouldComputeForecastIntervals, shouldstablize, shouldMaintainInfo, maxGrowth); + new AdaptiveSingularSpectrumSequenceForecasting(CatalogUtils.GetEnvironment(catalog), inputColumnName, trainSize, seriesLength, windowSize, discountFactor, + rankSelectionMethod, rank, maxRank, shouldComputeForecastIntervals, shouldstablize, shouldMaintainInfo, maxGrowth);*/ } } diff --git a/src/Microsoft.ML.TimeSeries/Forecast.cs b/src/Microsoft.ML.TimeSeries/Forecast.cs index 9321d115f0..8c3c1a7de6 100644 --- a/src/Microsoft.ML.TimeSeries/Forecast.cs +++ b/src/Microsoft.ML.TimeSeries/Forecast.cs @@ -4,7 +4,7 @@ using System.IO; using Microsoft.ML.Data; -using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceModeler; +//using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceForecasting; namespace Microsoft.ML.TimeSeries { @@ -45,7 +45,7 @@ public interface ICanForecast : ICanSaveModel void ForecastWithConfidenceIntervals(int horizon, out T[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f); } - public static class ForecastExtensions + /* public static class ForecastExtensions { /// /// Load a model. @@ -89,5 +89,5 @@ public static void SaveForecastingModel(this ModelOperationsCatalog catalog, } } } - } + }*/ } diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs index 310be9bd36..a3b656c2d6 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs @@ -15,6 +15,11 @@ internal interface IStatefulRowToRowMapper : IRowToRowMapper { } + internal interface IForecastTransformer : IStatefulTransformer + { + IDataView Forecast(int horizon, bool includeConfidenceInterval = false, float confidenceLevel = 0.95f); + } + internal interface IStatefulTransformer : ITransformer { /// @@ -51,13 +56,13 @@ internal interface IStatefulRowMapper : IRowMapper /// /// The user-defined type that holds the example. /// The user-defined type that holds the prediction. - public sealed class TimeSeriesPredictionFunction : PredictionEngineBase + public class TimeSeriesPredictionFunction : PredictionEngineBase where TSrc : class where TDst : class, new() { - private Action _pinger; - private long _rowPosition; - private ITransformer InputTransformer { get; set; } + protected Action Pinger; + protected long RowPosition; + protected ITransformer InputTransformer { get; set; } /// /// Checkpoints to disk with the updated @@ -188,7 +193,7 @@ internal DataViewRow GetStatefulRows(DataViewRow input, IRowToRowMapper mapper, return result; } - private Action CreatePinger(List rows) + internal Action CreatePinger(List rows) { if (rows.Count == 0) return position => { }; @@ -204,7 +209,7 @@ private protected override void PredictionEngineCore(IHostEnvironment env, DataV List rows = new List(); DataViewRow outputRowLocal = outputRowLocal = GetStatefulRows(inputRow, mapper, mapper.OutputSchema, rows); var cursorable = TypedCursorable.Create(env, new EmptyDataView(env, mapper.OutputSchema), ignoreMissingColumns, outputSchemaDefinition); - _pinger = CreatePinger(rows); + Pinger = CreatePinger(rows); disposer = outputRowLocal.Dispose; outputRow = cursorable.GetRow(outputRowLocal); } @@ -268,15 +273,53 @@ public override void Predict(TSrc example, ref TDst prediction) prediction = new TDst(); // Update state. - _pinger(_rowPosition); + Pinger(RowPosition); // Predict. FillValues(prediction); - _rowPosition++; + RowPosition++; } } + public sealed class TimeSeriesForecasting : TimeSeriesPredictionFunction + where TSrc : class + { + public TimeSeriesForecasting(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, + SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : + base(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition) + { + } + + private protected override void PredictionEngineCore(IHostEnvironment env, DataViewConstructionUtils.InputRow inputRow, + IRowToRowMapper mapper, bool ignoreMissingColumns, SchemaDefinition outputSchemaDefinition, out Action disposer, out IRowReadableAs outputRow) + { + List rows = new List(); + DataViewRow outputRowLocal = outputRowLocal = GetStatefulRows(inputRow, mapper, mapper.OutputSchema, rows); + Pinger = CreatePinger(rows); + disposer = outputRowLocal.Dispose; + outputRow = null; + } + + /// + /// Run prediction pipeline on one example. + /// + /// The example to run on. + public void Update(TSrc example) + { + Contracts.CheckValue(example, nameof(example)); + ExtractValues(example); + // Update state. + Pinger(RowPosition); + RowPosition++; + } + + public IDataView Forecast(int horizon, bool includeConfidenceInterval = false, float confidenceLevel = 0.95f) => + ((IForecastTransformer)InputTransformer).Forecast(horizon, includeConfidenceInterval, confidenceLevel); + + public override void Predict(TSrc example, ref object prediction) => throw new NotSupportedException(); + } + public static class PredictionFunctionExtensions { /// @@ -310,5 +353,16 @@ public static TimeSeriesPredictionFunction CreateTimeSeriesPredictio env.CheckValueOrNull(outputSchemaDefinition); return new TimeSeriesPredictionFunction(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); } + + public static TimeSeriesForecasting CreateTimeSeriesForecastingEngine(this ITransformer transformer, IHostEnvironment env, + bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) + where TSrc : class + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(transformer, nameof(transformer)); + env.CheckValueOrNull(inputSchemaDefinition); + env.CheckValueOrNull(outputSchemaDefinition); + return new TimeSeriesForecasting(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); + } } } diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs new file mode 100644 index 0000000000..2cf0903a33 --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -0,0 +1,304 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.CommandLine; +using Microsoft.ML.Data; +using Microsoft.ML.Runtime; +using Microsoft.ML.Transforms.TimeSeries; + +[assembly: LoadableClass(SsaForecasting.Summary, typeof(IDataTransform), typeof(SsaForecasting), typeof(SsaForecasting.Options), typeof(SignatureDataTransform), + SsaForecasting.UserName, SsaForecasting.LoaderSignature, SsaForecasting.ShortName)] + +[assembly: LoadableClass(SsaForecasting.Summary, typeof(IDataTransform), typeof(SsaForecasting), null, typeof(SignatureLoadDataTransform), + SsaForecasting.UserName, SsaForecasting.LoaderSignature)] + +[assembly: LoadableClass(SsaForecasting.Summary, typeof(SsaForecasting), null, typeof(SignatureLoadModel), + SsaForecasting.UserName, SsaForecasting.LoaderSignature)] + +[assembly: LoadableClass(typeof(IRowMapper), typeof(SsaForecasting), null, typeof(SignatureLoadRowMapper), + SsaForecasting.UserName, SsaForecasting.LoaderSignature)] + +namespace Microsoft.ML.Transforms.TimeSeries +{ + /// + /// resulting from fitting a . + /// + public sealed class SsaForecasting : SsaForecastingBaseWrapper, IStatefulTransformer, IForecastTransformer + { + internal const string Summary = "This transform forecasts using Singular Spectrum Analysis (SSA)."; + internal const string LoaderSignature = "SsaForecasting"; + internal const string UserName = "SSA Spike Detection"; + internal const string ShortName = "spike"; + + internal sealed class Options : TransformInputBase + { + [Argument(ArgumentType.Required, HelpText = "The name of the source column.", ShortName = "src", + SortOrder = 1, Purpose = SpecialPurpose.ColumnName)] + public string Source; + + [Argument(ArgumentType.Required, HelpText = "The name of the new column.", + SortOrder = 2)] + public string Name; + + [Argument(ArgumentType.Required, HelpText = "The confidence for spike detection in the range [0, 100].", + ShortName = "cnf", SortOrder = 4)] + public double Confidence = 99; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The discount factor in [0, 1]", ShortName = "disc", SortOrder = 12)] + public float DiscountFactor = 1; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determing whether the model is adaptive", ShortName = "adp", SortOrder = 14)] + public bool IsAdaptive = false; + public int WindowSize; + public RankSelectionMethod RankSelectionMethod; + public int? Rank; + public int? MaxRank; + public bool ShouldComputeForecastIntervals; + public bool ShouldStablize; + public bool ShouldMaintainInfo; + public GrowthRatio? MaxGrowth; + public int SeriesLength; + public int TrainSize; + public int Horizon; + } + + private sealed class BaseArguments : SsaForecastingOptions + { + public BaseArguments(Options options) + { + Source = options.Source; + Name = options.Name; + WindowSize = options.WindowSize; + DiscountFactor = options.DiscountFactor; + IsAdaptive = options.IsAdaptive; + RankSelectionMethod = options.RankSelectionMethod; + Rank = options.Rank; + ShouldComputeForecastIntervals = options.ShouldComputeForecastIntervals; + ShouldStablize = options.ShouldStablize; + MaxGrowth = options.MaxGrowth; + SeriesLength = options.SeriesLength; + TrainSize = options.TrainSize; + Horizon = options.Horizon; + } + } + + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "FRCSTRNS", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(SsaForecasting).Assembly.FullName); + } + + internal SsaForecasting(IHostEnvironment env, Options options, IDataView input) + : base(new BaseArguments(options), LoaderSignature, env) + { + InternalTransform.Model.Train(new RoleMappedData(input, null, InternalTransform.InputColumnName)); + } + + // Factory method for SignatureDataTransform. + private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(options, nameof(options)); + env.CheckValue(input, nameof(input)); + + return new SsaForecasting(env, options, input).MakeDataTransform(input); + } + + internal SsaForecasting(IHostEnvironment env, Options options) + : base(new BaseArguments(options), LoaderSignature, env) + { + // This constructor is empty. + } + + // Factory method for SignatureLoadDataTransform. + private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(ctx, nameof(ctx)); + env.CheckValue(input, nameof(input)); + + return new SsaForecasting(env, ctx).MakeDataTransform(input); + } + + IStatefulTransformer IStatefulTransformer.Clone() + { + var clone = (SsaForecasting)MemberwiseClone(); + clone.InternalTransform.Model = clone.InternalTransform.Model.Clone(); + clone.InternalTransform.StateRef = (SsaForecastingBase.State)clone.InternalTransform.StateRef.Clone(); + clone.InternalTransform.StateRef.InitState(clone.InternalTransform, InternalTransform.Host); + return clone; + } + + // Factory method for SignatureLoadModel. + private static SsaForecasting Create(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(GetVersionInfo()); + + return new SsaForecasting(env, ctx); + } + + internal SsaForecasting(IHostEnvironment env, ModelLoadContext ctx) + : base(env, ctx, LoaderSignature) + { + // *** Binary format *** + // + InternalTransform.Host.CheckDecode(InternalTransform.IsAdaptive == false); + } + + private protected override void SaveModel(ModelSaveContext ctx) + { + InternalTransform.Host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + ctx.SetVersionInfo(GetVersionInfo()); + + InternalTransform.Host.Assert(InternalTransform.IsAdaptive == false); + + // *** Binary format *** + // + + base.SaveModel(ctx); + } + + // Factory method for SignatureLoadRowMapper. + private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema) + => Create(env, ctx).MakeRowMapper(inputSchema); + } + + /// + /// Detect spikes in time series using Singular Spectrum Analysis. + /// + /// + /// | + /// | Output column data type | 3-element vector of | + /// + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-props.md)] + /// + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-ssa.md)] + /// + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-pvalue.md)] + /// + /// Check the See Also section for links to usage examples. + /// ]]> + /// + /// + /// + public sealed class SsaForecastingEstimator : IEstimator + { + private readonly IHost _host; + private readonly SsaForecasting.Options _options; + + /// + /// Create a new instance of + /// + /// Host Environment. + /// Name of the column resulting from the transformation of . + /// The confidence for spike detection in the range [0, 100]. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// Name of column to transform. If set to , the value of the will be used as source. + /// The vector contains Alert, Raw Score, P-Value as first three values. + internal SsaForecastingEstimator(IHostEnvironment env, + string outputColumnName, + int confidence, + float discountFactor, + bool isAdaptive, + int windowSize, + RankSelectionMethod rankSelectionMethod, + int? rank, + int? maxRank, + bool shouldComputeForecastIntervals, + bool shouldStablize, + bool shouldMaintainInfo, + GrowthRatio? maxGrowth, + string inputColumnName = null) + : this(env, new SsaForecasting.Options + { + Source = inputColumnName ?? outputColumnName, + Name = outputColumnName, + Confidence = confidence, + DiscountFactor = discountFactor, + IsAdaptive = isAdaptive, + WindowSize = windowSize, + RankSelectionMethod = rankSelectionMethod, + Rank = rank, + MaxRank = maxRank, + ShouldComputeForecastIntervals = shouldComputeForecastIntervals, + ShouldStablize = shouldStablize, + ShouldMaintainInfo = shouldMaintainInfo, + MaxGrowth = maxGrowth, + }) + { + } + + internal SsaForecastingEstimator(IHostEnvironment env, SsaForecasting.Options options) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(nameof(SsaForecastingEstimator)); + + _host.CheckNonEmpty(options.Name, nameof(options.Name)); + _host.CheckNonEmpty(options.Source, nameof(options.Source)); + + _options = options; + } + + /// + /// Train and return a transformer. + /// + public SsaForecasting Fit(IDataView input) + { + _host.CheckValue(input, nameof(input)); + return new SsaForecasting(_host, _options, input); + } + + /// + /// Schema propagation for transformers. + /// Returns the output schema of the data, if the input schema is like the one provided. + /// + public SchemaShape GetOutputSchema(SchemaShape inputSchema) + { + _host.CheckValue(inputSchema, nameof(inputSchema)); + + if (!inputSchema.TryFindColumn(_options.Source, out var col)) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _options.Source); + if (col.ItemType != NumberDataViewType.Single) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _options.Source, "Single", col.GetTypeString()); + + var metadata = new List() { + new SchemaShape.Column(AnnotationUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, TextDataViewType.Instance, false) + }; + var resultDic = inputSchema.ToDictionary(x => x.Name); + resultDic[_options.Name] = new SchemaShape.Column( + _options.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Double, false, new SchemaShape(metadata)); + + return new SchemaShape(resultDic.Values); + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs new file mode 100644 index 0000000000..62594a8dc0 --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs @@ -0,0 +1,255 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.IO; +using System.Threading; +using Microsoft.ML.CommandLine; +using Microsoft.ML.Data; +using Microsoft.ML.Internal.CpuMath; +using Microsoft.ML.Internal.Utilities; +using Microsoft.ML.Runtime; + +namespace Microsoft.ML.Transforms.TimeSeries +{ + + /// + /// The base class that can be inherited by the 'Argument' classes in the derived classes containing the shared input parameters. + /// + internal abstract class ForecastingArgumentsBase + { + [Argument(ArgumentType.Required, HelpText = "The name of the source column", ShortName = "src", + SortOrder = 1, Purpose = SpecialPurpose.ColumnName)] + public string Source; + + [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "name", + SortOrder = 2)] + public string Name; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The size of the sliding window for computing the p-value.", ShortName = "wnd", + SortOrder = 3)] + public int TrainSize = 1; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The size of the initial window for computing the p-value as well as" + + " training if needed. The default value is set to 0, which means there is no initial window considered.", + ShortName = "initwnd", SortOrder = 5)] + public int SeriesLength = 0; + } + + // REVIEW: This base class and its children classes generate one output column of type VBuffer to output 3 different anomaly scores as well as + // the alert flag. Ideally these 4 output information should be put in four seaparate columns instead of one VBuffer<> column. However, this is not currently + // possible due to our design restriction. This must be fixed in the next version and will potentially affect the children classes. + /// + /// The base class for sequential anomaly detection transforms that supports the p-value as well as the martingales scores computation from the sequence of + /// raw anomaly scores whose calculation is specified by the children classes. This class also provides mechanism for the threshold-based alerting on + /// the raw anomaly score, the p-value score or the martingale score. Currently, this class supports Power and Mixture martingales. + /// For more details, please refer to http://arxiv.org/pdf/1204.3251.pdf + /// + /// The type of the input sequence + /// The type of the input sequence + internal abstract class SequentialForecastingTransformBase : SequentialTransformerBase, TState> + where TState : SequentialForecastingTransformBase.ForecastingStateBase, new() + { + + // The size of the VBuffer in the dst column. + internal int OutputLength; + + private protected SequentialForecastingTransformBase(int windowSize, int initialWindowSize, string inputColumnName, string outputColumnName, string name, IHostEnvironment env) + : base(Contracts.CheckRef(env, nameof(env)).Register(name), windowSize, initialWindowSize, outputColumnName, inputColumnName, new VectorDataViewType(NumberDataViewType.Double, 0)) + { + } + + private protected SequentialForecastingTransformBase(ForecastingArgumentsBase args, string name, IHostEnvironment env) + : this(args.TrainSize, args.SeriesLength, args.Source, args.Name, name, env) + { + } + + private protected SequentialForecastingTransformBase(IHostEnvironment env, ModelLoadContext ctx, string name) + : base(Contracts.CheckRef(env, nameof(env)).Register(name), ctx) + { + // *** Binary format *** + // + } + + private protected override void SaveModel(ModelSaveContext ctx) + { + Host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + + // *** Binary format *** + // + + base.SaveModel(ctx); + } + + internal override IStatefulRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(Host, this, schema); + + internal sealed class Mapper : IStatefulRowMapper + { + private readonly IHost _host; + private readonly SequentialForecastingTransformBase _parent; + private readonly DataViewSchema _parentSchema; + private readonly int _inputColumnIndex; + private readonly VBuffer> _slotNames; + private ForecastingStateBase State { get; set; } + + public Mapper(IHostEnvironment env, SequentialForecastingTransformBase parent, DataViewSchema inputSchema) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(nameof(Mapper)); + _host.CheckValue(inputSchema, nameof(inputSchema)); + _host.CheckValue(parent, nameof(parent)); + + if (!inputSchema.TryGetColumnIndex(parent.InputColumnName, out _inputColumnIndex)) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", parent.InputColumnName); + + var colType = inputSchema[_inputColumnIndex].Type; + if (colType != NumberDataViewType.Single) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", parent.InputColumnName, "Single", colType.ToString()); + + _parent = parent; + _parentSchema = inputSchema; + _slotNames = new VBuffer>(_parent.OutputLength, new[] {"".AsMemory()}); + + State = (ForecastingStateBase)_parent.StateRef; + } + + public DataViewSchema.DetachedColumn[] GetOutputColumns() + { + var meta = new DataViewSchema.Annotations.Builder(); + meta.AddSlotNames(_parent.OutputLength, GetSlotNames); + var info = new DataViewSchema.DetachedColumn[3]; + info[0] = new DataViewSchema.DetachedColumn(_parent.OutputColumnName, new VectorDataViewType(NumberDataViewType.Double, 0), meta.ToAnnotations()); + info[1] = new DataViewSchema.DetachedColumn("Min", new VectorDataViewType(NumberDataViewType.Double, 0), meta.ToAnnotations()); + info[2] = new DataViewSchema.DetachedColumn("Max", new VectorDataViewType(NumberDataViewType.Double, 0), meta.ToAnnotations()); + return info; + } + + public void GetSlotNames(ref VBuffer> dst) => _slotNames.CopyTo(ref dst, 0, _parent.OutputLength); + + public Func GetDependencies(Func activeOutput) + { + if (activeOutput(0)) + return col => col == _inputColumnIndex; + else + return col => false; + } + + void ICanSaveModel.Save(ModelSaveContext ctx) => _parent.SaveModel(ctx); + + public Delegate[] CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) + { + disposer = null; + var getters = new Delegate[3]; + if (activeOutput(0)) + getters[0] = MakeGetter(input, State); + + getters[1] = getters[2] = getters[0]; + return getters; + } + + private delegate void ProcessData(ref TInput src, ref VBuffer dst); + + private Delegate MakeGetter(DataViewRow input, ForecastingStateBase state) + { + _host.AssertValue(input); + var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); + ProcessData processData = _parent.WindowSize > 0 ? + (ProcessData)state.Process : state.ProcessWithoutBuffer; + + ValueGetter> valueGetter = (ref VBuffer dst) => + { + TInput src = default; + srcGetter(ref src); + processData(ref src, ref dst); + }; + return valueGetter; + } + + public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) + { + disposer = null; + Action pinger = null; + if (activeOutput(0)) + pinger = MakePinger(input, State); + + return pinger; + } + + private Action MakePinger(DataViewRow input, ForecastingStateBase state) + { + _host.AssertValue(input); + var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); + Action pinger = (long rowPosition) => + { + TInput src = default; + srcGetter(ref src); + state.UpdateState(ref src, rowPosition, _parent.WindowSize > 0); + }; + return pinger; + } + + public void CloneState() + { + if (Interlocked.Increment(ref _parent.StateRefCount) > 1) + { + State = (ForecastingStateBase)_parent.StateRef.Clone(); + } + } + + public ITransformer GetTransformer() + { + return _parent; + } + } + /// + /// The base state class for sequential anomaly detection: this class implements the p-values and martinagle calculations for anomaly detection + /// given that the raw anomaly score calculation is specified by the derived classes. + /// + internal abstract class ForecastingStateBase : SequentialTransformerBase, TState>.StateBase + { + // A reference to the parent transform. + protected SequentialForecastingTransformBase Parent; + + private protected ForecastingStateBase() { } + + private protected override void CloneCore(TState state) + { + base.CloneCore(state); + } + + private protected ForecastingStateBase(BinaryReader reader) : base(reader) + { + } + + internal override void Save(BinaryWriter writer) + { + base.Save(writer); + } + + private protected override void SetNaOutput(ref VBuffer dst) + { + var outputLength = Parent.OutputLength; + var editor = VBufferEditor.Create(ref dst, outputLength); + + for (int i = 0; i < outputLength; ++i) + editor.Values[i] = Double.NaN; + + dst = editor.Commit(); + } + + private protected sealed override void InitializeStateCore(bool disk = false) + { + Parent = (SequentialForecastingTransformBase)ParentTransform; + Host.Assert(WindowSize >= 0); + InitializeAnomalyDetector(); + } + + /// + /// The abstract method that realizes the initialization functionality for the anomaly detector. + /// + private protected abstract void InitializeAnomalyDetector(); + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs index a5657e1b56..253ad7d86f 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs @@ -29,6 +29,24 @@ public DataBox(T value) } } + internal sealed class DataBoxMultiple + { + public T Value; + public T Value2; + public T Value3; + + public DataBoxMultiple() + { + } + + public DataBoxMultiple(T value, T value1, T value2) + { + Value = value; + Value2 = value1; + Value3 = value2; + } + } + /// /// The base class for sequential processing transforms. This class implements the basic sliding window buffering. The derived classes need to specify the transform logic, /// the initialization logic and the learning logic via implementing the abstract methods TransformCore(), InitializeStateCore() and LearnStateFromDataCore(), respectively diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs index 7f0c98574f..39dfed8fb2 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs @@ -26,6 +26,7 @@ namespace Microsoft.ML.Transforms.TimeSeries internal abstract class SequentialTransformerBase : IStatefulTransformer where TState : SequentialTransformerBase.StateBase, new() { + public SequentialTransformerBase() { } /// /// The base class for encapsulating the State object for sequential processing. This class implements a windowed buffer. @@ -393,7 +394,7 @@ public SequentialDataTransform(IHost host, SequentialTransformerBase 0, _parent.OutputColumnType); @@ -409,24 +410,29 @@ private static IDataTransform CreateLambdaTransform(IHost host, IDataView input, var inputSchema = SchemaDefinition.Create(typeof(DataBox)); inputSchema[0].ColumnName = inputColumnName; - var outputSchema = SchemaDefinition.Create(typeof(DataBox)); + var outputSchema = SchemaDefinition.Create(typeof(DataBoxMultiple)); outputSchema[0].ColumnName = outputColumnName; if (outputColTypeOverride != null) outputSchema[0].ColumnType = outputColTypeOverride; - Action, DataBox, TState> lambda; + outputSchema[1].ColumnName = "Min"; + outputSchema[2].ColumnName = "Max"; + + Action, DataBoxMultiple, TState> lambda; if (hasBuffer) lambda = MapFunction; else - lambda = MapFunctionWithoutBuffer; + lambda = MapFunction; return LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema); } - private static void MapFunction(DataBox input, DataBox output, TState state) + private static void MapFunction(DataBox input, DataBoxMultiple output, TState state) { state.Process(ref input.Value, ref output.Value); + state.Process(ref input.Value, ref output.Value2); + state.Process(ref input.Value, ref output.Value3); } private static void MapFunctionWithoutBuffer(DataBox input, DataBox output, TState state) diff --git a/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs b/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs index 0c823f2de1..f93efb6aa7 100644 --- a/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs @@ -201,8 +201,8 @@ public SsaAnomalyDetectionBase(SsaOptions options, string name, IHostEnvironment ErrorFunc = ErrorFunctionUtils.GetErrorFunction(ErrorFunction); IsAdaptive = options.IsAdaptive; // Creating the master SSA model - Model = new AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.InitialWindowSize, SeasonalWindowSize + 1, SeasonalWindowSize, - DiscountFactor, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalWindowSize / 2, false, false); + Model = new AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.InitialWindowSize, SeasonalWindowSize + 1, SeasonalWindowSize, + DiscountFactor, RankSelectionMethod.Exact, null, SeasonalWindowSize / 2, false, false); StateRef = new State(); StateRef.InitState(WindowSize, InitialWindowSize, this, Host); diff --git a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs new file mode 100644 index 0000000000..9c41801a00 --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs @@ -0,0 +1,310 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using Microsoft.ML.CommandLine; +using Microsoft.ML.Data; +using Microsoft.ML.Internal.Utilities; +using Microsoft.ML.Runtime; + +namespace Microsoft.ML.Transforms.TimeSeries +{ + /// + /// The wrapper to that implements the general anomaly detection transform based on Singular Spectrum modeling of the time-series. + /// For the details of the Singular Spectrum Analysis (SSA), refer to http://arxiv.org/pdf/1206.6910.pdf. + /// + public class SsaForecastingBaseWrapper : IForecastTransformer, ICanSaveModel + { + /// + /// Whether a call to should succeed, on an + /// appropriate schema. + /// + bool ITransformer.IsRowToRowMapper => ((ITransformer)InternalTransform).IsRowToRowMapper; + + /// + /// Creates a clone of the transformer. Used for taking the snapshot of the state. + /// + /// + IStatefulTransformer IStatefulTransformer.Clone() => InternalTransform.Clone(); + + /// + /// Schema propagation for transformers. + /// Returns the output schema of the data, if the input schema is like the one provided. + /// + public DataViewSchema GetOutputSchema(DataViewSchema inputSchema) => InternalTransform.GetOutputSchema(inputSchema); + + /// + /// Constructs a row-to-row mapper based on an input schema. If + /// is false, then an exception should be thrown. If the input schema is in any way + /// unsuitable for constructing the mapper, an exception should likewise be thrown. + /// + /// The input schema for which we should get the mapper. + /// The row to row mapper. + IRowToRowMapper ITransformer.GetRowToRowMapper(DataViewSchema inputSchema) + => ((ITransformer)InternalTransform).GetRowToRowMapper(inputSchema); + + /// + /// Same as but also supports mechanism to save the state. + /// + /// The input schema for which we should get the mapper. + /// The row to row mapper. + public IRowToRowMapper GetStatefulRowToRowMapper(DataViewSchema inputSchema) + => ((IStatefulTransformer)InternalTransform).GetStatefulRowToRowMapper(inputSchema); + + /// + /// Take the data in, make transformations, output the data. + /// Note that 's are lazy, so no actual transformations happen here, just schema validation. + /// + public IDataView Transform(IDataView input) => InternalTransform.Transform(input); + + /// + /// For saving a model into a repository. + /// + void ICanSaveModel.Save(ModelSaveContext ctx) => SaveModel(ctx); + + private protected virtual void SaveModel(ModelSaveContext ctx) => InternalTransform.SaveThis(ctx); + + /// + /// Creates a row mapper from Schema. + /// + internal IStatefulRowMapper MakeRowMapper(DataViewSchema schema) => InternalTransform.MakeRowMapper(schema); + + /// + /// Creates an IDataTransform from an IDataView. + /// + internal IDataTransform MakeDataTransform(IDataView input) => InternalTransform.MakeDataTransform(input); + + public IDataView Forecast(int horizon, bool includeConfidenceInterval = false, float confidenceLevel = 0.95f) + { + var model = ((AdaptiveSingularSpectrumSequenceModelerInternal)InternalTransform.Model); + + if (includeConfidenceInterval) + { + model.ForecastWithConfidenceIntervals(horizon, out float[] forecast, + out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, confidenceLevel); + + return DataViewConstructionUtils.CreateFromEnumerable(InternalTransform.Host, GetForecastWithConfidenceIntervals(forecast, + confidenceIntervalLowerBounds, confidenceIntervalUpperBounds)); + } + else + return DataViewConstructionUtils.CreateFromEnumerable(InternalTransform.Host, GetForecast(model.Forecast(horizon))); + } + + private IEnumerable GetForecast(float[] forecast) + { + foreach (var value in forecast) + yield return new SsaForecast() { Forecast = value }; + } + + private IEnumerable GetForecastWithConfidenceIntervals(float[] forecast, float[] min, float[] max) + { + for (int index = 0; index < forecast.Length; index += 1) + yield return new SsaForecastWithConfidenceInterval() { Forecast = forecast[index], ConfidenceIntervalLowerBound = min[index], ConfidenceIntervalUpperBound = max[index] }; + } + + internal class SsaForecast + { + public float Forecast; + } + + internal class SsaForecastWithConfidenceInterval : SsaForecast + { + public float ConfidenceIntervalLowerBound; + public float ConfidenceIntervalUpperBound; + } + + /// + /// Options for SSA Anomaly Detection. + /// + internal abstract class SsaForecastingOptions : ForecastingArgumentsBase + { + [Argument(ArgumentType.AtMostOnce, HelpText = "The discount factor in [0, 1]", ShortName = "disc", SortOrder = 12)] + public Single DiscountFactor = 1; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The function used to compute the error between the expected and the observed value", ShortName = "err", SortOrder = 13)] + public ErrorFunction ErrorFunction = ErrorFunction.SignedDifference; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determing whether the model is adaptive", ShortName = "adp", SortOrder = 14)] + public bool IsAdaptive = false; + public int WindowSize; + public RankSelectionMethod RankSelectionMethod; + public int? Rank; + public int? MaxRank; + public bool ShouldComputeForecastIntervals; + public bool ShouldStablize; + public bool ShouldMaintainInfo; + public GrowthRatio? MaxGrowth; + public int Horizon; + public bool ComputeConfidenceIntervals; + } + + internal SsaForecastingBase InternalTransform; + + internal SsaForecastingBaseWrapper(SsaForecastingOptions options, string name, IHostEnvironment env) + { + InternalTransform = new SsaForecastingBase(options, name, env, this); + } + + internal SsaForecastingBaseWrapper(IHostEnvironment env, ModelLoadContext ctx, string name) + { + InternalTransform = new SsaForecastingBase(env, ctx, name); + } + + /// + /// This base class that implements the general anomaly detection transform based on Singular Spectrum modeling of the time-series. + /// For the details of the Singular Spectrum Analysis (SSA), refer to http://arxiv.org/pdf/1206.6910.pdf. + /// + internal sealed class SsaForecastingBase : SequentialForecastingTransformBase + { + internal SsaForecastingBaseWrapper Parent; + internal readonly bool IsAdaptive; + internal readonly int Horizon; + internal readonly bool ComputeConfidenceIntervals; + internal SequenceModelerBase Model; + + public SsaForecastingBase(SsaForecastingOptions options, string name, IHostEnvironment env, SsaForecastingBaseWrapper parent) + : base(options.TrainSize, 0, options.Source, options.Name, name, env) + { + Host.CheckUserArg(0 <= options.DiscountFactor && options.DiscountFactor <= 1, nameof(options.DiscountFactor), "Must be in the range [0, 1]."); + IsAdaptive = options.IsAdaptive; + Horizon = options.Horizon; + ComputeConfidenceIntervals = options.ComputeConfidenceIntervals; + // Creating the master SSA model + Model = new AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.TrainSize, options.SeriesLength, options.WindowSize, + options.DiscountFactor, options.RankSelectionMethod, options.Rank, options.MaxRank, options.ShouldComputeForecastIntervals, options.ShouldStablize, options.ShouldMaintainInfo, + options.MaxGrowth); + + StateRef = new State(); + StateRef.InitState(WindowSize, InitialWindowSize, this, Host); + Parent = parent; + } + + public SsaForecastingBase(IHostEnvironment env, ModelLoadContext ctx, string name) + : base(env, ctx, name) + { + // *** Binary format *** + // + // bool: _isAdaptive + // int32: Horizon + // bool: ComputeConfidenceIntervals + // State: StateRef + // AdaptiveSingularSpectrumSequenceModeler: _model + + Host.CheckDecode(InitialWindowSize == 0); + + IsAdaptive = ctx.Reader.ReadBoolean(); + Horizon = ctx.Reader.ReadInt32(); + ComputeConfidenceIntervals = ctx.Reader.ReadBoolean(); + StateRef = new State(ctx.Reader); + + ctx.LoadModel, SignatureLoadModel>(env, out Model, "SSA"); + Host.CheckDecode(Model != null); + StateRef.InitState(this, Host); + } + + public override DataViewSchema GetOutputSchema(DataViewSchema inputSchema) + { + Host.CheckValue(inputSchema, nameof(inputSchema)); + + if (!inputSchema.TryGetColumnIndex(InputColumnName, out var col)) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", InputColumnName); + + var colType = inputSchema[col].Type; + if (colType != NumberDataViewType.Single) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", InputColumnName, "Single", colType.ToString()); + + return Transform(new EmptyDataView(Host, inputSchema)).Schema; + } + + private protected override void SaveModel(ModelSaveContext ctx) + { + ((ICanSaveModel)Parent).Save(ctx); + } + + internal void SaveThis(ModelSaveContext ctx) + { + Host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + + Host.Assert(InitialWindowSize == 0); + Host.Assert(Model != null); + + // *** Binary format *** + // + // bool: _isAdaptive + // int32: Horizon + // bool: ComputeConfidenceIntervals + // State: StateRef + // AdaptiveSingularSpectrumSequenceModeler: _model + + base.SaveModel(ctx); + ctx.Writer.Write(IsAdaptive); + ctx.Writer.Write(Horizon); + ctx.Writer.Write(ComputeConfidenceIntervals); + StateRef.Save(ctx.Writer); + + ctx.SaveModel(Model, "SSA"); + } + + internal sealed class State : ForecastingStateBase + { + private SequenceModelerBase _model; + private SsaForecastingBase _parentAnomalyDetector; + + public State() + { + } + + internal State(BinaryReader reader) : base(reader) + { + WindowedBuffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(reader, Host); + InitialWindowedBuffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(reader, Host); + } + + internal override void Save(BinaryWriter writer) + { + base.Save(writer); + TimeSeriesUtils.SerializeFixedSizeQueue(WindowedBuffer, writer); + TimeSeriesUtils.SerializeFixedSizeQueue(InitialWindowedBuffer, writer); + } + + private protected override void CloneCore(State state) + { + base.CloneCore(state); + Contracts.Assert(state is State); + var stateLocal = state as State; + stateLocal.WindowedBuffer = WindowedBuffer.Clone(); + stateLocal.InitialWindowedBuffer = InitialWindowedBuffer.Clone(); + if (_model != null) + { + _parentAnomalyDetector.Model = _parentAnomalyDetector.Model.Clone(); + _model = _parentAnomalyDetector.Model; + } + } + + private protected override void LearnStateFromDataCore(FixedSizeQueue data) + { + // This method is empty because there is no need to implement a training logic here. + } + + private protected override void InitializeAnomalyDetector() + { + _parentAnomalyDetector = (SsaForecastingBase)Parent; + _model = _parentAnomalyDetector.Model; + } + + private protected override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) + { + dst = new VBuffer(_parentAnomalyDetector.Horizon, + Array.ConvertAll(((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(_parentAnomalyDetector.Horizon), d => (double)d)); + + } + + public override void Consume(Single input) => _model.Consume(ref input, _parentAnomalyDetector.IsAdaptive); + } + } + } +} diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index dd25deb016..884d6c3776 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -21,6 +21,10 @@ private sealed class Prediction #pragma warning disable CS0649 [VectorType(4)] public double[] Change; + [VectorType(4)] + public double[] Min; + [VectorType(4)] + public double[] Max; #pragma warning restore CS0649 } @@ -43,6 +47,13 @@ public Data(float value) } } + class ForecastResult + { +#pragma warning disable CS0649 + public float Forecast; +#pragma warning restore CS0649 + } + private sealed class TimeSeriesData { public float Value; @@ -150,6 +161,56 @@ public void ChangePointDetectionWithSeasonality() } } + [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] + public void Forecast() + { + var env = new MLContext(); + const int ChangeHistorySize = 10; + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + var dataView = env.Data.LoadFromEnumerable(data); + + var args = new SsaForecasting.Options() + { + Confidence = 95, + Source = "Value", + Name = "Change", + WindowSize = 10, + SeriesLength = 11, + TrainSize = 22, + Horizon = 4 + }; + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + for (int i = 0; i < ChangeHistorySize; i++) + data.Add(new Data(i * 100)); + + // Train + var detector = new SsaForecastingEstimator(env, args).Fit(dataView); + // Transform + var output = detector.Transform(dataView); + // Get predictions + var enumerator = env.Data.CreateEnumerable(output, true).GetEnumerator(); + Prediction row = null; + List expectedValues = new List() { 0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07, + 0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483}; + + int index = 0; + while (enumerator.MoveNext() && index < expectedValues.Count) + { + row = enumerator.Current; + Assert.Equal(expectedValues[index++], row.Change[0], precision: 7); // Alert + Assert.Equal(expectedValues[index++], row.Change[1], precision: 7); // Raw score + Assert.Equal(expectedValues[index++], row.Change[2], precision: 7); // P-Value score + Assert.Equal(expectedValues[index++], row.Change[3], precision: 7); // Martingale score + } + } + [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() { @@ -296,6 +357,77 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() Assert.Equal(1.5292508189989167E-07, prediction.Change[3], precision: 5); // Martingale score } + [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] + public void ForecastingPredictionEngine() + { + const int ChangeHistorySize = 10; + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + + var ml = new MLContext(seed: 1); + var dataView = ml.Data.LoadFromEnumerable(data); + + var args = new SsaForecasting.Options() + { + Confidence = 95, + Source = "Value", + Name = "Change", + WindowSize = 10, + SeriesLength = 11, + TrainSize = 22, + Horizon = 4 + }; + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + for (int i = 0; i < ChangeHistorySize; i++) + data.Add(new Data(i * 100)); + + // Train + var model = new SsaForecastingEstimator(ml, args).Fit(dataView); + + //Model 1: Prediction #1. + var engine = model.CreateTimeSeriesForecastingEngine(ml); + engine.Update(new Data(1)); + engine.Update(new Data(2)); + var forecast = engine.Forecast(horizon: 5); + var prediction = ml.Data.CreateEnumerable(forecast, reuseRowObject: false); + + + /*Assert.Equal(0, prediction.Change[0], precision: 7); // Alert + Assert.Equal(1.1661833524703979, prediction.Change[1], precision: 5); // Raw score + Assert.Equal(0.5, prediction.Change[2], precision: 7); // P-Value score + Assert.Equal(5.1200000000000114E-08, prediction.Change[3], precision: 7); // Martingale score + + //Model 1: Checkpoint. + var modelPath = "temp.zip"; + engine.CheckPoint(ml, modelPath); + + //Model 1: Prediction #2 + prediction = engine.Predict(new Data(1)); + Assert.Equal(0, prediction.Change[0], precision: 7); // Alert + Assert.Equal(0.12216401100158691, prediction.Change[1], precision: 5); // Raw score + Assert.Equal(0.14823824685192111, prediction.Change[2], precision: 5); // P-Value score + Assert.Equal(1.5292508189989167E-07, prediction.Change[3], precision: 7); // Martingale score + + // Load Model 1. + ITransformer model2 = null; + using (var file = File.OpenRead(modelPath)) + model2 = ml.Model.Load(file, out var schema); + + //Predict and expect the same result after checkpointing(Prediction #2). + engine = model2.CreateTimeSeriesPredictionFunction(ml); + prediction = engine.Predict(new Data(1)); + Assert.Equal(0, prediction.Change[0], precision: 7); // Alert + Assert.Equal(0.12216401100158691, prediction.Change[1], precision: 5); // Raw score + Assert.Equal(0.14823824685192111, prediction.Change[2], precision: 5); // P-Value score + Assert.Equal(1.5292508189989167E-07, prediction.Change[3], precision: 5); // Martingale score*/ + } + [Fact] public void AnomalyDetectionWithSrCnn() { @@ -337,6 +469,7 @@ public void AnomalyDetectionWithSrCnn() } } + /* [Fact] public void Forecasting() { @@ -354,7 +487,7 @@ public void Forecasting() // Create forecasting model. var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler("Value", data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); + 1, AdaptiveSingularSpectrumSequenceForecasting.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); // Train. model.Train(dataView); @@ -399,7 +532,7 @@ public void ForecastingWithConfidenceInterval() // Create forecasting model. var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler("Value", data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); + 1, AdaptiveSingularSpectrumSequenceForecasting.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); // Train. model.Train(dataView); @@ -433,6 +566,6 @@ public void ForecastingWithConfidenceInterval() Assert.Equal(forecast, forecastCopy); Assert.Equal(lowConfInterval, lowConfIntervalCopy); Assert.Equal(upperConfInterval, upperConfIntervalCopy); - } + }*/ } } From eae6a1d7ac8ab6dc388153f85513b2aabf503dbf Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 01:22:14 -0700 Subject: [PATCH 02/21] refactor code. --- src/Microsoft.ML.TimeSeries/Forecast.cs | 93 ------------ src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 57 +++++--- .../SequentialForecastingTransformBase.cs | 85 ++++++----- .../SequentialTransformBase.cs | 12 +- .../SequentialTransformerBase.cs | 132 +++++++++++++++--- .../SsaForecastingBase.cs | 40 +++--- .../TimeSeriesDirectApi.cs | 6 +- 7 files changed, 229 insertions(+), 196 deletions(-) delete mode 100644 src/Microsoft.ML.TimeSeries/Forecast.cs diff --git a/src/Microsoft.ML.TimeSeries/Forecast.cs b/src/Microsoft.ML.TimeSeries/Forecast.cs deleted file mode 100644 index 8c3c1a7de6..0000000000 --- a/src/Microsoft.ML.TimeSeries/Forecast.cs +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.IO; -using Microsoft.ML.Data; -//using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceForecasting; - -namespace Microsoft.ML.TimeSeries -{ - /// - /// Interface for forecasting models. - /// - /// The type of values that are forecasted. - public interface ICanForecast : ICanSaveModel - { - /// - /// Train a forecasting model from an . - /// - /// Training data. - void Train(IDataView dataView); - - /// - /// Update a forecasting model with the new observations in the form of an . - /// - /// Reference to the observations as an - /// Name of the input column to update from. - void Update(IDataView dataView, string inputColumnName = null); - - /// - /// Perform forecasting until a particular . - /// - /// Number of values to forecast. - /// Forecasted values. - T[] Forecast(int horizon); - - /// - /// Perform forecasting until a particular and also computes confidence intervals. - /// - /// Number of values to forecast. - /// Forecasted values - /// Lower bound confidence intervals of forecasted values. - /// Upper bound confidence intervals of forecasted values. - /// Forecast confidence level. - void ForecastWithConfidenceIntervals(int horizon, out T[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f); - } - - /* public static class ForecastExtensions - { - /// - /// Load a model. - /// - /// The type of , usually float. - /// - /// File path to load the model from. - /// model. - public static ICanForecast LoadForecastingModel(this ModelOperationsCatalog catalog, string filePath) - { - var env = CatalogUtils.GetEnvironment(catalog); - using (var file = File.OpenRead(filePath)) - { - using (var rep = RepositoryReader.Open(file, env)) - { - ModelLoadContext.LoadModel, SignatureLoadModel>(env, out var model, rep, LoaderSignature); - return model; - } - } - } - - /// - /// Save a model to a file specified by - /// - /// - /// - /// model to save. - /// File path to save the model to. - public static void SaveForecastingModel(this ModelOperationsCatalog catalog, ICanForecast model, string filePath) - { - var env = CatalogUtils.GetEnvironment(catalog); - using (var file = File.Create(filePath)) - { - using (var ch = env.Start("Saving forecasting model.")) - { - using (var rep = RepositoryWriter.CreateNew(file, ch)) - { - ModelSaveContext.SaveModel(rep, model, LoaderSignature); - rep.Commit(); - } - } - } - } - }*/ -} diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index 2cf0903a33..eef75a3b99 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -44,9 +44,13 @@ internal sealed class Options : TransformInputBase SortOrder = 2)] public string Name; - [Argument(ArgumentType.Required, HelpText = "The confidence for spike detection in the range [0, 100].", - ShortName = "cnf", SortOrder = 4)] - public double Confidence = 99; + [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "cnfminname", + SortOrder = 2)] + public string ForecastingConfidenceIntervalMinOutputColumnName; + + [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "cnfmaxnname", + SortOrder = 2)] + public string ForecastingConfidenceIntervalMaxOutputColumnName; [Argument(ArgumentType.AtMostOnce, HelpText = "The discount factor in [0, 1]", ShortName = "disc", SortOrder = 12)] public float DiscountFactor = 1; @@ -57,13 +61,13 @@ internal sealed class Options : TransformInputBase public RankSelectionMethod RankSelectionMethod; public int? Rank; public int? MaxRank; - public bool ShouldComputeForecastIntervals; public bool ShouldStablize; public bool ShouldMaintainInfo; public GrowthRatio? MaxGrowth; public int SeriesLength; public int TrainSize; public int Horizon; + public float ConfidenceLevel; } private sealed class BaseArguments : SsaForecastingOptions @@ -72,17 +76,19 @@ public BaseArguments(Options options) { Source = options.Source; Name = options.Name; + ForecastingConfidenceIntervalMinOutputColumnName = options.ForecastingConfidenceIntervalMinOutputColumnName; + ForecastingConfidenceIntervalMaxOutputColumnName = options.ForecastingConfidenceIntervalMaxOutputColumnName; WindowSize = options.WindowSize; DiscountFactor = options.DiscountFactor; IsAdaptive = options.IsAdaptive; RankSelectionMethod = options.RankSelectionMethod; Rank = options.Rank; - ShouldComputeForecastIntervals = options.ShouldComputeForecastIntervals; ShouldStablize = options.ShouldStablize; MaxGrowth = options.MaxGrowth; SeriesLength = options.SeriesLength; TrainSize = options.TrainSize; Horizon = options.Horizon; + ConfidenceLevel = options.ConfidenceLevel; } } @@ -212,7 +218,6 @@ public sealed class SsaForecastingEstimator : IEstimator /// /// Host Environment. /// Name of the column resulting from the transformation of . - /// The confidence for spike detection in the range [0, 100]. /// /// /// @@ -225,9 +230,11 @@ public sealed class SsaForecastingEstimator : IEstimator /// /// Name of column to transform. If set to , the value of the will be used as source. /// The vector contains Alert, Raw Score, P-Value as first three values. + /// + /// + /// internal SsaForecastingEstimator(IHostEnvironment env, string outputColumnName, - int confidence, float discountFactor, bool isAdaptive, int windowSize, @@ -238,23 +245,27 @@ internal SsaForecastingEstimator(IHostEnvironment env, bool shouldStablize, bool shouldMaintainInfo, GrowthRatio? maxGrowth, - string inputColumnName = null) + string inputColumnName = null, + string forecastingConfidenceIntervalMinOutputColumnName = null, + string forecastingConfidenceIntervalMaxOutputColumnName = null, + float confidenceLevel = 0.95f) : this(env, new SsaForecasting.Options - { - Source = inputColumnName ?? outputColumnName, - Name = outputColumnName, - Confidence = confidence, - DiscountFactor = discountFactor, - IsAdaptive = isAdaptive, - WindowSize = windowSize, - RankSelectionMethod = rankSelectionMethod, - Rank = rank, - MaxRank = maxRank, - ShouldComputeForecastIntervals = shouldComputeForecastIntervals, - ShouldStablize = shouldStablize, - ShouldMaintainInfo = shouldMaintainInfo, - MaxGrowth = maxGrowth, - }) + { + Source = inputColumnName ?? outputColumnName, + Name = outputColumnName, + DiscountFactor = discountFactor, + IsAdaptive = isAdaptive, + WindowSize = windowSize, + RankSelectionMethod = rankSelectionMethod, + Rank = rank, + MaxRank = maxRank, + ShouldStablize = shouldStablize, + ShouldMaintainInfo = shouldMaintainInfo, + MaxGrowth = maxGrowth, + ConfidenceLevel = confidenceLevel, + ForecastingConfidenceIntervalMinOutputColumnName = forecastingConfidenceIntervalMinOutputColumnName, + ForecastingConfidenceIntervalMaxOutputColumnName = forecastingConfidenceIntervalMaxOutputColumnName + }) { } diff --git a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs index 62594a8dc0..5b9fa50edb 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs @@ -7,9 +7,8 @@ using System.Threading; using Microsoft.ML.CommandLine; using Microsoft.ML.Data; -using Microsoft.ML.Internal.CpuMath; -using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; +using static Microsoft.ML.DataViewSchema; namespace Microsoft.ML.Transforms.TimeSeries { @@ -27,47 +26,56 @@ internal abstract class ForecastingArgumentsBase SortOrder = 2)] public string Name; - [Argument(ArgumentType.AtMostOnce, HelpText = "The size of the sliding window for computing the p-value.", ShortName = "wnd", + [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "cnfminname", + SortOrder = 2)] + public string ForecastingConfidenceIntervalMinOutputColumnName; + + [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "cnfmaxnname", + SortOrder = 2)] + public string ForecastingConfidenceIntervalMaxOutputColumnName; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The size of the sliding window.", ShortName = "wnd", SortOrder = 3)] public int TrainSize = 1; - [Argument(ArgumentType.AtMostOnce, HelpText = "The size of the initial window for computing the p-value as well as" + - " training if needed. The default value is set to 0, which means there is no initial window considered.", - ShortName = "initwnd", SortOrder = 5)] + [Argument(ArgumentType.AtMostOnce, HelpText = "The size of the initial window. The default value " + + "is set to 0, which means there is no initial window considered.", ShortName = "initwnd", SortOrder = 5)] public int SeriesLength = 0; } - // REVIEW: This base class and its children classes generate one output column of type VBuffer to output 3 different anomaly scores as well as - // the alert flag. Ideally these 4 output information should be put in four seaparate columns instead of one VBuffer<> column. However, this is not currently - // possible due to our design restriction. This must be fixed in the next version and will potentially affect the children classes. /// - /// The base class for sequential anomaly detection transforms that supports the p-value as well as the martingales scores computation from the sequence of - /// raw anomaly scores whose calculation is specified by the children classes. This class also provides mechanism for the threshold-based alerting on - /// the raw anomaly score, the p-value score or the martingale score. Currently, this class supports Power and Mixture martingales. + /// The base class for forecasting transforms that also supports confidence intervals for each forecasted value. /// For more details, please refer to http://arxiv.org/pdf/1204.3251.pdf /// /// The type of the input sequence /// The type of the input sequence - internal abstract class SequentialForecastingTransformBase : SequentialTransformerBase, TState> + internal abstract class SequentialForecastingTransformBase : SequentialTransformerBase, TState> where TState : SequentialForecastingTransformBase.ForecastingStateBase, new() { // The size of the VBuffer in the dst column. - internal int OutputLength; - - private protected SequentialForecastingTransformBase(int windowSize, int initialWindowSize, string inputColumnName, string outputColumnName, string name, IHostEnvironment env) - : base(Contracts.CheckRef(env, nameof(env)).Register(name), windowSize, initialWindowSize, outputColumnName, inputColumnName, new VectorDataViewType(NumberDataViewType.Double, 0)) + private readonly int _outputLength; + + private protected SequentialForecastingTransformBase(int windowSize, int initialWindowSize, + string inputColumnName, string outputColumnName, string forecastingConfidenceIntervalMinOutputColumnName, + string forecastingConfidenceIntervalMaxOutputColumnName, string name, int outputLength, IHostEnvironment env) + : base(Contracts.CheckRef(env, nameof(env)).Register(name), windowSize, initialWindowSize, + outputColumnName, forecastingConfidenceIntervalMinOutputColumnName, + forecastingConfidenceIntervalMaxOutputColumnName, inputColumnName, new VectorDataViewType(NumberDataViewType.Single, outputLength)) { + _outputLength = outputLength; } - private protected SequentialForecastingTransformBase(ForecastingArgumentsBase args, string name, IHostEnvironment env) - : this(args.TrainSize, args.SeriesLength, args.Source, args.Name, name, env) + private protected SequentialForecastingTransformBase(ForecastingArgumentsBase args, string name, int outputLength, IHostEnvironment env) + : this(args.TrainSize, args.SeriesLength, args.Source, args.ForecastingConfidenceIntervalMinOutputColumnName, + args.ForecastingConfidenceIntervalMaxOutputColumnName, args.Name, name, outputLength, env) { } private protected SequentialForecastingTransformBase(IHostEnvironment env, ModelLoadContext ctx, string name) : base(Contracts.CheckRef(env, nameof(env)).Register(name), ctx) { + _outputLength = ctx.Reader.ReadInt32(); // *** Binary format *** // } @@ -81,6 +89,7 @@ private protected override void SaveModel(ModelSaveContext ctx) // base.SaveModel(ctx); + ctx.Writer.Write(_outputLength); } internal override IStatefulRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(Host, this, schema); @@ -91,7 +100,6 @@ internal sealed class Mapper : IStatefulRowMapper private readonly SequentialForecastingTransformBase _parent; private readonly DataViewSchema _parentSchema; private readonly int _inputColumnIndex; - private readonly VBuffer> _slotNames; private ForecastingStateBase State { get; set; } public Mapper(IHostEnvironment env, SequentialForecastingTransformBase parent, DataViewSchema inputSchema) @@ -110,24 +118,29 @@ public Mapper(IHostEnvironment env, SequentialForecastingTransformBase>(_parent.OutputLength, new[] {"".AsMemory()}); - State = (ForecastingStateBase)_parent.StateRef; } public DataViewSchema.DetachedColumn[] GetOutputColumns() { - var meta = new DataViewSchema.Annotations.Builder(); - meta.AddSlotNames(_parent.OutputLength, GetSlotNames); - var info = new DataViewSchema.DetachedColumn[3]; - info[0] = new DataViewSchema.DetachedColumn(_parent.OutputColumnName, new VectorDataViewType(NumberDataViewType.Double, 0), meta.ToAnnotations()); - info[1] = new DataViewSchema.DetachedColumn("Min", new VectorDataViewType(NumberDataViewType.Double, 0), meta.ToAnnotations()); - info[2] = new DataViewSchema.DetachedColumn("Max", new VectorDataViewType(NumberDataViewType.Double, 0), meta.ToAnnotations()); + DetachedColumn[] info; + + if (!string.IsNullOrEmpty(_parent.ForecastingConfidenceIntervalMaxOutputColumnName)) + { + info = new DetachedColumn[3]; + info[0] = new DetachedColumn(_parent.OutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); + info[1] = new DetachedColumn(_parent.ForecastingConfidenceIntervalMinOutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); + info[2] = new DetachedColumn(_parent.ForecastingConfidenceIntervalMaxOutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); + } + else + { + info = new DetachedColumn[3]; + info[0] = new DetachedColumn(_parent.OutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); + } + return info; } - public void GetSlotNames(ref VBuffer> dst) => _slotNames.CopyTo(ref dst, 0, _parent.OutputLength); - public Func GetDependencies(Func activeOutput) { if (activeOutput(0)) @@ -149,7 +162,7 @@ public Delegate[] CreateGetters(DataViewRow input, Func activeOutput, return getters; } - private delegate void ProcessData(ref TInput src, ref VBuffer dst); + private delegate void ProcessData(ref TInput src, ref VBuffer dst); private Delegate MakeGetter(DataViewRow input, ForecastingStateBase state) { @@ -158,7 +171,7 @@ private Delegate MakeGetter(DataViewRow input, ForecastingStateBase state) ProcessData processData = _parent.WindowSize > 0 ? (ProcessData)state.Process : state.ProcessWithoutBuffer; - ValueGetter> valueGetter = (ref VBuffer dst) => + ValueGetter> valueGetter = (ref VBuffer dst) => { TInput src = default; srcGetter(ref src); @@ -207,7 +220,7 @@ public ITransformer GetTransformer() /// The base state class for sequential anomaly detection: this class implements the p-values and martinagle calculations for anomaly detection /// given that the raw anomaly score calculation is specified by the derived classes. /// - internal abstract class ForecastingStateBase : SequentialTransformerBase, TState>.StateBase + internal abstract class ForecastingStateBase : SequentialTransformerBase, TState>.StateBase { // A reference to the parent transform. protected SequentialForecastingTransformBase Parent; @@ -228,13 +241,13 @@ internal override void Save(BinaryWriter writer) base.Save(writer); } - private protected override void SetNaOutput(ref VBuffer dst) + private protected override void SetNaOutput(ref VBuffer dst) { - var outputLength = Parent.OutputLength; + var outputLength = Parent._outputLength; var editor = VBufferEditor.Create(ref dst, outputLength); for (int i = 0; i < outputLength; ++i) - editor.Values[i] = Double.NaN; + editor.Values[i] = float.NaN; dst = editor.Commit(); } diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs index 253ad7d86f..8995648fe2 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs @@ -29,20 +29,20 @@ public DataBox(T value) } } - internal sealed class DataBoxMultiple + internal sealed class DataBoxForecastingWithConfidenceIntervals { - public T Value; + public T Value1; public T Value2; public T Value3; - public DataBoxMultiple() + public DataBoxForecastingWithConfidenceIntervals() { } - public DataBoxMultiple(T value, T value1, T value2) + public DataBoxForecastingWithConfidenceIntervals(T value1, T value2, T value3) { - Value = value; - Value2 = value1; + Value1 = value1; + Value2 = value2; Value3 = value2; } } diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs index 39dfed8fb2..5ae4eb00e2 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs @@ -166,6 +166,40 @@ public void UpdateStateCore(ref TInput input, bool buffer = true) } } + public void Process(ref TInput input, ref TOutput output1, ref TOutput output2, ref TOutput output3) + { + if (PreviousPosition == -1) + UpdateStateCore(ref input); + + if (InitialWindowedBuffer.Count < InitialWindowSize) + { + SetNaOutput(ref output1); + + if (InitialWindowedBuffer.Count == InitialWindowSize) + LearnStateFromDataCore(InitialWindowedBuffer); + } + else + { + TransformCore(ref input, WindowedBuffer, RowCounter - InitialWindowSize, ref output1, ref output2, ref output3); + } + } + + public void ProcessWithoutBuffer(ref TInput input, ref TOutput output1, ref TOutput output2, ref TOutput output3) + { + if (PreviousPosition == -1) + UpdateStateCore(ref input, false); + + if (InitialWindowedBuffer.Count < InitialWindowSize) + { + SetNaOutput(ref output1); + + if (InitialWindowedBuffer.Count == InitialWindowSize) + LearnStateFromDataCore(InitialWindowedBuffer); + } + else + TransformCore(ref input, WindowedBuffer, RowCounter - InitialWindowSize, ref output1, ref output2, ref output3); + } + public void Process(ref TInput input, ref TOutput output) { if (PreviousPosition == -1) @@ -218,6 +252,21 @@ private protected virtual void TransformCore(ref TInput input, FixedSizeQueue + /// The abstract method that realizes the main logic for the transform. + /// + /// A reference to the input object. + /// A reference to the dst object. + /// + /// + /// A reference to the windowed buffer. + /// A long number that indicates the number of times TransformCore has been called so far (starting value = 0). + private protected virtual void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, + ref TOutput dst1, ref TOutput dst2, ref TOutput dst3) + { + + } + /// /// The abstract method that realizes the logic for initializing the state object. /// @@ -267,6 +316,8 @@ private protected virtual void CloneCore(TState state) internal readonly string InputColumnName; internal readonly string OutputColumnName; + internal readonly string ForecastingConfidenceIntervalMinOutputColumnName; + internal readonly string ForecastingConfidenceIntervalMaxOutputColumnName; private protected DataViewType OutputColumnType; bool ITransformer.IsRowToRowMapper => false; @@ -283,7 +334,8 @@ private protected virtual void CloneCore(TState state) /// The name of the dst column. /// The name of the input column. /// - private protected SequentialTransformerBase(IHost host, int windowSize, int initialWindowSize, string outputColumnName, string inputColumnName, DataViewType outputColType) + private protected SequentialTransformerBase(IHost host, int windowSize, int initialWindowSize, + string outputColumnName, string inputColumnName, DataViewType outputColType) { Host = host; Host.CheckParam(initialWindowSize >= 0, nameof(initialWindowSize), "Must be non-negative."); @@ -300,6 +352,15 @@ private protected SequentialTransformerBase(IHost host, int windowSize, int init WindowSize = windowSize; } + private protected SequentialTransformerBase(IHost host, int windowSize, int initialWindowSize, + string outputColumnName, string forecastingConfidenceIntervalMinOutputColumnName, + string forecastingConfidenceIntervalMaxOutputColumnName, string inputColumnName, DataViewType outputColType) : + this(host, windowSize, initialWindowSize, outputColumnName, inputColumnName, outputColType) + { + ForecastingConfidenceIntervalMinOutputColumnName = forecastingConfidenceIntervalMinOutputColumnName; + ForecastingConfidenceIntervalMaxOutputColumnName = forecastingConfidenceIntervalMaxOutputColumnName; + } + private protected SequentialTransformerBase(IHost host, ModelLoadContext ctx) { Host = host; @@ -323,6 +384,8 @@ private protected SequentialTransformerBase(IHost host, ModelLoadContext ctx) InputColumnName = inputColumnName; OutputColumnName = outputColumnName; + ForecastingConfidenceIntervalMinOutputColumnName = ctx.Reader.ReadString(); + ForecastingConfidenceIntervalMaxOutputColumnName = ctx.Reader.ReadString(); InitialWindowSize = initialWindowSize; WindowSize = windowSize; @@ -349,7 +412,8 @@ private protected virtual void SaveModel(ModelSaveContext ctx) ctx.Writer.Write(InitialWindowSize); ctx.SaveNonEmptyString(InputColumnName); ctx.SaveNonEmptyString(OutputColumnName); - + ctx.Writer.Write(ForecastingConfidenceIntervalMinOutputColumnName); + ctx.Writer.Write(ForecastingConfidenceIntervalMaxOutputColumnName); var bs = new BinarySaver(Host, new BinarySaver.Arguments()); bs.TryWriteTypeDescription(ctx.Writer.BaseStream, OutputColumnType, out int byteWritten); } @@ -397,7 +461,9 @@ public SequentialDataTransform(IHost host, SequentialTransformerBase 0, _parent.OutputColumnType); + _parent.OutputColumnName, _parent.ForecastingConfidenceIntervalMinOutputColumnName, + _parent.ForecastingConfidenceIntervalMaxOutputColumnName, InitFunction, _parent.WindowSize > 0, _parent.OutputColumnType); + _mapper = mapper; _bindings = new ColumnBindings(input.Schema, _mapper.GetOutputColumns()); } @@ -405,34 +471,59 @@ public SequentialDataTransform(IHost host, SequentialTransformerBase _mapper.CloneState(); private static IDataTransform CreateLambdaTransform(IHost host, IDataView input, string inputColumnName, - string outputColumnName, Action initFunction, bool hasBuffer, DataViewType outputColTypeOverride) + string outputColumnName, string forecastingConfidenceIntervalMinOutputColumnName, + string forecastingConfidenceIntervalMaxOutputColumnName, Action initFunction, bool hasBuffer, DataViewType outputColTypeOverride) { var inputSchema = SchemaDefinition.Create(typeof(DataBox)); inputSchema[0].ColumnName = inputColumnName; - var outputSchema = SchemaDefinition.Create(typeof(DataBoxMultiple)); - outputSchema[0].ColumnName = outputColumnName; + SchemaDefinition outputSchema; - if (outputColTypeOverride != null) - outputSchema[0].ColumnType = outputColTypeOverride; + if (forecastingConfidenceIntervalMinOutputColumnName != null) + { + outputSchema = SchemaDefinition.Create(typeof(DataBoxForecastingWithConfidenceIntervals)); + outputSchema[0].ColumnName = outputColumnName; + + if (outputColTypeOverride != null) + outputSchema[0].ColumnType = outputSchema[1].ColumnType = outputSchema[2].ColumnType = outputColTypeOverride; - outputSchema[1].ColumnName = "Min"; - outputSchema[2].ColumnName = "Max"; + outputSchema[1].ColumnName = forecastingConfidenceIntervalMinOutputColumnName; + outputSchema[2].ColumnName = forecastingConfidenceIntervalMaxOutputColumnName; - Action, DataBoxMultiple, TState> lambda; - if (hasBuffer) - lambda = MapFunction; + Action, DataBoxForecastingWithConfidenceIntervals, TState> lambda; + if (hasBuffer) + lambda = MapFunction; + else + lambda = MapFunctionWithoutBuffer; + + return LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema); + } else - lambda = MapFunction; + { + outputSchema = SchemaDefinition.Create(typeof(DataBox)); + outputSchema[0].ColumnName = outputColumnName; + + if (outputColTypeOverride != null) + outputSchema[0].ColumnType = outputColTypeOverride; + + Action, DataBox, TState> lambda; + if (hasBuffer) + lambda = MapFunction; + else + lambda = MapFunctionWithoutBuffer; - return LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema); + return LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema); + } } - private static void MapFunction(DataBox input, DataBoxMultiple output, TState state) + private static void MapFunction(DataBox input, DataBox output, TState state) { state.Process(ref input.Value, ref output.Value); - state.Process(ref input.Value, ref output.Value2); - state.Process(ref input.Value, ref output.Value3); + } + + private static void MapFunction(DataBox input, DataBoxForecastingWithConfidenceIntervals output, TState state) + { + state.Process(ref input.Value, ref output.Value1, ref output.Value2, ref output.Value3); } private static void MapFunctionWithoutBuffer(DataBox input, DataBox output, TState state) @@ -440,6 +531,11 @@ private static void MapFunctionWithoutBuffer(DataBox input, DataBox input, DataBoxForecastingWithConfidenceIntervals output, TState state) + { + state.ProcessWithoutBuffer(ref input.Value, ref output.Value1, ref output.Value2, ref output.Value3); + } + private void InitFunction(TState state) { state.InitState(_parent.WindowSize, _parent.InitialWindowSize, _parent, _parent.Host); diff --git a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs index 9c41801a00..d297d74a80 100644 --- a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs @@ -133,12 +133,11 @@ internal abstract class SsaForecastingOptions : ForecastingArgumentsBase public RankSelectionMethod RankSelectionMethod; public int? Rank; public int? MaxRank; - public bool ShouldComputeForecastIntervals; public bool ShouldStablize; public bool ShouldMaintainInfo; public GrowthRatio? MaxGrowth; public int Horizon; - public bool ComputeConfidenceIntervals; + public float ConfidenceLevel; } internal SsaForecastingBase InternalTransform; @@ -147,7 +146,7 @@ internal SsaForecastingBaseWrapper(SsaForecastingOptions options, string name, I { InternalTransform = new SsaForecastingBase(options, name, env, this); } - + //string forecastingConfidenceIntervalMinOutputColumnName, string forecastingConfidenceIntervalMaxOutputColumnName, int horizon, bool computeConfidenceIntervals internal SsaForecastingBaseWrapper(IHostEnvironment env, ModelLoadContext ctx, string name) { InternalTransform = new SsaForecastingBase(env, ctx, name); @@ -162,28 +161,28 @@ internal sealed class SsaForecastingBase : SequentialForecastingTransformBase Model; public SsaForecastingBase(SsaForecastingOptions options, string name, IHostEnvironment env, SsaForecastingBaseWrapper parent) - : base(options.TrainSize, 0, options.Source, options.Name, name, env) + : base(options.TrainSize, 0, options.Source, options.Name, options.ForecastingConfidenceIntervalMinOutputColumnName, + options.ForecastingConfidenceIntervalMaxOutputColumnName, name, options.Horizon, env) { Host.CheckUserArg(0 <= options.DiscountFactor && options.DiscountFactor <= 1, nameof(options.DiscountFactor), "Must be in the range [0, 1]."); IsAdaptive = options.IsAdaptive; Horizon = options.Horizon; - ComputeConfidenceIntervals = options.ComputeConfidenceIntervals; + ConfidenceLevel = options.ConfidenceLevel; // Creating the master SSA model Model = new AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.TrainSize, options.SeriesLength, options.WindowSize, - options.DiscountFactor, options.RankSelectionMethod, options.Rank, options.MaxRank, options.ShouldComputeForecastIntervals, options.ShouldStablize, options.ShouldMaintainInfo, - options.MaxGrowth); + options.DiscountFactor, options.RankSelectionMethod, options.Rank, options.MaxRank, options.ShouldStablize, options.ShouldMaintainInfo, + !string.IsNullOrEmpty(options.ForecastingConfidenceIntervalMinOutputColumnName), options.MaxGrowth); StateRef = new State(); StateRef.InitState(WindowSize, InitialWindowSize, this, Host); Parent = parent; } - public SsaForecastingBase(IHostEnvironment env, ModelLoadContext ctx, string name) - : base(env, ctx, name) + public SsaForecastingBase(IHostEnvironment env, ModelLoadContext ctx, string name) : base(env, ctx, name) { // *** Binary format *** // @@ -197,7 +196,7 @@ public SsaForecastingBase(IHostEnvironment env, ModelLoadContext ctx, string nam IsAdaptive = ctx.Reader.ReadBoolean(); Horizon = ctx.Reader.ReadInt32(); - ComputeConfidenceIntervals = ctx.Reader.ReadBoolean(); + ConfidenceLevel = ctx.Reader.ReadFloat(); StateRef = new State(ctx.Reader); ctx.LoadModel, SignatureLoadModel>(env, out Model, "SSA"); @@ -236,14 +235,13 @@ internal void SaveThis(ModelSaveContext ctx) // // bool: _isAdaptive // int32: Horizon - // bool: ComputeConfidenceIntervals // State: StateRef // AdaptiveSingularSpectrumSequenceModeler: _model base.SaveModel(ctx); ctx.Writer.Write(IsAdaptive); ctx.Writer.Write(Horizon); - ctx.Writer.Write(ComputeConfidenceIntervals); + ctx.Writer.Write(ConfidenceLevel); StateRef.Save(ctx.Writer); ctx.SaveModel(Model, "SSA"); @@ -296,11 +294,21 @@ private protected override void InitializeAnomalyDetector() _model = _parentAnomalyDetector.Model; } - private protected override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) + private protected override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) + { + dst = new VBuffer(_parentAnomalyDetector.Horizon, + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(_parentAnomalyDetector.Horizon)); + } + + private protected override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, + ref VBuffer dst1, ref VBuffer dst2, ref VBuffer dst3) { - dst = new VBuffer(_parentAnomalyDetector.Horizon, - Array.ConvertAll(((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(_parentAnomalyDetector.Horizon), d => (double)d)); + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).ForecastWithConfidenceIntervals(_parentAnomalyDetector.Horizon, + out float[] forecast, out float[] min, out float[] max, _parentAnomalyDetector.ConfidenceLevel); + dst1 = new VBuffer(_parentAnomalyDetector.Horizon, forecast); + dst2 = new VBuffer(_parentAnomalyDetector.Horizon, min); + dst3 = new VBuffer(_parentAnomalyDetector.Horizon, max); } public override void Consume(Single input) => _model.Consume(ref input, _parentAnomalyDetector.IsAdaptive); diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 884d6c3776..43e2e1201c 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -2,12 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.Collections.Generic; using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework.Attributes; -using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; @@ -174,7 +172,7 @@ public void Forecast() var args = new SsaForecasting.Options() { - Confidence = 95, + ConfidenceLevel = 0.95f, Source = "Value", Name = "Change", WindowSize = 10, @@ -371,7 +369,7 @@ public void ForecastingPredictionEngine() var args = new SsaForecasting.Options() { - Confidence = 95, + ConfidenceLevel = 0.95f, Source = "Value", Name = "Change", WindowSize = 10, From 5362fdff6b9286308eec905f7f6f25aebd7204a1 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 01:24:01 -0700 Subject: [PATCH 03/21] refactor code. --- .../SequentialForecastingTransformBase.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs index 5b9fa50edb..44e1c289cb 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs @@ -134,7 +134,7 @@ public DataViewSchema.DetachedColumn[] GetOutputColumns() } else { - info = new DetachedColumn[3]; + info = new DetachedColumn[1]; info[0] = new DetachedColumn(_parent.OutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); } From 28ed52d07672cd8429157c42e4b76805b831a626 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 09:50:10 -0700 Subject: [PATCH 04/21] fix. --- .../SsaForecastingBase.cs | 4 ++-- .../TimeSeriesDirectApi.cs | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs index d297d74a80..0de478fb0c 100644 --- a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs @@ -174,8 +174,8 @@ public SsaForecastingBase(SsaForecastingOptions options, string name, IHostEnvir ConfidenceLevel = options.ConfidenceLevel; // Creating the master SSA model Model = new AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.TrainSize, options.SeriesLength, options.WindowSize, - options.DiscountFactor, options.RankSelectionMethod, options.Rank, options.MaxRank, options.ShouldStablize, options.ShouldMaintainInfo, - !string.IsNullOrEmpty(options.ForecastingConfidenceIntervalMinOutputColumnName), options.MaxGrowth); + options.DiscountFactor, options.RankSelectionMethod, options.Rank, options.MaxRank, !string.IsNullOrEmpty(options.ForecastingConfidenceIntervalMinOutputColumnName), + options.ShouldStablize, options.ShouldMaintainInfo, options.MaxGrowth); StateRef = new State(); StateRef.InitState(WindowSize, InitialWindowSize, this, Host); diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 43e2e1201c..a8cdd5f4be 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -19,10 +19,18 @@ private sealed class Prediction #pragma warning disable CS0649 [VectorType(4)] public double[] Change; +#pragma warning restore CS0649 + } + + private sealed class ForecastPrediction + { +#pragma warning disable CS0649 + [VectorType(4)] + public float[] Change; [VectorType(4)] - public double[] Min; + public float[] Min; [VectorType(4)] - public double[] Max; + public float[] Max; #pragma warning restore CS0649 } @@ -175,6 +183,8 @@ public void Forecast() ConfidenceLevel = 0.95f, Source = "Value", Name = "Change", + ForecastingConfidenceIntervalMinOutputColumnName = "Min", + ForecastingConfidenceIntervalMaxOutputColumnName = "Max", WindowSize = 10, SeriesLength = 11, TrainSize = 22, @@ -193,8 +203,8 @@ public void Forecast() // Transform var output = detector.Transform(dataView); // Get predictions - var enumerator = env.Data.CreateEnumerable(output, true).GetEnumerator(); - Prediction row = null; + var enumerator = env.Data.CreateEnumerable(output, true).GetEnumerator(); + ForecastPrediction row = null; List expectedValues = new List() { 0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07, 0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483}; From e0d79d89ef20f61011c934b75069954615449561 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 11:04:10 -0700 Subject: [PATCH 05/21] fix. --- .../AdaptiveSingularSpectrumSequenceModeler.cs | 7 +++++++ src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 4 ++-- src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs | 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index 4bc50590a9..40c4fb0f89 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -5,10 +5,17 @@ using System; using System.Collections.Generic; using System.Numerics; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Internal.CpuMath; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; +using Microsoft.ML.Transforms.TimeSeries; + +[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModelerInternal), + typeof(AdaptiveSingularSpectrumSequenceModelerInternal), null, typeof(SignatureLoadModel), + "SSA Sequence Modeler", + AdaptiveSingularSpectrumSequenceModelerInternal.LoaderSignature)] namespace Microsoft.ML.Transforms.TimeSeries { diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index eef75a3b99..157159b3bc 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -31,8 +31,8 @@ public sealed class SsaForecasting : SsaForecastingBaseWrapper, IStatefulTransfo { internal const string Summary = "This transform forecasts using Singular Spectrum Analysis (SSA)."; internal const string LoaderSignature = "SsaForecasting"; - internal const string UserName = "SSA Spike Detection"; - internal const string ShortName = "spike"; + internal const string UserName = "SSA Forecasting"; + internal const string ShortName = "ssafcst"; internal sealed class Options : TransformInputBase { diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs index 5ae4eb00e2..59775a2945 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs @@ -412,8 +412,8 @@ private protected virtual void SaveModel(ModelSaveContext ctx) ctx.Writer.Write(InitialWindowSize); ctx.SaveNonEmptyString(InputColumnName); ctx.SaveNonEmptyString(OutputColumnName); - ctx.Writer.Write(ForecastingConfidenceIntervalMinOutputColumnName); - ctx.Writer.Write(ForecastingConfidenceIntervalMaxOutputColumnName); + ctx.Writer.Write(ForecastingConfidenceIntervalMinOutputColumnName ?? string.Empty); + ctx.Writer.Write(ForecastingConfidenceIntervalMaxOutputColumnName ?? string.Empty); var bs = new BinarySaver(Host, new BinarySaver.Arguments()); bs.TryWriteTypeDescription(ctx.Writer.BaseStream, OutputColumnType, out int byteWritten); } @@ -479,7 +479,7 @@ private static IDataTransform CreateLambdaTransform(IHost host, IDataView input, SchemaDefinition outputSchema; - if (forecastingConfidenceIntervalMinOutputColumnName != null) + if (!string.IsNullOrEmpty(forecastingConfidenceIntervalMinOutputColumnName)) { outputSchema = SchemaDefinition.Create(typeof(DataBoxForecastingWithConfidenceIntervals)); outputSchema[0].ColumnName = outputColumnName; From 5e4e5c0a82c90e1f5a3904460470309370882706 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 11:10:05 -0700 Subject: [PATCH 06/21] fix. --- test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index a8cdd5f4be..3d4e05dd66 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -212,10 +212,10 @@ public void Forecast() while (enumerator.MoveNext() && index < expectedValues.Count) { row = enumerator.Current; - Assert.Equal(expectedValues[index++], row.Change[0], precision: 7); // Alert + /*Assert.Equal(expectedValues[index++], row.Change[0], precision: 7); // Alert Assert.Equal(expectedValues[index++], row.Change[1], precision: 7); // Raw score Assert.Equal(expectedValues[index++], row.Change[2], precision: 7); // P-Value score - Assert.Equal(expectedValues[index++], row.Change[3], precision: 7); // Martingale score + Assert.Equal(expectedValues[index++], row.Change[3], precision: 7); // Martingale score*/ } } From 7c320cf57488068742313f6f6230ec3ddd94c57b Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 11:37:15 -0700 Subject: [PATCH 07/21] fix. --- .../Dynamic/Transforms/TimeSeries/Forecasting.cs | 4 ++-- .../TimeSeries/ForecastingWithConfidenceInterval.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs index f69f8ff8d8..87bd685513 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -1,8 +1,8 @@ -using System; +/*using System; using System.Collections.Generic; using Microsoft.ML; using Microsoft.ML.Transforms.TimeSeries; -using Microsoft.ML.TimeSeries; +using Microsoft.ML.TimeSeries;*/ namespace Samples.Dynamic { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs index de546e043f..d446cbaf7b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs @@ -1,8 +1,8 @@ -using System; +/*using System; using System.Collections.Generic; using Microsoft.ML; using Microsoft.ML.Transforms.TimeSeries; -using Microsoft.ML.TimeSeries; +using Microsoft.ML.TimeSeries;*/ namespace Samples.Dynamic { From 9f04af9d2699c07a3fdc23965d1aadd1d967a228 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 14:17:40 -0700 Subject: [PATCH 08/21] fix. --- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index 157159b3bc..25835cfcd3 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -307,7 +307,18 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) }; var resultDic = inputSchema.ToDictionary(x => x.Name); resultDic[_options.Name] = new SchemaShape.Column( - _options.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Double, false, new SchemaShape(metadata)); + _options.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(metadata)); + + if (!string.IsNullOrEmpty(_options.ForecastingConfidenceIntervalMaxOutputColumnName)) + { + resultDic[_options.ForecastingConfidenceIntervalMinOutputColumnName] = new SchemaShape.Column( + _options.ForecastingConfidenceIntervalMinOutputColumnName, SchemaShape.Column.VectorKind.Vector, + NumberDataViewType.Single, false, new SchemaShape(metadata)); + + resultDic[_options.ForecastingConfidenceIntervalMaxOutputColumnName] = new SchemaShape.Column( + _options.ForecastingConfidenceIntervalMaxOutputColumnName, SchemaShape.Column.VectorKind.Vector, + NumberDataViewType.Single, false, new SchemaShape(metadata)); + } return new SchemaShape(resultDic.Values); } From db4ff6a8d84b067a3b67aabc0b1e2de481ba5910 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 16:09:02 -0700 Subject: [PATCH 09/21] Update documentation. --- src/Microsoft.ML.Data/MLContext.cs | 6 - src/Microsoft.ML.Data/TrainCatalog.cs | 27 ----- ...AdaptiveSingularSpectrumSequenceModeler.cs | 2 +- .../ExtensionsCatalog.cs | 45 +++++--- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 109 +++++++++++------- ...SequentialAnomalyDetectionTransformBase.cs | 22 ++-- 6 files changed, 105 insertions(+), 106 deletions(-) diff --git a/src/Microsoft.ML.Data/MLContext.cs b/src/Microsoft.ML.Data/MLContext.cs index fc30ba9ca2..f255ffe07a 100644 --- a/src/Microsoft.ML.Data/MLContext.cs +++ b/src/Microsoft.ML.Data/MLContext.cs @@ -46,11 +46,6 @@ public sealed class MLContext : IHostEnvironment /// public AnomalyDetectionCatalog AnomalyDetection { get; } - /// - /// Trainers and tasks specific to forecasting problems. - /// - public ForecastingCatalog Forecasting { get; } - /// /// Data processing operations. /// @@ -118,7 +113,6 @@ public MLContext(int? seed = null) Clustering = new ClusteringCatalog(_env); Ranking = new RankingCatalog(_env); AnomalyDetection = new AnomalyDetectionCatalog(_env); - Forecasting = new ForecastingCatalog(_env); Transforms = new TransformsCatalog(_env); Model = new ModelOperationsCatalog(_env); Data = new DataOperationsCatalog(_env); diff --git a/src/Microsoft.ML.Data/TrainCatalog.cs b/src/Microsoft.ML.Data/TrainCatalog.cs index 2331b51792..a1280ce090 100644 --- a/src/Microsoft.ML.Data/TrainCatalog.cs +++ b/src/Microsoft.ML.Data/TrainCatalog.cs @@ -704,31 +704,4 @@ public AnomalyDetectionMetrics Evaluate(IDataView data, string labelColumnName = return eval.Evaluate(data, labelColumnName, scoreColumnName, predictedLabelColumnName); } } - - /// - /// Class used by to create instances of forecasting components. - /// - public sealed class ForecastingCatalog : TrainCatalogBase - { - /// - /// The list of trainers for performing forecasting. - /// - public Forecasters Trainers { get; } - - internal ForecastingCatalog(IHostEnvironment env) : base(env, nameof(ForecastingCatalog)) - { - Trainers = new Forecasters(this); - } - - /// - /// Class used by to create instances of forecasting trainers. - /// - public sealed class Forecasters : CatalogInstantiatorBase - { - internal Forecasters(ForecastingCatalog catalog) - : base(catalog) - { - } - } - } } diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index 40c4fb0f89..c88c0529d8 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -257,7 +257,7 @@ private static VersionInfo GetVersionInfo() /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = true) /// The flag determining whether the model should be stabilized. /// The flag determining whether the meta information for the model needs to be maintained. - /// The maximum growth on the exponential trend + /// The maximum growth on the exponential trend. public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index c6e6ec8966..290e714a97 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -148,24 +148,30 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog int windowSize=64, int backAddWindowSize=5, int lookaheadWindowSize=5, int averageingWindowSize=3, int judgementWindowSize=21, double threshold=0.3) => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); - /*/// - /// Singular Spectrum Analysis (SSA) model for modeling univariate time-series. + /// + /// Singular Spectrum Analysis (SSA) model for modeling univariate time-series forecasting. /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. /// /// Catalog. - /// The name of the column on which forecasting needs to be performed. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. + /// The vector contains Alert, Raw Score, P-Value as first three values. + /// The length of the window on the series for building the trajectory matrix (parameter L). + /// The length of series that is kept in buffer for modeling (parameter N). /// The length of series from the begining used for training. - /// The length of series that is kept in buffer for modeling (parameter N from reference papar). - /// The length of the window on the series for building the trajectory matrix (parameter L from reference papar). - /// The discount factor in [0,1] used for online updates (default = 1). - /// The rank selection method (default = Exact). - /// The desired rank of the subspace used for SSA projection (parameter r from reference papar). This parameter should be in the range in [1, ]. - /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) - /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to - 1. - /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = ) - /// The flag determining whether the model should be stabilized. + /// The number of values to forecast. + /// The flag determing whether the model is adaptive. + /// The discount factor in [0,1] used for online updates. + /// The rank selection method. + /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. + /// If set to null, the rank is automatically determined based on prediction error minimization. + /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. + /// The flag determining whether the model should be stabilized. /// The flag determining whether the meta information for the model needs to be maintained. - /// The maximum growth on the exponential trend + /// The maximum growth on the exponential trend. + /// The name of the minimum confidence interval column. If not specified then confidence intervals will not be calculated. + /// The name of the maximum confidence interval column. If not specified then confidence intervals will not be calculated. + /// /// /// /// /// /// - public static AdaptiveSingularSpectrumSequenceForecasting AdaptiveSingularSpectrumSequenceModeler(this ForecastingCatalog catalog, - string inputColumnName, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, - int? rank = null, int? maxRank = null, bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) => - new AdaptiveSingularSpectrumSequenceForecasting(CatalogUtils.GetEnvironment(catalog), inputColumnName, trainSize, seriesLength, windowSize, discountFactor, - rankSelectionMethod, rank, maxRank, shouldComputeForecastIntervals, shouldstablize, shouldMaintainInfo, maxGrowth);*/ + public static SsaForecastingEstimator ForecastBySsa( + this TransformsCatalog catalog, string outputColumnName, string inputColumnName, int windowSize, int seriesLength, int trainSize, int horizon, + bool isAdaptive = false, float discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, + int? maxRank = null, bool shouldStablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null, string forecastingConfidenceIntervalMinOutputColumnName = null, + string forecastingConfidenceIntervalMaxOutputColumnName = null, float confidenceLevel = 0.95f) => + new SsaForecastingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, windowSize, seriesLength, trainSize, + horizon, isAdaptive, discountFactor, rankSelectionMethod, rank, maxRank, shouldStablize, shouldMaintainInfo, maxGrowth, forecastingConfidenceIntervalMinOutputColumnName, + forecastingConfidenceIntervalMaxOutputColumnName, confidenceLevel); } } diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index 25835cfcd3..d026f97bb2 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -36,38 +36,57 @@ public sealed class SsaForecasting : SsaForecastingBaseWrapper, IStatefulTransfo internal sealed class Options : TransformInputBase { - [Argument(ArgumentType.Required, HelpText = "The name of the source column.", ShortName = "src", - SortOrder = 1, Purpose = SpecialPurpose.ColumnName)] + [Argument(ArgumentType.Required, HelpText = "The name of the source column.", ShortName = "src", SortOrder = 1, Purpose = SpecialPurpose.ColumnName)] public string Source; - [Argument(ArgumentType.Required, HelpText = "The name of the new column.", - SortOrder = 2)] + [Argument(ArgumentType.Required, HelpText = "The name of the new column.", SortOrder = 2)] public string Name; - [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "cnfminname", - SortOrder = 2)] + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the minimum confidence interval column.", ShortName = "cnfminname", SortOrder = 3)] public string ForecastingConfidenceIntervalMinOutputColumnName; - [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "cnfmaxnname", - SortOrder = 2)] + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the maximum confidence interval column.", ShortName = "cnfmaxnname", SortOrder = 3)] public string ForecastingConfidenceIntervalMaxOutputColumnName; - [Argument(ArgumentType.AtMostOnce, HelpText = "The discount factor in [0, 1]", ShortName = "disc", SortOrder = 12)] + [Argument(ArgumentType.AtMostOnce, HelpText = "The discount factor in [0,1] used for online updates.", ShortName = "disc", SortOrder = 5)] public float DiscountFactor = 1; - [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determing whether the model is adaptive", ShortName = "adp", SortOrder = 14)] + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determing whether the model is adaptive", ShortName = "adp", SortOrder = 6)] public bool IsAdaptive = false; + + [Argument(ArgumentType.Required, HelpText = "The length of the window on the series for building the trajectory matrix (parameter L).", SortOrder = 2)] public int WindowSize; - public RankSelectionMethod RankSelectionMethod; - public int? Rank; - public int? MaxRank; - public bool ShouldStablize; - public bool ShouldMaintainInfo; - public GrowthRatio? MaxGrowth; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The rank selection method.", SortOrder = 3)] + public RankSelectionMethod RankSelectionMethod = RankSelectionMethod.Exact; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. " + + "If set to null, the rank is automatically determined based on prediction error minimization.", SortOrder = 3)] + public int? Rank = null; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1.", SortOrder = 3)] + public int? MaxRank = null; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determining whether the model should be stabilized.", SortOrder = 3)] + public bool ShouldStablize = true; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determining whether the meta information for the model needs to be maintained.", SortOrder = 3)] + public bool ShouldMaintainInfo = false; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum growth on the exponential trend.", SortOrder = 3)] + public GrowthRatio? MaxGrowth = null; + + [Argument(ArgumentType.Required, HelpText = "The length of series that is kept in buffer for modeling (parameter N).", SortOrder = 2)] public int SeriesLength; + + [Argument(ArgumentType.Required, HelpText = "The length of series from the begining used for training.", SortOrder = 2)] public int TrainSize; + + [Argument(ArgumentType.Required, HelpText = "The number of values to forecast.", SortOrder = 2)] public int Horizon; - public float ConfidenceLevel; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] + public float ConfidenceLevel = 0.95f; } private sealed class BaseArguments : SsaForecastingOptions @@ -186,7 +205,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// /// /// | - /// | Output column data type | 3-element vector of | + /// | Output column data type | Vector of | /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-props.md)] /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-ssa.md)] /// - /// [!include[io](~/../docs/samples/docs/api-reference/time-series-pvalue.md)] - /// /// Check the See Also section for links to usage examples. /// ]]> /// @@ -218,34 +235,39 @@ public sealed class SsaForecastingEstimator : IEstimator /// /// Host Environment. /// Name of the column resulting from the transformation of . - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// /// Name of column to transform. If set to , the value of the will be used as source. /// The vector contains Alert, Raw Score, P-Value as first three values. - /// - /// + /// The length of the window on the series for building the trajectory matrix (parameter L). + /// The length of series that is kept in buffer for modeling (parameter N). + /// The length of series from the begining used for training. + /// The number of values to forecast. + /// The flag determing whether the model is adaptive. + /// The discount factor in [0,1] used for online updates. + /// The rank selection method. + /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. + /// If set to null, the rank is automatically determined based on prediction error minimization. + /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. + /// The flag determining whether the model should be stabilized. + /// The flag determining whether the meta information for the model needs to be maintained. + /// The maximum growth on the exponential trend. + /// The name of the minimum confidence interval column. If not specified then confidence intervals will not be calculated. + /// The name of the maximum confidence interval column. If not specified then confidence intervals will not be calculated. /// internal SsaForecastingEstimator(IHostEnvironment env, string outputColumnName, - float discountFactor, - bool isAdaptive, + string inputColumnName, int windowSize, - RankSelectionMethod rankSelectionMethod, - int? rank, - int? maxRank, - bool shouldComputeForecastIntervals, - bool shouldStablize, - bool shouldMaintainInfo, - GrowthRatio? maxGrowth, - string inputColumnName = null, + int seriesLength, + int trainSize, + int horizon, + bool isAdaptive = false, + float discountFactor = 1, + RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, + int? rank = null, + int? maxRank = null, + bool shouldStablize = true, + bool shouldMaintainInfo = false, + GrowthRatio? maxGrowth = null, string forecastingConfidenceIntervalMinOutputColumnName = null, string forecastingConfidenceIntervalMaxOutputColumnName = null, float confidenceLevel = 0.95f) @@ -305,6 +327,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) var metadata = new List() { new SchemaShape.Column(AnnotationUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, TextDataViewType.Instance, false) }; + var resultDic = inputSchema.ToDictionary(x => x.Name); resultDic[_options.Name] = new SchemaShape.Column( _options.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(metadata)); diff --git a/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs index 915e74ab39..72cfdf27e5 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs @@ -387,18 +387,18 @@ public Action CreatePinger(DataViewRow input, Func activeOutput return pinger; } - private Action MakePinger(DataViewRow input, AnomalyDetectionStateBase state) + private Action MakePinger(DataViewRow input, AnomalyDetectionStateBase state) + { + _host.AssertValue(input); + var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); + Action pinger = (long rowPosition) => { - _host.AssertValue(input); - var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); - Action pinger = (long rowPosition) => - { - TInput src = default; - srcGetter(ref src); - state.UpdateState(ref src, rowPosition, _parent.WindowSize > 0); - }; - return pinger; - } + TInput src = default; + srcGetter(ref src); + state.UpdateState(ref src, rowPosition, _parent.WindowSize > 0); + }; + return pinger; + } public void CloneState() { From 16c79201797b39a9fa505ab5ecb18194b4838666 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 16:11:30 -0700 Subject: [PATCH 10/21] fix. --- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index d026f97bb2..59da3d89db 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -205,7 +205,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// /// /// /// /// - /// + /// public sealed class SsaForecastingEstimator : IEstimator { private readonly IHost _host; From 48ac2b59a0a6299f61ecd7026402c8a150627e53 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 16:12:14 -0700 Subject: [PATCH 11/21] Update documentation. --- src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 290e714a97..a348e2fa2f 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -2,10 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using Microsoft.ML.Data; using Microsoft.ML.Transforms.TimeSeries; -//using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceForecasting; namespace Microsoft.ML { From ac62642a03d3da1d456bb0008876409156182560 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 22:40:09 -0700 Subject: [PATCH 12/21] Update tests. --- src/Microsoft.ML.Data/MLContext.cs | 6 ++ src/Microsoft.ML.Data/TrainCatalog.cs | 27 +++++++ .../ExtensionsCatalog.cs | 2 +- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 1 - .../SequentialForecastingTransformBase.cs | 6 +- .../SequentialTransformerBase.cs | 8 +- .../SsaForecastingBase.cs | 38 ++++++---- .../TimeSeriesDirectApi.cs | 73 +++++++------------ 8 files changed, 94 insertions(+), 67 deletions(-) diff --git a/src/Microsoft.ML.Data/MLContext.cs b/src/Microsoft.ML.Data/MLContext.cs index f255ffe07a..fc30ba9ca2 100644 --- a/src/Microsoft.ML.Data/MLContext.cs +++ b/src/Microsoft.ML.Data/MLContext.cs @@ -46,6 +46,11 @@ public sealed class MLContext : IHostEnvironment /// public AnomalyDetectionCatalog AnomalyDetection { get; } + /// + /// Trainers and tasks specific to forecasting problems. + /// + public ForecastingCatalog Forecasting { get; } + /// /// Data processing operations. /// @@ -113,6 +118,7 @@ public MLContext(int? seed = null) Clustering = new ClusteringCatalog(_env); Ranking = new RankingCatalog(_env); AnomalyDetection = new AnomalyDetectionCatalog(_env); + Forecasting = new ForecastingCatalog(_env); Transforms = new TransformsCatalog(_env); Model = new ModelOperationsCatalog(_env); Data = new DataOperationsCatalog(_env); diff --git a/src/Microsoft.ML.Data/TrainCatalog.cs b/src/Microsoft.ML.Data/TrainCatalog.cs index a1280ce090..2331b51792 100644 --- a/src/Microsoft.ML.Data/TrainCatalog.cs +++ b/src/Microsoft.ML.Data/TrainCatalog.cs @@ -704,4 +704,31 @@ public AnomalyDetectionMetrics Evaluate(IDataView data, string labelColumnName = return eval.Evaluate(data, labelColumnName, scoreColumnName, predictedLabelColumnName); } } + + /// + /// Class used by to create instances of forecasting components. + /// + public sealed class ForecastingCatalog : TrainCatalogBase + { + /// + /// The list of trainers for performing forecasting. + /// + public Forecasters Trainers { get; } + + internal ForecastingCatalog(IHostEnvironment env) : base(env, nameof(ForecastingCatalog)) + { + Trainers = new Forecasters(this); + } + + /// + /// Class used by to create instances of forecasting trainers. + /// + public sealed class Forecasters : CatalogInstantiatorBase + { + internal Forecasters(ForecastingCatalog catalog) + : base(catalog) + { + } + } + } } diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index a348e2fa2f..cc8ecc49c3 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -179,7 +179,7 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// /// public static SsaForecastingEstimator ForecastBySsa( - this TransformsCatalog catalog, string outputColumnName, string inputColumnName, int windowSize, int seriesLength, int trainSize, int horizon, + this ForecastingCatalog catalog, string outputColumnName, string inputColumnName, int windowSize, int seriesLength, int trainSize, int horizon, bool isAdaptive = false, float discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, bool shouldStablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null, string forecastingConfidenceIntervalMinOutputColumnName = null, string forecastingConfidenceIntervalMaxOutputColumnName = null, float confidenceLevel = 0.95f) => diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index 59da3d89db..0604b9f2ab 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -224,7 +224,6 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// ]]> /// /// - /// public sealed class SsaForecastingEstimator : IEstimator { private readonly IHost _host; diff --git a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs index 44e1c289cb..ebf1c12e9a 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs @@ -256,13 +256,13 @@ private protected sealed override void InitializeStateCore(bool disk = false) { Parent = (SequentialForecastingTransformBase)ParentTransform; Host.Assert(WindowSize >= 0); - InitializeAnomalyDetector(); + InitializeForecaster(); } /// - /// The abstract method that realizes the initialization functionality for the anomaly detector. + /// The abstract method that realizes the initialization functionality for the forecaster. /// - private protected abstract void InitializeAnomalyDetector(); + private protected abstract void InitializeForecaster(); } } } diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs index 59775a2945..8ae346eb78 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs @@ -168,6 +168,7 @@ public void UpdateStateCore(ref TInput input, bool buffer = true) public void Process(ref TInput input, ref TOutput output1, ref TOutput output2, ref TOutput output3) { + //Using prediction engine will not evaluate the below condition to true. if (PreviousPosition == -1) UpdateStateCore(ref input); @@ -186,8 +187,9 @@ public void Process(ref TInput input, ref TOutput output1, ref TOutput output2, public void ProcessWithoutBuffer(ref TInput input, ref TOutput output1, ref TOutput output2, ref TOutput output3) { + //Using prediction engine will not evaluate the below condition to true. if (PreviousPosition == -1) - UpdateStateCore(ref input, false); + UpdateStateCore(ref input); if (InitialWindowedBuffer.Count < InitialWindowSize) { @@ -202,6 +204,7 @@ public void ProcessWithoutBuffer(ref TInput input, ref TOutput output1, ref TOut public void Process(ref TInput input, ref TOutput output) { + //Using prediction engine will not evaluate the below condition to true. if (PreviousPosition == -1) UpdateStateCore(ref input); @@ -220,8 +223,9 @@ public void Process(ref TInput input, ref TOutput output) public void ProcessWithoutBuffer(ref TInput input, ref TOutput output) { + //Using prediction engine will not evaluate the below condition to true. if (PreviousPosition == -1) - UpdateStateCore(ref input, false); + UpdateStateCore(ref input); if (InitialWindowedBuffer.Count < InitialWindowSize) { diff --git a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs index 0de478fb0c..ce26181071 100644 --- a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs @@ -250,7 +250,7 @@ internal void SaveThis(ModelSaveContext ctx) internal sealed class State : ForecastingStateBase { private SequenceModelerBase _model; - private SsaForecastingBase _parentAnomalyDetector; + private SsaForecastingBase _parentForecaster; public State() { @@ -278,8 +278,8 @@ private protected override void CloneCore(State state) stateLocal.InitialWindowedBuffer = InitialWindowedBuffer.Clone(); if (_model != null) { - _parentAnomalyDetector.Model = _parentAnomalyDetector.Model.Clone(); - _model = _parentAnomalyDetector.Model; + _parentForecaster.Model = _parentForecaster.Model.Clone(); + _model = _parentForecaster.Model; } } @@ -288,30 +288,40 @@ private protected override void LearnStateFromDataCore(FixedSizeQueue da // This method is empty because there is no need to implement a training logic here. } - private protected override void InitializeAnomalyDetector() + private protected override void InitializeForecaster() { - _parentAnomalyDetector = (SsaForecastingBase)Parent; - _model = _parentAnomalyDetector.Model; + _parentForecaster = (SsaForecastingBase)Parent; + _model = _parentForecaster.Model; } private protected override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) { - dst = new VBuffer(_parentAnomalyDetector.Horizon, - ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(_parentAnomalyDetector.Horizon)); + // Forecasting is being done without prediction engine. Update the model + // with the observation. + if (PreviousPosition == -1) + Consume(input); + + dst = new VBuffer(_parentForecaster.Horizon, + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(_parentForecaster.Horizon)); } private protected override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst1, ref VBuffer dst2, ref VBuffer dst3) { - ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).ForecastWithConfidenceIntervals(_parentAnomalyDetector.Horizon, - out float[] forecast, out float[] min, out float[] max, _parentAnomalyDetector.ConfidenceLevel); + // Forecasting is being done without prediction engine. Update the model + // with the observation. + if (PreviousPosition == -1) + Consume(input); + + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).ForecastWithConfidenceIntervals(_parentForecaster.Horizon, + out float[] forecast, out float[] min, out float[] max, _parentForecaster.ConfidenceLevel); - dst1 = new VBuffer(_parentAnomalyDetector.Horizon, forecast); - dst2 = new VBuffer(_parentAnomalyDetector.Horizon, min); - dst3 = new VBuffer(_parentAnomalyDetector.Horizon, max); + dst1 = new VBuffer(_parentForecaster.Horizon, forecast); + dst2 = new VBuffer(_parentForecaster.Horizon, min); + dst3 = new VBuffer(_parentForecaster.Horizon, max); } - public override void Consume(Single input) => _model.Consume(ref input, _parentAnomalyDetector.IsAdaptive); + public override void Consume(Single input) => _model.Consume(ref input, _parentForecaster.IsAdaptive); } } } diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 3d4e05dd66..088eda9482 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -26,11 +26,11 @@ private sealed class ForecastPrediction { #pragma warning disable CS0649 [VectorType(4)] - public float[] Change; + public float[] Forecast; [VectorType(4)] - public float[] Min; + public float[] MinCnf; [VectorType(4)] - public float[] Max; + public float[] MaxCnf; #pragma warning restore CS0649 } @@ -182,13 +182,14 @@ public void Forecast() { ConfidenceLevel = 0.95f, Source = "Value", - Name = "Change", - ForecastingConfidenceIntervalMinOutputColumnName = "Min", - ForecastingConfidenceIntervalMaxOutputColumnName = "Max", + Name = "Forecast", + ForecastingConfidenceIntervalMinOutputColumnName = "MinCnf", + ForecastingConfidenceIntervalMaxOutputColumnName = "MaxCnf", WindowSize = 10, SeriesLength = 11, TrainSize = 22, - Horizon = 4 + Horizon = 4, + IsAdaptive = true }; for (int j = 0; j < NumberOfSeasonsInTraining; j++) @@ -205,18 +206,19 @@ public void Forecast() // Get predictions var enumerator = env.Data.CreateEnumerable(output, true).GetEnumerator(); ForecastPrediction row = null; - List expectedValues = new List() { 0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07, - 0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483}; + List expectedForecast = new List() { 0.191491723f, 2.53994083f, 5.26454258f, 7.37313938f }; + List minCnf = new List() { -3.9741993f, -2.36872721f, 0.09407653f, 2.18899345f }; + List maxCnf = new List() { 4.3571825f, 7.448609f, 10.435009f, 12.5572853f }; + enumerator.MoveNext(); + row = enumerator.Current; - int index = 0; - while (enumerator.MoveNext() && index < expectedValues.Count) + for (int localIndex = 0; localIndex < 4; localIndex++) { - row = enumerator.Current; - /*Assert.Equal(expectedValues[index++], row.Change[0], precision: 7); // Alert - Assert.Equal(expectedValues[index++], row.Change[1], precision: 7); // Raw score - Assert.Equal(expectedValues[index++], row.Change[2], precision: 7); // P-Value score - Assert.Equal(expectedValues[index++], row.Change[3], precision: 7); // Martingale score*/ + Assert.Equal(expectedForecast[localIndex], row.Forecast[localIndex], precision: 7); + Assert.Equal(minCnf[localIndex], row.MinCnf[localIndex], precision: 7); + Assert.Equal(maxCnf[localIndex], row.MaxCnf[localIndex], precision: 7); } + } [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] @@ -385,7 +387,7 @@ public void ForecastingPredictionEngine() WindowSize = 10, SeriesLength = 11, TrainSize = 22, - Horizon = 4 + Horizon = 4, }; for (int j = 0; j < NumberOfSeasonsInTraining; j++) @@ -403,37 +405,16 @@ public void ForecastingPredictionEngine() engine.Update(new Data(1)); engine.Update(new Data(2)); var forecast = engine.Forecast(horizon: 5); - var prediction = ml.Data.CreateEnumerable(forecast, reuseRowObject: false); - - - /*Assert.Equal(0, prediction.Change[0], precision: 7); // Alert - Assert.Equal(1.1661833524703979, prediction.Change[1], precision: 5); // Raw score - Assert.Equal(0.5, prediction.Change[2], precision: 7); // P-Value score - Assert.Equal(5.1200000000000114E-08, prediction.Change[3], precision: 7); // Martingale score - - //Model 1: Checkpoint. - var modelPath = "temp.zip"; - engine.CheckPoint(ml, modelPath); - - //Model 1: Prediction #2 - prediction = engine.Predict(new Data(1)); - Assert.Equal(0, prediction.Change[0], precision: 7); // Alert - Assert.Equal(0.12216401100158691, prediction.Change[1], precision: 5); // Raw score - Assert.Equal(0.14823824685192111, prediction.Change[2], precision: 5); // P-Value score - Assert.Equal(1.5292508189989167E-07, prediction.Change[3], precision: 7); // Martingale score + var prediction = ml.Data.CreateEnumerable(forecast, reuseRowObject: false).GetEnumerator(); - // Load Model 1. - ITransformer model2 = null; - using (var file = File.OpenRead(modelPath)) - model2 = ml.Model.Load(file, out var schema); + int index = 0; + List expectedForecast = new List() { 3.9516573f, 6.212672f, 7.732854f, 8.125769f, 7.22453928f }; + while (prediction.MoveNext()) + { + Assert.Equal(prediction.Current.Forecast, expectedForecast[index++]); + } - //Predict and expect the same result after checkpointing(Prediction #2). - engine = model2.CreateTimeSeriesPredictionFunction(ml); - prediction = engine.Predict(new Data(1)); - Assert.Equal(0, prediction.Change[0], precision: 7); // Alert - Assert.Equal(0.12216401100158691, prediction.Change[1], precision: 5); // Raw score - Assert.Equal(0.14823824685192111, prediction.Change[2], precision: 5); // P-Value score - Assert.Equal(1.5292508189989167E-07, prediction.Change[3], precision: 5); // Martingale score*/ + Assert.Equal(5, index); } [Fact] From 8a44929a74caa23fac1618dc35842986737849f4 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 27 Jun 2019 22:46:55 -0700 Subject: [PATCH 13/21] clean up tests. --- .../TimeSeriesDirectApi.cs | 209 +++++------------- 1 file changed, 55 insertions(+), 154 deletions(-) diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 088eda9482..4a0559b032 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -167,60 +167,6 @@ public void ChangePointDetectionWithSeasonality() } } - [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] - public void Forecast() - { - var env = new MLContext(); - const int ChangeHistorySize = 10; - const int SeasonalitySize = 10; - const int NumberOfSeasonsInTraining = 5; - - List data = new List(); - var dataView = env.Data.LoadFromEnumerable(data); - - var args = new SsaForecasting.Options() - { - ConfidenceLevel = 0.95f, - Source = "Value", - Name = "Forecast", - ForecastingConfidenceIntervalMinOutputColumnName = "MinCnf", - ForecastingConfidenceIntervalMaxOutputColumnName = "MaxCnf", - WindowSize = 10, - SeriesLength = 11, - TrainSize = 22, - Horizon = 4, - IsAdaptive = true - }; - - for (int j = 0; j < NumberOfSeasonsInTraining; j++) - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new Data(i)); - - for (int i = 0; i < ChangeHistorySize; i++) - data.Add(new Data(i * 100)); - - // Train - var detector = new SsaForecastingEstimator(env, args).Fit(dataView); - // Transform - var output = detector.Transform(dataView); - // Get predictions - var enumerator = env.Data.CreateEnumerable(output, true).GetEnumerator(); - ForecastPrediction row = null; - List expectedForecast = new List() { 0.191491723f, 2.53994083f, 5.26454258f, 7.37313938f }; - List minCnf = new List() { -3.9741993f, -2.36872721f, 0.09407653f, 2.18899345f }; - List maxCnf = new List() { 4.3571825f, 7.448609f, 10.435009f, 12.5572853f }; - enumerator.MoveNext(); - row = enumerator.Current; - - for (int localIndex = 0; localIndex < 4; localIndex++) - { - Assert.Equal(expectedForecast[localIndex], row.Forecast[localIndex], precision: 7); - Assert.Equal(minCnf[localIndex], row.MinCnf[localIndex], precision: 7); - Assert.Equal(maxCnf[localIndex], row.MaxCnf[localIndex], precision: 7); - } - - } - [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() { @@ -368,7 +314,61 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() } [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] - public void ForecastingPredictionEngine() + public void SsaForecast() + { + var env = new MLContext(); + const int ChangeHistorySize = 10; + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + var dataView = env.Data.LoadFromEnumerable(data); + + var args = new SsaForecasting.Options() + { + ConfidenceLevel = 0.95f, + Source = "Value", + Name = "Forecast", + ForecastingConfidenceIntervalMinOutputColumnName = "MinCnf", + ForecastingConfidenceIntervalMaxOutputColumnName = "MaxCnf", + WindowSize = 10, + SeriesLength = 11, + TrainSize = 22, + Horizon = 4, + IsAdaptive = true + }; + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + for (int i = 0; i < ChangeHistorySize; i++) + data.Add(new Data(i * 100)); + + // Train + var detector = new SsaForecastingEstimator(env, args).Fit(dataView); + // Transform + var output = detector.Transform(dataView); + // Get predictions + var enumerator = env.Data.CreateEnumerable(output, true).GetEnumerator(); + ForecastPrediction row = null; + List expectedForecast = new List() { 0.191491723f, 2.53994083f, 5.26454258f, 7.37313938f }; + List minCnf = new List() { -3.9741993f, -2.36872721f, 0.09407653f, 2.18899345f }; + List maxCnf = new List() { 4.3571825f, 7.448609f, 10.435009f, 12.5572853f }; + enumerator.MoveNext(); + row = enumerator.Current; + + for (int localIndex = 0; localIndex < 4; localIndex++) + { + Assert.Equal(expectedForecast[localIndex], row.Forecast[localIndex], precision: 7); + Assert.Equal(minCnf[localIndex], row.MinCnf[localIndex], precision: 7); + Assert.Equal(maxCnf[localIndex], row.MaxCnf[localIndex], precision: 7); + } + + } + + [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] + public void SsaForecastPredictionEngine() { const int ChangeHistorySize = 10; const int SeasonalitySize = 10; @@ -457,104 +457,5 @@ public void AnomalyDetectionWithSrCnn() k += 1; } } - - /* - [Fact] - public void Forecasting() - { - const int SeasonalitySize = 10; - const int NumberOfSeasonsInTraining = 5; - - List data = new List(); - - var ml = new MLContext(seed: 1); - var dataView = ml.Data.LoadFromEnumerable(data); - - for (int j = 0; j < NumberOfSeasonsInTraining; j++) - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new Data(i)); - - // Create forecasting model. - var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler("Value", data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceForecasting.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); - - // Train. - model.Train(dataView); - - // Forecast. - var forecast = model.Forecast(5); - - // Update with new observations. - model.Update(dataView); - - // Checkpoint. - ml.Model.SaveForecastingModel(model, "model.zip"); - - // Load the checkpointed model from disk. - var modelCopy = ml.Model.LoadForecastingModel("model.zip"); - - // Forecast with the checkpointed model loaded from disk. - var forecastCopy = modelCopy.Forecast(5); - - // Forecast with the original model(that was checkpointed to disk). - forecast = model.Forecast(5); - - // Both the forecasted values from model loaded from disk and - // already in memory model should be the same. - Assert.Equal(forecast, forecastCopy); - } - - [Fact] - public void ForecastingWithConfidenceInterval() - { - const int SeasonalitySize = 10; - const int NumberOfSeasonsInTraining = 5; - - List data = new List(); - - var ml = new MLContext(seed: 1); - var dataView = ml.Data.LoadFromEnumerable(data); - - for (int j = 0; j < NumberOfSeasonsInTraining; j++) - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new Data(i)); - - // Create forecasting model. - var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler("Value", data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceForecasting.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); - - // Train. - model.Train(dataView); - - // Forecast. - float[] forecast; - float[] lowConfInterval; - float[] upperConfInterval; - model.ForecastWithConfidenceIntervals(5, out forecast, out lowConfInterval, out upperConfInterval); - - // Update with new observations. - model.Update(dataView); - - // Checkpoint. - ml.Model.SaveForecastingModel(model, "model.zip"); - - // Load the checkpointed model from disk. - var modelCopy = ml.Model.LoadForecastingModel("model.zip"); - - // Forecast with the checkpointed model loaded from disk. - float[] forecastCopy; - float[] lowConfIntervalCopy; - float[] upperConfIntervalCopy; - modelCopy.ForecastWithConfidenceIntervals(5, out forecastCopy, out lowConfIntervalCopy, out upperConfIntervalCopy); - - // Forecast with the original model(that was checkpointed to disk). - model.ForecastWithConfidenceIntervals(5, out forecast, out lowConfInterval, out upperConfInterval); - - // Both the forecasted values from model loaded from disk and - // already in memory model should be the same. - Assert.Equal(forecast, forecastCopy); - Assert.Equal(lowConfInterval, lowConfIntervalCopy); - Assert.Equal(upperConfInterval, upperConfIntervalCopy); - }*/ } } From c973d48fa719e1644f2b31a1aadb44802a73ee03 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 28 Jun 2019 11:57:09 -0700 Subject: [PATCH 14/21] Update with PR review comment and entrypoint for forecasting API. --- ...AdaptiveSingularSpectrumSequenceModeler.cs | 44 +--- .../ExtensionsCatalog.cs | 12 +- .../PredictionFunction.cs | 98 ++++++-- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 36 +-- .../SequentialForecastingTransformBase.cs | 14 +- .../SequentialTransformBase.cs | 5 + .../SsaForecastingBase.cs | 6 +- .../TimeSeriesProcessing.cs | 14 ++ .../Common/EntryPoints/core_ep-list.tsv | 1 + .../Common/EntryPoints/core_manifest.json | 229 ++++++++++++++++++ .../TimeSeriesDirectApi.cs | 4 +- 11 files changed, 374 insertions(+), 89 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index c88c0529d8..c5dd203064 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.Numerics; using Microsoft.ML; +using Microsoft.ML.CommandLine; using Microsoft.ML.Data; using Microsoft.ML.Internal.CpuMath; using Microsoft.ML.Internal.Utilities; @@ -34,51 +35,22 @@ public enum RankSelectionMethod /// public struct GrowthRatio { - private int _timeSpan; - private Double _growth; + [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] + public readonly int TimeSpan; - /// - /// Time span of growth ratio. Must be strictly positive. - /// - public int TimeSpan - { - get - { - return _timeSpan; - } - set - { - Contracts.CheckParam(value > 0, nameof(TimeSpan), "The time span must be strictly positive."); - _timeSpan = value; - } - } - - /// - /// Growth. Must be non-negative. - /// - public Double Growth - { - get - { - return _growth; - } - set - { - Contracts.CheckParam(value >= 0, nameof(Growth), "The growth must be non-negative."); - _growth = value; - } - } + [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] + public readonly Double Growth; public GrowthRatio(int timeSpan = 1, double growth = Double.PositiveInfinity) { Contracts.CheckParam(timeSpan > 0, nameof(TimeSpan), "The time span must be strictly positive."); Contracts.CheckParam(growth >= 0, nameof(Growth), "The growth must be non-negative."); - _growth = growth; - _timeSpan = timeSpan; + Growth = growth; + TimeSpan = timeSpan; } - public Double Ratio { get { return Math.Pow(_growth, 1d / _timeSpan); } } + public Double Ratio { get { return Math.Pow(Growth, 1d / TimeSpan); } } } /// diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index cc8ecc49c3..26fc49828e 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -167,8 +167,8 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// The flag determining whether the model should be stabilized. /// The flag determining whether the meta information for the model needs to be maintained. /// The maximum growth on the exponential trend. - /// The name of the minimum confidence interval column. If not specified then confidence intervals will not be calculated. - /// The name of the maximum confidence interval column. If not specified then confidence intervals will not be calculated. + /// The name of the confidence interval lower bound column. If not specified then confidence intervals will not be calculated. + /// The name of the confidence interval upper bound column. If not specified then confidence intervals will not be calculated. /// /// /// @@ -181,10 +181,10 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog public static SsaForecastingEstimator ForecastBySsa( this ForecastingCatalog catalog, string outputColumnName, string inputColumnName, int windowSize, int seriesLength, int trainSize, int horizon, bool isAdaptive = false, float discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, - int? maxRank = null, bool shouldStablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null, string forecastingConfidenceIntervalMinOutputColumnName = null, - string forecastingConfidenceIntervalMaxOutputColumnName = null, float confidenceLevel = 0.95f) => + int? maxRank = null, bool shouldStablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null, string forcastingConfidentLowerBoundColumnName = null, + string forcastingConfidentUpperBoundColumnName = null, float confidenceLevel = 0.95f) => new SsaForecastingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, windowSize, seriesLength, trainSize, - horizon, isAdaptive, discountFactor, rankSelectionMethod, rank, maxRank, shouldStablize, shouldMaintainInfo, maxGrowth, forecastingConfidenceIntervalMinOutputColumnName, - forecastingConfidenceIntervalMaxOutputColumnName, confidenceLevel); + horizon, isAdaptive, discountFactor, rankSelectionMethod, rank, maxRank, shouldStablize, shouldMaintainInfo, maxGrowth, forcastingConfidentLowerBoundColumnName, + forcastingConfidentUpperBoundColumnName, confidenceLevel); } } diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs index a3b656c2d6..4b3c206dbd 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs @@ -15,6 +15,10 @@ internal interface IStatefulRowToRowMapper : IRowToRowMapper { } + /// + /// Used to expose standalone that allows + /// modifying transformer parameters such as horizon, confidence intervals. + /// internal interface IForecastTransformer : IStatefulTransformer { IDataView Forecast(int horizon, bool includeConfidenceInterval = false, float confidenceLevel = 0.95f); @@ -31,6 +35,7 @@ internal interface IStatefulTransformer : ITransformer /// /// Creates a clone of the transfomer. Used for taking the snapshot of the state. + /// This is used to create multiple time series with their own state. /// /// IStatefulTransformer Clone(); @@ -56,7 +61,7 @@ internal interface IStatefulRowMapper : IRowMapper /// /// The user-defined type that holds the example. /// The user-defined type that holds the prediction. - public class TimeSeriesPredictionFunction : PredictionEngineBase + public abstract class TimeSeriesPredictionEngine : PredictionEngineBase where TSrc : class where TDst : class, new() { @@ -65,7 +70,7 @@ public class TimeSeriesPredictionFunction : PredictionEngineBase - /// Checkpoints to disk with the updated + /// Checkpoints to disk with the updated /// state. /// /// Usually . @@ -88,7 +93,7 @@ public void CheckPoint(IHostEnvironment env, string modelPath) } /// - /// Checkpoints to a with the updated + /// Checkpoints to a with the updated /// state. /// /// Usually . @@ -107,14 +112,14 @@ public void CheckPoint(IHostEnvironment env, Stream stream) env.CheckParam(stream != null, nameof(stream)); if (Transformer is ITransformerChainAccessor) - { + { - new TransformerChain - (((ITransformerChainAccessor)Transformer).Transformers, - ((ITransformerChainAccessor)Transformer).Scopes).SaveTo(env, stream); - } - else - Transformer.SaveTo(env, stream); + new TransformerChain + (((ITransformerChainAccessor)Transformer).Transformers, + ((ITransformerChainAccessor)Transformer).Scopes).SaveTo(env, stream); + } + else + Transformer.SaveTo(env, stream); } private static ITransformer CloneTransformers(ITransformer transformer) @@ -140,7 +145,7 @@ private static ITransformer CloneTransformers(ITransformer transformer) /// Contructor for creating time series specific prediction engine. It allows update the time series model to be updated with the observations /// seen at prediction time via /// - public TimeSeriesPredictionFunction(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, + public TimeSeriesPredictionEngine(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(env, CloneTransformers(transformer), ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition) { @@ -282,8 +287,35 @@ public override void Predict(TSrc example, ref TDst prediction) } } - public sealed class TimeSeriesForecasting : TimeSeriesPredictionFunction - where TSrc : class + /// + /// A class that runs the previously trained model (and the preceding transform pipeline) on the + /// in-memory data, one example at a time. + /// This can also be used with trained pipelines that do not end with a predictor: in this case, the + /// 'prediction' will be just the outcome of all the transformations. + /// Creates an anomaly detection engine for a time series pipeline + /// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model. + /// + /// The user-defined type that holds the example. + /// The user-defined type that holds the prediction. + public sealed class TimeSeriesAnomalyDetection : TimeSeriesPredictionEngine + where TSrc : class + where TDst : class, new() + { + public TimeSeriesAnomalyDetection(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, + SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : + base(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition) + { + } + } + + /// + /// Creates forecasting engine for a time series pipeline that allows updating the state of time series forecasting model + /// with new observations and checkpointing the model. It also allows forecasting values without passing any input values and + /// dynamically configuring paramaters such as horizon and confidence interval. + /// + /// The user-defined type that holds the example. + public sealed class TimeSeriesForecasting : TimeSeriesPredictionEngine + where TSrc : class { public TimeSeriesForecasting(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : @@ -302,9 +334,9 @@ private protected override void PredictionEngineCore(IHostEnvironment env, DataV } /// - /// Run prediction pipeline on one example. + /// Update the time series model with one observation. /// - /// The example to run on. + /// The observation to update the forecasting model with. public void Update(TSrc example) { Contracts.CheckValue(example, nameof(example)); @@ -314,16 +346,27 @@ public void Update(TSrc example) RowPosition++; } + /// + /// Standalone forecasting method. Allows dynamic configuration of forecasting parameters such as horizon + /// and confidence level. + /// + /// The number of values to forecast. + /// Whether to include confidence intervals for each forecasted value. + /// The confidence level for forecasting. + /// public IDataView Forecast(int horizon, bool includeConfidenceInterval = false, float confidenceLevel = 0.95f) => ((IForecastTransformer)InputTransformer).Forecast(horizon, includeConfidenceInterval, confidenceLevel); + /// + /// This method should not be used. + /// public override void Predict(TSrc example, ref object prediction) => throw new NotSupportedException(); } public static class PredictionFunctionExtensions { /// - /// creates a prediction function/engine for a time series pipeline + /// creates an anomaly detection engine for a time series pipeline /// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model. /// /// Class describing input schema to the model. @@ -342,7 +385,7 @@ public static class PredictionFunctionExtensions /// ]]> /// /// - public static TimeSeriesPredictionFunction CreateTimeSeriesPredictionFunction(this ITransformer transformer, IHostEnvironment env, + public static TimeSeriesAnomalyDetection CreateTimeSeriesPredictionFunction(this ITransformer transformer, IHostEnvironment env, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() @@ -351,9 +394,28 @@ public static TimeSeriesPredictionFunction CreateTimeSeriesPredictio env.CheckValue(transformer, nameof(transformer)); env.CheckValueOrNull(inputSchemaDefinition); env.CheckValueOrNull(outputSchemaDefinition); - return new TimeSeriesPredictionFunction(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); + return new TimeSeriesAnomalyDetection(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); } + /// + /// creates a forecasting engine for a time series pipeline + /// It allows updating the state of time series forecasting model with new observations and allows checkpointing the model. + /// + /// Class describing input schema to the model. + /// The time series pipeline in the form of a . + /// Usually + /// To ignore missing columns. Default is false. + /// Input schema definition. Default is null. + /// Output schema definition. Default is null. + ///

Example code can be found by searching for TimeSeriesPredictionFunction in ML.NET.

+ /// + /// + /// + /// + /// public static TimeSeriesForecasting CreateTimeSeriesForecastingEngine(this ITransformer transformer, IHostEnvironment env, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index 0604b9f2ab..8fdeb5b2c8 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -42,11 +42,11 @@ internal sealed class Options : TransformInputBase [Argument(ArgumentType.Required, HelpText = "The name of the new column.", SortOrder = 2)] public string Name; - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the minimum confidence interval column.", ShortName = "cnfminname", SortOrder = 3)] - public string ForecastingConfidenceIntervalMinOutputColumnName; + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the confidence interval lower bound column.", ShortName = "cnfminname", SortOrder = 3)] + public string ForcastingConfidentLowerBoundColumnName; - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the maximum confidence interval column.", ShortName = "cnfmaxnname", SortOrder = 3)] - public string ForecastingConfidenceIntervalMaxOutputColumnName; + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the confidence interval upper bound column.", ShortName = "cnfmaxnname", SortOrder = 3)] + public string ForcastingConfidentUpperBoundColumnName; [Argument(ArgumentType.AtMostOnce, HelpText = "The discount factor in [0,1] used for online updates.", ShortName = "disc", SortOrder = 5)] public float DiscountFactor = 1; @@ -95,8 +95,8 @@ public BaseArguments(Options options) { Source = options.Source; Name = options.Name; - ForecastingConfidenceIntervalMinOutputColumnName = options.ForecastingConfidenceIntervalMinOutputColumnName; - ForecastingConfidenceIntervalMaxOutputColumnName = options.ForecastingConfidenceIntervalMaxOutputColumnName; + ForcastingConfidentLowerBoundColumnName = options.ForcastingConfidentLowerBoundColumnName; + ForcastingConfidentUpperBoundColumnName = options.ForcastingConfidentUpperBoundColumnName; WindowSize = options.WindowSize; DiscountFactor = options.DiscountFactor; IsAdaptive = options.IsAdaptive; @@ -249,8 +249,8 @@ public sealed class SsaForecastingEstimator : IEstimator /// The flag determining whether the model should be stabilized. /// The flag determining whether the meta information for the model needs to be maintained. /// The maximum growth on the exponential trend. - /// The name of the minimum confidence interval column. If not specified then confidence intervals will not be calculated. - /// The name of the maximum confidence interval column. If not specified then confidence intervals will not be calculated. + /// The name of the confidence interval lower bound column. If not specified then confidence intervals will not be calculated. + /// The name of the confidence interval upper bound column. If not specified then confidence intervals will not be calculated. /// internal SsaForecastingEstimator(IHostEnvironment env, string outputColumnName, @@ -267,8 +267,8 @@ internal SsaForecastingEstimator(IHostEnvironment env, bool shouldStablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null, - string forecastingConfidenceIntervalMinOutputColumnName = null, - string forecastingConfidenceIntervalMaxOutputColumnName = null, + string forcastingConfidentLowerBoundColumnName = null, + string forcastingConfidentUpperBoundColumnName = null, float confidenceLevel = 0.95f) : this(env, new SsaForecasting.Options { @@ -284,8 +284,8 @@ internal SsaForecastingEstimator(IHostEnvironment env, ShouldMaintainInfo = shouldMaintainInfo, MaxGrowth = maxGrowth, ConfidenceLevel = confidenceLevel, - ForecastingConfidenceIntervalMinOutputColumnName = forecastingConfidenceIntervalMinOutputColumnName, - ForecastingConfidenceIntervalMaxOutputColumnName = forecastingConfidenceIntervalMaxOutputColumnName + ForcastingConfidentLowerBoundColumnName = forcastingConfidentLowerBoundColumnName, + ForcastingConfidentUpperBoundColumnName = forcastingConfidentUpperBoundColumnName }) { } @@ -313,6 +313,8 @@ public SsaForecasting Fit(IDataView input) /// /// Schema propagation for transformers. /// Returns the output schema of the data, if the input schema is like the one provided. + /// Creates three output columns if confidence intervals are requested otherwise + /// just one. /// public SchemaShape GetOutputSchema(SchemaShape inputSchema) { @@ -331,14 +333,14 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) resultDic[_options.Name] = new SchemaShape.Column( _options.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(metadata)); - if (!string.IsNullOrEmpty(_options.ForecastingConfidenceIntervalMaxOutputColumnName)) + if (!string.IsNullOrEmpty(_options.ForcastingConfidentUpperBoundColumnName)) { - resultDic[_options.ForecastingConfidenceIntervalMinOutputColumnName] = new SchemaShape.Column( - _options.ForecastingConfidenceIntervalMinOutputColumnName, SchemaShape.Column.VectorKind.Vector, + resultDic[_options.ForcastingConfidentLowerBoundColumnName] = new SchemaShape.Column( + _options.ForcastingConfidentLowerBoundColumnName, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(metadata)); - resultDic[_options.ForecastingConfidenceIntervalMaxOutputColumnName] = new SchemaShape.Column( - _options.ForecastingConfidenceIntervalMaxOutputColumnName, SchemaShape.Column.VectorKind.Vector, + resultDic[_options.ForcastingConfidentUpperBoundColumnName] = new SchemaShape.Column( + _options.ForcastingConfidentUpperBoundColumnName, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(metadata)); } diff --git a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs index ebf1c12e9a..873aef0a2a 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs @@ -26,15 +26,15 @@ internal abstract class ForecastingArgumentsBase SortOrder = 2)] public string Name; - [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "cnfminname", + [Argument(ArgumentType.Required, HelpText = "The name of the confidence interval lower bound column.", ShortName = "cnfminname", SortOrder = 2)] - public string ForecastingConfidenceIntervalMinOutputColumnName; + public string ForcastingConfidentLowerBoundColumnName; - [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "cnfmaxnname", + [Argument(ArgumentType.Required, HelpText = "The name of the confidence interval upper bound column.", ShortName = "cnfmaxnname", SortOrder = 2)] - public string ForecastingConfidenceIntervalMaxOutputColumnName; + public string ForcastingConfidentUpperBoundColumnName; - [Argument(ArgumentType.AtMostOnce, HelpText = "The size of the sliding window.", ShortName = "wnd", + [Argument(ArgumentType.AtMostOnce, HelpText = "The length of series from the begining used for training.", ShortName = "wnd", SortOrder = 3)] public int TrainSize = 1; @@ -67,8 +67,8 @@ private protected SequentialForecastingTransformBase(int windowSize, int initial } private protected SequentialForecastingTransformBase(ForecastingArgumentsBase args, string name, int outputLength, IHostEnvironment env) - : this(args.TrainSize, args.SeriesLength, args.Source, args.ForecastingConfidenceIntervalMinOutputColumnName, - args.ForecastingConfidenceIntervalMaxOutputColumnName, args.Name, name, outputLength, env) + : this(args.TrainSize, args.SeriesLength, args.Source, args.ForcastingConfidentLowerBoundColumnName, + args.ForcastingConfidentUpperBoundColumnName, args.Name, name, outputLength, env) { } diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs index 8995648fe2..43a7005864 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs @@ -29,6 +29,11 @@ public DataBox(T value) } } + /// + /// The box class that is used to box the TInput and TOutput for the LambdaTransform. + /// This is for the case where there are three output columns. + /// + /// The type to be boxed, e.g. TInput or TOutput internal sealed class DataBoxForecastingWithConfidenceIntervals { public T Value1; diff --git a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs index ce26181071..feb4bd1114 100644 --- a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs @@ -165,8 +165,8 @@ internal sealed class SsaForecastingBase : SequentialForecastingTransformBase Model; public SsaForecastingBase(SsaForecastingOptions options, string name, IHostEnvironment env, SsaForecastingBaseWrapper parent) - : base(options.TrainSize, 0, options.Source, options.Name, options.ForecastingConfidenceIntervalMinOutputColumnName, - options.ForecastingConfidenceIntervalMaxOutputColumnName, name, options.Horizon, env) + : base(options.TrainSize, 0, options.Source, options.Name, options.ForcastingConfidentLowerBoundColumnName, + options.ForcastingConfidentUpperBoundColumnName, name, options.Horizon, env) { Host.CheckUserArg(0 <= options.DiscountFactor && options.DiscountFactor <= 1, nameof(options.DiscountFactor), "Must be in the range [0, 1]."); IsAdaptive = options.IsAdaptive; @@ -174,7 +174,7 @@ public SsaForecastingBase(SsaForecastingOptions options, string name, IHostEnvir ConfidenceLevel = options.ConfidenceLevel; // Creating the master SSA model Model = new AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.TrainSize, options.SeriesLength, options.WindowSize, - options.DiscountFactor, options.RankSelectionMethod, options.Rank, options.MaxRank, !string.IsNullOrEmpty(options.ForecastingConfidenceIntervalMinOutputColumnName), + options.DiscountFactor, options.RankSelectionMethod, options.Rank, options.MaxRank, !string.IsNullOrEmpty(options.ForcastingConfidentLowerBoundColumnName), options.ShouldStablize, options.ShouldMaintainInfo, options.MaxGrowth); StateRef = new State(); diff --git a/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs b/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs index a7dd59cfc9..24882236e4 100644 --- a/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs +++ b/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs @@ -124,5 +124,19 @@ public static CommonOutputs.TransformOutput SsaSpikeDetector(IHostEnvironment en OutputData = view }; } + + [TlcModule.EntryPoint(Desc = TimeSeries.SsaForecasting.Summary, + UserName = TimeSeries.SsaForecasting.UserName, + ShortName = TimeSeries.SsaForecasting.ShortName)] + internal static CommonOutputs.TransformOutput SsaForecasting(IHostEnvironment env, SsaForecasting.Options options) + { + var h = EntryPointUtils.CheckArgsAndCreateHost(env, "SsaForecasting", options); + var view = new SsaForecastingEstimator(h, options).Fit(options.Data).Transform(options.Data); + return new CommonOutputs.TransformOutput() + { + Model = new TransformModelImpl(h, view, options.Data), + OutputData = view + }; + } } } diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index f18a9e9e2b..e7f45f417d 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -38,6 +38,7 @@ TimeSeriesProcessingEntryPoints.PercentileThresholdTransform Detects the values TimeSeriesProcessingEntryPoints.PValueTransform This P-Value transform calculates the p-value of the current input in the sequence with regard to the values in the sliding window. Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints PValueTransform Microsoft.ML.Transforms.TimeSeries.PValueTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SlidingWindowTransform Returns the last values for a time series [y(t-d-l+1), y(t-d-l+2), ..., y(t-l-1), y(t-l)] where d is the size of the window, l the lag and y is a Float. Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SlidingWindowTransform Microsoft.ML.Transforms.TimeSeries.SlidingWindowTransformBase`1+Arguments[System.Single] Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SsaChangePointDetector This transform detects the change-points in a seasonal time-series using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaChangePointDetector Microsoft.ML.Transforms.TimeSeries.SsaChangePointDetector+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput +TimeSeriesProcessingEntryPoints.SsaForecasting This transform forecasts using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaForecasting Microsoft.ML.Transforms.TimeSeries.SsaForecasting+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SsaSpikeDetector This transform detects the spikes in a seasonal time-series using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaSpikeDetector Microsoft.ML.Transforms.TimeSeries.SsaSpikeDetector+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Trainers.AveragedPerceptronBinaryClassifier Averaged Perceptron Binary Classifier. Microsoft.ML.Trainers.AveragedPerceptronTrainer TrainBinary Microsoft.ML.Trainers.AveragedPerceptronTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.EnsembleBinaryClassifier Train binary ensemble. Microsoft.ML.Trainers.Ensemble.Ensemble CreateBinaryEnsemble Microsoft.ML.Trainers.Ensemble.EnsembleTrainer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 99b0d3a8ab..04fd0a6ea5 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -4007,6 +4007,235 @@ "ITransformOutput" ] }, + { + "Name": "TimeSeriesProcessingEntryPoints.SsaForecasting", + "Desc": "This transform forecasts using Singular Spectrum Analysis (SSA).", + "FriendlyName": "SSA Forecasting", + "ShortName": "ssafcst", + "Inputs": [ + { + "Name": "Source", + "Type": "String", + "Desc": "The name of the source column.", + "Aliases": [ + "src" + ], + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "Data", + "Type": "DataView", + "Desc": "Input dataset", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "Name", + "Type": "String", + "Desc": "The name of the new column.", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false + }, + { + "Name": "WindowSize", + "Type": "Int", + "Desc": "The length of the window on the series for building the trajectory matrix (parameter L).", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "SeriesLength", + "Type": "Int", + "Desc": "The length of series that is kept in buffer for modeling (parameter N).", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "TrainSize", + "Type": "Int", + "Desc": "The length of series from the begining used for training.", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "Horizon", + "Type": "Int", + "Desc": "The number of values to forecast.", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "ConfidenceLevel", + "Type": "Float", + "Desc": "The confidence level in [0, 1) for forecasting.", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0.95 + }, + { + "Name": "ForcastingConfidentLowerBoundColumnName", + "Type": "String", + "Desc": "The name of the confidence interval lower bound column.", + "Aliases": [ + "cnfminname" + ], + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "ForcastingConfidentUpperBoundColumnName", + "Type": "String", + "Desc": "The name of the confidence interval upper bound column.", + "Aliases": [ + "cnfmaxnname" + ], + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "RankSelectionMethod", + "Type": { + "Kind": "Enum", + "Values": [ + "Fixed", + "Exact", + "Fast" + ] + }, + "Desc": "The rank selection method.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": "Exact" + }, + { + "Name": "Rank", + "Type": "Int", + "Desc": "The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. If set to null, the rank is automatically determined based on prediction error minimization.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "MaxRank", + "Type": "Int", + "Desc": "The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "ShouldStablize", + "Type": "Bool", + "Desc": "The flag determining whether the model should be stabilized.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "ShouldMaintainInfo", + "Type": "Bool", + "Desc": "The flag determining whether the meta information for the model needs to be maintained.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "MaxGrowth", + "Type": { + "Kind": "Struct", + "Fields": [ + { + "Name": "TimeSpan", + "Type": "Int", + "Desc": "The confidence level in [0, 1) for forecasting.", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "Growth", + "Type": "Float", + "Desc": "The confidence level in [0, 1) for forecasting.", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0.0 + } + ] + }, + "Desc": "The maximum growth on the exponential trend.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "DiscountFactor", + "Type": "Float", + "Desc": "The discount factor in [0,1] used for online updates.", + "Aliases": [ + "disc" + ], + "Required": false, + "SortOrder": 5.0, + "IsNullable": false, + "Default": 1.0 + }, + { + "Name": "IsAdaptive", + "Type": "Bool", + "Desc": "The flag determing whether the model is adaptive", + "Aliases": [ + "adp" + ], + "Required": false, + "SortOrder": 6.0, + "IsNullable": false, + "Default": false + } + ], + "Outputs": [ + { + "Name": "OutputData", + "Type": "DataView", + "Desc": "Transformed dataset" + }, + { + "Name": "Model", + "Type": "TransformModel", + "Desc": "Transform model" + } + ], + "InputKind": [ + "ITransformInput" + ], + "OutputKind": [ + "ITransformOutput" + ] + }, { "Name": "TimeSeriesProcessingEntryPoints.SsaSpikeDetector", "Desc": "This transform detects the spikes in a seasonal time-series using Singular Spectrum Analysis (SSA).", diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 4a0559b032..b79378555c 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -329,8 +329,8 @@ public void SsaForecast() ConfidenceLevel = 0.95f, Source = "Value", Name = "Forecast", - ForecastingConfidenceIntervalMinOutputColumnName = "MinCnf", - ForecastingConfidenceIntervalMaxOutputColumnName = "MaxCnf", + ForcastingConfidentLowerBoundColumnName = "MinCnf", + ForcastingConfidentUpperBoundColumnName = "MaxCnf", WindowSize = 10, SeriesLength = 11, TrainSize = 22, From a431b7b7f8a77d065abdc6d0a1da3945c39b6fc0 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 28 Jun 2019 13:19:38 -0700 Subject: [PATCH 15/21] Make GrowthRatio accessible in nimbus ML. --- .../AdaptiveSingularSpectrumSequenceModeler.cs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index c5dd203064..fed9058850 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -36,19 +36,10 @@ public enum RankSelectionMethod public struct GrowthRatio { [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] - public readonly int TimeSpan; + public int TimeSpan; [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] - public readonly Double Growth; - - public GrowthRatio(int timeSpan = 1, double growth = Double.PositiveInfinity) - { - Contracts.CheckParam(timeSpan > 0, nameof(TimeSpan), "The time span must be strictly positive."); - Contracts.CheckParam(growth >= 0, nameof(Growth), "The growth must be non-negative."); - - Growth = growth; - TimeSpan = timeSpan; - } + public Double Growth; public Double Ratio { get { return Math.Pow(Growth, 1d / TimeSpan); } } } @@ -241,6 +232,8 @@ public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, int _host.CheckParam(seriesLength > windowSize, nameof(seriesLength), "The series length should be greater than the window size."); _host.Check(trainSize > 2 * windowSize, "The input series length for training should be greater than twice the window size."); _host.CheckParam(0 <= discountFactor && discountFactor <= 1, nameof(discountFactor), "The discount factor should be in [0,1]."); + _host.CheckParam(maxGrowth == null || maxGrowth.Value.TimeSpan > 0, nameof(GrowthRatio.TimeSpan), "The time span must be strictly positive."); + _host.CheckParam(maxGrowth == null || maxGrowth.Value.Growth >= 0, nameof(GrowthRatio.Growth), "The growth must be non-negative."); if (maxRank != null) { From 0a4aeba12bb2302e9ea338a81e3de0a846414cdc Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 28 Jun 2019 13:23:29 -0700 Subject: [PATCH 16/21] update comments. --- .../AdaptiveSingularSpectrumSequenceModeler.cs | 4 ++-- test/BaselineOutput/Common/EntryPoints/core_manifest.json | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index fed9058850..3d793c7100 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -35,10 +35,10 @@ public enum RankSelectionMethod /// public struct GrowthRatio { - [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] + [Argument(ArgumentType.AtMostOnce, HelpText = "Time span of growth ratio. Must be strictly positive.", SortOrder = 1)] public int TimeSpan; - [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] + [Argument(ArgumentType.AtMostOnce, HelpText = "Growth. Must be non-negative.", SortOrder = 2)] public Double Growth; public Double Ratio { get { return Math.Pow(Growth, 1d / TimeSpan); } } diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 04fd0a6ea5..2413ab25d6 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -4169,16 +4169,16 @@ { "Name": "TimeSpan", "Type": "Int", - "Desc": "The confidence level in [0, 1) for forecasting.", + "Desc": "Time span of growth ratio. Must be strictly positive.", "Required": false, - "SortOrder": 2.0, + "SortOrder": 1.0, "IsNullable": false, "Default": 0 }, { "Name": "Growth", "Type": "Float", - "Desc": "The confidence level in [0, 1) for forecasting.", + "Desc": "Growth. Must be non-negative.", "Required": false, "SortOrder": 2.0, "IsNullable": false, From 5904a5314c299e498949d3a106f10a4c995bac1f Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 28 Jun 2019 14:10:53 -0700 Subject: [PATCH 17/21] Add samples. --- .../TimeSeries/DetectAnomalyBySrCnn.cs | 2 +- .../TimeSeries/DetectChangePointBySsa.cs | 4 +- .../DetectChangePointBySsaStream.cs | 4 +- .../TimeSeries/DetectIidChangePoint.cs | 4 +- .../Transforms/TimeSeries/DetectIidSpike.cs | 2 +- .../Transforms/TimeSeries/DetectSpikeBySsa.cs | 4 +- .../Transforms/TimeSeries/Forecasting.cs | 46 ++++++++++++------- docs/samples/Microsoft.ML.Samples/Program.cs | 6 ++- ...AdaptiveSingularSpectrumSequenceModeler.cs | 2 +- .../PredictionFunction.cs | 4 +- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 4 +- .../TimeSeriesDirectApi.cs | 12 ++--- 12 files changed, 56 insertions(+), 38 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs index 9b17e86244..494c14c2eb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs @@ -40,7 +40,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectAnomalyBySrCnn(outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateAnomalyDetectionEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tMag"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs index 8404ca2f63..cbf539a135 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs @@ -57,7 +57,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, confidence, changeHistoryLength, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateAnomalyDetectionEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); @@ -99,7 +99,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesPredictionFunction(ml); + engine = model.CreateAnomalyDetectionEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs index 0cd9b0fa77..1c782df823 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs @@ -57,7 +57,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, confidence, changeHistoryLength, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateAnomalyDetectionEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); @@ -103,7 +103,7 @@ public static void Example() model = ml.Model.Load(stream, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesPredictionFunction(ml); + engine = model.CreateAnomalyDetectionEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs index 2890c4ec57..9c392e343c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs @@ -56,7 +56,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateAnomalyDetectionEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); @@ -97,7 +97,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // Create a time series prediction engine from the checkpointed model. - engine = model.CreateTimeSeriesPredictionFunction(ml); + engine = model.CreateAnomalyDetectionEngine(ml); for (int index = 0; index < 8; index++) { // Anomaly change point detection. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs index 439006a7ec..fab14a1fe6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs @@ -48,7 +48,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateAnomalyDetectionEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs index 38094bb672..4963209a0b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs @@ -54,7 +54,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateAnomalyDetectionEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from spike predictions on new data:"); @@ -94,7 +94,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesPredictionFunction(ml); + engine = model.CreateAnomalyDetectionEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs index 87bd685513..1fc6c7e0e4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -1,8 +1,8 @@ -/*using System; +using System; using System.Collections.Generic; +using System.IO; using Microsoft.ML; using Microsoft.ML.Transforms.TimeSeries; -using Microsoft.ML.TimeSeries;*/ namespace Samples.Dynamic { @@ -12,12 +12,11 @@ public static class Forecasting // does forecasting. public static void Example() { - /*// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern - const int SeasonalitySize = 5; + // Generate sample series data with a recurring pattern. var data = new List() { new TimeSeriesData(0), @@ -46,41 +45,56 @@ public static void Example() var inputColumnName = nameof(TimeSeriesData.Value); // Instantiate the forecasting model. - var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler(inputColumnName, data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceForecasting.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); + var model = ml.Forecasting.ForecastBySsa(inputColumnName, inputColumnName, 5, 11, data.Count, 5); // Train. - model.Train(dataView); + var transformer = model.Fit(dataView); // Forecast next five values. - var forecast = model.Forecast(5); + var forecastEngine = transformer.CreateForecastingEngine(ml); + var forecast = ml.Data.CreateEnumerable(forecastEngine.Forecast(5), false); Console.WriteLine($"Forecasted values:"); Console.WriteLine("[{0}]", string.Join(", ", forecast)); // Forecasted values: // [2.452744, 2.589339, 2.729183, 2.873005, 3.028931] // Update with new observations. - dataView = ml.Data.LoadFromEnumerable(new List() { new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0) }); - model.Update(dataView); + forecastEngine.Update(new TimeSeriesData(0)); + forecastEngine.Update(new TimeSeriesData(0)); + forecastEngine.Update(new TimeSeriesData(0)); + forecastEngine.Update(new TimeSeriesData(0)); // Checkpoint. - ml.Model.SaveForecastingModel(model, "model.zip"); + forecastEngine.CheckPoint(ml, "model.zip"); // Load the checkpointed model from disk. - var modelCopy = ml.Model.LoadForecastingModel("model.zip"); + // Load the model. + ITransformer modelCopy; + using (var file = File.OpenRead("model.zip")) + modelCopy = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + var forecastEngineCopy = modelCopy.CreateForecastingEngine(ml); // Forecast with the checkpointed model loaded from disk. - forecast = modelCopy.Forecast(5); + forecast = ml.Data.CreateEnumerable(forecastEngineCopy.Forecast(5), false); Console.WriteLine("[{0}]", string.Join(", ", forecast)); // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] // Forecast with the original model(that was checkpointed to disk). - forecast = model.Forecast(5); + forecast = ml.Data.CreateEnumerable(forecastEngine.Forecast(5), false); Console.WriteLine("[{0}]", string.Join(", ", forecast)); - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081]*/ + // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] } + class ForecastResult + { +#pragma warning disable CS0649 + public float Forecast; +#pragma warning restore CS0649 + } + class TimeSeriesData { public float Value; diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index eb49938927..60e9ce5884 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,5 +1,6 @@ using System; using System.Reflection; +using Samples.Dynamic; namespace Microsoft.ML.Samples { @@ -9,7 +10,7 @@ public static class Program internal static void RunAll() { - int samples = 0; + /*int samples = 0; foreach (var type in Assembly.GetExecutingAssembly().GetTypes()) { var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy); @@ -22,7 +23,8 @@ internal static void RunAll() } } - Console.WriteLine("Number of samples that ran without any exception: " + samples); + Console.WriteLine("Number of samples that ran without any exception: " + samples);*/ + Forecasting.Example(); } } } diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index 3d793c7100..1986eac4bb 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -230,7 +230,7 @@ public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, int _host = env.Register(LoaderSignature); _host.CheckParam(windowSize >= 2, nameof(windowSize), "The window size should be at least 2."); // ...because otherwise we have nothing to autoregress on _host.CheckParam(seriesLength > windowSize, nameof(seriesLength), "The series length should be greater than the window size."); - _host.Check(trainSize > 2 * windowSize, "The input series length for training should be greater than twice the window size."); + _host.Check(trainSize > 2 * windowSize, "The input size for training should be greater than twice the window size."); _host.CheckParam(0 <= discountFactor && discountFactor <= 1, nameof(discountFactor), "The discount factor should be in [0,1]."); _host.CheckParam(maxGrowth == null || maxGrowth.Value.TimeSpan > 0, nameof(GrowthRatio.TimeSpan), "The time span must be strictly positive."); _host.CheckParam(maxGrowth == null || maxGrowth.Value.Growth >= 0, nameof(GrowthRatio.Growth), "The growth must be non-negative."); diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs index 4b3c206dbd..e2bda60a00 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs @@ -385,7 +385,7 @@ public static class PredictionFunctionExtensions /// ]]> /// /// - public static TimeSeriesAnomalyDetection CreateTimeSeriesPredictionFunction(this ITransformer transformer, IHostEnvironment env, + public static TimeSeriesAnomalyDetection CreateAnomalyDetectionEngine(this ITransformer transformer, IHostEnvironment env, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() @@ -416,7 +416,7 @@ public static TimeSeriesAnomalyDetection CreateTimeSeriesPredictionF /// ]]> /// /// - public static TimeSeriesForecasting CreateTimeSeriesForecastingEngine(this ITransformer transformer, IHostEnvironment env, + public static TimeSeriesForecasting CreateForecastingEngine(this ITransformer transformer, IHostEnvironment env, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class { diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index 8fdeb5b2c8..6e3b1ce61f 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -285,7 +285,9 @@ internal SsaForecastingEstimator(IHostEnvironment env, MaxGrowth = maxGrowth, ConfidenceLevel = confidenceLevel, ForcastingConfidentLowerBoundColumnName = forcastingConfidentLowerBoundColumnName, - ForcastingConfidentUpperBoundColumnName = forcastingConfidentUpperBoundColumnName + ForcastingConfidentUpperBoundColumnName = forcastingConfidentUpperBoundColumnName, + SeriesLength = seriesLength, + TrainSize = trainSize }) { } diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index b79378555c..c6b5a06388 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -204,7 +204,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() var model = pipeline.Fit(dataView); //Create prediction function. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateAnomalyDetectionEngine(ml); //Checkpoint with no inputs passed at prediction. var modelPath = "temp.zip"; @@ -217,7 +217,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() model2 = ml.Model.Load(file, out var schema); //Raw score after state gets updated with two inputs. - var engine2 = model2.CreateTimeSeriesPredictionFunction(ml); + var engine2 = model2.CreateAnomalyDetectionEngine(ml); var prediction2 = engine2.Predict(new Data(1)); //Raw score after first input. Assert.Equal(1.1661833524703979, prediction2.Change[1], precision: 5); // Raw score @@ -238,7 +238,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() //Load the model with state updated with just one input, then pass in the second input //and raw score should match the raw score obtained by passing the two input in the first model. - var engine3 = model3.CreateTimeSeriesPredictionFunction(ml); + var engine3 = model3.CreateAnomalyDetectionEngine(ml); var prediction3 = engine3.Predict(new Data(1)); Assert.Equal(0.12216401100158691, prediction2.Change[1], precision: 5); // Raw score } @@ -281,7 +281,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() var model = pipeline.Fit(dataView); //Model 1: Prediction #1. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateAnomalyDetectionEngine(ml); var prediction = engine.Predict(new Data(1)); Assert.Equal(0, prediction.Change[0], precision: 7); // Alert Assert.Equal(1.1661833524703979, prediction.Change[1], precision: 5); // Raw score @@ -305,7 +305,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() model2 = ml.Model.Load(file, out var schema); //Predict and expect the same result after checkpointing(Prediction #2). - engine = model2.CreateTimeSeriesPredictionFunction(ml); + engine = model2.CreateAnomalyDetectionEngine(ml); prediction = engine.Predict(new Data(1)); Assert.Equal(0, prediction.Change[0], precision: 7); // Alert Assert.Equal(0.12216401100158691, prediction.Change[1], precision: 5); // Raw score @@ -401,7 +401,7 @@ public void SsaForecastPredictionEngine() var model = new SsaForecastingEstimator(ml, args).Fit(dataView); //Model 1: Prediction #1. - var engine = model.CreateTimeSeriesForecastingEngine(ml); + var engine = model.CreateForecastingEngine(ml); engine.Update(new Data(1)); engine.Update(new Data(2)); var forecast = engine.Forecast(horizon: 5); From 3927a5c2f3b8d25d87a4b8a97b18ddd2c2f5f49f Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Jul 2019 02:18:37 -0700 Subject: [PATCH 18/21] PR feedback. --- .../TimeSeries/DetectAnomalyBySrCnn.cs | 2 +- .../TimeSeries/DetectChangePointBySsa.cs | 4 +- .../DetectChangePointBySsaStream.cs | 4 +- .../TimeSeries/DetectIidChangePoint.cs | 4 +- .../Transforms/TimeSeries/DetectIidSpike.cs | 2 +- .../Transforms/TimeSeries/DetectSpikeBySsa.cs | 4 +- .../Transforms/TimeSeries/Forecasting.cs | 6 +- .../ExtensionsCatalog.cs | 7 +- .../PredictionFunction.cs | 207 +++++++----------- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 16 +- ...SequentialAnomalyDetectionTransformBase.cs | 15 +- .../SequentialForecastingTransformBase.cs | 65 +++++- .../SequentialTransformerBase.cs | 33 ++- .../SrCnnTransformBase.cs | 15 +- .../SsaForecastingBase.cs | 76 +++---- .../TimeSeriesDirectApi.cs | 97 ++++++-- 16 files changed, 310 insertions(+), 247 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs index 494c14c2eb..9fd6b849a8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs @@ -40,7 +40,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectAnomalyBySrCnn(outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateAnomalyDetectionEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tMag"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs index cbf539a135..0bc494790f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs @@ -57,7 +57,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, confidence, changeHistoryLength, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateAnomalyDetectionEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); @@ -99,7 +99,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateAnomalyDetectionEngine(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs index 1c782df823..d9a97ee5ef 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs @@ -57,7 +57,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, confidence, changeHistoryLength, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateAnomalyDetectionEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); @@ -103,7 +103,7 @@ public static void Example() model = ml.Model.Load(stream, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateAnomalyDetectionEngine(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs index 9c392e343c..466080c49d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs @@ -56,7 +56,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateAnomalyDetectionEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); @@ -97,7 +97,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // Create a time series prediction engine from the checkpointed model. - engine = model.CreateAnomalyDetectionEngine(ml); + engine = model.CreateTimeSeriesEngine(ml); for (int index = 0; index < 8; index++) { // Anomaly change point detection. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs index fab14a1fe6..1395399571 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs @@ -48,7 +48,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateAnomalyDetectionEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs index 4963209a0b..39198e5f8d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs @@ -54,7 +54,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateAnomalyDetectionEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from spike predictions on new data:"); @@ -94,7 +94,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateAnomalyDetectionEngine(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs index 1fc6c7e0e4..e351f600ae 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -42,7 +42,7 @@ public static void Example() var dataView = ml.Data.LoadFromEnumerable(data); // Setup arguments. - var inputColumnName = nameof(TimeSeriesData.Value); + /*var inputColumnName = nameof(TimeSeriesData.Value); // Instantiate the forecasting model. var model = ml.Forecasting.ForecastBySsa(inputColumnName, inputColumnName, 5, 11, data.Count, 5); @@ -74,7 +74,7 @@ public static void Example() modelCopy = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - var forecastEngineCopy = modelCopy.CreateForecastingEngine(ml); + /*var forecastEngineCopy = modelCopy.CreateForecastingEngine(ml); // Forecast with the checkpointed model loaded from disk. forecast = ml.Data.CreateEnumerable(forecastEngineCopy.Forecast(5), false); @@ -84,7 +84,7 @@ public static void Example() // Forecast with the original model(that was checkpointed to disk). forecast = ml.Data.CreateEnumerable(forecastEngine.Forecast(5), false); Console.WriteLine("[{0}]", string.Join(", ", forecast)); - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081]*/ } diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 26fc49828e..3694592c2a 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -169,7 +169,8 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// The maximum growth on the exponential trend. /// The name of the confidence interval lower bound column. If not specified then confidence intervals will not be calculated. /// The name of the confidence interval upper bound column. If not specified then confidence intervals will not be calculated. - /// + /// The confidence level for forecasting. + /// Set this to true if horizon will change after training. /// /// /// + string forcastingConfidentUpperBoundColumnName = null, float confidenceLevel = 0.95f, bool variableHorizon = false) => new SsaForecastingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, windowSize, seriesLength, trainSize, horizon, isAdaptive, discountFactor, rankSelectionMethod, rank, maxRank, shouldStablize, shouldMaintainInfo, maxGrowth, forcastingConfidentLowerBoundColumnName, - forcastingConfidentUpperBoundColumnName, confidenceLevel); + forcastingConfidentUpperBoundColumnName, confidenceLevel, variableHorizon); } } diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs index e2bda60a00..c3800423d6 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs @@ -15,15 +15,6 @@ internal interface IStatefulRowToRowMapper : IRowToRowMapper { } - /// - /// Used to expose standalone that allows - /// modifying transformer parameters such as horizon, confidence intervals. - /// - internal interface IForecastTransformer : IStatefulTransformer - { - IDataView Forecast(int horizon, bool includeConfidenceInterval = false, float confidenceLevel = 0.95f); - } - internal interface IStatefulTransformer : ITransformer { /// @@ -43,14 +34,22 @@ internal interface IStatefulTransformer : ITransformer internal abstract class StatefulRow : DataViewRow { - public abstract Action GetPinger(); + public abstract Action GetPinger(); } internal interface IStatefulRowMapper : IRowMapper { void CloneState(); - Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer); + Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer); + } + + internal class PingerArgument + { + public long RowPosition; + public float? ConfidenceLevel; + public int? Horizon; + public bool DontConsumeSource; } /// @@ -61,13 +60,13 @@ internal interface IStatefulRowMapper : IRowMapper /// /// The user-defined type that holds the example. /// The user-defined type that holds the prediction. - public abstract class TimeSeriesPredictionEngine : PredictionEngineBase + public sealed class TimeSeriesPredictionEngine : PredictionEngineBase where TSrc : class where TDst : class, new() { - protected Action Pinger; - protected long RowPosition; - protected ITransformer InputTransformer { get; set; } + private Action _pinger; + private long _rowPosition; + private ITransformer InputTransformer { get; set; } /// /// Checkpoints to disk with the updated @@ -198,11 +197,11 @@ internal DataViewRow GetStatefulRows(DataViewRow input, IRowToRowMapper mapper, return result; } - internal Action CreatePinger(List rows) + internal Action CreatePinger(List rows) { if (rows.Count == 0) return position => { }; - Action pinger = null; + Action pinger = null; foreach (var row in rows) pinger += row.GetPinger(); return pinger; @@ -214,7 +213,7 @@ private protected override void PredictionEngineCore(IHostEnvironment env, DataV List rows = new List(); DataViewRow outputRowLocal = outputRowLocal = GetStatefulRows(inputRow, mapper, mapper.OutputSchema, rows); var cursorable = TypedCursorable.Create(env, new EmptyDataView(env, mapper.OutputSchema), ignoreMissingColumns, outputSchemaDefinition); - Pinger = CreatePinger(rows); + _pinger = CreatePinger(rows); disposer = outputRowLocal.Dispose; outputRow = cursorable.GetRow(outputRowLocal); } @@ -270,103 +269,83 @@ private protected override Func TransformerChec /// The example to run on. /// The object to store the prediction in. If it's null, a new one will be created, otherwise the old one /// is reused. - public override void Predict(TSrc example, ref TDst prediction) + /// + /// + public void Predict(TSrc example, ref TDst prediction, int? horizon = null, float? confidenceLevel = null) { - Contracts.CheckValue(example, nameof(example)); - ExtractValues(example); - if (prediction == null) - prediction = new TDst(); + if (example != null && prediction != null) + { + //Update models and make a prediction after updating. + Contracts.CheckValue(example, nameof(example)); + ExtractValues(example); - // Update state. - Pinger(RowPosition); + // Update state. + _pinger(new PingerArgument() + { + RowPosition = _rowPosition, + ConfidenceLevel = confidenceLevel, + Horizon = horizon + }); - // Predict. - FillValues(prediction); + // Predict. + FillValues(prediction); - RowPosition++; - } - } + _rowPosition++; + } + else if (prediction != null) + { + //Forecast. - /// - /// A class that runs the previously trained model (and the preceding transform pipeline) on the - /// in-memory data, one example at a time. - /// This can also be used with trained pipelines that do not end with a predictor: in this case, the - /// 'prediction' will be just the outcome of all the transformations. - /// Creates an anomaly detection engine for a time series pipeline - /// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model. - /// - /// The user-defined type that holds the example. - /// The user-defined type that holds the prediction. - public sealed class TimeSeriesAnomalyDetection : TimeSeriesPredictionEngine - where TSrc : class - where TDst : class, new() - { - public TimeSeriesAnomalyDetection(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, - SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : - base(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition) - { - } - } + // Signal all time series models to not fetch src values in getters. + _pinger(new PingerArgument() + { + RowPosition = _rowPosition, + DontConsumeSource = true, + ConfidenceLevel = confidenceLevel, + Horizon = horizon + }); + + // Predict. The expectation is user has asked for columns that are + // forecasting columns and hence will not trigger a getter that needs an input. + FillValues(prediction); + } + else if (example != null) + { + //Update models. - /// - /// Creates forecasting engine for a time series pipeline that allows updating the state of time series forecasting model - /// with new observations and checkpointing the model. It also allows forecasting values without passing any input values and - /// dynamically configuring paramaters such as horizon and confidence interval. - /// - /// The user-defined type that holds the example. - public sealed class TimeSeriesForecasting : TimeSeriesPredictionEngine - where TSrc : class - { - public TimeSeriesForecasting(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, - SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : - base(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition) - { - } + //Extract value that needs to propagated to all the models. + Contracts.CheckValue(example, nameof(example)); + ExtractValues(example); - private protected override void PredictionEngineCore(IHostEnvironment env, DataViewConstructionUtils.InputRow inputRow, - IRowToRowMapper mapper, bool ignoreMissingColumns, SchemaDefinition outputSchemaDefinition, out Action disposer, out IRowReadableAs outputRow) - { - List rows = new List(); - DataViewRow outputRowLocal = outputRowLocal = GetStatefulRows(inputRow, mapper, mapper.OutputSchema, rows); - Pinger = CreatePinger(rows); - disposer = outputRowLocal.Dispose; - outputRow = null; - } + // Update state. + _pinger(new PingerArgument() + { + RowPosition = _rowPosition, + ConfidenceLevel = confidenceLevel, + Horizon = horizon + }); - /// - /// Update the time series model with one observation. - /// - /// The observation to update the forecasting model with. - public void Update(TSrc example) - { - Contracts.CheckValue(example, nameof(example)); - ExtractValues(example); - // Update state. - Pinger(RowPosition); - RowPosition++; + _rowPosition++; + } } - /// - /// Standalone forecasting method. Allows dynamic configuration of forecasting parameters such as horizon - /// and confidence level. - /// - /// The number of values to forecast. - /// Whether to include confidence intervals for each forecasted value. - /// The confidence level for forecasting. - /// - public IDataView Forecast(int horizon, bool includeConfidenceInterval = false, float confidenceLevel = 0.95f) => - ((IForecastTransformer)InputTransformer).Forecast(horizon, includeConfidenceInterval, confidenceLevel); - /// /// This method should not be used. /// - public override void Predict(TSrc example, ref object prediction) => throw new NotSupportedException(); + public override void Predict(TSrc example, ref TDst prediction) => Predict(example, ref prediction); + + public TDst Predict(TSrc example, int? horizon = null, float? confidenceLevel = null) + { + TDst dst = new TDst(); + Predict(example, ref dst, horizon, confidenceLevel); + return dst; + } } public static class PredictionFunctionExtensions { /// - /// creates an anomaly detection engine for a time series pipeline + /// creates a prediction engine for a time series pipeline. /// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model. /// /// Class describing input schema to the model. @@ -385,7 +364,7 @@ public static class PredictionFunctionExtensions /// ]]> /// /// - public static TimeSeriesAnomalyDetection CreateAnomalyDetectionEngine(this ITransformer transformer, IHostEnvironment env, + public static TimeSeriesPredictionEngine CreateTimeSeriesEngine(this ITransformer transformer, IHostEnvironment env, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() @@ -394,37 +373,7 @@ public static TimeSeriesAnomalyDetection CreateAnomalyDetectionEngin env.CheckValue(transformer, nameof(transformer)); env.CheckValueOrNull(inputSchemaDefinition); env.CheckValueOrNull(outputSchemaDefinition); - return new TimeSeriesAnomalyDetection(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); - } - - /// - /// creates a forecasting engine for a time series pipeline - /// It allows updating the state of time series forecasting model with new observations and allows checkpointing the model. - /// - /// Class describing input schema to the model. - /// The time series pipeline in the form of a . - /// Usually - /// To ignore missing columns. Default is false. - /// Input schema definition. Default is null. - /// Output schema definition. Default is null. - ///

Example code can be found by searching for TimeSeriesPredictionFunction in ML.NET.

- /// - /// - /// - /// - /// - public static TimeSeriesForecasting CreateForecastingEngine(this ITransformer transformer, IHostEnvironment env, - bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) - where TSrc : class - { - Contracts.CheckValue(env, nameof(env)); - env.CheckValue(transformer, nameof(transformer)); - env.CheckValueOrNull(inputSchemaDefinition); - env.CheckValueOrNull(outputSchemaDefinition); - return new TimeSeriesForecasting(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); + return new TimeSeriesPredictionEngine(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); } } } diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index 6e3b1ce61f..1726ff78fd 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -27,7 +27,7 @@ namespace Microsoft.ML.Transforms.TimeSeries /// /// resulting from fitting a . /// - public sealed class SsaForecasting : SsaForecastingBaseWrapper, IStatefulTransformer, IForecastTransformer + public sealed class SsaForecasting : SsaForecastingBaseWrapper, IStatefulTransformer { internal const string Summary = "This transform forecasts using Singular Spectrum Analysis (SSA)."; internal const string LoaderSignature = "SsaForecasting"; @@ -87,6 +87,9 @@ internal sealed class Options : TransformInputBase [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] public float ConfidenceLevel = 0.95f; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Set this to true horizon will change at prediction time.", SortOrder = 2)] + public bool VariableHorizon; } private sealed class BaseArguments : SsaForecastingOptions @@ -108,6 +111,7 @@ public BaseArguments(Options options) TrainSize = options.TrainSize; Horizon = options.Horizon; ConfidenceLevel = options.ConfidenceLevel; + VariableHorizon = options.VariableHorizon; } } @@ -251,7 +255,8 @@ public sealed class SsaForecastingEstimator : IEstimator /// The maximum growth on the exponential trend. /// The name of the confidence interval lower bound column. If not specified then confidence intervals will not be calculated. /// The name of the confidence interval upper bound column. If not specified then confidence intervals will not be calculated. - /// + /// The confidence level for forecasting. + /// Set this to true if horizon will change after training. internal SsaForecastingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName, @@ -269,7 +274,8 @@ internal SsaForecastingEstimator(IHostEnvironment env, GrowthRatio? maxGrowth = null, string forcastingConfidentLowerBoundColumnName = null, string forcastingConfidentUpperBoundColumnName = null, - float confidenceLevel = 0.95f) + float confidenceLevel = 0.95f, + bool variableHorizon = false) : this(env, new SsaForecasting.Options { Source = inputColumnName ?? outputColumnName, @@ -287,7 +293,9 @@ internal SsaForecastingEstimator(IHostEnvironment env, ForcastingConfidentLowerBoundColumnName = forcastingConfidentLowerBoundColumnName, ForcastingConfidentUpperBoundColumnName = forcastingConfidentUpperBoundColumnName, SeriesLength = seriesLength, - TrainSize = trainSize + TrainSize = trainSize, + VariableHorizon = variableHorizon, + Horizon = horizon }) { } diff --git a/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs index 72cfdf27e5..5048a38dac 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs @@ -377,25 +377,28 @@ private Delegate MakeGetter(DataViewRow input, AnomalyDetectionStateBase state) return valueGetter; } - public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) + public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) { disposer = null; - Action pinger = null; + Action pinger = null; if (activeOutput(0)) pinger = MakePinger(input, State); return pinger; } - private Action MakePinger(DataViewRow input, AnomalyDetectionStateBase state) + private Action MakePinger(DataViewRow input, AnomalyDetectionStateBase state) { _host.AssertValue(input); var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); - Action pinger = (long rowPosition) => + Action pinger = (PingerArgument args) => { + if (args.DontConsumeSource) + return; + TInput src = default; srcGetter(ref src); - state.UpdateState(ref src, rowPosition, _parent.WindowSize > 0); + state.UpdateState(ref src, args.RowPosition, _parent.WindowSize > 0); }; return pinger; } @@ -516,7 +519,7 @@ private protected override void SetNaOutput(ref VBuffer dst) dst = editor.Commit(); } - private protected sealed override void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) + public sealed override void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) { var outputLength = Parent.OutputLength; Host.Assert(outputLength >= 2); diff --git a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs index 873aef0a2a..2ca5f5c135 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs @@ -101,6 +101,7 @@ internal sealed class Mapper : IStatefulRowMapper private readonly DataViewSchema _parentSchema; private readonly int _inputColumnIndex; private ForecastingStateBase State { get; set; } + private bool _dontFetchSrcValue; public Mapper(IHostEnvironment env, SequentialForecastingTransformBase parent, DataViewSchema inputSchema) { @@ -119,6 +120,7 @@ public Mapper(IHostEnvironment env, SequentialForecastingTransformBase GetDependencies(Func activeOutput) public Delegate[] CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) { disposer = null; - var getters = new Delegate[3]; + var getters = string.IsNullOrEmpty(_parent.ForecastingConfidenceIntervalMaxOutputColumnName) ? new Delegate[1] : new Delegate[3]; + if (activeOutput(0)) - getters[0] = MakeGetter(input, State); + { + ValueGetter> valueGetter = (ref VBuffer dst) => + { + State.Forecast(ref dst); + }; + + getters[0] = valueGetter; + } + + if (activeOutput(1)) + { + ValueGetter> valueGetter = (ref VBuffer dst) => + { + State.ConfidenceIntervalLowerBound(ref dst); + }; + + getters[1] = valueGetter; + } + + if (activeOutput(2)) + { + ValueGetter> valueGetter = (ref VBuffer dst) => + { + State.ConfidenceIntervalUpperBound(ref dst); + }; + + getters[2] = valueGetter; + } - getters[1] = getters[2] = getters[0]; return getters; } @@ -174,31 +203,49 @@ private Delegate MakeGetter(DataViewRow input, ForecastingStateBase state) ValueGetter> valueGetter = (ref VBuffer dst) => { TInput src = default; + if (_dontFetchSrcValue) + { + state.TransformCore(ref src, null, 0, ref dst); + return; + } + srcGetter(ref src); processData(ref src, ref dst); + }; return valueGetter; } - public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) + public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) { disposer = null; - Action pinger = null; + Action pinger = null; if (activeOutput(0)) pinger = MakePinger(input, State); return pinger; } - private Action MakePinger(DataViewRow input, ForecastingStateBase state) + private Action MakePinger(DataViewRow input, ForecastingStateBase state) { _host.AssertValue(input); var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); - Action pinger = (long rowPosition) => + Action pinger = (PingerArgument args) => { + state.LocalConfidenceLevel = args.ConfidenceLevel; + state.LocalHorizon = args.Horizon; + + // This means don't call srcGetter in getters. + if (args.DontConsumeSource) + { + _dontFetchSrcValue = true; + return; + } + + _dontFetchSrcValue = false; TInput src = default; srcGetter(ref src); - state.UpdateState(ref src, rowPosition, _parent.WindowSize > 0); + state.UpdateState(ref src, args.RowPosition, _parent.WindowSize > 0); }; return pinger; } @@ -224,6 +271,8 @@ internal abstract class ForecastingStateBase : SequentialTransformerBase Parent; + internal int? LocalHorizon; + internal float? LocalConfidenceLevel; private protected ForecastingStateBase() { } diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs index 8ae346eb78..6796d80c83 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs @@ -251,7 +251,22 @@ private protected virtual void SetNaOutput(ref TOutput dst) { } /// A reference to the dst object. /// A reference to the windowed buffer. /// A long number that indicates the number of times TransformCore has been called so far (starting value = 0). - private protected virtual void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref TOutput dst) + public virtual void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref TOutput dst) + { + + } + + public virtual void Forecast(ref TOutput dst) + { + + } + + public virtual void ConfidenceIntervalLowerBound(ref TOutput dst) + { + + } + + public virtual void ConfidenceIntervalUpperBound(ref TOutput dst) { } @@ -606,7 +621,7 @@ private sealed class RowImpl : StatefulRow private readonly DataViewSchema _schema; private readonly DataViewRow _input; private readonly Delegate[] _getters; - private readonly Action _pinger; + private readonly Action _pinger; private readonly Action _disposer; private bool _disposed; private readonly ColumnBindings _bindings; @@ -617,7 +632,7 @@ private sealed class RowImpl : StatefulRow public override long Batch => _input.Batch; - public RowImpl(ColumnBindings bindings, DataViewRow input, Delegate[] getters, Action pinger, Action disposer) + public RowImpl(ColumnBindings bindings, DataViewRow input, Delegate[] getters, Action pinger, Action disposer) { Contracts.CheckValue(bindings, nameof(bindings)); Contracts.CheckValue(input, nameof(input)); @@ -656,8 +671,8 @@ public override ValueGetter GetGetter(DataViewSchema.Column column) return fn; } - public override Action GetPinger() => - _pinger as Action ?? throw Contracts.Except("Invalid TValue in GetPinger: '{0}'", typeof(long)); + public override Action GetPinger() => + _pinger as Action ?? throw Contracts.Except("Invalid TValue in GetPinger: '{0}'", typeof(PingerArgument)); /// /// Returns whether the given column is active in this row. @@ -930,7 +945,7 @@ private sealed class StatefulRowImpl : StatefulRow { private readonly DataViewRow _input; private readonly Delegate[] _getters; - private readonly Action _pinger; + private readonly Action _pinger; private readonly Action _disposer; private readonly TimeSeriesRowToRowMapperTransform _parent; @@ -942,7 +957,7 @@ private sealed class StatefulRowImpl : StatefulRow public override DataViewSchema Schema { get; } public StatefulRowImpl(DataViewRow input, TimeSeriesRowToRowMapperTransform parent, - DataViewSchema schema, Delegate[] getters, Action pinger, Action disposer) + DataViewSchema schema, Delegate[] getters, Action pinger, Action disposer) { _input = input; _parent = parent; @@ -979,8 +994,8 @@ public override ValueGetter GetGetter(DataViewSchema.Column colu return fn; } - public override Action GetPinger() => - _pinger as Action ?? throw Contracts.Except("Invalid TValue in GetPinger: '{0}'", typeof(long)); + public override Action GetPinger() => + _pinger as Action ?? throw Contracts.Except("Invalid TValue in GetPinger: '{0}'", typeof(PingerArgument)); public override ValueGetter GetIdGetter() => _input.GetIdGetter(); diff --git a/src/Microsoft.ML.TimeSeries/SrCnnTransformBase.cs b/src/Microsoft.ML.TimeSeries/SrCnnTransformBase.cs index 9f64fa2167..c76973bef2 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnTransformBase.cs @@ -220,25 +220,28 @@ private Delegate MakeGetter(DataViewRow input, SrCnnStateBase state) return valueGetter; } - public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) + public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) { disposer = null; - Action pinger = null; + Action pinger = null; if (activeOutput(0)) pinger = MakePinger(input, State); return pinger; } - private Action MakePinger(DataViewRow input, SrCnnStateBase state) + private Action MakePinger(DataViewRow input, SrCnnStateBase state) { _host.AssertValue(input); var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); - Action pinger = (long rowPosition) => + Action pinger = (PingerArgument args) => { + if (args.DontConsumeSource) + return; + TInput src = default; srcGetter(ref src); - state.UpdateState(ref src, rowPosition, _parent.WindowSize > 0); + state.UpdateState(ref src, args.RowPosition, _parent.WindowSize > 0); }; return pinger; } @@ -289,7 +292,7 @@ private protected override void SetNaOutput(ref VBuffer dst) dst = editor.Commit(); } - private protected sealed override void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) + public sealed override void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) { var outputLength = Parent.OutputLength; diff --git a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs index feb4bd1114..86a48781c7 100644 --- a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs @@ -16,7 +16,7 @@ namespace Microsoft.ML.Transforms.TimeSeries /// The wrapper to that implements the general anomaly detection transform based on Singular Spectrum modeling of the time-series. /// For the details of the Singular Spectrum Analysis (SSA), refer to http://arxiv.org/pdf/1206.6910.pdf. /// - public class SsaForecastingBaseWrapper : IForecastTransformer, ICanSaveModel + public class SsaForecastingBaseWrapper : IStatefulTransformer, ICanSaveModel { /// /// Whether a call to should succeed, on an @@ -77,45 +77,6 @@ public IRowToRowMapper GetStatefulRowToRowMapper(DataViewSchema inputSchema) /// internal IDataTransform MakeDataTransform(IDataView input) => InternalTransform.MakeDataTransform(input); - public IDataView Forecast(int horizon, bool includeConfidenceInterval = false, float confidenceLevel = 0.95f) - { - var model = ((AdaptiveSingularSpectrumSequenceModelerInternal)InternalTransform.Model); - - if (includeConfidenceInterval) - { - model.ForecastWithConfidenceIntervals(horizon, out float[] forecast, - out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, confidenceLevel); - - return DataViewConstructionUtils.CreateFromEnumerable(InternalTransform.Host, GetForecastWithConfidenceIntervals(forecast, - confidenceIntervalLowerBounds, confidenceIntervalUpperBounds)); - } - else - return DataViewConstructionUtils.CreateFromEnumerable(InternalTransform.Host, GetForecast(model.Forecast(horizon))); - } - - private IEnumerable GetForecast(float[] forecast) - { - foreach (var value in forecast) - yield return new SsaForecast() { Forecast = value }; - } - - private IEnumerable GetForecastWithConfidenceIntervals(float[] forecast, float[] min, float[] max) - { - for (int index = 0; index < forecast.Length; index += 1) - yield return new SsaForecastWithConfidenceInterval() { Forecast = forecast[index], ConfidenceIntervalLowerBound = min[index], ConfidenceIntervalUpperBound = max[index] }; - } - - internal class SsaForecast - { - public float Forecast; - } - - internal class SsaForecastWithConfidenceInterval : SsaForecast - { - public float ConfidenceIntervalLowerBound; - public float ConfidenceIntervalUpperBound; - } - /// /// Options for SSA Anomaly Detection. /// @@ -138,6 +99,7 @@ internal abstract class SsaForecastingOptions : ForecastingArgumentsBase public GrowthRatio? MaxGrowth; public int Horizon; public float ConfidenceLevel; + public bool VariableHorizon; } internal SsaForecastingBase InternalTransform; @@ -166,7 +128,7 @@ internal sealed class SsaForecastingBase : SequentialForecastingTransformBase windowedBuffer, long iteration, ref VBuffer dst) + public override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) { // Forecasting is being done without prediction engine. Update the model // with the observation. if (PreviousPosition == -1) Consume(input); - dst = new VBuffer(_parentForecaster.Horizon, + dst = new VBuffer(LocalHorizon ?? _parentForecaster.Horizon, ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(_parentForecaster.Horizon)); } @@ -313,14 +275,38 @@ private protected override void TransformCore(ref float input, FixedSizeQueue(_parentForecaster.Horizon, forecast); dst2 = new VBuffer(_parentForecaster.Horizon, min); dst3 = new VBuffer(_parentForecaster.Horizon, max); } + public override void Forecast( ref VBuffer dst) + { + int horizon = LocalHorizon ?? _parentForecaster.Horizon; + dst = new VBuffer(horizon, ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(horizon)); + } + + public override void ConfidenceIntervalLowerBound(ref VBuffer dst) + { + int horizon = LocalHorizon ?? _parentForecaster.Horizon; + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).ForecastWithConfidenceIntervals(horizon, + out float[] forecast, out float[] min, out float[] max, LocalConfidenceLevel ?? _parentForecaster.ConfidenceLevel); + + dst = new VBuffer(horizon, min); + } + + public override void ConfidenceIntervalUpperBound(ref VBuffer dst) + { + int horizon = LocalHorizon ?? _parentForecaster.Horizon; + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).ForecastWithConfidenceIntervals(horizon, + out float[] forecast, out float[] min, out float[] max, LocalConfidenceLevel ?? _parentForecaster.ConfidenceLevel); + + dst = new VBuffer(horizon, max); + } + public override void Consume(Single input) => _model.Consume(ref input, _parentForecaster.IsAdaptive); } } diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index c6b5a06388..4f61724ff8 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -60,6 +60,15 @@ class ForecastResult #pragma warning restore CS0649 } + class ForecastResultArray + { +#pragma warning disable CS0649 + public float[] Forecast; + public float[] ConfidenceLowerBound; + public float[] ConfidenceUpperBound; +#pragma warning restore CS0649 + } + private sealed class TimeSeriesData { public float Value; @@ -204,7 +213,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() var model = pipeline.Fit(dataView); //Create prediction function. - var engine = model.CreateAnomalyDetectionEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); //Checkpoint with no inputs passed at prediction. var modelPath = "temp.zip"; @@ -217,7 +226,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() model2 = ml.Model.Load(file, out var schema); //Raw score after state gets updated with two inputs. - var engine2 = model2.CreateAnomalyDetectionEngine(ml); + var engine2 = model2.CreateTimeSeriesEngine(ml); var prediction2 = engine2.Predict(new Data(1)); //Raw score after first input. Assert.Equal(1.1661833524703979, prediction2.Change[1], precision: 5); // Raw score @@ -238,7 +247,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() //Load the model with state updated with just one input, then pass in the second input //and raw score should match the raw score obtained by passing the two input in the first model. - var engine3 = model3.CreateAnomalyDetectionEngine(ml); + var engine3 = model3.CreateTimeSeriesEngine(ml); var prediction3 = engine3.Predict(new Data(1)); Assert.Equal(0.12216401100158691, prediction2.Change[1], precision: 5); // Raw score } @@ -279,9 +288,9 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() // Train. var model = pipeline.Fit(dataView); - + //Model 1: Prediction #1. - var engine = model.CreateAnomalyDetectionEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); var prediction = engine.Predict(new Data(1)); Assert.Equal(0, prediction.Change[0], precision: 7); // Alert Assert.Equal(1.1661833524703979, prediction.Change[1], precision: 5); // Raw score @@ -305,7 +314,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() model2 = ml.Model.Load(file, out var schema); //Predict and expect the same result after checkpointing(Prediction #2). - engine = model2.CreateAnomalyDetectionEngine(ml); + engine = model2.CreateTimeSeriesEngine(ml); prediction = engine.Predict(new Data(1)); Assert.Equal(0, prediction.Change[0], precision: 7); // Alert Assert.Equal(0.12216401100158691, prediction.Change[1], precision: 5); // Raw score @@ -383,11 +392,14 @@ public void SsaForecastPredictionEngine() { ConfidenceLevel = 0.95f, Source = "Value", - Name = "Change", + Name = "Forecast", WindowSize = 10, SeriesLength = 11, TrainSize = 22, Horizon = 4, + ForcastingConfidentLowerBoundColumnName = "ConfidenceLowerBound", + ForcastingConfidentUpperBoundColumnName = "ConfidenceUpperBound", + VariableHorizon = true }; for (int j = 0; j < NumberOfSeasonsInTraining; j++) @@ -398,23 +410,60 @@ public void SsaForecastPredictionEngine() data.Add(new Data(i * 100)); // Train - var model = new SsaForecastingEstimator(ml, args).Fit(dataView); - - //Model 1: Prediction #1. - var engine = model.CreateForecastingEngine(ml); - engine.Update(new Data(1)); - engine.Update(new Data(2)); - var forecast = engine.Forecast(horizon: 5); - var prediction = ml.Data.CreateEnumerable(forecast, reuseRowObject: false).GetEnumerator(); - - int index = 0; - List expectedForecast = new List() { 3.9516573f, 6.212672f, 7.732854f, 8.125769f, 7.22453928f }; - while (prediction.MoveNext()) - { - Assert.Equal(prediction.Current.Forecast, expectedForecast[index++]); - } - - Assert.Equal(5, index); + var model = ml.Transforms.Text.FeaturizeText("Text_Featurized", "Text") + .Append(ml.Transforms.Conversion.ConvertType("Value", "Value", DataKind.Single)) + .Append(ml.Forecasting.ForecastBySsa("Forecast", "Value", 10, 11, 22, 4, + forcastingConfidentLowerBoundColumnName: "ConfidenceLowerBound", + forcastingConfidentUpperBoundColumnName: "ConfidenceUpperBound", variableHorizon: true)) + .Append(ml.Transforms.Concatenate("Forecast", "Forecast", "ConfidenceLowerBound", "ConfidenceUpperBound")) + .Fit(dataView); + + //Prediction engine. + var engine = model.CreateTimeSeriesEngine(ml); + ForecastResultArray result = new ForecastResultArray(); + + // Forecast and change the horizon to 5. + engine.Predict(null, ref result, horizon: 5); + // [Forecast, ConfidenceLowerBound, ConfidenceUpperBound] + Assert.Equal(result.Forecast, new float[] { -1.02245092f, 0.08333081f, 2.60737085f, 5.397319f, 7.500832f, -5.188142f, -4.82533741f, + -2.563095f, 0.213172823f, 2.29317045f, 3.14324f, 4.991999f, 7.777837f, 10.5814648f, 12.7084932f }); + + // Update the forecasting model. + engine.Predict(new Data(2)); + + // Update the model and then forecast. + engine.Predict(new Data(2), ref result); + + engine.CheckPoint(ml, "model.zip"); + // [Forecast, ConfidenceLowerBound, ConfidenceUpperBound] + Assert.Equal(result.Forecast, new float[] { 4.310587f, 6.39716768f, 7.73934f, 8.029469f, 0.144895911f, + 1.48849952f, 2.568874f, 2.84532261f, 8.476278f, 11.3058357f, 12.9098063f, 13.2136145f }); + + // Checkpoint the model. + ITransformer modelCopy; + using (var file = File.OpenRead("model.zip")) + modelCopy = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); + ForecastResultArray resultCopy = new ForecastResultArray(); + + // Update both the models. + engine.Predict(new Data(3)); + forecastEngineCopy.Predict(new Data(3)); + + // Forecast values with the original and check-pointed model. + forecastEngineCopy.Predict(null, ref resultCopy, horizon: 5); + engine.Predict(null, ref result, horizon: 5); + // [Forecast, ConfidenceLowerBound, ConfidenceUpperBound] + Assert.Equal(result.Forecast, new float[] { 6.00658846f, 7.506871f, 7.96424866f, 7.17514229f, + 5.02655172f, 1.84089744f, 2.59820318f, 2.79378271f, 1.99099624f, + -0.181109816f, 10.1722794f, 12.41554f, 13.1347151f, 12.3592882f, 10.2342129f}); + + // The forecasted results should be the same because the state of the models + // is the same. + Assert.Equal(result.Forecast, resultCopy.Forecast); + } [Fact] From 14b66b665560fdbba36a36cc63777307b012f0ce Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Jul 2019 03:09:23 -0700 Subject: [PATCH 19/21] Add TestEstimatorCore, samples and misc PR feedback. --- .../Transforms/TimeSeries/Forecasting.cs | 41 +++++----- .../ForecastingWithConfidenceInterval.cs | 81 +++++++++++-------- docs/samples/Microsoft.ML.Samples/Program.cs | 5 +- ...dictionFunction.cs => PredictionEngine.cs} | 0 src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 60 +++++++------- .../SequentialForecastingTransformBase.cs | 28 ++++--- .../TimeSeriesProcessing.cs | 8 +- .../TimeSeriesDirectApi.cs | 4 +- .../TimeSeriesEstimatorTests.cs | 36 +++++++++ 9 files changed, 156 insertions(+), 107 deletions(-) rename src/Microsoft.ML.TimeSeries/{PredictionFunction.cs => PredictionEngine.cs} (100%) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs index e351f600ae..e2b12dcc44 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -42,27 +42,30 @@ public static void Example() var dataView = ml.Data.LoadFromEnumerable(data); // Setup arguments. - /*var inputColumnName = nameof(TimeSeriesData.Value); + var inputColumnName = nameof(TimeSeriesData.Value); + var outputColumnName = nameof(ForecastResult.Forecast); // Instantiate the forecasting model. - var model = ml.Forecasting.ForecastBySsa(inputColumnName, inputColumnName, 5, 11, data.Count, 5); + var model = ml.Forecasting.ForecastBySsa(outputColumnName, inputColumnName, 5, 11, data.Count, 5); // Train. var transformer = model.Fit(dataView); // Forecast next five values. - var forecastEngine = transformer.CreateForecastingEngine(ml); - var forecast = ml.Data.CreateEnumerable(forecastEngine.Forecast(5), false); + var forecastEngine = transformer.CreateTimeSeriesEngine(ml); + var forecast = new ForecastResult(); + forecastEngine.Predict(null, ref forecast); + Console.WriteLine($"Forecasted values:"); - Console.WriteLine("[{0}]", string.Join(", ", forecast)); + Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); // Forecasted values: - // [2.452744, 2.589339, 2.729183, 2.873005, 3.028931] + // [1.977226, 1.020494, 1.760543, 3.437509, 4.266461] // Update with new observations. - forecastEngine.Update(new TimeSeriesData(0)); - forecastEngine.Update(new TimeSeriesData(0)); - forecastEngine.Update(new TimeSeriesData(0)); - forecastEngine.Update(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); // Checkpoint. forecastEngine.CheckPoint(ml, "model.zip"); @@ -74,25 +77,23 @@ public static void Example() modelCopy = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - /*var forecastEngineCopy = modelCopy.CreateForecastingEngine(ml); + var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); // Forecast with the checkpointed model loaded from disk. - forecast = ml.Data.CreateEnumerable(forecastEngineCopy.Forecast(5), false); - Console.WriteLine("[{0}]", string.Join(", ", forecast)); - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + forecastEngineCopy.Predict(null, ref forecast); + Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Forecast with the original model(that was checkpointed to disk). - forecast = ml.Data.CreateEnumerable(forecastEngine.Forecast(5), false); - Console.WriteLine("[{0}]", string.Join(", ", forecast)); - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081]*/ + forecastEngine.Predict(null, ref forecast); + Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] } class ForecastResult { -#pragma warning disable CS0649 - public float Forecast; -#pragma warning restore CS0649 + public float[] Forecast { get; set; } } class TimeSeriesData diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs index d446cbaf7b..1198ab93aa 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs @@ -1,8 +1,8 @@ -/*using System; +using System; using System.Collections.Generic; using Microsoft.ML; using Microsoft.ML.Transforms.TimeSeries; -using Microsoft.ML.TimeSeries;*/ +using System.IO; namespace Samples.Dynamic { @@ -12,12 +12,11 @@ public static class ForecastingWithConfidenceInternal // does forecasting. public static void Example() { - /*// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern - const int SeasonalitySize = 5; + // Generate sample series data with a recurring pattern. var data = new List() { new TimeSeriesData(0), @@ -44,50 +43,59 @@ public static void Example() // Setup arguments. var inputColumnName = nameof(TimeSeriesData.Value); + var outputColumnName = nameof(ForecastResult.Forecast); - // Instantiate forecasting model. - var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler(inputColumnName, data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceForecasting.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); + // Instantiate the forecasting model. + var model = ml.Forecasting.ForecastBySsa(outputColumnName, inputColumnName, 5, 11, data.Count, 5, + confidenceLevel: 0.95f, + forcastingConfidentLowerBoundColumnName: "ConfidenceLowerBound", + forcastingConfidentUpperBoundColumnName: "ConfidenceUpperBound"); // Train. - model.Train(dataView); - - // Forecast next five values with confidence internal. - float[] forecast; - float[] confidenceIntervalLowerBounds; - float[] confidenceIntervalUpperBounds; - model.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); - PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); + var transformer = model.Fit(dataView); + + // Forecast next five values. + var forecastEngine = transformer.CreateTimeSeriesEngine(ml); + var forecast = new ForecastResult(); + forecastEngine.Predict(null, ref forecast); + + PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); // Forecasted values: - // [2.452744, 2.589339, 2.729183, 2.873005, 3.028931] + // [1.977226, 1.020494, 1.760543, 3.437509, 4.266461] // Confidence intervals: - // [-0.2235315 - 5.12902] [-0.08777174 - 5.266451] [0.05076938 - 5.407597] [0.1925406 - 5.553469] [0.3469928 - 5.71087] + // [0.3451088 - 3.609343] [-0.7967533 - 2.83774] [-0.058467 - 3.579552] [1.61505 - 5.259968] [2.349299 - 6.183623] // Update with new observations. - dataView = ml.Data.LoadFromEnumerable(new List() { new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0) }); - model.Update(dataView); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); // Checkpoint. - ml.Model.SaveForecastingModel(model, "model.zip"); + forecastEngine.CheckPoint(ml, "model.zip"); // Load the checkpointed model from disk. - var modelCopy = ml.Model.LoadForecastingModel("model.zip"); + // Load the model. + ITransformer modelCopy; + using (var file = File.OpenRead("model.zip")) + modelCopy = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); // Forecast with the checkpointed model loaded from disk. - modelCopy.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); - PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); - // Forecasted values: - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + forecastEngineCopy.Predict(null, ref forecast); + PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Confidence intervals: - // [-1.808158 - 3.544394] [-1.8586 - 3.495622] [-1.871486 - 3.485341] [-1.836414 - 3.524514] [-1.736431 - 3.627447] + // [0.1592142 - 3.423448] [-0.5617217 - 3.072772] [-1.512994 - 2.125025] [-2.022905 - 1.622013] [-1.351382 - 2.482941] // Forecast with the original model(that was checkpointed to disk). - model.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); - PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); - // Forecasted values: - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + forecastEngine.Predict(null, ref forecast); + PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Confidence intervals: - // [-1.808158 - 3.544394] [-1.8586 - 3.495622] [-1.871486 - 3.485341] [-1.836414 - 3.524514] [-1.736431 - 3.627447] + // [0.1592142 - 3.423448] [-0.5617217 - 3.072772] [-1.512994 - 2.125025] [-2.022905 - 1.622013] [-1.351382 - 2.482941] } static void PrintForecastValuesAndIntervals(float[] forecast, float[] confidenceIntervalLowerBounds, float[] confidenceIntervalUpperBounds) @@ -97,7 +105,14 @@ static void PrintForecastValuesAndIntervals(float[] forecast, float[] confidence Console.WriteLine($"Confidence intervals:"); for (int index = 0; index < forecast.Length; index++) Console.Write($"[{confidenceIntervalLowerBounds[index]} - {confidenceIntervalUpperBounds[index]}] "); - Console.WriteLine();*/ + Console.WriteLine(); + } + + class ForecastResult + { + public float[] Forecast { get; set; } + public float[] ConfidenceLowerBound { get; set; } + public float[] ConfidenceUpperBound { get; set; } } class TimeSeriesData diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 60e9ce5884..5b318ae9e3 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -10,7 +10,7 @@ public static class Program internal static void RunAll() { - /*int samples = 0; + int samples = 0; foreach (var type in Assembly.GetExecutingAssembly().GetTypes()) { var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy); @@ -23,8 +23,7 @@ internal static void RunAll() } } - Console.WriteLine("Number of samples that ran without any exception: " + samples);*/ - Forecasting.Example(); + Console.WriteLine("Number of samples that ran without any exception: " + samples); } } } diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionEngine.cs similarity index 100% rename from src/Microsoft.ML.TimeSeries/PredictionFunction.cs rename to src/Microsoft.ML.TimeSeries/PredictionEngine.cs diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs index 1726ff78fd..a5327e6d9d 100644 --- a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -10,24 +10,24 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Transforms.TimeSeries; -[assembly: LoadableClass(SsaForecasting.Summary, typeof(IDataTransform), typeof(SsaForecasting), typeof(SsaForecasting.Options), typeof(SignatureDataTransform), - SsaForecasting.UserName, SsaForecasting.LoaderSignature, SsaForecasting.ShortName)] +[assembly: LoadableClass(SsaForecastingTransformer.Summary, typeof(IDataTransform), typeof(SsaForecastingTransformer), typeof(SsaForecastingTransformer.Options), typeof(SignatureDataTransform), + SsaForecastingTransformer.UserName, SsaForecastingTransformer.LoaderSignature, SsaForecastingTransformer.ShortName)] -[assembly: LoadableClass(SsaForecasting.Summary, typeof(IDataTransform), typeof(SsaForecasting), null, typeof(SignatureLoadDataTransform), - SsaForecasting.UserName, SsaForecasting.LoaderSignature)] +[assembly: LoadableClass(SsaForecastingTransformer.Summary, typeof(IDataTransform), typeof(SsaForecastingTransformer), null, typeof(SignatureLoadDataTransform), + SsaForecastingTransformer.UserName, SsaForecastingTransformer.LoaderSignature)] -[assembly: LoadableClass(SsaForecasting.Summary, typeof(SsaForecasting), null, typeof(SignatureLoadModel), - SsaForecasting.UserName, SsaForecasting.LoaderSignature)] +[assembly: LoadableClass(SsaForecastingTransformer.Summary, typeof(SsaForecastingTransformer), null, typeof(SignatureLoadModel), + SsaForecastingTransformer.UserName, SsaForecastingTransformer.LoaderSignature)] -[assembly: LoadableClass(typeof(IRowMapper), typeof(SsaForecasting), null, typeof(SignatureLoadRowMapper), - SsaForecasting.UserName, SsaForecasting.LoaderSignature)] +[assembly: LoadableClass(typeof(IRowMapper), typeof(SsaForecastingTransformer), null, typeof(SignatureLoadRowMapper), + SsaForecastingTransformer.UserName, SsaForecastingTransformer.LoaderSignature)] namespace Microsoft.ML.Transforms.TimeSeries { /// /// resulting from fitting a . /// - public sealed class SsaForecasting : SsaForecastingBaseWrapper, IStatefulTransformer + public sealed class SsaForecastingTransformer : SsaForecastingBaseWrapper, IStatefulTransformer { internal const string Summary = "This transform forecasts using Singular Spectrum Analysis (SSA)."; internal const string LoaderSignature = "SsaForecasting"; @@ -123,10 +123,10 @@ private static VersionInfo GetVersionInfo() verReadableCur: 0x00010001, verWeCanReadBack: 0x00010001, loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(SsaForecasting).Assembly.FullName); + loaderAssemblyName: typeof(SsaForecastingTransformer).Assembly.FullName); } - internal SsaForecasting(IHostEnvironment env, Options options, IDataView input) + internal SsaForecastingTransformer(IHostEnvironment env, Options options, IDataView input) : base(new BaseArguments(options), LoaderSignature, env) { InternalTransform.Model.Train(new RoleMappedData(input, null, InternalTransform.InputColumnName)); @@ -139,10 +139,10 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat env.CheckValue(options, nameof(options)); env.CheckValue(input, nameof(input)); - return new SsaForecasting(env, options, input).MakeDataTransform(input); + return new SsaForecastingTransformer(env, options, input).MakeDataTransform(input); } - internal SsaForecasting(IHostEnvironment env, Options options) + internal SsaForecastingTransformer(IHostEnvironment env, Options options) : base(new BaseArguments(options), LoaderSignature, env) { // This constructor is empty. @@ -155,12 +155,12 @@ private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, env.CheckValue(ctx, nameof(ctx)); env.CheckValue(input, nameof(input)); - return new SsaForecasting(env, ctx).MakeDataTransform(input); + return new SsaForecastingTransformer(env, ctx).MakeDataTransform(input); } IStatefulTransformer IStatefulTransformer.Clone() { - var clone = (SsaForecasting)MemberwiseClone(); + var clone = (SsaForecastingTransformer)MemberwiseClone(); clone.InternalTransform.Model = clone.InternalTransform.Model.Clone(); clone.InternalTransform.StateRef = (SsaForecastingBase.State)clone.InternalTransform.StateRef.Clone(); clone.InternalTransform.StateRef.InitState(clone.InternalTransform, InternalTransform.Host); @@ -168,16 +168,16 @@ IStatefulTransformer IStatefulTransformer.Clone() } // Factory method for SignatureLoadModel. - private static SsaForecasting Create(IHostEnvironment env, ModelLoadContext ctx) + private static SsaForecastingTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); - return new SsaForecasting(env, ctx); + return new SsaForecastingTransformer(env, ctx); } - internal SsaForecasting(IHostEnvironment env, ModelLoadContext ctx) + internal SsaForecastingTransformer(IHostEnvironment env, ModelLoadContext ctx) : base(env, ctx, LoaderSignature) { // *** Binary format *** @@ -205,7 +205,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat } /// - /// Detect spikes in time series using Singular Spectrum Analysis. + /// Forecasts using Singular Spectrum Analysis. /// /// /// /// /// - public sealed class SsaForecastingEstimator : IEstimator + public sealed class SsaForecastingEstimator : IEstimator { private readonly IHost _host; - private readonly SsaForecasting.Options _options; + private readonly SsaForecastingTransformer.Options _options; /// /// Create a new instance of @@ -276,7 +276,7 @@ internal SsaForecastingEstimator(IHostEnvironment env, string forcastingConfidentUpperBoundColumnName = null, float confidenceLevel = 0.95f, bool variableHorizon = false) - : this(env, new SsaForecasting.Options + : this(env, new SsaForecastingTransformer.Options { Source = inputColumnName ?? outputColumnName, Name = outputColumnName, @@ -300,7 +300,7 @@ internal SsaForecastingEstimator(IHostEnvironment env, { } - internal SsaForecastingEstimator(IHostEnvironment env, SsaForecasting.Options options) + internal SsaForecastingEstimator(IHostEnvironment env, SsaForecastingTransformer.Options options) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(SsaForecastingEstimator)); @@ -314,10 +314,10 @@ internal SsaForecastingEstimator(IHostEnvironment env, SsaForecasting.Options op /// /// Train and return a transformer. /// - public SsaForecasting Fit(IDataView input) + public SsaForecastingTransformer Fit(IDataView input) { _host.CheckValue(input, nameof(input)); - return new SsaForecasting(_host, _options, input); + return new SsaForecastingTransformer(_host, _options, input); } /// @@ -335,23 +335,19 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) if (col.ItemType != NumberDataViewType.Single) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _options.Source, "Single", col.GetTypeString()); - var metadata = new List() { - new SchemaShape.Column(AnnotationUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, TextDataViewType.Instance, false) - }; - var resultDic = inputSchema.ToDictionary(x => x.Name); resultDic[_options.Name] = new SchemaShape.Column( - _options.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(metadata)); + _options.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false); if (!string.IsNullOrEmpty(_options.ForcastingConfidentUpperBoundColumnName)) { resultDic[_options.ForcastingConfidentLowerBoundColumnName] = new SchemaShape.Column( _options.ForcastingConfidentLowerBoundColumnName, SchemaShape.Column.VectorKind.Vector, - NumberDataViewType.Single, false, new SchemaShape(metadata)); + NumberDataViewType.Single, false); resultDic[_options.ForcastingConfidentUpperBoundColumnName] = new SchemaShape.Column( _options.ForcastingConfidentUpperBoundColumnName, SchemaShape.Column.VectorKind.Vector, - NumberDataViewType.Single, false, new SchemaShape(metadata)); + NumberDataViewType.Single, false); } return new SchemaShape(resultDic.Values); diff --git a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs index 2ca5f5c135..27557df7de 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs @@ -168,26 +168,28 @@ public Delegate[] CreateGetters(DataViewRow input, Func activeOutput, getters[0] = valueGetter; } - if (activeOutput(1)) + if (!string.IsNullOrEmpty(_parent.ForecastingConfidenceIntervalMaxOutputColumnName)) { - ValueGetter> valueGetter = (ref VBuffer dst) => + if (activeOutput(1)) { - State.ConfidenceIntervalLowerBound(ref dst); - }; + ValueGetter> valueGetter = (ref VBuffer dst) => + { + State.ConfidenceIntervalLowerBound(ref dst); + }; - getters[1] = valueGetter; - } + getters[1] = valueGetter; + } - if (activeOutput(2)) - { - ValueGetter> valueGetter = (ref VBuffer dst) => + if (activeOutput(2)) { - State.ConfidenceIntervalUpperBound(ref dst); - }; + ValueGetter> valueGetter = (ref VBuffer dst) => + { + State.ConfidenceIntervalUpperBound(ref dst); + }; - getters[2] = valueGetter; + getters[2] = valueGetter; + } } - return getters; } diff --git a/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs b/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs index 24882236e4..9420aad1f3 100644 --- a/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs +++ b/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs @@ -125,10 +125,10 @@ public static CommonOutputs.TransformOutput SsaSpikeDetector(IHostEnvironment en }; } - [TlcModule.EntryPoint(Desc = TimeSeries.SsaForecasting.Summary, - UserName = TimeSeries.SsaForecasting.UserName, - ShortName = TimeSeries.SsaForecasting.ShortName)] - internal static CommonOutputs.TransformOutput SsaForecasting(IHostEnvironment env, SsaForecasting.Options options) + [TlcModule.EntryPoint(Desc = TimeSeries.SsaForecastingTransformer.Summary, + UserName = TimeSeries.SsaForecastingTransformer.UserName, + ShortName = TimeSeries.SsaForecastingTransformer.ShortName)] + internal static CommonOutputs.TransformOutput SsaForecasting(IHostEnvironment env, SsaForecastingTransformer.Options options) { var h = EntryPointUtils.CheckArgsAndCreateHost(env, "SsaForecasting", options); var view = new SsaForecastingEstimator(h, options).Fit(options.Data).Transform(options.Data); diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 4f61724ff8..d8f4387d75 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -333,7 +333,7 @@ public void SsaForecast() List data = new List(); var dataView = env.Data.LoadFromEnumerable(data); - var args = new SsaForecasting.Options() + var args = new SsaForecastingTransformer.Options() { ConfidenceLevel = 0.95f, Source = "Value", @@ -388,7 +388,7 @@ public void SsaForecastPredictionEngine() var ml = new MLContext(seed: 1); var dataView = ml.Data.LoadFromEnumerable(data); - var args = new SsaForecasting.Options() + var args = new SsaForecastingTransformer.Options() { ConfidenceLevel = 0.95f, Source = "Value", diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesEstimatorTests.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesEstimatorTests.cs index 399b276194..efb25b43e3 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesEstimatorTests.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesEstimatorTests.cs @@ -74,6 +74,42 @@ void TestSsaChangePointEstimator() Done(); } + [Fact] + void TestSsaForecastingEstimator() + { + const int ChangeHistorySize = 10; + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + + var ml = new MLContext(seed: 1); + var dataView = ml.Data.LoadFromEnumerable(data); + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + for (int i = 0; i < ChangeHistorySize; i++) + data.Add(new Data(i * 100)); + + // Train + var pipe = new SsaForecastingEstimator(Env, "Forecast", "Value", 10, 11, 22, 4, + forcastingConfidentLowerBoundColumnName: "ConfidenceLowerBound", + forcastingConfidentUpperBoundColumnName: "ConfidenceUpperBound"); + + var xyData = new List { new TestDataXY() { A = new float[inputSize] } }; + var stringData = new List { new TestDataDifferntType() { data_0 = new string[inputSize] } }; + + var invalidDataWrongNames = ML.Data.LoadFromEnumerable(xyData); + var invalidDataWrongTypes = ML.Data.LoadFromEnumerable(stringData); + + TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); + TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); + + Done(); + } + [Fact] void TestSsaSpikeEstimator() { From 989fc800ecd9daa4566564bb7ef15fd0c89c03db Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Jul 2019 04:07:29 -0700 Subject: [PATCH 20/21] update docs. --- .../io-time-series-ssa-forecast.md | 5 +++ .../Transforms/TimeSeries/Forecasting.cs | 7 ++-- .../ForecastingWithConfidenceInterval.cs | 7 ++-- .../ExtensionsCatalog.cs | 4 +-- .../PredictionEngine.cs | 34 ++++++++++++++++--- src/Microsoft.ML.TimeSeries/SSaForecasting.cs | 9 +++-- 6 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 docs/api-reference/io-time-series-ssa-forecast.md diff --git a/docs/api-reference/io-time-series-ssa-forecast.md b/docs/api-reference/io-time-series-ssa-forecast.md new file mode 100644 index 0000000000..ae510add41 --- /dev/null +++ b/docs/api-reference/io-time-series-ssa-forecast.md @@ -0,0 +1,5 @@ +### Input and Output Columns +There is only one input column. +The input column must be where a value indicates a value at a timestamp in the time series. + +It produces either just one vector of forecasted values or three vectors: a vector of forecasted values, a vector of confidence lower bounds and a vector of confidence upper bounds. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs index e2b12dcc44..0be475ff58 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -53,8 +53,7 @@ public static void Example() // Forecast next five values. var forecastEngine = transformer.CreateTimeSeriesEngine(ml); - var forecast = new ForecastResult(); - forecastEngine.Predict(null, ref forecast); + var forecast = forecastEngine.Predict(); Console.WriteLine($"Forecasted values:"); Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); @@ -80,12 +79,12 @@ public static void Example() var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); // Forecast with the checkpointed model loaded from disk. - forecastEngineCopy.Predict(null, ref forecast); + forecast = forecastEngineCopy.Predict(); Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Forecast with the original model(that was checkpointed to disk). - forecastEngine.Predict(null, ref forecast); + forecast = forecastEngine.Predict(); Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs index 1198ab93aa..571cf9325a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs @@ -56,8 +56,7 @@ public static void Example() // Forecast next five values. var forecastEngine = transformer.CreateTimeSeriesEngine(ml); - var forecast = new ForecastResult(); - forecastEngine.Predict(null, ref forecast); + var forecast = forecastEngine.Predict(); PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); // Forecasted values: @@ -84,14 +83,14 @@ public static void Example() var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); // Forecast with the checkpointed model loaded from disk. - forecastEngineCopy.Predict(null, ref forecast); + forecast = forecastEngineCopy.Predict(); PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Confidence intervals: // [0.1592142 - 3.423448] [-0.5617217 - 3.072772] [-1.512994 - 2.125025] [-2.022905 - 1.622013] [-1.351382 - 2.482941] // Forecast with the original model(that was checkpointed to disk). - forecastEngine.Predict(null, ref forecast); + forecast = forecastEngine.Predict(); PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Confidence intervals: diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 3694592c2a..bfec448467 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -147,7 +147,7 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); /// - /// Singular Spectrum Analysis (SSA) model for modeling univariate time-series forecasting. + /// Singular Spectrum Analysis (SSA) model for univariate time-series forecasting. /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. /// /// Catalog. @@ -170,7 +170,7 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// The name of the confidence interval lower bound column. If not specified then confidence intervals will not be calculated. /// The name of the confidence interval upper bound column. If not specified then confidence intervals will not be calculated. /// The confidence level for forecasting. - /// Set this to true if horizon will change after training. + /// Set this to true if horizon will change after training(at prediction time). /// /// /// - /// Contructor for creating time series specific prediction engine. It allows update the time series model to be updated with the observations + /// Contructor for creating time series specific prediction engine. It allows the time series model to be updated with the observations /// seen at prediction time via /// public TimeSeriesPredictionEngine(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, @@ -269,8 +269,8 @@ private protected override Func TransformerChec /// The example to run on. /// The object to store the prediction in. If it's null, a new one will be created, otherwise the old one /// is reused. - /// - /// + /// Used to indicate the number of values to forecast. + /// Used in forecasting model for confidence. public void Predict(TSrc example, ref TDst prediction, int? horizon = null, float? confidenceLevel = null) { if (example != null && prediction != null) @@ -330,16 +330,40 @@ public void Predict(TSrc example, ref TDst prediction, int? horizon = null, floa } /// - /// This method should not be used. + /// Performs prediction. In the case of forecasting only task can be left as null. + /// If is not null then it could be used to update forecasting models with new obervation. + /// For anomaly detection the model is always updated with . /// + /// Input to the prediction engine. + /// Forecasting/Prediction from the engine. public override void Predict(TSrc example, ref TDst prediction) => Predict(example, ref prediction); + /// + /// Performs prediction. In the case of forecasting only task can be left as null. + /// If is not null then it could be used to update forecasting models with new obervation. + /// + /// Input to the prediction engine. + /// Number of values to forecast. + /// Confidence level for forecasting. + /// Prediction/Forecasting after the model has been updated with public TDst Predict(TSrc example, int? horizon = null, float? confidenceLevel = null) { TDst dst = new TDst(); Predict(example, ref dst, horizon, confidenceLevel); return dst; } + + /// + /// Forecasting only task. + /// + /// Number of values to forecast. + /// Confidence level for forecasting. + public TDst Predict(int? horizon = null, float? confidenceLevel = null) + { + TDst dst = new TDst(); + Predict(null, ref dst, horizon, confidenceLevel); + return dst; + } } public static class PredictionFunctionExtensions @@ -355,7 +379,7 @@ public static class PredictionFunctionExtensions /// To ignore missing columns. Default is false. /// Input schema definition. Default is null. /// Output schema definition. Default is null. - ///

Example code can be found by searching for TimeSeriesPredictionFunction in ML.NET.

+ ///

Example code can be found by searching for TimeSeriesPredictionEngine in ML.NET.

/// /// /// | /// | Output column data type | Vector of | /// + /// | | | + /// | -- | -- | + /// | Does this estimator need to look at the data to train its parameters? | Yes | + /// | Input column data type | | + /// | Output column data type | Three vectors of | + /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-props.md)] /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-ssa.md)] From 0d3027ed69cec6b78ab2a2b15dcdcb059e8ac2e9 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Jul 2019 04:22:51 -0700 Subject: [PATCH 21/21] update entry points. --- src/Microsoft.ML.TimeSeries/PredictionEngine.cs | 9 +++++---- .../SequentialTransformBase.cs | 14 +++++++------- .../SequentialTransformerBase.cs | 4 ++-- .../Common/EntryPoints/core_ep-list.tsv | 2 +- .../Common/EntryPoints/core_manifest.json | 9 +++++++++ 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/PredictionEngine.cs b/src/Microsoft.ML.TimeSeries/PredictionEngine.cs index 8e99e130e7..75bcf88407 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionEngine.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionEngine.cs @@ -264,11 +264,12 @@ private protected override Func TransformerChec } /// - /// Run prediction pipeline on one example. + /// Performs prediction. In the case of forecasting only task can be left as null. + /// If is not null then it could be used to update forecasting models with new obervation. + /// For anomaly detection the model is always updated with . /// - /// The example to run on. - /// The object to store the prediction in. If it's null, a new one will be created, otherwise the old one - /// is reused. + /// Input to the prediction engine. + /// Forecasting/Prediction from the engine. /// Used to indicate the number of values to forecast. /// Used in forecasting model for confidence. public void Predict(TSrc example, ref TDst prediction, int? horizon = null, float? confidenceLevel = null) diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs index 43a7005864..931cece4b0 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs @@ -36,19 +36,19 @@ public DataBox(T value) /// The type to be boxed, e.g. TInput or TOutput internal sealed class DataBoxForecastingWithConfidenceIntervals { - public T Value1; - public T Value2; - public T Value3; + public T Forecast; + public T ConfidenceIntervalLowerBound; + public T ConfidenceIntervalUpperBound; public DataBoxForecastingWithConfidenceIntervals() { } - public DataBoxForecastingWithConfidenceIntervals(T value1, T value2, T value3) + public DataBoxForecastingWithConfidenceIntervals(T forecast, T confidenceIntervalLowerBound, T confidenceIntervalUpperBound) { - Value1 = value1; - Value2 = value2; - Value3 = value2; + Forecast = forecast; + ConfidenceIntervalLowerBound = confidenceIntervalLowerBound; + ConfidenceIntervalUpperBound = confidenceIntervalUpperBound; } } diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs index 6796d80c83..af6017a6ca 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs @@ -542,7 +542,7 @@ private static void MapFunction(DataBox input, DataBox output, private static void MapFunction(DataBox input, DataBoxForecastingWithConfidenceIntervals output, TState state) { - state.Process(ref input.Value, ref output.Value1, ref output.Value2, ref output.Value3); + state.Process(ref input.Value, ref output.Forecast, ref output.ConfidenceIntervalLowerBound, ref output.ConfidenceIntervalUpperBound); } private static void MapFunctionWithoutBuffer(DataBox input, DataBox output, TState state) @@ -552,7 +552,7 @@ private static void MapFunctionWithoutBuffer(DataBox input, DataBox input, DataBoxForecastingWithConfidenceIntervals output, TState state) { - state.ProcessWithoutBuffer(ref input.Value, ref output.Value1, ref output.Value2, ref output.Value3); + state.ProcessWithoutBuffer(ref input.Value, ref output.Forecast, ref output.ConfidenceIntervalLowerBound, ref output.ConfidenceIntervalUpperBound); } private void InitFunction(TState state) diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index e7f45f417d..6288a254ae 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -38,7 +38,7 @@ TimeSeriesProcessingEntryPoints.PercentileThresholdTransform Detects the values TimeSeriesProcessingEntryPoints.PValueTransform This P-Value transform calculates the p-value of the current input in the sequence with regard to the values in the sliding window. Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints PValueTransform Microsoft.ML.Transforms.TimeSeries.PValueTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SlidingWindowTransform Returns the last values for a time series [y(t-d-l+1), y(t-d-l+2), ..., y(t-l-1), y(t-l)] where d is the size of the window, l the lag and y is a Float. Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SlidingWindowTransform Microsoft.ML.Transforms.TimeSeries.SlidingWindowTransformBase`1+Arguments[System.Single] Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SsaChangePointDetector This transform detects the change-points in a seasonal time-series using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaChangePointDetector Microsoft.ML.Transforms.TimeSeries.SsaChangePointDetector+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput -TimeSeriesProcessingEntryPoints.SsaForecasting This transform forecasts using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaForecasting Microsoft.ML.Transforms.TimeSeries.SsaForecasting+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput +TimeSeriesProcessingEntryPoints.SsaForecasting This transform forecasts using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaForecasting Microsoft.ML.Transforms.TimeSeries.SsaForecastingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SsaSpikeDetector This transform detects the spikes in a seasonal time-series using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaSpikeDetector Microsoft.ML.Transforms.TimeSeries.SsaSpikeDetector+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Trainers.AveragedPerceptronBinaryClassifier Averaged Perceptron Binary Classifier. Microsoft.ML.Trainers.AveragedPerceptronTrainer TrainBinary Microsoft.ML.Trainers.AveragedPerceptronTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.EnsembleBinaryClassifier Train binary ensemble. Microsoft.ML.Trainers.Ensemble.Ensemble CreateBinaryEnsemble Microsoft.ML.Trainers.Ensemble.EnsembleTrainer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 2413ab25d6..57ced3cd61 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -4085,6 +4085,15 @@ "IsNullable": false, "Default": 0.95 }, + { + "Name": "VariableHorizon", + "Type": "Bool", + "Desc": "Set this to true horizon will change at prediction time.", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": false + }, { "Name": "ForcastingConfidentLowerBoundColumnName", "Type": "String",