From 2d5fbf6766e93330edbd18a9f770a2e5aa544aa0 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Thu, 25 Oct 2018 23:53:10 -0700 Subject: [PATCH 01/14] Adding another flavor of LR in the HAL learners package. This will compute the std errors --- .../LRWithTrainingStatistics.cs | 346 ++++++++++++++++++ .../LogisticRegression/LbfgsPredictorBase.cs | 14 +- .../LogisticRegression/LogisticRegression.cs | 22 +- .../Standard/ModelStatistics.cs | 4 +- 4 files changed, 366 insertions(+), 20 deletions(-) create mode 100644 src/Microsoft.ML.HalLearners/LRWithTrainingStatistics.cs diff --git a/src/Microsoft.ML.HalLearners/LRWithTrainingStatistics.cs b/src/Microsoft.ML.HalLearners/LRWithTrainingStatistics.cs new file mode 100644 index 0000000000..a8a9ee0d3a --- /dev/null +++ b/src/Microsoft.ML.HalLearners/LRWithTrainingStatistics.cs @@ -0,0 +1,346 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Runtime.Internal.Internallearn; +using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Runtime.Learners; +using Microsoft.ML.Runtime.Numeric; +using Microsoft.ML.Runtime.Training; +using Microsoft.ML.Trainers.HalLearners; +using System; +using System.Collections.Generic; + +[assembly: LoadableClass(LogisticRegressionWithStats.Summary, typeof(LogisticRegressionWithStats), typeof(LogisticRegression.Arguments), + new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) }, + LogisticRegressionWithStats.UserNameValue, + LogisticRegressionWithStats.LoadNameValue, + LogisticRegressionWithStats.ShortName, + "logisticregressionwrapper")] + +[assembly: LoadableClass(typeof(void), typeof(LogisticRegressionWithStats), null, typeof(SignatureEntryPointModule), LogisticRegressionWithStats.LoadNameValue)] + +namespace Microsoft.ML.Trainers.HalLearners +{ + using Mkl = OlsLinearRegressionTrainer.Mkl; + + /// + /// + public sealed partial class LogisticRegressionWithStats : LogisticRegression + { + public new const string LoadNameValue = "LogisticRegressionWithStats"; + internal const string UserNameValue = "Logistic Regression"; + internal const string ShortName = "lrwstats"; + internal const string Summary = "Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can " + + "be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function."; + + /// + /// Initializes a new instance of + /// + /// The environment to use. + /// The name of the label column. + /// The name of the feature column. + /// The name for the example weight column. + /// Enforce non-negative weights. + /// Weight of L1 regularizer term. + /// Weight of L2 regularizer term. + /// Memory size for . Lower=faster, less accurate. + /// Threshold for optimizer convergence. + /// A delegate to apply all the advanced arguments to the algorithm. + public LogisticRegressionWithStats(IHostEnvironment env, + string featureColumn, + string labelColumn, + string weightColumn = null, + float l1Weight = Arguments.Defaults.L1Weight, + float l2Weight = Arguments.Defaults.L2Weight, + float optimizationTolerance = Arguments.Defaults.OptTol, + int memorySize = Arguments.Defaults.MemorySize, + bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, + Action advancedSettings = null) + : base(env, featureColumn, labelColumn, weightColumn, + l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings) + { + } + + /// + /// Initializes a new instance of + /// + internal LogisticRegressionWithStats(IHostEnvironment env, Arguments args) + : base(env, args) + { + } + + protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) + { + Contracts.AssertValue(ch); + Contracts.AssertValue(cursorFactory); + Contracts.Assert(NumGoodRows > 0); + Contracts.Assert(WeightSum > 0); + Contracts.Assert(BiasCount == 1); + Contracts.Assert(loss >= 0); + Contracts.Assert(numParams >= BiasCount); + Contracts.Assert(CurrentWeights.IsDense); + + ch.Info("Model trained with {0} training examples.", NumGoodRows); + + // Compute deviance: start with loss function. + float deviance = (float)(2 * loss * WeightSum); + + if (L2Weight > 0) + { + // Need to subtract L2 regularization loss. + // The bias term is not regularized. + var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight; + deviance -= regLoss; + } + + if (L1Weight > 0) + { + // Need to subtract L1 regularization loss. + // The bias term is not regularized. + Double regLoss = 0; + VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) regLoss += Math.Abs(value); }); + deviance -= (float)regLoss * L1Weight * 2; + } + + ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); + + // Compute null deviance, i.e., the deviance of null hypothesis. + // Cap the prior positive rate at 1e-15. + Double priorPosRate = PosWeight / WeightSum; + Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); + float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? + 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); + ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); + + // Compute AIC. + ch.Info("AIC: \t{0}", 2 * numParams + deviance); + + // Show the coefficients statistics table. + var featureColIdx = cursorFactory.Data.Schema.Feature.Index; + var schema = cursorFactory.Data.Data.Schema; + var featureLength = CurrentWeights.Length - BiasCount; + var namesSpans = VBufferUtils.CreateEmpty>(featureLength); + if (schema.HasSlotNames(featureColIdx, featureLength)) + schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); + Host.Assert(namesSpans.Length == featureLength); + + // Inverse mapping of non-zero weight slots. + Dictionary weightIndicesInvMap = null; + + // Indices of bias and non-zero weight slots. + int[] weightIndices = null; + + // Whether all weights are non-zero. + bool denseWeight = numParams == CurrentWeights.Length; + + // Extract non-zero indices of weight. + if (!denseWeight) + { + weightIndices = new int[numParams]; + weightIndicesInvMap = new Dictionary(numParams); + weightIndices[0] = 0; + weightIndicesInvMap[0] = 0; + int j = 1; + for (int i = 1; i < CurrentWeights.Length; i++) + { + if (CurrentWeights.Values[i] != 0) + { + weightIndices[j] = i; + weightIndicesInvMap[i] = j++; + } + } + + Contracts.Assert(j == numParams); + } + + // Compute the standard error of coefficients. + long hessianDimension = (long)numParams * (numParams + 1) / 2; + if (hessianDimension > int.MaxValue) + { + ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + + "to reduce the number of parameters."); + Stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + return; + } + + // Building the variance-covariance matrix for parameters. + // The layout of this algorithm is a packed row-major lower triangular matrix. + // For example, layout of indices for 4-by-4: + // 0 + // 1 2 + // 3 4 5 + // 6 7 8 9 + var hessian = new Double[hessianDimension]; + + // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) + // Since bias is not regularized. + if (L2Weight > 0) + { + // i is the array index of the diagonal entry at iRow-th row and iRow-th column. + // iRow is one-based. + int i = 0; + for (int iRow = 2; iRow <= numParams; iRow++) + { + i += iRow; + hessian[i] = L2Weight; + } + + Contracts.Assert(i == hessian.Length - 1); + } + + // Initialize the remaining entries. + var bias = CurrentWeights.Values[0]; + using (var cursor = cursorFactory.Create()) + { + while (cursor.MoveNext()) + { + var label = cursor.Label; + var weight = cursor.Weight; + var score = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features); + // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example. + var variance = weight / (2 + 2 * Math.Cosh(score)); + + // Increment the first entry of hessian. + hessian[0] += variance; + + var values = cursor.Features.Values; + if (cursor.Features.IsDense) + { + int ioff = 1; + + // Increment remaining entries of hessian. + for (int i = 1; i < numParams; i++) + { + ch.Assert(ioff == i * (i + 1) / 2); + int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; + Contracts.Assert(0 <= wi && wi < cursor.Features.Length); + var val = values[wi] * variance; + // Add the implicit first bias term to X'X + hessian[ioff++] += val; + // Add the remainder of X'X + for (int j = 0; j < i; j++) + { + int wj = weightIndices == null ? j : weightIndices[j + 1] - 1; + Contracts.Assert(0 <= wj && wj < cursor.Features.Length); + hessian[ioff++] += val * values[wj]; + } + } + ch.Assert(ioff == hessian.Length); + } + else + { + var indices = cursor.Features.Indices; + for (int ii = 0; ii < cursor.Features.Count; ++ii) + { + int i = indices[ii]; + int wi = i + 1; + if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi)) + continue; + + Contracts.Assert(0 < wi && wi <= cursor.Features.Length); + int ioff = wi * (wi + 1) / 2; + var val = values[ii] * variance; + // Add the implicit first bias term to X'X + hessian[ioff] += val; + // Add the remainder of X'X + for (int jj = 0; jj <= ii; jj++) + { + int j = indices[jj]; + int wj = j + 1; + if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj)) + continue; + + Contracts.Assert(0 < wj && wj <= cursor.Features.Length); + hessian[ioff + wj] += val * values[jj]; + } + } + } + } + } + + // Apply Cholesky Decomposition to find the inverse of the Hessian. + Double[] invHessian = null; + try + { + // First, find the Cholesky decomposition LL' of the Hessian. + Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); + // Note that hessian is already modified at this point. It is no longer the original Hessian, + // but instead represents the Cholesky decomposition L. + // Also note that the following routine is supposed to consume the Cholesky decomposition L instead + // of the original information matrix. + Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); + // At this point, hessian should contain the inverse of the original Hessian matrix. + // Swap hessian with invHessian to avoid confusion in the following context. + Utils.Swap(ref hessian, ref invHessian); + Contracts.Assert(hessian == null); + } + catch (DllNotFoundException) + { + throw ch.ExceptNotSupp("The MKL library (Microsoft.ML.MklImports.dll) or one of its dependencies is missing."); + } + + float[] stdErrorValues = new float[numParams]; + stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]); + + for (int i = 1; i < numParams; i++) + { + // Initialize with inverse Hessian. + stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; + } + + if (L2Weight > 0) + { + // Iterate through all entries of inverse Hessian to make adjustment to variance. + // A discussion on ridge regularized LR coefficient covariance matrix can be found here: + // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ + // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf + int ioffset = 1; + for (int iRow = 1; iRow < numParams; iRow++) + { + for (int iCol = 0; iCol <= iRow; iCol++) + { + var entry = (Single)invHessian[ioffset]; + var adjustment = -L2Weight * entry * entry; + stdErrorValues[iRow] -= adjustment; + if (0 < iCol && iCol < iRow) + stdErrorValues[iCol] -= adjustment; + ioffset++; + } + } + + Contracts.Assert(ioffset == invHessian.Length); + } + + for (int i = 1; i < numParams; i++) + stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); + + VBuffer stdErrors = new VBuffer(CurrentWeights.Length, numParams, stdErrorValues, weightIndices); + Stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, ref stdErrors); + } + + [TlcModule.EntryPoint(Name = "Trainers.LogisticRegressionBinaryClassifierWithStats", + Desc = Summary, + UserName = UserNameValue, + ShortName = ShortName, + XmlInclude = new[] { @"", + @""})] + + public static new CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input) + { + Contracts.CheckValue(env, nameof(env)); + var host = env.Register("TrainLRBinary"); + host.CheckValue(input, nameof(input)); + EntryPointUtils.CheckInputArgs(host, input); + + return LearnerEntryPointsUtils.Train(host, input, + () => new LogisticRegressionWithStats(host, input), + () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn), + () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.WeightColumn)); + } + } +} diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs index 5f7a40c5d2..c7656822a7 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -92,14 +92,14 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight [Argument(ArgumentType.AtMostOnce, HelpText = "Enforce non-negative weights", ShortName = "nn", SortOrder = 90)] public bool EnforceNonNegativity = Defaults.EnforceNonNegativity; - internal static class Defaults + public static class Defaults { - internal const float L2Weight = 1; - internal const float L1Weight = 1; - internal const float OptTol = 1e-7f; - internal const int MemorySize = 20; - internal const int MaxIterations = int.MaxValue; - internal const bool EnforceNonNegativity = false; + public const float L2Weight = 1; + public const float L1Weight = 1; + public const float OptTol = 1e-7f; + public const int MemorySize = 20; + public const int MaxIterations = int.MaxValue; + public const bool EnforceNonNegativity = false; } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 48757b347a..1b827c1e10 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -30,7 +30,7 @@ namespace Microsoft.ML.Runtime.Learners /// /// - public sealed partial class LogisticRegression : LbfgsTrainerBase, ParameterMixingCalibratedPredictor> + public partial class LogisticRegression : LbfgsTrainerBase, ParameterMixingCalibratedPredictor> { public const string LoadNameValue = "LogisticRegression"; internal const string UserNameValue = "Logistic Regression"; @@ -44,8 +44,8 @@ public sealed class Arguments : ArgumentsBase public bool ShowTrainingStats = false; } - private Double _posWeight; - private LinearModelStatistics _stats; + protected Double PosWeight; + protected LinearModelStatistics Stats; /// /// Initializes a new instance of @@ -76,17 +76,17 @@ public LogisticRegression(IHostEnvironment env, Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); - _posWeight = 0; + PosWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; } /// /// Initializes a new instance of /// - internal LogisticRegression(IHostEnvironment env, Arguments args) + public LogisticRegression(IHostEnvironment env, Arguments args) : base(env, args, TrainerUtils.MakeBoolScalarLabel(args.LabelColumn)) { - _posWeight = 0; + PosWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; } @@ -177,7 +177,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. - Double priorPosRate = _posWeight / WeightSum; + Double priorPosRate = PosWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); @@ -231,7 +231,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); - _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + Stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); return; } @@ -330,13 +330,13 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. } } - _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + Stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); } protected override void ProcessPriorDistribution(float label, float weight) { if (label > 0) - _posWeight += weight; + PosWeight += weight; } //Override default termination criterion MeanRelativeImprovementCriterion with @@ -373,7 +373,7 @@ protected override ParameterMixingCalibratedPredictor CreatePredictor() CurrentWeights.GetItemOrDefault(0, ref bias); CurrentWeights.CopyTo(ref weights, 1, CurrentWeights.Length - 1); return new ParameterMixingCalibratedPredictor(Host, - new LinearBinaryPredictor(Host, ref weights, bias, _stats), + new LinearBinaryPredictor(Host, ref weights, bias, Stats), new PlattCalibrator(Host, -1, 0)); } diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index 8e12b04c3f..1c735d0c66 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -92,7 +92,7 @@ private static VersionInfo GetVersionInfo() public int ParametersCount { get { return _paramCount; } } - internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) + public LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) { Contracts.AssertValue(env); env.Assert(trainingExampleCount > 0); @@ -104,7 +104,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, _nullDeviance = nullDeviance; } - internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, ref VBuffer coeffStdError) + public LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, ref VBuffer coeffStdError) : this(env, trainingExampleCount, paramCount, deviance, nullDeviance) { _env.Assert(coeffStdError.Count == _paramCount); From 68dd4a66e63bb66923050151082006de585df35c Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 16:33:00 -0700 Subject: [PATCH 02/14] Adding ComputeExtendedTrainingStatistics as an extension method on LR., rather than a separate trainer. --- .../LRWithTrainingStatistics.cs | 346 ------------------ .../LogisticRegressionTrainingStats.cs | 88 +++++ .../LogisticRegression/LbfgsPredictorBase.cs | 30 +- .../LogisticRegression/LogisticRegression.cs | 42 ++- .../MulticlassLogisticRegression.cs | 8 +- .../Standard/ModelStatistics.cs | 22 +- .../PoissonRegression/PoissonRegression.cs | 2 +- .../TrainerEstimators/LbfgsTests.cs | 15 + 8 files changed, 170 insertions(+), 383 deletions(-) delete mode 100644 src/Microsoft.ML.HalLearners/LRWithTrainingStatistics.cs create mode 100644 src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs diff --git a/src/Microsoft.ML.HalLearners/LRWithTrainingStatistics.cs b/src/Microsoft.ML.HalLearners/LRWithTrainingStatistics.cs deleted file mode 100644 index a8a9ee0d3a..0000000000 --- a/src/Microsoft.ML.HalLearners/LRWithTrainingStatistics.cs +++ /dev/null @@ -1,346 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.EntryPoints; -using Microsoft.ML.Runtime.Internal.Internallearn; -using Microsoft.ML.Runtime.Internal.Utilities; -using Microsoft.ML.Runtime.Learners; -using Microsoft.ML.Runtime.Numeric; -using Microsoft.ML.Runtime.Training; -using Microsoft.ML.Trainers.HalLearners; -using System; -using System.Collections.Generic; - -[assembly: LoadableClass(LogisticRegressionWithStats.Summary, typeof(LogisticRegressionWithStats), typeof(LogisticRegression.Arguments), - new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) }, - LogisticRegressionWithStats.UserNameValue, - LogisticRegressionWithStats.LoadNameValue, - LogisticRegressionWithStats.ShortName, - "logisticregressionwrapper")] - -[assembly: LoadableClass(typeof(void), typeof(LogisticRegressionWithStats), null, typeof(SignatureEntryPointModule), LogisticRegressionWithStats.LoadNameValue)] - -namespace Microsoft.ML.Trainers.HalLearners -{ - using Mkl = OlsLinearRegressionTrainer.Mkl; - - /// - /// - public sealed partial class LogisticRegressionWithStats : LogisticRegression - { - public new const string LoadNameValue = "LogisticRegressionWithStats"; - internal const string UserNameValue = "Logistic Regression"; - internal const string ShortName = "lrwstats"; - internal const string Summary = "Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can " - + "be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function."; - - /// - /// Initializes a new instance of - /// - /// The environment to use. - /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. - /// Enforce non-negative weights. - /// Weight of L1 regularizer term. - /// Weight of L2 regularizer term. - /// Memory size for . Lower=faster, less accurate. - /// Threshold for optimizer convergence. - /// A delegate to apply all the advanced arguments to the algorithm. - public LogisticRegressionWithStats(IHostEnvironment env, - string featureColumn, - string labelColumn, - string weightColumn = null, - float l1Weight = Arguments.Defaults.L1Weight, - float l2Weight = Arguments.Defaults.L2Weight, - float optimizationTolerance = Arguments.Defaults.OptTol, - int memorySize = Arguments.Defaults.MemorySize, - bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, - Action advancedSettings = null) - : base(env, featureColumn, labelColumn, weightColumn, - l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings) - { - } - - /// - /// Initializes a new instance of - /// - internal LogisticRegressionWithStats(IHostEnvironment env, Arguments args) - : base(env, args) - { - } - - protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) - { - Contracts.AssertValue(ch); - Contracts.AssertValue(cursorFactory); - Contracts.Assert(NumGoodRows > 0); - Contracts.Assert(WeightSum > 0); - Contracts.Assert(BiasCount == 1); - Contracts.Assert(loss >= 0); - Contracts.Assert(numParams >= BiasCount); - Contracts.Assert(CurrentWeights.IsDense); - - ch.Info("Model trained with {0} training examples.", NumGoodRows); - - // Compute deviance: start with loss function. - float deviance = (float)(2 * loss * WeightSum); - - if (L2Weight > 0) - { - // Need to subtract L2 regularization loss. - // The bias term is not regularized. - var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight; - deviance -= regLoss; - } - - if (L1Weight > 0) - { - // Need to subtract L1 regularization loss. - // The bias term is not regularized. - Double regLoss = 0; - VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) regLoss += Math.Abs(value); }); - deviance -= (float)regLoss * L1Weight * 2; - } - - ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); - - // Compute null deviance, i.e., the deviance of null hypothesis. - // Cap the prior positive rate at 1e-15. - Double priorPosRate = PosWeight / WeightSum; - Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); - float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? - 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); - ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); - - // Compute AIC. - ch.Info("AIC: \t{0}", 2 * numParams + deviance); - - // Show the coefficients statistics table. - var featureColIdx = cursorFactory.Data.Schema.Feature.Index; - var schema = cursorFactory.Data.Data.Schema; - var featureLength = CurrentWeights.Length - BiasCount; - var namesSpans = VBufferUtils.CreateEmpty>(featureLength); - if (schema.HasSlotNames(featureColIdx, featureLength)) - schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); - Host.Assert(namesSpans.Length == featureLength); - - // Inverse mapping of non-zero weight slots. - Dictionary weightIndicesInvMap = null; - - // Indices of bias and non-zero weight slots. - int[] weightIndices = null; - - // Whether all weights are non-zero. - bool denseWeight = numParams == CurrentWeights.Length; - - // Extract non-zero indices of weight. - if (!denseWeight) - { - weightIndices = new int[numParams]; - weightIndicesInvMap = new Dictionary(numParams); - weightIndices[0] = 0; - weightIndicesInvMap[0] = 0; - int j = 1; - for (int i = 1; i < CurrentWeights.Length; i++) - { - if (CurrentWeights.Values[i] != 0) - { - weightIndices[j] = i; - weightIndicesInvMap[i] = j++; - } - } - - Contracts.Assert(j == numParams); - } - - // Compute the standard error of coefficients. - long hessianDimension = (long)numParams * (numParams + 1) / 2; - if (hessianDimension > int.MaxValue) - { - ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + - "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + - "to reduce the number of parameters."); - Stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); - return; - } - - // Building the variance-covariance matrix for parameters. - // The layout of this algorithm is a packed row-major lower triangular matrix. - // For example, layout of indices for 4-by-4: - // 0 - // 1 2 - // 3 4 5 - // 6 7 8 9 - var hessian = new Double[hessianDimension]; - - // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) - // Since bias is not regularized. - if (L2Weight > 0) - { - // i is the array index of the diagonal entry at iRow-th row and iRow-th column. - // iRow is one-based. - int i = 0; - for (int iRow = 2; iRow <= numParams; iRow++) - { - i += iRow; - hessian[i] = L2Weight; - } - - Contracts.Assert(i == hessian.Length - 1); - } - - // Initialize the remaining entries. - var bias = CurrentWeights.Values[0]; - using (var cursor = cursorFactory.Create()) - { - while (cursor.MoveNext()) - { - var label = cursor.Label; - var weight = cursor.Weight; - var score = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features); - // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example. - var variance = weight / (2 + 2 * Math.Cosh(score)); - - // Increment the first entry of hessian. - hessian[0] += variance; - - var values = cursor.Features.Values; - if (cursor.Features.IsDense) - { - int ioff = 1; - - // Increment remaining entries of hessian. - for (int i = 1; i < numParams; i++) - { - ch.Assert(ioff == i * (i + 1) / 2); - int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; - Contracts.Assert(0 <= wi && wi < cursor.Features.Length); - var val = values[wi] * variance; - // Add the implicit first bias term to X'X - hessian[ioff++] += val; - // Add the remainder of X'X - for (int j = 0; j < i; j++) - { - int wj = weightIndices == null ? j : weightIndices[j + 1] - 1; - Contracts.Assert(0 <= wj && wj < cursor.Features.Length); - hessian[ioff++] += val * values[wj]; - } - } - ch.Assert(ioff == hessian.Length); - } - else - { - var indices = cursor.Features.Indices; - for (int ii = 0; ii < cursor.Features.Count; ++ii) - { - int i = indices[ii]; - int wi = i + 1; - if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi)) - continue; - - Contracts.Assert(0 < wi && wi <= cursor.Features.Length); - int ioff = wi * (wi + 1) / 2; - var val = values[ii] * variance; - // Add the implicit first bias term to X'X - hessian[ioff] += val; - // Add the remainder of X'X - for (int jj = 0; jj <= ii; jj++) - { - int j = indices[jj]; - int wj = j + 1; - if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj)) - continue; - - Contracts.Assert(0 < wj && wj <= cursor.Features.Length); - hessian[ioff + wj] += val * values[jj]; - } - } - } - } - } - - // Apply Cholesky Decomposition to find the inverse of the Hessian. - Double[] invHessian = null; - try - { - // First, find the Cholesky decomposition LL' of the Hessian. - Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); - // Note that hessian is already modified at this point. It is no longer the original Hessian, - // but instead represents the Cholesky decomposition L. - // Also note that the following routine is supposed to consume the Cholesky decomposition L instead - // of the original information matrix. - Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); - // At this point, hessian should contain the inverse of the original Hessian matrix. - // Swap hessian with invHessian to avoid confusion in the following context. - Utils.Swap(ref hessian, ref invHessian); - Contracts.Assert(hessian == null); - } - catch (DllNotFoundException) - { - throw ch.ExceptNotSupp("The MKL library (Microsoft.ML.MklImports.dll) or one of its dependencies is missing."); - } - - float[] stdErrorValues = new float[numParams]; - stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]); - - for (int i = 1; i < numParams; i++) - { - // Initialize with inverse Hessian. - stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; - } - - if (L2Weight > 0) - { - // Iterate through all entries of inverse Hessian to make adjustment to variance. - // A discussion on ridge regularized LR coefficient covariance matrix can be found here: - // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ - // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf - int ioffset = 1; - for (int iRow = 1; iRow < numParams; iRow++) - { - for (int iCol = 0; iCol <= iRow; iCol++) - { - var entry = (Single)invHessian[ioffset]; - var adjustment = -L2Weight * entry * entry; - stdErrorValues[iRow] -= adjustment; - if (0 < iCol && iCol < iRow) - stdErrorValues[iCol] -= adjustment; - ioffset++; - } - } - - Contracts.Assert(ioffset == invHessian.Length); - } - - for (int i = 1; i < numParams; i++) - stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); - - VBuffer stdErrors = new VBuffer(CurrentWeights.Length, numParams, stdErrorValues, weightIndices); - Stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, ref stdErrors); - } - - [TlcModule.EntryPoint(Name = "Trainers.LogisticRegressionBinaryClassifierWithStats", - Desc = Summary, - UserName = UserNameValue, - ShortName = ShortName, - XmlInclude = new[] { @"", - @""})] - - public static new CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input) - { - Contracts.CheckValue(env, nameof(env)); - var host = env.Register("TrainLRBinary"); - host.CheckValue(input, nameof(input)); - EntryPointUtils.CheckInputArgs(host, input); - - return LearnerEntryPointsUtils.Train(host, input, - () => new LogisticRegressionWithStats(host, input), - () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn), - () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.WeightColumn)); - } - } -} diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs new file mode 100644 index 0000000000..5f7bf7441f --- /dev/null +++ b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs @@ -0,0 +1,88 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Trainers.HalLearners; +using System; + +namespace Microsoft.ML.Runtime.Learners +{ + using Mkl = OlsLinearRegressionTrainer.Mkl; + + /// + /// + public static class LogisticRegressionTrainingStats + { + + public static void ComputeExtendedTrainingStatistics(this LogisticRegression trainer, IChannel ch) + { + Contracts.AssertValue(ch); + Contracts.AssertValue(trainer.Stats, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); + Contracts.Assert(trainer.GetL2Weight > 0); + Contracts.Assert(trainer.GetNumGoodRows > 0); + + ch.Info("Model trained with {0} training examples.", trainer.GetNumGoodRows); + + // Apply Cholesky Decomposition to find the inverse of the Hessian. + Double[] invHessian = null; + try + { + // First, find the Cholesky decomposition LL' of the Hessian. + Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, trainer.GetNumSelectedParams, trainer.Stats.Hessian); + // Note that hessian is already modified at this point. It is no longer the original Hessian, + // but instead represents the Cholesky decomposition L. + // Also note that the following routine is supposed to consume the Cholesky decomposition L instead + // of the original information matrix. + Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, trainer.GetNumSelectedParams, trainer.Stats.Hessian); + // At this point, hessian should contain the inverse of the original Hessian matrix. + // Swap hessian with invHessian to avoid confusion in the following context. + Utils.Swap(ref trainer.Stats.Hessian, ref invHessian); + Contracts.Assert(trainer.Stats.Hessian == null); + } + catch (DllNotFoundException) + { + throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing."); + } + + float[] stdErrorValues = new float[trainer.GetNumSelectedParams]; + stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]); + + for (int i = 1; i < trainer.GetNumSelectedParams; i++) + { + // Initialize with inverse Hessian. + stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; + } + + if (trainer.GetL2Weight > 0) + { + // Iterate through all entries of inverse Hessian to make adjustment to variance. + // A discussion on ridge regularized LR coefficient covariance matrix can be found here: + // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ + // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf + int ioffset = 1; + for (int iRow = 1; iRow < trainer.GetNumSelectedParams; iRow++) + { + for (int iCol = 0; iCol <= iRow; iCol++) + { + var entry = (Single)invHessian[ioffset]; + var adjustment = -trainer.GetL2Weight * entry * entry; + stdErrorValues[iRow] -= adjustment; + if (0 < iCol && iCol < iRow) + stdErrorValues[iCol] -= adjustment; + ioffset++; + } + } + + Contracts.Assert(ioffset == invHessian.Length); + } + + for (int i = 1; i < trainer.GetNumSelectedParams; i++) + stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); + + VBuffer stdErrors = new VBuffer(trainer.GetWeights.Length, trainer.GetNumSelectedParams, stdErrorValues, trainer.Stats.WeightIndices); + trainer.Stats.SetCoeffStdError(ref stdErrors); + } + } +} diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs index c7656822a7..5bbf80eb51 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -136,6 +136,7 @@ public static class Defaults private VBuffer[] _features; private float[] _labels; private float[] _weights; + protected int NumParams; // Stores the bounds of the chunk to be used by each thread. The 0th slot is 0. The length // is one more than the number of threads to use. @@ -151,6 +152,23 @@ public static class Defaults private static readonly TrainerInfo _info = new TrainerInfo(caching: true, supportIncrementalTrain: true); public override TrainerInfo Info => _info; + /// + /// Gets the number of useful training rows. + /// + public long GetNumGoodRows => NumGoodRows; + + /// + /// Gets the L2weight + /// + public float GetL2Weight => L2Weight; + + public int GetNumSelectedParams => NumParams; + + /// + /// Gets the training weights. + /// + public VBuffer GetWeights => CurrentWeights; + internal LbfgsTrainerBase(IHostEnvironment env, string featureColumn, SchemaShape.Column labelColumn, string weightColumn, Action advancedSettings, float l1Weight, float l2Weight, @@ -230,7 +248,7 @@ private static TArgs ArgsInit(string featureColumn, SchemaShape.Column labelColu } protected virtual int ClassCount => 1; - protected int BiasCount => ClassCount; + public int BiasCount => ClassCount; protected int WeightCount => ClassCount * NumFeatures; protected virtual Optimizer InitializeOptimizer(IChannel ch, FloatLabelCursor.Factory cursorFactory, out VBuffer init, out ITerminationCriterion terminationCriterion) @@ -525,16 +543,16 @@ protected virtual void TrainCore(IChannel ch, RoleMappedData data) ch.Assert(CurrentWeights.Length == BiasCount + WeightCount); - int numParams = BiasCount; + NumParams = BiasCount; if ((L1Weight > 0 && !Quiet) || ShowTrainingStats) { - VBufferUtils.ForEachDefined(ref CurrentWeights, (index, value) => { if (index >= BiasCount && value != 0) numParams++; }); + VBufferUtils.ForEachDefined(ref CurrentWeights, (index, value) => { if (index >= BiasCount && value != 0) NumParams++; }); if (L1Weight > 0 && !Quiet) - ch.Info("L1 regularization selected {0} of {1} weights.", numParams, BiasCount + WeightCount); + ch.Info("L1 regularization selected {0} of {1} weights.", NumParams, BiasCount + WeightCount); } if (ShowTrainingStats) - ComputeTrainingStatistics(ch, cursorFactory, loss, numParams); + ComputeTrainingStatistics(ch, cursorFactory, loss); } // Ensure that the bias portion of vec is represented in vec. @@ -550,7 +568,7 @@ protected void EnsureBiases(ref VBuffer vec) protected abstract float AccumulateOneGradient(ref VBuffer feat, float label, float weight, ref VBuffer xDense, ref VBuffer grad, ref float[] scratch); - protected abstract void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams); + protected abstract void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss); protected abstract void ProcessPriorDistribution(float label, float weight); /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 1b827c1e10..1394a7f3f5 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -15,6 +15,7 @@ using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Runtime.Numeric; using Microsoft.ML.Runtime.Training; +//using Microsoft.ML.Trainers.HalLearners; [assembly: LoadableClass(LogisticRegression.Summary, typeof(LogisticRegression), typeof(LogisticRegression.Arguments), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) }, @@ -44,8 +45,8 @@ public sealed class Arguments : ArgumentsBase public bool ShowTrainingStats = false; } - protected Double PosWeight; - protected LinearModelStatistics Stats; + private Double _posWeight; + public LinearModelStatistics Stats; /// /// Initializes a new instance of @@ -76,7 +77,7 @@ public LogisticRegression(IHostEnvironment env, Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); - PosWeight = 0; + _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; } @@ -86,7 +87,7 @@ public LogisticRegression(IHostEnvironment env, public LogisticRegression(IHostEnvironment env, Arguments args) : base(env, args, TrainerUtils.MakeBoolScalarLabel(args.LabelColumn)) { - PosWeight = 0; + _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; } @@ -140,7 +141,7 @@ protected override float AccumulateOneGradient(ref VBuffer feat, float la return weight * datumLoss; } - protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) + protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); @@ -148,7 +149,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == 1); Contracts.Assert(loss >= 0); - Contracts.Assert(numParams >= BiasCount); + Contracts.Assert(NumParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); @@ -173,18 +174,18 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. deviance -= (float)regLoss * L1Weight * 2; } - ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); + ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - NumParams, 0)); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. - Double priorPosRate = PosWeight / WeightSum; + Double priorPosRate = _posWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); // Compute AIC. - ch.Info("AIC: \t{0}", 2 * numParams + deviance); + ch.Info("AIC: \t{0}", 2 * NumParams + deviance); // Show the coefficients statistics table. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; @@ -202,13 +203,13 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. int[] weightIndices = null; // Whether all weights are non-zero. - bool denseWeight = numParams == CurrentWeights.Length; + bool denseWeight = NumParams == CurrentWeights.Length; // Extract non-zero indices of weight. if (!denseWeight) { - weightIndices = new int[numParams]; - weightIndicesInvMap = new Dictionary(numParams); + weightIndices = new int[NumParams]; + weightIndicesInvMap = new Dictionary(NumParams); weightIndices[0] = 0; weightIndicesInvMap[0] = 0; int j = 1; @@ -221,17 +222,17 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. } } - Contracts.Assert(j == numParams); + Contracts.Assert(j == NumParams); } // Compute the standard error of coefficients. - long hessianDimension = (long)numParams * (numParams + 1) / 2; + long hessianDimension = (long)NumParams * (NumParams + 1) / 2; if (hessianDimension > int.MaxValue) { ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); - Stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + Stats = new LinearModelStatistics(Host, NumGoodRows, NumParams, deviance, nullDeviance); return; } @@ -251,7 +252,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. // i is the array index of the diagonal entry at iRow-th row and iRow-th column. // iRow is one-based. int i = 0; - for (int iRow = 2; iRow <= numParams; iRow++) + for (int iRow = 2; iRow <= NumParams; iRow++) { i += iRow; hessian[i] = L2Weight; @@ -281,7 +282,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. int ioff = 1; // Increment remaining entries of hessian. - for (int i = 1; i < numParams; i++) + for (int i = 1; i < NumParams; i++) { ch.Assert(ioff == i * (i + 1) / 2); int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; @@ -329,14 +330,15 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. } } } - - Stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + Stats = new LinearModelStatistics(Host, NumGoodRows, NumParams, deviance, nullDeviance); + Stats.Hessian = hessian; + Stats.WeightIndices = weightIndices; } protected override void ProcessPriorDistribution(float label, float weight) { if (label > 0) - PosWeight += weight; + _posWeight += weight; } //Override default termination criterion MeanRelativeImprovementCriterion with diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 5e2003f3e1..33d70c5653 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -251,7 +251,7 @@ protected override MulticlassLogisticRegressionPredictor CreatePredictor() return new MulticlassLogisticRegressionPredictor(Host, ref CurrentWeights, _numClasses, NumFeatures, _labelNames, _stats); } - protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) + protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); @@ -259,7 +259,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == _numClasses); Contracts.Assert(loss >= 0); - Contracts.Assert(numParams >= BiasCount); + Contracts.Assert(NumParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); @@ -299,10 +299,10 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. ch.Info("Null Deviance: \t{0}", nullDeviance); // Compute AIC. - ch.Info("AIC: \t{0}", 2 * numParams + deviance); + ch.Info("AIC: \t{0}", 2 * NumParams + deviance); // REVIEW: Figure out how to compute the statistics for the coefficients. - _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + _stats = new LinearModelStatistics(Host, NumGoodRows, NumParams, deviance, nullDeviance); } protected override void ProcessPriorDistribution(float label, float weight) diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index 1c735d0c66..e8ab9c5d79 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -2,17 +2,16 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; -using System.ComponentModel; -using System.IO; -using System.Linq; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.CpuMath; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Runtime.Model; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; // This is for deserialization from a model repository. [assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel), @@ -82,7 +81,7 @@ private static VersionInfo GetVersionInfo() // It could be null when there are too many non-zero weights so that // the memory is insufficient to hold the Hessian matrix necessary for the computation // of the variance-covariance matrix. - private readonly VBuffer? _coeffStdError; + private VBuffer? _coeffStdError; public long TrainingExampleCount { get { return _trainingExampleCount; } } @@ -92,6 +91,11 @@ private static VersionInfo GetVersionInfo() public int ParametersCount { get { return _paramCount; } } + public Double[] Hessian; + + // Indices of bias and non-zero weight slots. + public int[] WeightIndices; + public LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) { Contracts.AssertValue(env); @@ -285,6 +289,12 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat }; } + public void SetCoeffStdError(ref VBuffer coeffStdError) + { + _env.Assert(coeffStdError.Count == _paramCount); + _coeffStdError = coeffStdError; + } + private IEnumerable GetUnorderedCoefficientStatistics(LinearBinaryPredictor parent, RoleMappedSchema schema) { Contracts.AssertValue(_env); diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index 4de37a209e..e714756de9 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -155,7 +155,7 @@ protected override PoissonRegressionPredictor CreatePredictor() return new PoissonRegressionPredictor(Host, ref weights, bias); } - protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory factory, float loss, int numParams) + protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory factory, float loss) { // No-op by design. } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index fc3b458145..295c2c2387 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -38,5 +38,20 @@ public void TestEstimatorPoissonRegression() TestEstimatorCore(pipe, dataView); Done(); } + + [Fact] + public void TestLogisticRegressionStats() + { + (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); + + var trainer = new LogisticRegression(Env, "Features", "Label", advancedSettings: s=> { s.ShowTrainingStats = true; }); + pipe = pipe.Append(trainer); + var transformer = pipe.Fit(dataView); + + using (var ch = Env.Start("Calcuating STD for LR.")) + trainer.ComputeExtendedTrainingStatistics(ch); + + Done(); + } } } From e8fede2499358fe911dcb8629a95c533cae33d46 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 31 Oct 2018 11:06:03 -0700 Subject: [PATCH 03/14] adding test checks --- .../LogisticRegressionTrainingStats.cs | 2 +- .../Standard/ModelStatistics.cs | 4 ++-- .../TrainerEstimators/LbfgsTests.cs | 11 +++++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs index 5f7bf7441f..70554ad53d 100644 --- a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs +++ b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs @@ -82,7 +82,7 @@ public static void ComputeExtendedTrainingStatistics(this LogisticRegression tra stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); VBuffer stdErrors = new VBuffer(trainer.GetWeights.Length, trainer.GetNumSelectedParams, stdErrorValues, trainer.Stats.WeightIndices); - trainer.Stats.SetCoeffStdError(ref stdErrors); + trainer.Stats.SetCoeffStdError(stdErrors); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index e8ab9c5d79..fd3f775e29 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -226,7 +226,7 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias stdError = stats._coeffStdError.Value.Values[0]; Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0)); zScore = bias / stdError; - pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); + pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); return true; } @@ -289,7 +289,7 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat }; } - public void SetCoeffStdError(ref VBuffer coeffStdError) + public void SetCoeffStdError(VBuffer coeffStdError) { _env.Assert(coeffStdError.Count == _paramCount); _coeffStdError = coeffStdError; diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index 295c2c2387..0f290fd0dd 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -48,9 +48,20 @@ public void TestLogisticRegressionStats() pipe = pipe.Append(trainer); var transformer = pipe.Fit(dataView); + LinearModelStatistics.TryGetBiasStatistics(trainer.Stats, 2, out float stdError, out float zScore, out float pValue); + + Assert.Equal(0.0f, stdError); + Assert.Equal(0.0f, zScore); + Assert.Equal(0.0f, pValue); + using (var ch = Env.Start("Calcuating STD for LR.")) trainer.ComputeExtendedTrainingStatistics(ch); + LinearModelStatistics.TryGetBiasStatistics(trainer.Stats, 2, out stdError, out zScore, out pValue); + + Assert.True(stdError > 0); + Assert.True(zScore > 0); + Done(); } } From 06f97042eb6bd3dc9f05c17aa06b8c70145713ff Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 31 Oct 2018 15:11:58 -0700 Subject: [PATCH 04/14] moving the calculations of the extended training statistics in the predictor. --- .../LogisticRegressionTrainingStats.cs | 33 ++++++++------- .../LogisticRegression/LbfgsPredictorBase.cs | 31 +++----------- .../LogisticRegression/LogisticRegression.cs | 41 +++++++++---------- .../MulticlassLogisticRegression.cs | 8 ++-- .../Standard/ModelStatistics.cs | 8 ++-- .../PoissonRegression/PoissonRegression.cs | 2 +- .../TrainerEstimators/LbfgsTests.cs | 15 ++++--- 7 files changed, 59 insertions(+), 79 deletions(-) diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs index 70554ad53d..9c40d864ab 100644 --- a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs +++ b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs @@ -16,58 +16,57 @@ namespace Microsoft.ML.Runtime.Learners public static class LogisticRegressionTrainingStats { - public static void ComputeExtendedTrainingStatistics(this LogisticRegression trainer, IChannel ch) + public static void ComputeExtendedTrainingStatistics(this LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight) { Contracts.AssertValue(ch); - Contracts.AssertValue(trainer.Stats, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); - Contracts.Assert(trainer.GetL2Weight > 0); - Contracts.Assert(trainer.GetNumGoodRows > 0); + Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); + Contracts.Assert(l2Weight > 0); - ch.Info("Model trained with {0} training examples.", trainer.GetNumGoodRows); + int numSelectedParams = model.Statistics.ParametersCount; // Apply Cholesky Decomposition to find the inverse of the Hessian. Double[] invHessian = null; try { // First, find the Cholesky decomposition LL' of the Hessian. - Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, trainer.GetNumSelectedParams, trainer.Stats.Hessian); + Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian); // Note that hessian is already modified at this point. It is no longer the original Hessian, // but instead represents the Cholesky decomposition L. // Also note that the following routine is supposed to consume the Cholesky decomposition L instead // of the original information matrix. - Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, trainer.GetNumSelectedParams, trainer.Stats.Hessian); + Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian); // At this point, hessian should contain the inverse of the original Hessian matrix. // Swap hessian with invHessian to avoid confusion in the following context. - Utils.Swap(ref trainer.Stats.Hessian, ref invHessian); - Contracts.Assert(trainer.Stats.Hessian == null); + Utils.Swap(ref model.Statistics.Hessian, ref invHessian); + Contracts.Assert(model.Statistics.Hessian == null); } catch (DllNotFoundException) { throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing."); } - float[] stdErrorValues = new float[trainer.GetNumSelectedParams]; + float[] stdErrorValues = new float[numSelectedParams]; stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]); - for (int i = 1; i < trainer.GetNumSelectedParams; i++) + for (int i = 1; i < numSelectedParams; i++) { // Initialize with inverse Hessian. stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; } - if (trainer.GetL2Weight > 0) + if (l2Weight > 0) { // Iterate through all entries of inverse Hessian to make adjustment to variance. // A discussion on ridge regularized LR coefficient covariance matrix can be found here: // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf int ioffset = 1; - for (int iRow = 1; iRow < trainer.GetNumSelectedParams; iRow++) + for (int iRow = 1; iRow < numSelectedParams; iRow++) { for (int iCol = 0; iCol <= iRow; iCol++) { var entry = (Single)invHessian[ioffset]; - var adjustment = -trainer.GetL2Weight * entry * entry; + var adjustment = -l2Weight * entry * entry; stdErrorValues[iRow] -= adjustment; if (0 < iCol && iCol < iRow) stdErrorValues[iCol] -= adjustment; @@ -78,11 +77,11 @@ public static void ComputeExtendedTrainingStatistics(this LogisticRegression tra Contracts.Assert(ioffset == invHessian.Length); } - for (int i = 1; i < trainer.GetNumSelectedParams; i++) + for (int i = 1; i < numSelectedParams; i++) stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); - VBuffer stdErrors = new VBuffer(trainer.GetWeights.Length, trainer.GetNumSelectedParams, stdErrorValues, trainer.Stats.WeightIndices); - trainer.Stats.SetCoeffStdError(stdErrors); + VBuffer stdErrors = new VBuffer(model.Weights2.Count, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices); + model.Statistics.SetCoeffStdError(stdErrors); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs index b93c3a86c0..766cf06152 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -136,7 +136,6 @@ public static class Defaults private VBuffer[] _features; private float[] _labels; private float[] _weights; - protected int NumParams; // Stores the bounds of the chunk to be used by each thread. The 0th slot is 0. The length // is one more than the number of threads to use. @@ -147,26 +146,6 @@ public static class Defaults private VBuffer[] _localGradients; private float[] _localLosses; - /// - /// Gets the number of useful training rows. - /// - public long GetNumGoodRows => NumGoodRows; - - /// - /// Gets the L2weight - /// - public float GetL2Weight => L2Weight; - - /// - /// Gets the number of parameters selected - /// - public int GetNumSelectedParams => NumParams; - - /// - /// Gets the training weights. - /// - public VBuffer GetWeights => CurrentWeights; - // REVIEW: It's pointless to request caching when we're going to load everything into // memory, that is, when using multiple threads. So should caching not be requested? private static readonly TrainerInfo _info = new TrainerInfo(caching: true, supportIncrementalTrain: true); @@ -574,16 +553,16 @@ protected virtual void TrainCore(IChannel ch, RoleMappedData data) ch.Assert(CurrentWeights.Length == BiasCount + WeightCount); - NumParams = BiasCount; + int numParams = BiasCount; if ((L1Weight > 0 && !Quiet) || ShowTrainingStats) { - VBufferUtils.ForEachDefined(ref CurrentWeights, (index, value) => { if (index >= BiasCount && value != 0) NumParams++; }); + VBufferUtils.ForEachDefined(ref CurrentWeights, (index, value) => { if (index >= BiasCount && value != 0) numParams++; }); if (L1Weight > 0 && !Quiet) - ch.Info("L1 regularization selected {0} of {1} weights.", NumParams, BiasCount + WeightCount); + ch.Info("L1 regularization selected {0} of {1} weights.", numParams, BiasCount + WeightCount); } if (ShowTrainingStats) - ComputeTrainingStatistics(ch, cursorFactory, loss); + ComputeTrainingStatistics(ch, cursorFactory, loss, numParams); } // Ensure that the bias portion of vec is represented in vec. @@ -599,7 +578,7 @@ protected void EnsureBiases(ref VBuffer vec) protected abstract float AccumulateOneGradient(ref VBuffer feat, float label, float weight, ref VBuffer xDense, ref VBuffer grad, ref float[] scratch); - protected abstract void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss); + protected abstract void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams); protected abstract void ProcessPriorDistribution(float label, float weight); /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 1394a7f3f5..370810eb3c 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -15,7 +15,6 @@ using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Runtime.Numeric; using Microsoft.ML.Runtime.Training; -//using Microsoft.ML.Trainers.HalLearners; [assembly: LoadableClass(LogisticRegression.Summary, typeof(LogisticRegression), typeof(LogisticRegression.Arguments), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) }, @@ -31,7 +30,7 @@ namespace Microsoft.ML.Runtime.Learners /// /// - public partial class LogisticRegression : LbfgsTrainerBase, ParameterMixingCalibratedPredictor> + public sealed partial class LogisticRegression : LbfgsTrainerBase, ParameterMixingCalibratedPredictor> { public const string LoadNameValue = "LogisticRegression"; internal const string UserNameValue = "Logistic Regression"; @@ -45,8 +44,8 @@ public sealed class Arguments : ArgumentsBase public bool ShowTrainingStats = false; } - private Double _posWeight; - public LinearModelStatistics Stats; + private double _posWeight; + private LinearModelStatistics _stats; /// /// Initializes a new instance of @@ -84,7 +83,7 @@ public LogisticRegression(IHostEnvironment env, /// /// Initializes a new instance of /// - public LogisticRegression(IHostEnvironment env, Arguments args) + internal LogisticRegression(IHostEnvironment env, Arguments args) : base(env, args, TrainerUtils.MakeBoolScalarLabel(args.LabelColumn)) { _posWeight = 0; @@ -141,7 +140,7 @@ protected override float AccumulateOneGradient(ref VBuffer feat, float la return weight * datumLoss; } - protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss) + protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); @@ -149,7 +148,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == 1); Contracts.Assert(loss >= 0); - Contracts.Assert(NumParams >= BiasCount); + Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); @@ -174,7 +173,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. deviance -= (float)regLoss * L1Weight * 2; } - ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - NumParams, 0)); + ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. @@ -185,7 +184,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); // Compute AIC. - ch.Info("AIC: \t{0}", 2 * NumParams + deviance); + ch.Info("AIC: \t{0}", 2 * numParams + deviance); // Show the coefficients statistics table. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; @@ -203,13 +202,13 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. int[] weightIndices = null; // Whether all weights are non-zero. - bool denseWeight = NumParams == CurrentWeights.Length; + bool denseWeight = numParams == CurrentWeights.Length; // Extract non-zero indices of weight. if (!denseWeight) { - weightIndices = new int[NumParams]; - weightIndicesInvMap = new Dictionary(NumParams); + weightIndices = new int[numParams]; + weightIndicesInvMap = new Dictionary(numParams); weightIndices[0] = 0; weightIndicesInvMap[0] = 0; int j = 1; @@ -222,17 +221,17 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. } } - Contracts.Assert(j == NumParams); + Contracts.Assert(j == numParams); } // Compute the standard error of coefficients. - long hessianDimension = (long)NumParams * (NumParams + 1) / 2; + long hessianDimension = (long)numParams * (numParams + 1) / 2; if (hessianDimension > int.MaxValue) { ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); - Stats = new LinearModelStatistics(Host, NumGoodRows, NumParams, deviance, nullDeviance); + _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); return; } @@ -252,7 +251,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. // i is the array index of the diagonal entry at iRow-th row and iRow-th column. // iRow is one-based. int i = 0; - for (int iRow = 2; iRow <= NumParams; iRow++) + for (int iRow = 2; iRow <= numParams; iRow++) { i += iRow; hessian[i] = L2Weight; @@ -282,7 +281,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. int ioff = 1; // Increment remaining entries of hessian. - for (int i = 1; i < NumParams; i++) + for (int i = 1; i < numParams; i++) { ch.Assert(ioff == i * (i + 1) / 2); int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; @@ -330,9 +329,9 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. } } } - Stats = new LinearModelStatistics(Host, NumGoodRows, NumParams, deviance, nullDeviance); - Stats.Hessian = hessian; - Stats.WeightIndices = weightIndices; + _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + _stats.Hessian = hessian; + _stats.WeightIndices = weightIndices; } protected override void ProcessPriorDistribution(float label, float weight) @@ -375,7 +374,7 @@ protected override ParameterMixingCalibratedPredictor CreatePredictor() CurrentWeights.GetItemOrDefault(0, ref bias); CurrentWeights.CopyTo(ref weights, 1, CurrentWeights.Length - 1); return new ParameterMixingCalibratedPredictor(Host, - new LinearBinaryPredictor(Host, ref weights, bias, Stats), + new LinearBinaryPredictor(Host, ref weights, bias, _stats), new PlattCalibrator(Host, -1, 0)); } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 617bd717f0..70e896feef 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -251,7 +251,7 @@ protected override MulticlassLogisticRegressionPredictor CreatePredictor() return new MulticlassLogisticRegressionPredictor(Host, ref CurrentWeights, _numClasses, NumFeatures, _labelNames, _stats); } - protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss) + protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); @@ -259,7 +259,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == _numClasses); Contracts.Assert(loss >= 0); - Contracts.Assert(NumParams >= BiasCount); + Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); @@ -299,10 +299,10 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. ch.Info("Null Deviance: \t{0}", nullDeviance); // Compute AIC. - ch.Info("AIC: \t{0}", 2 * NumParams + deviance); + ch.Info("AIC: \t{0}", 2 * numParams + deviance); // REVIEW: Figure out how to compute the statistics for the coefficients. - _stats = new LinearModelStatistics(Host, NumGoodRows, NumParams, deviance, nullDeviance); + _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); } protected override void ProcessPriorDistribution(float label, float weight) diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index fd3f775e29..b9ccbe47cc 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -83,13 +83,13 @@ private static VersionInfo GetVersionInfo() // of the variance-covariance matrix. private VBuffer? _coeffStdError; - public long TrainingExampleCount { get { return _trainingExampleCount; } } + public long TrainingExampleCount => _trainingExampleCount; - public Single Deviance { get { return _deviance; } } + public Single Deviance => _deviance; - public Single NullDeviance { get { return _nullDeviance; } } + public Single NullDeviance => _nullDeviance; - public int ParametersCount { get { return _paramCount; } } + public int ParametersCount => _paramCount; public Double[] Hessian; diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index e714756de9..4de37a209e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -155,7 +155,7 @@ protected override PoissonRegressionPredictor CreatePredictor() return new PoissonRegressionPredictor(Host, ref weights, bias); } - protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory factory, float loss) + protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory factory, float loss, int numParams) { // No-op by design. } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index 0f290fd0dd..7f8c1e6f59 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -4,6 +4,7 @@ using Microsoft.ML.Core.Data; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Internal.Calibration; using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Trainers; using Xunit; @@ -44,20 +45,22 @@ public void TestLogisticRegressionStats() { (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); - var trainer = new LogisticRegression(Env, "Features", "Label", advancedSettings: s=> { s.ShowTrainingStats = true; }); - pipe = pipe.Append(trainer); - var transformer = pipe.Fit(dataView); + pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label", advancedSettings: s => { s.ShowTrainingStats = true; })); + var transformerChain = pipe.Fit(dataView) as TransformerChain>; - LinearModelStatistics.TryGetBiasStatistics(trainer.Stats, 2, out float stdError, out float zScore, out float pValue); + var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor; + var stats = linearModel.Statistics; + + LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); Assert.Equal(0.0f, stdError); Assert.Equal(0.0f, zScore); Assert.Equal(0.0f, pValue); using (var ch = Env.Start("Calcuating STD for LR.")) - trainer.ComputeExtendedTrainingStatistics(ch); + linearModel.ComputeExtendedTrainingStatistics(ch); - LinearModelStatistics.TryGetBiasStatistics(trainer.Stats, 2, out stdError, out zScore, out pValue); + LinearModelStatistics.TryGetBiasStatistics(stats, 2, out stdError, out zScore, out pValue); Assert.True(stdError > 0); Assert.True(zScore > 0); From 3831f5d7feeccecf7348b680ef301b33cad73eb8 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 7 Nov 2018 00:32:36 -0800 Subject: [PATCH 05/14] Creating two separate methods to compute the matrix of standartDeviations, one the old MKl way in the HAL Learners package, and the other making use of Math.Numerics --- build/Dependencies.props | 1 + .../LogisticRegressionTrainingStats.cs | 21 ++-- .../Microsoft.ML.StandardLearners.csproj | 6 +- .../Standard/ModelStatistics.cs | 98 ++++++++++++++++++- .../TrainerEstimators/LbfgsTests.cs | 32 +++++- 5 files changed, 142 insertions(+), 16 deletions(-) diff --git a/build/Dependencies.props b/build/Dependencies.props index 7a79b3a087..3f053444db 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -22,6 +22,7 @@ 4.5.0 4.5.0 1.10.0 + 4.6.0 diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs index 9c40d864ab..adf823c42c 100644 --- a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs +++ b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs @@ -11,12 +11,17 @@ namespace Microsoft.ML.Runtime.Learners { using Mkl = OlsLinearRegressionTrainer.Mkl; - /// - /// public static class LogisticRegressionTrainingStats { - - public static void ComputeExtendedTrainingStatistics(this LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight) + /// + /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// p-value and z-Score. + /// This function performs the same calculations as but it is faster than it, because it makes use of Intel's MKL. + /// + /// A obtained as a result of training with . + /// The used for messaging. + /// The L2Weight used for training. (Supply the same one that got used during training.) + public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight) { Contracts.AssertValue(ch); Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); @@ -58,8 +63,8 @@ public static void ComputeExtendedTrainingStatistics(this LinearBinaryPredictor { // Iterate through all entries of inverse Hessian to make adjustment to variance. // A discussion on ridge regularized LR coefficient covariance matrix can be found here: - // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ - // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf + // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25) + // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression") int ioffset = 1; for (int iRow = 1; iRow < numSelectedParams; iRow++) { @@ -80,7 +85,9 @@ public static void ComputeExtendedTrainingStatistics(this LinearBinaryPredictor for (int i = 1; i < numSelectedParams; i++) stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); - VBuffer stdErrors = new VBuffer(model.Weights2.Count, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices); + // currentWeights vector size is Weights2 + the bias + var currentWeightsCount = model.Weights2.Count + 1; + VBuffer stdErrors = new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices); model.Statistics.SetCoeffStdError(stdErrors); } } diff --git a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj index 8bd76945aa..9eb8c8cc58 100644 --- a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj +++ b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj @@ -1,4 +1,4 @@ - + netstandard2.0 @@ -6,6 +6,10 @@ true + + + + diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index 0a128881dc..ff29b6375a 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using MathNet.Numerics.LinearAlgebra; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.CpuMath; @@ -96,7 +97,7 @@ private static VersionInfo GetVersionInfo() // Indices of bias and non-zero weight slots. public int[] WeightIndices; - public LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) + internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) { Contracts.AssertValue(env); env.Assert(trainingExampleCount > 0); @@ -115,7 +116,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, _coeffStdError = coeffStdError; } - public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) + internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); _env = env; @@ -161,7 +162,7 @@ public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) _coeffStdError = new VBuffer(length, _paramCount, stdErrorValues, stdErrorIndices); } - public static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx) + internal static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); @@ -212,6 +213,10 @@ private void SaveCore(ModelSaveContext ctx) ctx.Writer.WriteIntsNoCount(_coeffStdError.Value.Indices, _paramCount); } + /// + /// Computes the standart deviation, Z-Score and p-Value. + /// Should be called after . + /// public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue) { if (!stats._coeffStdError.HasValue) @@ -230,6 +235,93 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias return true; } + /// + /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// p-value and z-Score. + /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. + /// + /// A obtained as a result of training with . + /// The used for messaging. + /// The L2Weight used for training. (Supply the same one that got used during training.) + public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight) + { + Contracts.AssertValue(ch); + Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); + Contracts.Assert(l2Weight > 0); + + int numSelectedParams = model.Statistics.ParametersCount; + + double[] hessian = model.Statistics.Hessian; + double[,] matrixHessian = new double[numSelectedParams, numSelectedParams]; + + int hessianLength = 0; + int dimention = numSelectedParams - 1; + + for (int row = dimention; row >= 0; row--) + { + for (int col = 0; col <= dimention; col++) + { + if ((row + col) <= dimention) + { + if ((row + col) == dimention) + { + matrixHessian[row, col] = hessian[hessianLength]; + } + else + { + matrixHessian[row, col] = hessian[hessianLength]; + matrixHessian[dimention - col, dimention - row] = hessian[hessianLength]; + } + hessianLength++; + } + else + continue; + } + } + + var h = Matrix.Build.DenseOfArray(matrixHessian); + var invers = h.Inverse(); + + float[] stdErrorValues2 = new float[numSelectedParams]; + stdErrorValues2[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]); + + for (int i = 1; i < numSelectedParams; i++) + { + // Initialize with inverse Hessian. + // The diagonal of the inverse Hessian. + stdErrorValues2[i] = (Single)invers[i, numSelectedParams - i - 1]; + } + + if (l2Weight > 0) + { + // Iterate through all entries of inverse Hessian to make adjustment to variance. + // A discussion on ridge regularized LR coefficient covariance matrix can be found here: + // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ + // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf + int ioffset = 1; + for (int iRow = 1; iRow < numSelectedParams; iRow++) + { + for (int iCol = 0; iCol <= iRow; iCol++) + { + float entry = (float)invers[iRow, numSelectedParams - iCol - 1]; + var adjustment = -l2Weight * entry * entry; + stdErrorValues2[iRow] -= adjustment; + + if (0 < iCol && iCol < iRow) + stdErrorValues2[iCol] -= adjustment; + ioffset++; + } + } + } + + for (int i = 1; i < numSelectedParams; i++) + stdErrorValues2[i] = (float)Math.Sqrt(stdErrorValues2[i]); + + var currentWeightsCount = model.Weights2.Count + 1; // adding one for the bias + VBuffer stdErrors = new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues2, model.Statistics.WeightIndices); + model.Statistics.SetCoeffStdError(stdErrors); + } + private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, in VBuffer weights, in VBuffer> names, ref VBuffer estimate, ref VBuffer stdErr, ref VBuffer zScore, ref VBuffer pValue, out ValueGetter>> getSlotNames) { diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index 7f8c1e6f59..15949e1c9f 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -50,7 +50,6 @@ public void TestLogisticRegressionStats() var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor; var stats = linearModel.Statistics; - LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); Assert.Equal(0.0f, stdError); @@ -58,14 +57,37 @@ public void TestLogisticRegressionStats() Assert.Equal(0.0f, pValue); using (var ch = Env.Start("Calcuating STD for LR.")) - linearModel.ComputeExtendedTrainingStatistics(ch); + LinearModelStatistics.ComputeStd(linearModel, ch); LinearModelStatistics.TryGetBiasStatistics(stats, 2, out stdError, out zScore, out pValue); - Assert.True(stdError > 0); - Assert.True(zScore > 0); + Assert.True(stdError == 0.250672936f); + Assert.True(zScore == 7.97852373f); + } - Done(); + [Fact] + public void TestLogisticRegressionStats_MKL() + { + (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); + + pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label", advancedSettings: s => { s.ShowTrainingStats = true; })); + var transformerChain = pipe.Fit(dataView) as TransformerChain>; + + var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor; + var stats = linearModel.Statistics; + LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); + + Assert.Equal(0.0f, stdError); + Assert.Equal(0.0f, zScore); + Assert.Equal(0.0f, pValue); + + using (var ch = Env.Start("Calcuating STD for LR.")) + LogisticRegressionTrainingStats.ComputeStd(linearModel, ch); + + LinearModelStatistics.TryGetBiasStatistics(stats, 2, out stdError, out zScore, out pValue); + + Assert.True(stdError == 0.250672936f); + Assert.True(zScore == 7.97852373f); } } } From c638cbd23c88b5f228017b55b6aa60265201b62e Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 7 Nov 2018 15:48:01 -0800 Subject: [PATCH 06/14] refactoring the std computations in an interface --- .../LogisticRegressionTrainingStats.cs | 31 ++--- .../LogisticRegression/LogisticRegression.cs | 116 +++++++++++++++++- .../Standard/ModelStatistics.cs | 101 +-------------- .../TrainerEstimators/LbfgsTests.cs | 24 +--- .../InstanceInitializerAnalyzer.cs | 2 +- 5 files changed, 136 insertions(+), 138 deletions(-) diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs index adf823c42c..21a48dc746 100644 --- a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs +++ b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs @@ -11,39 +11,42 @@ namespace Microsoft.ML.Runtime.Learners { using Mkl = OlsLinearRegressionTrainer.Mkl; - public static class LogisticRegressionTrainingStats + public sealed class ComputeLRTrainingStdThroughHal : IComputeLRTrainingStd { /// /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, - /// p-value and z-Score. - /// This function performs the same calculations as but it is faster than it, because it makes use of Intel's MKL. + /// p-value and z-Score, making use of Intel's MKL for the matrix operations. /// - /// A obtained as a result of training with . + /// + /// + /// + /// /// The used for messaging. /// The L2Weight used for training. (Supply the same one that got used during training.) - public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight) + public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) { Contracts.AssertValue(ch); - Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); + Contracts.AssertValue(hessian, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); + Contracts.AssertNonEmpty(weightIndices); + Contracts.Assert(numSelectedParams > 0); + Contracts.Assert(currentWeightsCount > 0); Contracts.Assert(l2Weight > 0); - int numSelectedParams = model.Statistics.ParametersCount; - // Apply Cholesky Decomposition to find the inverse of the Hessian. Double[] invHessian = null; try { // First, find the Cholesky decomposition LL' of the Hessian. - Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian); + Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, hessian); // Note that hessian is already modified at this point. It is no longer the original Hessian, // but instead represents the Cholesky decomposition L. // Also note that the following routine is supposed to consume the Cholesky decomposition L instead // of the original information matrix. - Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian); + Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, hessian); // At this point, hessian should contain the inverse of the original Hessian matrix. // Swap hessian with invHessian to avoid confusion in the following context. - Utils.Swap(ref model.Statistics.Hessian, ref invHessian); - Contracts.Assert(model.Statistics.Hessian == null); + Utils.Swap(ref hessian, ref invHessian); + Contracts.Assert(hessian == null); } catch (DllNotFoundException) { @@ -86,9 +89,7 @@ public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2 stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); // currentWeights vector size is Weights2 + the bias - var currentWeightsCount = model.Weights2.Count + 1; - VBuffer stdErrors = new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices); - model.Statistics.SetCoeffStdError(stdErrors); + return new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues, weightIndices); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index a6476fd04b..8318b6ae5f 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using MathNet.Numerics.LinearAlgebra; using Microsoft.ML.Core.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; @@ -42,6 +43,8 @@ public sealed class Arguments : ArgumentsBase { [Argument(ArgumentType.AtMostOnce, HelpText = "Show statistics of training examples.", ShortName = "stat", SortOrder = 50)] public bool ShowTrainingStats = false; + + public IComputeLRTrainingStd StdComputer; } private double _posWeight; @@ -78,6 +81,9 @@ public LogisticRegression(IHostEnvironment env, _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; + + if (Args.StdComputer == null) + Args.StdComputer = new ComputeLRTrainingStd(); } /// @@ -88,6 +94,9 @@ internal LogisticRegression(IHostEnvironment env, Arguments args) { _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; + + if (Args.StdComputer == null) + Args.StdComputer = new ComputeLRTrainingStd(); } public override PredictionKind PredictionKind => PredictionKind.BinaryClassification; @@ -329,9 +338,14 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. } } } - _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); - _stats.Hessian = hessian; - _stats.WeightIndices = weightIndices; + + if (Args.StdComputer == null) + _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + else + { + var std = Args.StdComputer.ComputeStd(hessian, weightIndices, numParams, CurrentWeights.Count, ch, L2Weight); + _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, std); + } } protected override void ProcessPriorDistribution(float label, float weight) @@ -398,4 +412,100 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.WeightColumn)); } } + + public interface IComputeLRTrainingStd + { + VBuffer ComputeStd(double[] hessian, int[] weightIndices, int parametersCount, int currentWeightsCount, IChannel ch, float l2Weight); + } + + public sealed class ComputeLRTrainingStd: IComputeLRTrainingStd + { + /// + /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// p-value and z-Score. + /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. + /// + /// + /// + /// + /// + /// The used for messaging. + /// The L2Weight used for training. (Supply the same one that got used during training.) + public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) + { + Contracts.AssertValue(ch); + Contracts.AssertValue(hessian, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); + Contracts.AssertNonEmpty(weightIndices); + Contracts.Assert(numSelectedParams > 0); + Contracts.Assert(currentWeightsCount > 0); + Contracts.Assert(l2Weight > 0); + + double[,] matrixHessian = new double[numSelectedParams, numSelectedParams]; + + int hessianLength = 0; + int dimention = numSelectedParams - 1; + + for (int row = dimention; row >= 0; row--) + { + for (int col = 0; col <= dimention; col++) + { + if ((row + col) <= dimention) + { + if ((row + col) == dimention) + { + matrixHessian[row, col] = hessian[hessianLength]; + } + else + { + matrixHessian[row, col] = hessian[hessianLength]; + matrixHessian[dimention - col, dimention - row] = hessian[hessianLength]; + } + hessianLength++; + } + else + continue; + } + } + + var h = Matrix.Build.DenseOfArray(matrixHessian); + var invers = h.Inverse(); + + float[] stdErrorValues2 = new float[numSelectedParams]; + stdErrorValues2[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]); + + for (int i = 1; i < numSelectedParams; i++) + { + // Initialize with inverse Hessian. + // The diagonal of the inverse Hessian. + stdErrorValues2[i] = (Single)invers[i, numSelectedParams - i - 1]; + } + + if (l2Weight > 0) + { + // Iterate through all entries of inverse Hessian to make adjustment to variance. + // A discussion on ridge regularized LR coefficient covariance matrix can be found here: + // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ + // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf + int ioffset = 1; + for (int iRow = 1; iRow < numSelectedParams; iRow++) + { + for (int iCol = 0; iCol <= iRow; iCol++) + { + float entry = (float)invers[iRow, numSelectedParams - iCol - 1]; + var adjustment = -l2Weight * entry * entry; + stdErrorValues2[iRow] -= adjustment; + + if (0 < iCol && iCol < iRow) + stdErrorValues2[iCol] -= adjustment; + ioffset++; + } + } + } + + for (int i = 1; i < numSelectedParams; i++) + stdErrorValues2[i] = (float)Math.Sqrt(stdErrorValues2[i]); + + return new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues2, weightIndices); + } + } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index ff29b6375a..90158e5fdc 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -82,7 +82,7 @@ private static VersionInfo GetVersionInfo() // It could be null when there are too many non-zero weights so that // the memory is insufficient to hold the Hessian matrix necessary for the computation // of the variance-covariance matrix. - private VBuffer? _coeffStdError; + private readonly VBuffer? _coeffStdError; public long TrainingExampleCount => _trainingExampleCount; @@ -92,11 +92,6 @@ private static VersionInfo GetVersionInfo() public int ParametersCount => _paramCount; - public Double[] Hessian; - - // Indices of bias and non-zero weight slots. - public int[] WeightIndices; - internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) { Contracts.AssertValue(env); @@ -215,7 +210,6 @@ private void SaveCore(ModelSaveContext ctx) /// /// Computes the standart deviation, Z-Score and p-Value. - /// Should be called after . /// public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue) { @@ -235,93 +229,6 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias return true; } - /// - /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, - /// p-value and z-Score. - /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. - /// - /// A obtained as a result of training with . - /// The used for messaging. - /// The L2Weight used for training. (Supply the same one that got used during training.) - public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight) - { - Contracts.AssertValue(ch); - Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); - Contracts.Assert(l2Weight > 0); - - int numSelectedParams = model.Statistics.ParametersCount; - - double[] hessian = model.Statistics.Hessian; - double[,] matrixHessian = new double[numSelectedParams, numSelectedParams]; - - int hessianLength = 0; - int dimention = numSelectedParams - 1; - - for (int row = dimention; row >= 0; row--) - { - for (int col = 0; col <= dimention; col++) - { - if ((row + col) <= dimention) - { - if ((row + col) == dimention) - { - matrixHessian[row, col] = hessian[hessianLength]; - } - else - { - matrixHessian[row, col] = hessian[hessianLength]; - matrixHessian[dimention - col, dimention - row] = hessian[hessianLength]; - } - hessianLength++; - } - else - continue; - } - } - - var h = Matrix.Build.DenseOfArray(matrixHessian); - var invers = h.Inverse(); - - float[] stdErrorValues2 = new float[numSelectedParams]; - stdErrorValues2[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]); - - for (int i = 1; i < numSelectedParams; i++) - { - // Initialize with inverse Hessian. - // The diagonal of the inverse Hessian. - stdErrorValues2[i] = (Single)invers[i, numSelectedParams - i - 1]; - } - - if (l2Weight > 0) - { - // Iterate through all entries of inverse Hessian to make adjustment to variance. - // A discussion on ridge regularized LR coefficient covariance matrix can be found here: - // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ - // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf - int ioffset = 1; - for (int iRow = 1; iRow < numSelectedParams; iRow++) - { - for (int iCol = 0; iCol <= iRow; iCol++) - { - float entry = (float)invers[iRow, numSelectedParams - iCol - 1]; - var adjustment = -l2Weight * entry * entry; - stdErrorValues2[iRow] -= adjustment; - - if (0 < iCol && iCol < iRow) - stdErrorValues2[iCol] -= adjustment; - ioffset++; - } - } - } - - for (int i = 1; i < numSelectedParams; i++) - stdErrorValues2[i] = (float)Math.Sqrt(stdErrorValues2[i]); - - var currentWeightsCount = model.Weights2.Count + 1; // adding one for the bias - VBuffer stdErrors = new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues2, model.Statistics.WeightIndices); - model.Statistics.SetCoeffStdError(stdErrors); - } - private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, in VBuffer weights, in VBuffer> names, ref VBuffer estimate, ref VBuffer stdErr, ref VBuffer zScore, ref VBuffer pValue, out ValueGetter>> getSlotNames) { @@ -381,12 +288,6 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat }; } - public void SetCoeffStdError(VBuffer coeffStdError) - { - _env.Assert(coeffStdError.Count == _paramCount); - _coeffStdError = coeffStdError; - } - private IEnumerable GetUnorderedCoefficientStatistics(LinearBinaryPredictor parent, RoleMappedSchema schema) { Contracts.AssertValue(_env); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index 15949e1c9f..8587cd6961 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -52,15 +52,6 @@ public void TestLogisticRegressionStats() var stats = linearModel.Statistics; LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); - Assert.Equal(0.0f, stdError); - Assert.Equal(0.0f, zScore); - Assert.Equal(0.0f, pValue); - - using (var ch = Env.Start("Calcuating STD for LR.")) - LinearModelStatistics.ComputeStd(linearModel, ch); - - LinearModelStatistics.TryGetBiasStatistics(stats, 2, out stdError, out zScore, out pValue); - Assert.True(stdError == 0.250672936f); Assert.True(zScore == 7.97852373f); } @@ -70,22 +61,17 @@ public void TestLogisticRegressionStats_MKL() { (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); - pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label", advancedSettings: s => { s.ShowTrainingStats = true; })); + pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label", advancedSettings: s => { + s.ShowTrainingStats = true; + s.StdComputer = new ComputeLRTrainingStdThroughHal(); + })); + var transformerChain = pipe.Fit(dataView) as TransformerChain>; var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor; var stats = linearModel.Statistics; LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); - Assert.Equal(0.0f, stdError); - Assert.Equal(0.0f, zScore); - Assert.Equal(0.0f, pValue); - - using (var ch = Env.Start("Calcuating STD for LR.")) - LogisticRegressionTrainingStats.ComputeStd(linearModel, ch); - - LinearModelStatistics.TryGetBiasStatistics(stats, 2, out stdError, out zScore, out pValue); - Assert.True(stdError == 0.250672936f); Assert.True(zScore == 7.97852373f); } diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs index c7ee67537a..c4bd2fe38c 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs @@ -18,7 +18,7 @@ public sealed class InstanceInitializerAnalyzer : DiagnosticAnalyzer internal const string DiagnosticId = "MSML_NoInstanceInitializers"; private const string Title = "No initializers on instance fields or properties"; - private const string Format = "Member {0} has a {1} initialier outside the constructor"; + private const string Format = "Member {0} has a {1} initializer outside the constructor"; private static DiagnosticDescriptor Rule = new DiagnosticDescriptor(DiagnosticId, Title, Format, Category, From dd9524edf1bfdae3998fc51370ecbdaec9a4ca8e Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Thu, 8 Nov 2018 10:34:41 -0800 Subject: [PATCH 07/14] fix visibility and xml comments --- .../LogisticRegressionTrainingStats.cs | 4 ++- .../LogisticRegression/LbfgsPredictorBase.cs | 16 +++++------ .../LogisticRegression/LogisticRegression.cs | 28 +++++++++++++++++++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs index 21a48dc746..071296c112 100644 --- a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs +++ b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs @@ -15,7 +15,9 @@ public sealed class ComputeLRTrainingStdThroughHal : IComputeLRTrainingStd { /// /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, - /// p-value and z-Score, making use of Intel's MKL for the matrix operations. + /// p-value and z-Score. + /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. + /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. /// /// /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs index 6b3f3fa734..f17e617e29 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -92,14 +92,14 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight [Argument(ArgumentType.AtMostOnce, HelpText = "Enforce non-negative weights", ShortName = "nn", SortOrder = 90)] public bool EnforceNonNegativity = Defaults.EnforceNonNegativity; - public static class Defaults + internal static class Defaults { - public const float L2Weight = 1; - public const float L1Weight = 1; - public const float OptTol = 1e-7f; - public const int MemorySize = 20; - public const int MaxIterations = int.MaxValue; - public const bool EnforceNonNegativity = false; + internal const float L2Weight = 1; + internal const float L1Weight = 1; + internal const float OptTol = 1e-7f; + internal const int MemorySize = 20; + internal const int MaxIterations = int.MaxValue; + internal const bool EnforceNonNegativity = false; } } @@ -258,7 +258,7 @@ private static TArgs ArgsInit(string featureColumn, SchemaShape.Column labelColu } protected virtual int ClassCount => 1; - public int BiasCount => ClassCount; + protected int BiasCount => ClassCount; protected int WeightCount => ClassCount * NumFeatures; protected virtual Optimizer InitializeOptimizer(IChannel ch, FloatLabelCursor.Factory cursorFactory, out VBuffer init, out ITerminationCriterion terminationCriterion) diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 8318b6ae5f..bc177dcc63 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -41,9 +41,23 @@ public sealed partial class LogisticRegression : LbfgsTrainerBase + /// If set to truetraining statistics will be generated at the end of training. + /// If you have a large number of learned training parameters(more than 500), + /// generating the training statistics might take a few seconds. + /// More than 1000 weights might take a few minutes. For those cases consider using the instance of + /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Show statistics of training examples.", ShortName = "stat", SortOrder = 50)] public bool ShowTrainingStats = false; + /// + /// The instance of that computes the training statistics at the end of training. + /// If you have a large number of learned training parameters(more than 500), + /// generating the training statistics might take a few seconds. + /// More than 1000 weights might take a few minutes. For those cases consider using the instance of + /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration. + /// public IComputeLRTrainingStd StdComputer; } @@ -413,8 +427,21 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm } } + /// + /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// p-value and z-Score. + /// If you need fast calculations, use the implementation in the Microsoft.ML.HALLearners package, + /// which makes use of hardware acceleration. + /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. + /// public interface IComputeLRTrainingStd { + /// + /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// p-value and z-Score. + /// If you need fast calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. + /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. + /// VBuffer ComputeStd(double[] hessian, int[] weightIndices, int parametersCount, int currentWeightsCount, IChannel ch, float l2Weight); } @@ -424,6 +451,7 @@ public sealed class ComputeLRTrainingStd: IComputeLRTrainingStd /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, /// p-value and z-Score. /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. + /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. /// /// /// From e540d6343206b781f69a2809675f59571aa4f00c Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Thu, 8 Nov 2018 10:58:58 -0800 Subject: [PATCH 08/14] tolerance around test numbr comparisons --- .../BaseTestBaseline.cs | 51 ++++++++++--------- .../TrainerEstimators/LbfgsTests.cs | 8 +-- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs b/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs index f4443f6fc9..3fb0867696 100644 --- a/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs +++ b/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs @@ -535,29 +535,7 @@ private bool MatchNumberWithTolerance(MatchCollection firstCollection, MatchColl double f1 = double.Parse(firstCollection[i].ToString()); double f2 = double.Parse(secondCollection[i].ToString()); - // this follows the IEEE recommendations for how to compare floating point numbers - double allowedVariance = Math.Pow(10, -digitsOfPrecision); - double delta = Round(f1, digitsOfPrecision) - Round(f2, digitsOfPrecision); - // limitting to the digits we care about. - delta = Math.Round(delta, digitsOfPrecision); - - bool inRange = delta > -allowedVariance && delta < allowedVariance; - - // for some cases, rounding up is not beneficial - // so checking on whether the difference is significant prior to rounding, and failing only then. - // example, for 5 digits of precision. - // F1 = 1.82844949 Rounds to 1.8284 - // F2 = 1.8284502 Rounds to 1.8285 - // would fail the inRange == true check, but would suceed the following, and we doconsider those two numbers - // (1.82844949 - 1.8284502) = -0.00000071 - - if (!inRange) - { - delta = Math.Round(f1 - f2, digitsOfPrecision); - inRange = delta >= -allowedVariance && delta <= allowedVariance; - } - - if(!inRange) + if(!CompareNumbersWithTolerance(f1, f2, digitsOfPrecision)) { return false; } @@ -566,6 +544,33 @@ private bool MatchNumberWithTolerance(MatchCollection firstCollection, MatchColl return true; } + public bool CompareNumbersWithTolerance(double expected, double actual, int digitsOfPrecision = DigitsOfPrecision) + { + // this follows the IEEE recommendations for how to compare floating point numbers + double allowedVariance = Math.Pow(10, -digitsOfPrecision); + double delta = Round(expected, digitsOfPrecision) - Round(actual, digitsOfPrecision); + // limitting to the digits we care about. + delta = Math.Round(delta, digitsOfPrecision); + + bool inRange = delta > -allowedVariance && delta < allowedVariance; + + // for some cases, rounding up is not beneficial + // so checking on whether the difference is significant prior to rounding, and failing only then. + // example, for 5 digits of precision. + // F1 = 1.82844949 Rounds to 1.8284 + // F2 = 1.8284502 Rounds to 1.8285 + // would fail the inRange == true check, but would suceed the following, and we doconsider those two numbers + // (1.82844949 - 1.8284502) = -0.00000071 + + if (!inRange) + { + delta = Math.Round(expected - actual, digitsOfPrecision); + inRange = delta >= -allowedVariance && delta <= allowedVariance; + } + + return inRange; + } + private static double Round(double value, int digitsOfPrecision) { if ((value == 0) || double.IsInfinity(value) || double.IsNaN(value)) diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index 8587cd6961..5ebf27edae 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -52,8 +52,8 @@ public void TestLogisticRegressionStats() var stats = linearModel.Statistics; LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); - Assert.True(stdError == 0.250672936f); - Assert.True(zScore == 7.97852373f); + CompareNumbersWithTolerance(stdError, 0.250672936); + CompareNumbersWithTolerance(zScore, 7.97852373); } [Fact] @@ -72,8 +72,8 @@ public void TestLogisticRegressionStats_MKL() var stats = linearModel.Statistics; LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); - Assert.True(stdError == 0.250672936f); - Assert.True(zScore == 7.97852373f); + CompareNumbersWithTolerance(stdError, 0.250672936); + CompareNumbersWithTolerance(zScore, 7.97852373); } } } From fe29307efc072cec96255e10483919c1c6a5bd3c Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Thu, 8 Nov 2018 23:47:20 -0800 Subject: [PATCH 09/14] addressing PR comments --- pkg/Microsoft.ML/Microsoft.ML.nupkgproj | 1 + .../LogisticRegressionTrainingStats.cs | 8 ++++---- .../Standard/LogisticRegression/LogisticRegression.cs | 10 +++++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj index 75517c587e..f479d0e970 100644 --- a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj +++ b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj @@ -8,6 +8,7 @@ + diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs index 071296c112..9aca098a35 100644 --- a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs +++ b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs @@ -28,7 +28,7 @@ public sealed class ComputeLRTrainingStdThroughHal : IComputeLRTrainingStd public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) { Contracts.AssertValue(ch); - Contracts.AssertValue(hessian, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); + Contracts.AssertValue(hessian, nameof(hessian)); Contracts.AssertNonEmpty(weightIndices); Contracts.Assert(numSelectedParams > 0); Contracts.Assert(currentWeightsCount > 0); @@ -61,7 +61,7 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS for (int i = 1; i < numSelectedParams; i++) { // Initialize with inverse Hessian. - stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; + stdErrorValues[i] = (float)invHessian[i * (i + 1) / 2 + i]; } if (l2Weight > 0) @@ -75,8 +75,8 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS { for (int iCol = 0; iCol <= iRow; iCol++) { - var entry = (Single)invHessian[ioffset]; - var adjustment = -l2Weight * entry * entry; + var entry = (float)invHessian[ioffset]; + var adjustment = l2Weight * entry * entry; stdErrorValues[iRow] -= adjustment; if (0 < iCol && iCol < iRow) stdErrorValues[iCol] -= adjustment; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index bc177dcc63..6e29b88dcd 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -96,7 +96,7 @@ public LogisticRegression(IHostEnvironment env, _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; - if (Args.StdComputer == null) + if (ShowTrainingStats && Args.StdComputer == null) Args.StdComputer = new ComputeLRTrainingStd(); } @@ -109,7 +109,7 @@ internal LogisticRegression(IHostEnvironment env, Arguments args) _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; - if (Args.StdComputer == null) + if (ShowTrainingStats && Args.StdComputer == null) Args.StdComputer = new ComputeLRTrainingStd(); } @@ -462,7 +462,7 @@ public sealed class ComputeLRTrainingStd: IComputeLRTrainingStd public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) { Contracts.AssertValue(ch); - Contracts.AssertValue(hessian, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); + Contracts.AssertValue(hessian, nameof(hessian)); Contracts.AssertNonEmpty(weightIndices); Contracts.Assert(numSelectedParams > 0); Contracts.Assert(currentWeightsCount > 0); @@ -505,7 +505,7 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS { // Initialize with inverse Hessian. // The diagonal of the inverse Hessian. - stdErrorValues2[i] = (Single)invers[i, numSelectedParams - i - 1]; + stdErrorValues2[i] = (float)invers[i, numSelectedParams - i - 1]; } if (l2Weight > 0) @@ -520,7 +520,7 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS for (int iCol = 0; iCol <= iRow; iCol++) { float entry = (float)invers[iRow, numSelectedParams - iCol - 1]; - var adjustment = -l2Weight * entry * entry; + var adjustment = l2Weight * entry * entry; stdErrorValues2[iRow] -= adjustment; if (0 < iCol && iCol < iRow) From 5386a8cc065a33b4b7332c44067e00a60f56bd10 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Fri, 9 Nov 2018 10:23:50 -0800 Subject: [PATCH 10/14] baselines have more information because of the added calculations --- .../EntryPoints/ensemble-model0-stats.txt | 12 ++++++-- .../EntryPoints/ensemble-model2-stats.txt | 12 ++++++-- .../ensemble-summary-key-value-pairs.txt | 20 +++++++++++++ .../Common/EntryPoints/ensemble-summary.txt | 30 +++++++++++++++++++ 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt index 5c5d36e4b6..057ef0ff87 100644 --- a/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt +++ b/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt @@ -5,6 +5,14 @@ #@ col={name={Residual Deviance} type=R4 src=1} #@ col={name={Null Deviance} type=R4 src=2} #@ col=AIC:R4:3 +#@ col=BiasEstimate:R4:4 +#@ col=BiasStandardError:R4:5 +#@ col=BiasZScore:R4:6 +#@ col=BiasPValue:R4:7 +#@ col=Estimate:R4:8-16 +#@ col=StandardError:R4:17-25 +#@ col=ZScore:R4:26-34 +#@ col=PValue:R4:35-43 #@ } -Count of training examples Residual Deviance Null Deviance AIC -521 98.29433 669.0935 118.294327 +Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 +521 98.29433 669.0935 118.294327 -5.120674 0.699818552 -7.31714535 0 2.353567 1.78653753 1.9442488 1.38072 1.0831089 2.43588924 1.61141682 1.34575915 -0.7715381 0.4267568 0.42040658 0.41370967 0.482155383 0.456691444 0.451504 0.4605175 0.478413582 0.342069477 5.5150075 4.249547 4.69954872 2.86364126 2.37164259 5.395056 3.4991436 2.81296182 -2.255501 5.96046448E-08 2.14576721E-05 2.62260437E-06 0.00418818 0.0177091956 5.96046448E-08 0.000466823578 0.00490885973 0.0241017938 diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt index 152e94f64d..dbb2224574 100644 --- a/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt +++ b/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt @@ -5,6 +5,14 @@ #@ col={name={Residual Deviance} type=R4 src=1} #@ col={name={Null Deviance} type=R4 src=2} #@ col=AIC:R4:3 +#@ col=BiasEstimate:R4:4 +#@ col=BiasStandardError:R4:5 +#@ col=BiasZScore:R4:6 +#@ col=BiasPValue:R4:7 +#@ col=Estimate:R4:8-16 +#@ col=StandardError:R4:17-25 +#@ col=ZScore:R4:26-34 +#@ col=PValue:R4:35-43 #@ } -Count of training examples Residual Deviance Null Deviance AIC -520 94.1969452 673.3445 114.196945 +Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 +520 94.1969452 673.3445 114.196945 -4.860323 0.712811947 -6.81852055 0 2.143086 1.49418533 1.71121442 1.38318741 0.883200347 3.16845965 1.38684654 1.51904845 -0.8226236 0.430655479 0.4099987 0.4222687 0.4832917 0.457050323 0.457937717 0.445124656 0.4728626 0.338379949 4.976335 3.64436626 4.05243 2.86201358 1.93239188 6.918975 3.11563635 3.21245217 -2.43106484 6.556511E-07 0.0002681017 5.07235527E-05 0.00420969725 0.05331099 0 0.00183564425 0.00131618977 0.0150545239 diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt index beeec64d77..d89d7a7619 100644 --- a/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt +++ b/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt @@ -14,6 +14,16 @@ Count of training examples: 521 Residual Deviance: 98.29433 Null Deviance: 669.0935 AIC: 118.2943 +(Bias): System.Single[] +Features.thickness: System.Single[] +Features.bare_nuclei: System.Single[] +Features.uniform_shape: System.Single[] +Features.uniform_size: System.Single[] +Features.bland_chromatin: System.Single[] +Features.adhesion: System.Single[] +Features.normal_nucleoli: System.Single[] +Features.epit_size: System.Single[] +Cat.1: System.Single[] Partition model 1 summary: Per-feature gain summary for the boosted tree ensemble: Features.uniform_size: 1 @@ -43,6 +53,16 @@ Count of training examples: 520 Residual Deviance: 94.19695 Null Deviance: 673.3445 AIC: 114.1969 +(Bias): System.Single[] +Features.bare_nuclei: System.Single[] +Features.thickness: System.Single[] +Features.uniform_shape: System.Single[] +Features.uniform_size: System.Single[] +Features.normal_nucleoli: System.Single[] +Features.bland_chromatin: System.Single[] +Features.adhesion: System.Single[] +Features.epit_size: System.Single[] +Cat.1: System.Single[] Partition model 3 summary: Per-feature gain summary for the boosted tree ensemble: Features.uniform_size: 1 diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt index 50abe9df54..fadb2e27c8 100644 --- a/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt +++ b/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt @@ -17,6 +17,21 @@ Count of training examples: 521 Residual Deviance: 98.29433 Null Deviance: 669.0935 AIC: 118.2943 + +Coefficients statistics: +Coefficient Estimate Std. Error z value Pr(>|z|) +(Bias) -5.120674 0.6998186 -7.317145 0 *** +Features.thickness 2.353567 0.4267568 5.515007 5.960464E-08 *** +Features.bare_nuclei 2.435889 0.451504 5.395056 5.960464E-08 *** +Features.uniform_shape 1.944249 0.4137097 4.699549 2.622604E-06 *** +Features.uniform_size 1.786538 0.4204066 4.249547 2.145767E-05 *** +Features.bland_chromatin 1.611417 0.4605175 3.499144 0.0004668236 *** +Features.adhesion 1.38072 0.4821554 2.863641 0.00418818 ** +Features.normal_nucleoli 1.345759 0.4784136 2.812962 0.00490886 ** +Features.epit_size 1.083109 0.4566914 2.371643 0.0177092 * +Cat.1 -0.7715381 0.3420695 -2.255501 0.02410179 * +--- +Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Partition model 1 summary: Per-feature gain summary for the boosted tree ensemble: @@ -50,6 +65,21 @@ Count of training examples: 520 Residual Deviance: 94.19695 Null Deviance: 673.3445 AIC: 114.1969 + +Coefficients statistics: +Coefficient Estimate Std. Error z value Pr(>|z|) +(Bias) -4.860323 0.7128119 -6.818521 0 *** +Features.bare_nuclei 3.16846 0.4579377 6.918975 0 *** +Features.thickness 2.143086 0.4306555 4.976335 6.556511E-07 *** +Features.uniform_shape 1.711214 0.4222687 4.05243 5.072355E-05 *** +Features.uniform_size 1.494185 0.4099987 3.644366 0.0002681017 *** +Features.normal_nucleoli 1.519048 0.4728626 3.212452 0.00131619 ** +Features.bland_chromatin 1.386847 0.4451247 3.115636 0.001835644 ** +Features.adhesion 1.383187 0.4832917 2.862014 0.004209697 ** +Features.epit_size 0.8832003 0.4570503 1.932392 0.05331099 . +Cat.1 -0.8226236 0.3383799 -2.431065 0.01505452 * +--- +Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Partition model 3 summary: Per-feature gain summary for the boosted tree ensemble: From fb897ed0734135356dd45cdd9a2a916008c7bdee Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Fri, 9 Nov 2018 11:26:08 -0800 Subject: [PATCH 11/14] more baseline updates. --- .../LogisticRegressionTrainingStats.cs | 1 - .../LogisticRegression/LogisticRegression.cs | 3 +-- .../Command/CommandTrainingLrWithStats-summary.txt | 12 ++++++++++++ test/BaselineOutput/Common/EntryPoints/lr-stats.txt | 12 ++++++++++-- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs index 9aca098a35..5b834edd72 100644 --- a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs +++ b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs @@ -29,7 +29,6 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS { Contracts.AssertValue(ch); Contracts.AssertValue(hessian, nameof(hessian)); - Contracts.AssertNonEmpty(weightIndices); Contracts.Assert(numSelectedParams > 0); Contracts.Assert(currentWeightsCount > 0); Contracts.Assert(l2Weight > 0); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 6e29b88dcd..08317430ce 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -357,7 +357,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); else { - var std = Args.StdComputer.ComputeStd(hessian, weightIndices, numParams, CurrentWeights.Count, ch, L2Weight); + var std = Args.StdComputer.ComputeStd(hessian, weightIndices, numParams, CurrentWeights.Length, ch, L2Weight); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, std); } } @@ -463,7 +463,6 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS { Contracts.AssertValue(ch); Contracts.AssertValue(hessian, nameof(hessian)); - Contracts.AssertNonEmpty(weightIndices); Contracts.Assert(numSelectedParams > 0); Contracts.Assert(currentWeightsCount > 0); Contracts.Assert(l2Weight > 0); diff --git a/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt b/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt index 6d58cb8d2d..4bd1c57233 100644 --- a/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt +++ b/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt @@ -13,3 +13,15 @@ Count of training examples: 32561 Residual Deviance: 26705.74 Null Deviance: 35948.08 AIC: 26719.74 + +Coefficients statistics: +Coefficient Estimate Std. Error z value Pr(>|z|) +(Bias) -8.228298 0.1161297 -70.85435 0 *** +education-num 5.066041 0.1048074 48.33666 0 *** +capital-gain 18.58347 0.4694776 39.5833 0 *** +age 3.86064 0.1061118 36.38277 0 *** +hours-per-week 3.946534 0.1258723 31.35349 0 *** +capital-loss 2.81616 0.13793 20.41732 0 *** +fnlwgt 0.7489593 0.2048056 3.656927 0.0002553463 *** +--- +Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 diff --git a/test/BaselineOutput/Common/EntryPoints/lr-stats.txt b/test/BaselineOutput/Common/EntryPoints/lr-stats.txt index 8e04238c73..c467f102be 100644 --- a/test/BaselineOutput/Common/EntryPoints/lr-stats.txt +++ b/test/BaselineOutput/Common/EntryPoints/lr-stats.txt @@ -5,6 +5,14 @@ #@ col={name={Residual Deviance} type=R4 src=1} #@ col={name={Null Deviance} type=R4 src=2} #@ col=AIC:R4:3 +#@ col=BiasEstimate:R4:4 +#@ col=BiasStandardError:R4:5 +#@ col=BiasZScore:R4:6 +#@ col=BiasPValue:R4:7 +#@ col=Estimate:R4:8-16 +#@ col=StandardError:R4:17-25 +#@ col=ZScore:R4:26-34 +#@ col=PValue:R4:35-43 #@ } -Count of training examples Residual Deviance Null Deviance AIC -683 126.83107 884.350159 146.83107 +Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses +683 126.83107 884.350159 146.83107 -6.186806 0.459383339 -13.4676332 0 2.65800762 1.68089855 1.944068 1.42514718 0.8536965 2.9325006 1.74816787 1.58165014 0.595681 0.455618978 0.429146379 0.431570023 0.479817748 0.470442533 0.4381438 0.469593167 0.4714128 0.467883229 5.83383846 3.916842 4.504641 2.97018433 1.814667 6.69301 3.72272849 3.35512757 1.27314031 0 8.9764595E-05 6.67572E-06 0.002976358 0.06957501 0 0.00019711256 0.0007933974 0.202968419 From 89301b4ec82ec0c2e3337a43e64090ffc97d0c0c Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Fri, 9 Nov 2018 23:41:42 -0800 Subject: [PATCH 12/14] post merge test fixes. --- test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index 3c3b739c9a..30906c8940 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -45,7 +45,7 @@ public void TestLogisticRegressionStats() { (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); - pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label", advancedSettings: s => { s.ShowTrainingStats = true; })); + pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features", advancedSettings: s => { s.ShowTrainingStats = true; })); var transformerChain = pipe.Fit(dataView) as TransformerChain>; var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor; @@ -61,7 +61,7 @@ public void TestLogisticRegressionStats_MKL() { (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); - pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label", advancedSettings: s => { + pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features", advancedSettings: s => { s.ShowTrainingStats = true; s.StdComputer = new ComputeLRTrainingStdThroughHal(); })); From c8d060ac09b6dc1d343a8e0656ae54e62b984bb5 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Mon, 12 Nov 2018 09:50:15 -0800 Subject: [PATCH 13/14] Eric's comments --- build/Dependencies.props | 2 +- ...s.cs => ComputeLRTrainingStdThroughHal.cs} | 10 +--- .../AssemblyInfo.cs | 1 + .../LogisticRegression/LogisticRegression.cs | 59 +++++++++++-------- .../Standard/ModelStatistics.cs | 1 - 5 files changed, 38 insertions(+), 35 deletions(-) rename src/Microsoft.ML.HalLearners/{LogisticRegressionTrainingStats.cs => ComputeLRTrainingStdThroughHal.cs} (89%) diff --git a/build/Dependencies.props b/build/Dependencies.props index 3f053444db..47f34e0e1e 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -9,6 +9,7 @@ 4.3.0 4.8.0 4.5.0 + 4.6.0 @@ -22,7 +23,6 @@ 4.5.0 4.5.0 1.10.0 - 4.6.0 diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs similarity index 89% rename from src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs rename to src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs index 5b834edd72..d55526d19d 100644 --- a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs +++ b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs @@ -25,7 +25,7 @@ public sealed class ComputeLRTrainingStdThroughHal : IComputeLRTrainingStd /// /// The used for messaging. /// The L2Weight used for training. (Supply the same one that got used during training.) - public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) + public override VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) { Contracts.AssertValue(ch); Contracts.AssertValue(hessian, nameof(hessian)); @@ -74,12 +74,8 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS { for (int iCol = 0; iCol <= iRow; iCol++) { - var entry = (float)invHessian[ioffset]; - var adjustment = l2Weight * entry * entry; - stdErrorValues[iRow] -= adjustment; - if (0 < iCol && iCol < iRow) - stdErrorValues[iCol] -= adjustment; - ioffset++; + var entry = (float)invHessian[ioffset++]; + AdjustVariance(entry, iRow, iCol, l2Weight, stdErrorValues); } } diff --git a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs index 415752aa8d..671913b203 100644 --- a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs +++ b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs @@ -6,5 +6,6 @@ using Microsoft.ML; [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Legacy" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.HalLearners" + PublicKey.Value)] [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 08317430ce..cdc1aec49c 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -428,27 +428,40 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm } /// - /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation, /// p-value and z-Score. /// If you need fast calculations, use the implementation in the Microsoft.ML.HALLearners package, /// which makes use of hardware acceleration. /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. /// - public interface IComputeLRTrainingStd + public abstract class IComputeLRTrainingStd { /// - /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation, /// p-value and z-Score. /// If you need fast calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. /// - VBuffer ComputeStd(double[] hessian, int[] weightIndices, int parametersCount, int currentWeightsCount, IChannel ch, float l2Weight); + public abstract VBuffer ComputeStd(double[] hessian, int[] weightIndices, int parametersCount, int currentWeightsCount, IChannel ch, float l2Weight); + + /// + /// Adjust the variance for regularized cases. + /// + [BestFriend] + internal void AdjustVariance(float inverseEntry, int iRow, int iCol, float l2Weight, float[] stdErrorValues2) + { + var adjustment = l2Weight * inverseEntry * inverseEntry; + stdErrorValues2[iRow] -= adjustment; + + if (0 < iCol && iCol < iRow) + stdErrorValues2[iCol] -= adjustment; + } } public sealed class ComputeLRTrainingStd: IComputeLRTrainingStd { /// - /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation, /// p-value and z-Score. /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. @@ -459,7 +472,7 @@ public sealed class ComputeLRTrainingStd: IComputeLRTrainingStd /// /// The used for messaging. /// The L2Weight used for training. (Supply the same one that got used during training.) - public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) + public override VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) { Contracts.AssertValue(ch); Contracts.AssertValue(hessian, nameof(hessian)); @@ -470,22 +483,22 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS double[,] matrixHessian = new double[numSelectedParams, numSelectedParams]; int hessianLength = 0; - int dimention = numSelectedParams - 1; + int dimension = numSelectedParams - 1; - for (int row = dimention; row >= 0; row--) + for (int row = dimension; row >= 0; row--) { - for (int col = 0; col <= dimention; col++) + for (int col = 0; col <= dimension; col++) { - if ((row + col) <= dimention) + if ((row + col) <= dimension) { - if ((row + col) == dimention) + if ((row + col) == dimension) { matrixHessian[row, col] = hessian[hessianLength]; } else { matrixHessian[row, col] = hessian[hessianLength]; - matrixHessian[dimention - col, dimention - row] = hessian[hessianLength]; + matrixHessian[dimension - col, dimension - row] = hessian[hessianLength]; } hessianLength++; } @@ -497,42 +510,36 @@ public VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numS var h = Matrix.Build.DenseOfArray(matrixHessian); var invers = h.Inverse(); - float[] stdErrorValues2 = new float[numSelectedParams]; - stdErrorValues2[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]); + float[] stdErrorValues = new float[numSelectedParams]; + stdErrorValues[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]); for (int i = 1; i < numSelectedParams; i++) { // Initialize with inverse Hessian. // The diagonal of the inverse Hessian. - stdErrorValues2[i] = (float)invers[i, numSelectedParams - i - 1]; + stdErrorValues[i] = (float)invers[i, numSelectedParams - i - 1]; } if (l2Weight > 0) { // Iterate through all entries of inverse Hessian to make adjustment to variance. // A discussion on ridge regularized LR coefficient covariance matrix can be found here: - // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ - // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf - int ioffset = 1; + // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25) + // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression") for (int iRow = 1; iRow < numSelectedParams; iRow++) { for (int iCol = 0; iCol <= iRow; iCol++) { float entry = (float)invers[iRow, numSelectedParams - iCol - 1]; - var adjustment = l2Weight * entry * entry; - stdErrorValues2[iRow] -= adjustment; - - if (0 < iCol && iCol < iRow) - stdErrorValues2[iCol] -= adjustment; - ioffset++; + AdjustVariance(entry, iRow, iCol, l2Weight, stdErrorValues); } } } for (int i = 1; i < numSelectedParams; i++) - stdErrorValues2[i] = (float)Math.Sqrt(stdErrorValues2[i]); + stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); - return new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues2, weightIndices); + return new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues, weightIndices); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index 67c588a93c..1eeb043c01 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using MathNet.Numerics.LinearAlgebra; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.CpuMath; From 39ca55e899a27dc8474522f57f1e2acdcf6ac1cb Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Mon, 12 Nov 2018 10:37:34 -0800 Subject: [PATCH 14/14] renaming interface. --- .../ComputeLRTrainingStdThroughHal.cs | 2 +- .../LogisticRegression/LogisticRegression.cs | 23 +++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs index d55526d19d..66868c1c9a 100644 --- a/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs +++ b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs @@ -11,7 +11,7 @@ namespace Microsoft.ML.Runtime.Learners { using Mkl = OlsLinearRegressionTrainer.Mkl; - public sealed class ComputeLRTrainingStdThroughHal : IComputeLRTrainingStd + public sealed class ComputeLRTrainingStdThroughHal : ComputeLRTrainingStd { /// /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 9c3e0595d5..60c81b0ed1 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -45,20 +45,20 @@ public sealed class Arguments : ArgumentsBase /// If set to truetraining statistics will be generated at the end of training. /// If you have a large number of learned training parameters(more than 500), /// generating the training statistics might take a few seconds. - /// More than 1000 weights might take a few minutes. For those cases consider using the instance of + /// More than 1000 weights might take a few minutes. For those cases consider using the instance of /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration. /// [Argument(ArgumentType.AtMostOnce, HelpText = "Show statistics of training examples.", ShortName = "stat", SortOrder = 50)] public bool ShowTrainingStats = false; /// - /// The instance of that computes the training statistics at the end of training. + /// The instance of that computes the training statistics at the end of training. /// If you have a large number of learned training parameters(more than 500), /// generating the training statistics might take a few seconds. - /// More than 1000 weights might take a few minutes. For those cases consider using the instance of + /// More than 1000 weights might take a few minutes. For those cases consider using the instance of /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration. /// - public IComputeLRTrainingStd StdComputer; + public ComputeLRTrainingStd StdComputer; } private double _posWeight; @@ -97,7 +97,7 @@ public LogisticRegression(IHostEnvironment env, ShowTrainingStats = Args.ShowTrainingStats; if (ShowTrainingStats && Args.StdComputer == null) - Args.StdComputer = new ComputeLRTrainingStd(); + Args.StdComputer = new ComputeLRTrainingStdImpl(); } /// @@ -110,7 +110,7 @@ internal LogisticRegression(IHostEnvironment env, Arguments args) ShowTrainingStats = Args.ShowTrainingStats; if (ShowTrainingStats && Args.StdComputer == null) - Args.StdComputer = new ComputeLRTrainingStd(); + Args.StdComputer = new ComputeLRTrainingStdImpl(); } public override PredictionKind PredictionKind => PredictionKind.BinaryClassification; @@ -430,11 +430,11 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm /// /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation, /// p-value and z-Score. - /// If you need fast calculations, use the implementation in the Microsoft.ML.HALLearners package, + /// If you need fast calculations, use the implementation in the Microsoft.ML.HALLearners package, /// which makes use of hardware acceleration. /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. /// - public abstract class IComputeLRTrainingStd + public abstract class ComputeLRTrainingStd { /// /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation, @@ -458,7 +458,12 @@ internal void AdjustVariance(float inverseEntry, int iRow, int iCol, float l2Wei } } - public sealed class ComputeLRTrainingStd: IComputeLRTrainingStd + /// + /// Extends the implementing making use of Math.Net numeric + /// If you need faster calculations(have non-sparse weight vectors of more than 300 features), use the instance of ComputeLRTrainingStd from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration + /// for those computations. + /// + public sealed class ComputeLRTrainingStdImpl : ComputeLRTrainingStd { /// /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,