Skip to content

Commit

Permalink
GetSummaryDataView/Row implementation for Pca and Linear Predictors (d…
Browse files Browse the repository at this point in the history
…otnet#185)

* Implement `ICanGetSummaryAsIDataView` on `PcaPredictor` class
* Implement `ICanGetSummaryAsIRow` on `LinearPredictor` class
  • Loading branch information
ganik authored and eerhardt committed Jul 27, 2018
1 parent 6ff387b commit 6eb3551
Show file tree
Hide file tree
Showing 32 changed files with 15,142 additions and 58 deletions.
21 changes: 21 additions & 0 deletions src/Microsoft.ML.PCA/PcaTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ public static CommonOutputs.AnomalyDetectionOutput TrainPcaAnomaly(IHostEnvironm
// REVIEW: move the predictor to a different file and fold EigenUtils.cs to this file.
public sealed class PcaPredictor : PredictorBase<Float>,
IValueMapper,
ICanGetSummaryAsIDataView,
ICanSaveInTextFormat, ICanSaveModel, ICanSaveSummary
{
public const string LoaderSignature = "pcaAnomExec";
Expand Down Expand Up @@ -468,6 +469,26 @@ public void SaveAsText(TextWriter writer, RoleMappedSchema schema)
}
}

public IDataView GetSummaryDataView(RoleMappedSchema schema)
{
var bldr = new ArrayDataViewBuilder(Host);

var cols = new VBuffer<Float>[_rank + 1];
var names = new string[_rank + 1];
for (var i = 0; i < _rank; ++i)
{
names[i] = "EigenVector" + i;
cols[i] = _eigenVectors[i];
}
names[_rank] = "MeanVector";
cols[_rank] = _mean;

bldr.AddColumn("VectorName", names);
bldr.AddColumn("VectorData", NumberType.R4, cols);

return bldr.GetDataView();
}

public ColumnType InputType
{
get { return _inputType; }
Expand Down
76 changes: 28 additions & 48 deletions src/Microsoft.ML.StandardLearners/Standard/LinearPredictor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public abstract class LinearPredictor : PredictorBase<Float>,
ICanSaveInTextFormat,
ICanSaveInSourceCode,
ICanSaveModel,
ICanGetSummaryAsIRow,
ICanSaveSummary,
IPredictorWithFeatureWeights<Float>,
IWhatTheFeatureValueMapper,
Expand Down Expand Up @@ -343,6 +344,30 @@ public void SaveAsCode(TextWriter writer, RoleMappedSchema schema)

public abstract void SaveSummary(TextWriter writer, RoleMappedSchema schema);

public virtual IRow GetSummaryIRowOrNull(RoleMappedSchema schema)
{
var cols = new List<IColumn>();

var names = default(VBuffer<DvText>);
MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names);
var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames,
new VectorType(TextType.Instance, Weight.Length), ref names);
var slotNamesRow = RowColumnUtils.GetRow(null, slotNamesCol);
var colType = new VectorType(NumberType.R4, Weight.Length);

// Add the bias and the weight columns.
var bias = Bias;
cols.Add(RowColumnUtils.GetColumn("Bias", NumberType.R4, ref bias));
var weights = Weight;
cols.Add(RowColumnUtils.GetColumn("Weights", colType, ref weights, slotNamesRow));
return RowColumnUtils.GetRow(null, cols.ToArray());
}

public virtual IRow GetStatsIRowOrNull(RoleMappedSchema schema)
{
return null;
}

public abstract void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator = null);

public virtual void GetFeatureWeights(ref VBuffer<Float> weights)
Expand All @@ -366,8 +391,7 @@ public ValueMapper<TSrc, VBuffer<Float>> GetWhatTheFeatureMapper<TSrc, TDstContr

public sealed partial class LinearBinaryPredictor : LinearPredictor,
ICanGetSummaryInKeyValuePairs,
IParameterMixer<Float>,
ICanGetSummaryAsIRow
IParameterMixer<Float>
{
public const string LoaderSignature = "Linear2CExec";
public const string RegistrationName = "LinearBinaryPredictor";
Expand Down Expand Up @@ -503,26 +527,7 @@ public IList<KeyValuePair<string, object>> GetSummaryInKeyValuePairs(RoleMappedS
return results;
}

public IRow GetSummaryIRowOrNull(RoleMappedSchema schema)
{
var cols = new List<IColumn>();

var names = default(VBuffer<DvText>);
MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names);
var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames,
new VectorType(TextType.Instance, Weight.Length), ref names);
var slotNamesRow = RowColumnUtils.GetRow(null, slotNamesCol);
var colType = new VectorType(NumberType.R4, Weight.Length);

// Add the bias and the weight columns.
var bias = Bias;
cols.Add(RowColumnUtils.GetColumn("Bias", NumberType.R4, ref bias));
var weights = Weight;
cols.Add(RowColumnUtils.GetColumn("Weights", colType, ref weights, slotNamesRow));
return RowColumnUtils.GetRow(null, cols.ToArray());
}

public IRow GetStatsIRowOrNull(RoleMappedSchema schema)
public override IRow GetStatsIRowOrNull(RoleMappedSchema schema)
{
if (_stats == null)
return null;
Expand Down Expand Up @@ -582,8 +587,7 @@ public override void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICali

public sealed class LinearRegressionPredictor : RegressionPredictor,
IParameterMixer<Float>,
ICanGetSummaryInKeyValuePairs,
ICanGetSummaryAsIRow
ICanGetSummaryInKeyValuePairs
{
public const string LoaderSignature = "LinearRegressionExec";
public const string RegistrationName = "LinearRegressionPredictor";
Expand Down Expand Up @@ -663,30 +667,6 @@ public IList<KeyValuePair<string, object>> GetSummaryInKeyValuePairs(RoleMappedS

return results;
}

public IRow GetSummaryIRowOrNull(RoleMappedSchema schema)
{
var cols = new List<IColumn>();

var names = default(VBuffer<DvText>);
MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names);
var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames,
new VectorType(TextType.Instance, Weight.Length), ref names);
var slotNamesRow = RowColumnUtils.GetRow(null, slotNamesCol);
var colType = new VectorType(NumberType.R4, Weight.Length);

// Add the bias and the weight columns.
var bias = Bias;
cols.Add(RowColumnUtils.GetColumn("Bias", NumberType.R4, ref bias));
var weights = Weight;
cols.Add(RowColumnUtils.GetColumn("Weights", colType, ref weights, slotNamesRow));
return RowColumnUtils.GetRow(null, cols.ToArray());
}

public IRow GetStatsIRowOrNull(RoleMappedSchema schema)
{
return null;
}
}

public sealed class PoissonRegressionPredictor : RegressionPredictor, IParameterMixer<Float>
Expand Down
8 changes: 8 additions & 0 deletions test/BaselineOutput/Common/EntryPoints/lr-weights.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#@ TextLoader{
#@ header+
#@ sep=tab
#@ col=Bias:R4:0
#@ col=Weights:R4:1-9
#@ }
Bias thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses
-6.186806 2.65800762 1.68089855 1.944068 1.42514718 0.8536965 2.9325006 1.74816787 1.58165014 0.595681
10 changes: 10 additions & 0 deletions test/BaselineOutput/Common/EntryPoints/mc-lr-stats.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#@ TextLoader{
#@ header+
#@ sep=tab
#@ col={name={Count of training examples} type=I8 src=0}
#@ col={name={Residual Deviance} type=R4 src=1}
#@ col={name={Null Deviance} type=R4 src=2}
#@ col=AIC:R4:3
#@ }
Count of training examples Residual Deviance Null Deviance AIC
683 119.098892 884.3502 159.098892
10 changes: 10 additions & 0 deletions test/BaselineOutput/Common/EntryPoints/mc-lr-weights.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#@ TextLoader{
#@ header+
#@ sep=tab
#@ col=Bias:R4:0
#@ col=Weights:R4:1-9
#@ col=ClassNames:TX:10
#@ }
Bias thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses ClassNames
3.36404228 -1.579712 -0.8266232 -1.051891 -0.79305464 -0.386733949 -1.59106934 -1.01550019 -0.8356989 -0.332574666 Class_0
-3.36404562 1.57971311 0.826623559 1.051891 0.7930542 0.386735022 1.59107041 1.015499 0.8356983 0.332574 Class_1
Loading

0 comments on commit 6eb3551

Please sign in to comment.