Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,22 @@ public static void Example()
var data = mlContext.Data.LoadFromEnumerable(rawData);

// Construct the pipeline.
var pipeline = mlContext.Transforms.Conversion.ConvertType("SurvivedInt32", "Survived", DataKind.Int32);
var pipeline = mlContext.Transforms.Conversion.ConvertType(
"SurvivedInt32", "Survived", DataKind.Int32);

// Let's train our pipeline, and then apply it to the same data.
var transformer = pipeline.Fit(data);
var transformedData = transformer.Transform(data);

// Display original column 'Survived' (boolean) and converted column 'SurvivedInt32' (Int32)
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);
// Display original column 'Survived' (boolean) and converted column
// SurvivedInt32' (Int32)
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, true);

foreach (var item in convertedData)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

new line between any two lines that were broken down into multiple lines but no new line between multiple breakdowns of a single line.

{
Console.WriteLine("A:{0,-10} Aconv:{1}", item.Survived, item.SurvivedInt32);
Console.WriteLine("A:{0,-10} Aconv:{1}", item.Survived,
item.SurvivedInt32);
}

// Output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,33 @@

namespace Samples.Dynamic
{
// This example illustrates how to convert multiple columns of different types to one type, in this case System.Single.
// This is often a useful data transformation before concatenating the features together and passing them to a particular estimator.
// This example illustrates how to convert multiple columns of different types
// to one type, in this case System.Single.
// This is often a useful data transformation before concatenating the features
// together and passing them to a particular estimator.
public static class ConvertTypeMultiColumn
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext(seed: 1);

var rawData = new[] {
new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145},
new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14},
new InputData() { Feature1 = false, Feature2 = "14", Feature3 = DateTime.Today, Feature4 = 0.2046},
new InputData() { Feature1 = false, Feature2 = "23", Feature3 = DateTime.Now, Feature4 = 0.1206},
new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09},
new InputData() { Feature1 = true, Feature2 = "0.4",
Feature3 = DateTime.Now, Feature4 = 0.145},

new InputData() { Feature1 = false, Feature2 = "0.5",
Feature3 = DateTime.Today, Feature4 = 3.14},

new InputData() { Feature1 = false, Feature2 = "14",
Feature3 = DateTime.Today, Feature4 = 0.2046},

new InputData() { Feature1 = false, Feature2 = "23",
Feature3 = DateTime.Now, Feature4 = 0.1206},

new InputData() { Feature1 = true, Feature2 = "8904",
Feature3 = DateTime.UtcNow, Feature4 = 8.09},
};

// Convert the data to an IDataView.
Expand All @@ -37,17 +48,20 @@ public static void Example()

// Let's fit our pipeline to the data.
var transformer = pipeline.Fit(data);
// Transforming the same data. This will add the 4 columns defined in the pipeline, containing the converted
// Transforming the same data. This will add the 4 columns defined in
// the pipeline, containing the converted
// values of the initial columns.
var transformedData = transformer.Transform(data);

// Shape the transformed data as a strongly typed IEnumerable.
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, true);

// Printing the results.
Console.WriteLine("Converted1\t Converted2\t Converted3\t Converted4");
foreach (var item in convertedData)
Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}");
Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t " +
$"{item.Converted3}\t {item.Converted4}");

// Transformed data.
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ public static class Hash
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext(seed: 1);

// Get a small dataset as an IEnumerable.
Expand All @@ -24,30 +24,40 @@ public static void Example()

var data = mlContext.Data.LoadFromEnumerable(rawData);

// Construct the pipeline that would hash the two columns and store the results in new columns.
// The first transform hashes the string column and the second transform hashes the integer column.
// Construct the pipeline that would hash the two columns and store the
// results in new columns. The first transform hashes the string column
// and the second transform hashes the integer column.
//
// Hashing is not a reversible operation, so there is no way to retrive the original value from the hashed value.
// Sometimes, for debugging, or model explainability, users will need to know what values in the original columns generated
// the values in the hashed columns, since the algorithms will mostly use the hashed values for further computations.
// The Hash method will preserve the mapping from the original values to the hashed values in the Annotations of the
// newly created column (column populated with the hashed values).
//
// Setting the maximumNumberOfInverts parameters to -1 will preserve the full map.
// If that parameter is left to the default 0 value, the mapping is not preserved.
var pipeline = mlContext.Transforms.Conversion.Hash("CategoryHashed", "Category", numberOfBits: 16, maximumNumberOfInverts: -1)
.Append(mlContext.Transforms.Conversion.Hash("AgeHashed", "Age", numberOfBits: 8));
// Hashing is not a reversible operation, so there is no way to retrive
// the original value from the hashed value. Sometimes, for debugging,
// or model explainability, users will need to know what values in the
// original columns generated the values in the hashed columns, since
// the algorithms will mostly use the hashed values for further
// computations. The Hash method will preserve the mapping from the
// original values to the hashed values in the Annotations of the newly
// created column (column populated with the hashed values).
//
// Setting the maximumNumberOfInverts parameters to -1 will preserve the
// full map. If that parameter is left to the default 0 value, the
// mapping is not preserved.
var pipeline = mlContext.Transforms.Conversion.Hash("CategoryHashed",
"Category", numberOfBits: 16, maximumNumberOfInverts: -1)
.Append(mlContext.Transforms.Conversion.Hash("AgeHashed", "Age",
numberOfBits: 8));

// Let's fit our pipeline, and then apply it to the same data.
var transformer = pipeline.Fit(data);
var transformedData = transformer.Transform(data);

// Convert the post transformation from the IDataView format to an IEnumerable<TransformedData> for easy consumption.
var convertedData = mlContext.Data.CreateEnumerable<TransformedDataPoint>(transformedData, true);
// Convert the post transformation from the IDataView format to an
// IEnumerable <TransformedData> for easy consumption.
var convertedData = mlContext.Data.CreateEnumerable<
TransformedDataPoint>(transformedData, true);

Console.WriteLine("Category CategoryHashed\t Age\t AgeHashed");
foreach (var item in convertedData)
Console.WriteLine($"{item.Category}\t {item.CategoryHashed}\t\t {item.Age}\t {item.AgeHashed}");
Console.WriteLine($"{item.Category}\t {item.CategoryHashed}\t\t " +
$"{item.Age}\t {item.AgeHashed}");

// Expected data after the transformation.
//
Expand All @@ -58,20 +68,24 @@ public static void Example()
// MLB 36206 18 127
// MLS 6013 14 62

// For the Category column, where we set the maximumNumberOfInverts parameter, the names of the original categories,
// and their correspondance with the generated hash values is preserved in the Annotations in the format of indices and values.
// the indices array will have the hashed values, and the corresponding element, position-wise, in the values array will
// contain the original value.
// For the Category column, where we set the maximumNumberOfInverts
// parameter, the names of the original categories, and their
// correspondance with the generated hash values is preserved in the
// Annotations in the format of indices and values.the indices array
// will have the hashed values, and the corresponding element,
// position -wise, in the values array will contain the original value.
//
// See below for an example on how to retrieve the mapping.
var slotNames = new VBuffer<ReadOnlyMemory<char>>();
transformedData.Schema["CategoryHashed"].Annotations.GetValue("KeyValues", ref slotNames);
transformedData.Schema["CategoryHashed"].Annotations.GetValue(
"KeyValues", ref slotNames);

var indices = slotNames.GetIndices();
var categoryNames = slotNames.GetValues();

for (int i = 0; i < indices.Length; i++)
Console.WriteLine($"The original value of the {indices[i]} category is {categoryNames[i]}");
Console.WriteLine($"The original value of the {indices[i]} " +
$"category is {categoryNames[i]}");

// Output Data
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ public class KeyToValueToKey
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable.
Expand All @@ -27,25 +27,40 @@ public static void Example()

// A pipeline to convert the terms of the 'Review' column in
// making use of default settings.
var defaultPipeline = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedText", nameof(DataPoint.Review))
.Append(mlContext.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), "TokenizedText"));
var defaultPipeline = mlContext.Transforms.Text.TokenizeIntoWords(
"TokenizedText", nameof(DataPoint.Review)).Append(mlContext
.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys),
"TokenizedText"));

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please move .Append to a new line.

// Another pipeline, that customizes the advanced settings of the ValueToKeyMappingEstimator.
// We can change the maximumNumberOfKeys to limit how many keys will get generated out of the set of words,
// and condition the order in which they get evaluated by changing keyOrdinality from the default ByOccurence (order in which they get encountered)
// to value/alphabetically.
var customizedPipeline = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedText", nameof(DataPoint.Review))
.Append(mlContext.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), "TokenizedText", maximumNumberOfKeys: 10,
keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue));
// Another pipeline, that customizes the advanced settings of the
// ValueToKeyMappingEstimator. We can change the maximumNumberOfKeys to
// limit how many keys will get generated out of the set of words, and
// condition the order in which they get evaluated by changing
// keyOrdinality from the default ByOccurence (order in which they get
// encountered) to value/alphabetically.
var customizedPipeline = mlContext.Transforms.Text.TokenizeIntoWords(
"TokenizedText", nameof(DataPoint.Review)).Append(mlContext
.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys),
"TokenizedText", maximumNumberOfKeys: 10, keyOrdinality:
ValueToKeyMappingEstimator.KeyOrdinality.ByValue));

// The transformed data.
var transformedDataDefault = defaultPipeline.Fit(trainData).Transform(trainData);
var transformedDataCustomized = customizedPipeline.Fit(trainData).Transform(trainData);
var transformedDataDefault = defaultPipeline.Fit(trainData).Transform(
trainData);

var transformedDataCustomized = customizedPipeline.Fit(trainData)
.Transform(trainData);

// Getting the resulting data as an IEnumerable.
// This will contain the newly created columns.
IEnumerable<TransformedData> defaultData = mlContext.Data.CreateEnumerable<TransformedData>(transformedDataDefault, reuseRowObject: false);
IEnumerable<TransformedData> customizedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedDataCustomized, reuseRowObject: false);
IEnumerable<TransformedData> defaultData = mlContext.Data.
CreateEnumerable<TransformedData>(transformedDataDefault,
reuseRowObject: false);

IEnumerable<TransformedData> customizedData = mlContext.Data.
CreateEnumerable<TransformedData>(transformedDataCustomized,
reuseRowObject: false);

Console.WriteLine($"Keys");
foreach (var dataRow in defaultData)
Console.WriteLine($"{string.Join(',', dataRow.Keys)}");
Expand All @@ -65,13 +80,17 @@ public static void Example()
// 8,2,9,7,6,4
// 3,10,0,0,0
// 3,10,0,0,0,8
// Retrieve the original values, by appending the KeyToValue etimator to the existing pipelines
// to convert the keys back to the strings.
var pipeline = defaultPipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue(nameof(TransformedData.Keys)));
// Retrieve the original values, by appending the KeyToValue etimator to
// the existing pipelines to convert the keys back to the strings.
var pipeline = defaultPipeline.Append(mlContext.Transforms.Conversion
.MapKeyToValue(nameof(TransformedData.Keys)));

transformedDataDefault = pipeline.Fit(trainData).Transform(trainData);

// Preview of the DefaultColumnName column obtained.
var originalColumnBack = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataDefault.Schema[nameof(TransformedData.Keys)]);
var originalColumnBack = transformedDataDefault.GetColumn<VBuffer<
ReadOnlyMemory<char>>>(transformedDataDefault.Schema[nameof(
TransformedData.Keys)]);

foreach (var row in originalColumnBack)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ namespace Samples.Dynamic
{
class MapKeyToBinaryVector
{
/// This example demonstrates the use of MapKeyToVector by mapping keys to floats[] of 0 and 1, representing the number in binary format.
/// Because the ML.NET KeyType maps the missing value to zero, counting starts at 1, so the uint values
/// converted to KeyTypes will appear skewed by one.
/// This example demonstrates the use of MapKeyToVector by mapping keys to
/// floats[] of 0 and 1, representing the number in binary format.
/// Because the ML.NET KeyType maps the missing value to zero, counting
/// starts at 1, so the uint values converted to KeyTypes will appear
/// skewed by one.
/// See https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable.
Expand All @@ -30,18 +32,21 @@ public static void Example()
var data = mlContext.Data.LoadFromEnumerable(rawData);

// Constructs the ML.net pipeline
var pipeline = mlContext.Transforms.Conversion.MapKeyToBinaryVector("TimeframeVector", "Timeframe");
var pipeline = mlContext.Transforms.Conversion.MapKeyToBinaryVector(
"TimeframeVector", "Timeframe");

// Fits the pipeline to the data.
IDataView transformedData = pipeline.Fit(data).Transform(data);

// Getting the resulting data as an IEnumerable.
// This will contain the newly created columns.
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<
TransformedData>(transformedData, reuseRowObject: false);

Console.WriteLine($" Timeframe TimeframeVector");
foreach (var featureRow in features)
Console.WriteLine($"{featureRow.Timeframe}\t\t\t{string.Join(',', featureRow.TimeframeVector)}");
Console.WriteLine($"{featureRow.Timeframe}\t\t\t" +
$"{string.Join(',', featureRow.TimeframeVector)}");

// Timeframe TimeframeVector
// 10 0,1,0,0,1 //binary representation of 9, the original value
Expand Down
Loading