-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Added convenience constructor for set of transforms (#380). #405
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
bfac765
20d9194
44e8e8d
d3c627e
5a34a16
1566ed0
e9b1023
05637d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -237,6 +237,17 @@ private static VersionInfo GetVersionInfo() | |
private const string DropRegistrationName = "DropColumns"; | ||
private const string KeepRegistrationName = "KeepColumns"; | ||
|
||
/// <summary> | ||
/// Convenience constructor for public facing API. | ||
/// </summary> | ||
/// <param name="env">Host Environment.</param> | ||
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param> | ||
/// <param name="columnsToDrop">Name of the columns to be dropped.</param> | ||
public DropColumnsTransform(IHostEnvironment env, IDataView input, params string[] columnsToDrop) | ||
:this(env, new Arguments() { Column = columnsToDrop }, input) | ||
{ | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In order to keep columns, we do The fact that we do so in the underlying C# code is just because previously this little bit of confusion didn't matter, since we weren't advocating that people use this directly. However since we now are, we ought to adjust accordingly. For that reason, I wonder if we can make the relationship explicit... public static class KeepColumnsTransform
{
public IDataTransform Create(IHostEnvironment env, IDataView input, params string[] columnsToKeep)
=> new DropColumnsTransform(env, new KeepArguments() { Column = columnsToKeep }, input);
} or something along these lines. Once that ambiguity is resolved, we can change "drop" creator back to a plain old constructor. #Closed |
||
|
||
/// <summary> | ||
/// Public constructor corresponding to SignatureDataTransform. | ||
/// </summary> | ||
|
@@ -383,4 +394,17 @@ public ValueGetter<TValue> GetGetter<TValue>(int col) | |
} | ||
} | ||
} | ||
|
||
public class KeepColumnsTransform | ||
{ | ||
/// <summary> | ||
/// A helper method to create <see cref="KeepColumnsTransform"/> for public facing API. | ||
/// </summary> | ||
/// <param name="env">Host Environment.</param> | ||
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param> | ||
/// <param name="columnsToKeep">Name of the columns to be kept. All other columns will be removed.</param> | ||
/// <returns></returns> | ||
public static IDataTransform Create(IHostEnvironment env, IDataView input, params string[] columnsToKeep) | ||
=> new DropColumnsTransform(env, new DropColumnsTransform.KeepArguments() { Column = columnsToKeep }, input); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,21 @@ public BootstrapSampleTransform(IHostEnvironment env, Arguments args, IDataView | |
_poolSize = args.PoolSize; | ||
} | ||
|
||
/// <summary> | ||
/// Convenience constructor for public facing API. | ||
/// </summary> | ||
/// <param name="env">Host Environment.</param> | ||
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param> | ||
/// <param name="complement">Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform.</param> | ||
/// <param name="seed">The random seed. If unspecified random state will be instead derived from the environment.</param> | ||
/// <param name="shuffleInput">Whether we should attempt to shuffle the source data. By default on, but can be turned off for efficiency.</param> | ||
/// <param name="poolSize">When shuffling the output, the number of output rows to keep in that pool. Note that shuffling of output is completely distinct from shuffling of input.</param> | ||
public BootstrapSampleTransform(IHostEnvironment env, IDataView input, bool complement = false, uint? seed = null, bool shuffleInput = true, int poolSize = 1000) | ||
|
||
: this(env, new Arguments() { Complement = complement, Seed = seed, ShuffleInput = shuffleInput, PoolSize = poolSize }, input) | ||
{ | ||
|
||
} | ||
|
||
private BootstrapSampleTransform(IHost host, ModelLoadContext ctx, IDataView input) | ||
: base(host, input) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -120,6 +120,32 @@ public sealed class Arguments : TransformInputBase | |
|
||
public const string UserName = "Categorical Hash Transform"; | ||
|
||
/// <summary> | ||
/// A helper method to create <see cref="CategoricalHashTransform"/> for public facing API. | ||
/// </summary> | ||
/// <param name="env">Host Environment.</param> | ||
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param> | ||
/// <param name="name">Name of the output column.</param> | ||
/// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param> | ||
/// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param> | ||
/// <param name="invertHash">Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.</param> | ||
/// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index).</param> | ||
|
||
public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source =null, int hashBits = 16, int invertHash = 0, CategoricalTransform.OutputKind outputKind = CategoricalTransform.OutputKind.Bag) | ||
{ | ||
var args = new Arguments() | ||
{ | ||
Column = new[] { new Column(){ | ||
Source = source ?? name, | ||
Name = name | ||
} | ||
}, | ||
HashBits = hashBits, | ||
InvertHash = invertHash, | ||
OutputKind = outputKind | ||
}; | ||
return Create(env, args, input); | ||
} | ||
|
||
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) | ||
{ | ||
Contracts.CheckValue(env, nameof(env)); | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Blank lines with trailing whitespace on them is evil. #Resolved