- 
                Notifications
    You must be signed in to change notification settings 
- Fork 1.9k
Normalization API helpers #446
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -468,16 +468,11 @@ private static List<IDataTransform> BacktrackPipe(IDataView dataPipe, out IDataV | |
| Contracts.AssertValue(dataPipe); | ||
|  | ||
| var transforms = new List<IDataTransform>(); | ||
| while (true) | ||
| while (dataPipe is IDataTransform xf) | ||
| { | ||
| // REVIEW: a malicious user could construct a loop in the Source chain, that would | ||
| // cause this method to iterate forever (and throw something when the list overflows). There's | ||
| // no way to insulate from ALL malicious behavior. | ||
|  | ||
| var xf = dataPipe as IDataTransform; | ||
| if (xf == null) | ||
| break; | ||
|  | ||
| transforms.Add(xf); | ||
| dataPipe = xf.Source; | ||
| Contracts.AssertValue(dataPipe); | ||
|  | @@ -514,11 +509,8 @@ public static bool AddNormalizerIfNeeded(IHostEnvironment env, IChannel ch, ITra | |
| { | ||
| if (autoNorm != NormalizeOption.Yes) | ||
| { | ||
| var nn = trainer as ITrainerEx; | ||
| DvBool isNormalized = DvBool.False; | ||
| if (nn == null || !nn.NeedNormalization || | ||
| (schema.TryGetMetadata(BoolType.Instance, MetadataUtils.Kinds.IsNormalized, featCol, ref isNormalized) && | ||
| isNormalized.IsTrue)) | ||
| if (trainer.NeedNormalization() != true || schema.IsNormalized(featCol)) | ||
| { | ||
| ch.Info("Not adding a normalizer."); | ||
| return false; | ||
|  | @@ -530,20 +522,13 @@ public static bool AddNormalizerIfNeeded(IHostEnvironment env, IChannel ch, ITra | |
| } | ||
| } | ||
| ch.Info("Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off."); | ||
| // Quote the feature column name | ||
| string quotedFeatureColumnName = featureColumn; | ||
| StringBuilder sb = new StringBuilder(); | ||
| if (CmdQuoter.QuoteValue(quotedFeatureColumnName, sb)) | ||
| quotedFeatureColumnName = sb.ToString(); | ||
| var component = new SubComponent<IDataTransform, SignatureDataTransform>("MinMax", string.Format("col={{ name={0} source={0} }}", quotedFeatureColumnName)); | ||
| var loader = view as IDataLoader; | ||
| if (loader != null) | ||
| { | ||
| view = CompositeDataLoader.Create(env, loader, | ||
| new KeyValuePair<string, SubComponent<IDataTransform, SignatureDataTransform>>(null, component)); | ||
| } | ||
| IDataView ApplyNormalizer(IHostEnvironment innerEnv, IDataView input) | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If my PR goes in first, you will be able to use convenience constructor here...:) #Closed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know, I even had a comment about that. :) But Ivan made me remove it. #Closed | ||
| => NormalizeTransform.CreateMinMaxNormalizer(innerEnv, input, featureColumn); | ||
|  | ||
| if (view is IDataLoader loader) | ||
| view = CompositeDataLoader.ApplyTransform(env, loader, tag: null, creationArgs: null, ApplyNormalizer); | ||
| else | ||
| view = component.CreateInstance(env, view); | ||
| view = ApplyNormalizer(env, view); | ||
| return true; | ||
| } | ||
| return false; | ||
|  | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(minor) Do we instead want to document what
IsNormalizedmeans? "Returns whether a column<insert what IsNormalized means>". #ResolvedThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, I can add a bit more information. Though that information, I might prefer to make that part of the
Kindsstatic class, since the documentation of those are the primary source of truth. This is just something we added as a convenience on top of that.In reply to: 199596084 [](ancestors = 199596084)