@@ -111,8 +111,8 @@ public sealed class Options : TransformInputBase
111111 [ Argument ( ArgumentType . AtMostOnce , HelpText = "Whether to keep numbers or remove them." , ShortName = "num" , SortOrder = 8 ) ]
112112 public bool KeepNumbers = TextNormalizingEstimator . Defaults . KeepNumbers ;
113113
114- [ Argument ( ArgumentType . AtMostOnce , HelpText = "Whether to output the transformed text tokens as an additional column ." , ShortName = "tokens,showtext,showTransformedText" , SortOrder = 9 ) ]
115- public bool OutputTokens ;
114+ [ Argument ( ArgumentType . AtMostOnce , HelpText = "Column containing the transformed text tokens." , ShortName = "OutputTokens, tokens,showtext,showTransformedText" , SortOrder = 9 ) ]
115+ public string OutputTokensColumnName ;
116116
117117 [ Argument ( ArgumentType . Multiple , HelpText = "A dictionary of whitelisted terms." , ShortName = "dict" , NullName = "<None>" , SortOrder = 10 , Hide = true ) ]
118118 internal TermLoaderArguments Dictionary ;
@@ -225,7 +225,7 @@ private sealed class TransformApplierParams
225225 public readonly bool KeepDiacritics ;
226226 public readonly bool KeepPunctuations ;
227227 public readonly bool KeepNumbers ;
228- public readonly bool OutputTextTokens ;
228+ public readonly string OutputTextTokensColumnName ;
229229 public readonly TermLoaderArguments Dictionary ;
230230
231231 public StopWordsRemovingEstimator . Language StopwordsLanguage
@@ -252,7 +252,7 @@ internal LpNormNormalizingEstimatorBase.NormFunction LpNorm
252252
253253 // These properties encode the logic needed to determine which transforms to apply.
254254 #region NeededTransforms
255- public bool NeedsWordTokenizationTransform { get { return WordExtractorFactory != null || UsePredefinedStopWordRemover || OutputTextTokens ; } }
255+ public bool NeedsWordTokenizationTransform { get { return WordExtractorFactory != null || UsePredefinedStopWordRemover || ! string . IsNullOrEmpty ( OutputTextTokensColumnName ) ; } }
256256
257257 public bool NeedsNormalizeTransform
258258 {
@@ -303,7 +303,7 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
303303 KeepDiacritics = parent . OptionalSettings . KeepDiacritics ;
304304 KeepPunctuations = parent . OptionalSettings . KeepPunctuations ;
305305 KeepNumbers = parent . OptionalSettings . KeepNumbers ;
306- OutputTextTokens = parent . OptionalSettings . OutputTokens ;
306+ OutputTextTokensColumnName = parent . OptionalSettings . OutputTokensColumnName ;
307307 Dictionary = parent . _dictionary ;
308308 }
309309 }
@@ -316,8 +316,6 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
316316
317317 internal const Language DefaultLanguage = Language . English ;
318318
319- private const string TransformedTextColFormat = "{0}_TransformedText" ;
320-
321319 internal TextFeaturizingEstimator ( IHostEnvironment env , string outputColumnName , string inputColumnName = null )
322320 : this ( env , outputColumnName , new [ ] { inputColumnName ?? outputColumnName } )
323321 {
@@ -434,10 +432,10 @@ public ITransformer Fit(IDataView input)
434432 wordFeatureCol = dstCol ;
435433 }
436434
437- if ( tparams . OutputTextTokens )
435+ if ( ! string . IsNullOrEmpty ( tparams . OutputTextTokensColumnName ) )
438436 {
439437 string [ ] srcCols = wordTokCols ?? textCols ;
440- view = new ColumnConcatenatingTransformer ( h , string . Format ( TransformedTextColFormat , OutputColumn ) , srcCols ) . Transform ( view ) ;
438+ view = new ColumnConcatenatingTransformer ( h , tparams . OutputTextTokensColumnName , srcCols ) . Transform ( view ) ;
441439 }
442440
443441 if ( tparams . CharExtractorFactory != null )
@@ -506,7 +504,7 @@ public ITransformer Fit(IDataView input)
506504 // Otherwise, simply use the slot names, omitting the original source column names
507505 // entirely. For the Concat transform setting the Key == Value of the TaggedColumn
508506 // KVP signals this intent.
509- Contracts . Assert ( charFeatureCol != null || wordFeatureCol != null || tparams . OutputTextTokens ) ;
507+ Contracts . Assert ( charFeatureCol != null || wordFeatureCol != null || ! string . IsNullOrEmpty ( tparams . OutputTextTokensColumnName ) ) ;
510508 if ( charFeatureCol != null )
511509 srcTaggedCols . Add ( new KeyValuePair < string , string > ( charFeatureCol , charFeatureCol ) ) ;
512510 else if ( wordFeatureCol != null )
@@ -555,9 +553,10 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
555553
556554 result [ OutputColumn ] = new SchemaShape . Column ( OutputColumn , SchemaShape . Column . VectorKind . Vector , NumberDataViewType . Single , false ,
557555 new SchemaShape ( metadata ) ) ;
558- if ( OptionalSettings . OutputTokens )
556+
557+ if ( ! string . IsNullOrEmpty ( OptionalSettings . OutputTokensColumnName ) )
559558 {
560- string name = string . Format ( TransformedTextColFormat , OutputColumn ) ;
559+ string name = OptionalSettings . OutputTokensColumnName ;
561560 result [ name ] = new SchemaShape . Column ( name , SchemaShape . Column . VectorKind . VariableVector , TextDataViewType . Instance , false ) ;
562561 }
563562
0 commit comments