@@ -163,8 +163,8 @@ public IStopWordsRemoverOptions StopWordsRemoverOptions
163163 [ Argument ( ArgumentType . AtMostOnce , HelpText = "Whether to keep numbers or remove them." , ShortName = "num" , SortOrder = 8 ) ]
164164 public bool KeepNumbers = TextNormalizingEstimator . Defaults . KeepNumbers ;
165165
166- [ Argument ( ArgumentType . AtMostOnce , HelpText = "Whether to output the transformed text tokens as an additional column ." , ShortName = "tokens,showtext,showTransformedText" , SortOrder = 9 ) ]
167- public bool OutputTokens ;
166+ [ Argument ( ArgumentType . AtMostOnce , HelpText = "Column containing the transformed text tokens." , ShortName = "tokens,showtext,showTransformedText" , SortOrder = 9 ) ]
167+ public string OutputTokensColumnName ;
168168
169169 [ Argument ( ArgumentType . Multiple , HelpText = "A dictionary of whitelisted terms." , ShortName = "dict" , NullName = "<None>" , SortOrder = 10 , Hide = true ) ]
170170 internal TermLoaderArguments Dictionary ;
@@ -278,7 +278,7 @@ private sealed class TransformApplierParams
278278 public readonly bool KeepDiacritics ;
279279 public readonly bool KeepPunctuations ;
280280 public readonly bool KeepNumbers ;
281- public readonly bool OutputTextTokens ;
281+ public readonly string OutputTextTokensColumnName ;
282282 public readonly TermLoaderArguments Dictionary ;
283283
284284 public StopWordsRemovingEstimator . Language StopwordsLanguage
@@ -305,7 +305,7 @@ internal LpNormNormalizingEstimatorBase.NormFunction LpNorm
305305
306306 // These properties encode the logic needed to determine which transforms to apply.
307307 #region NeededTransforms
308- public bool NeedsWordTokenizationTransform { get { return WordExtractorFactory != null || NeedsRemoveStopwordsTransform || OutputTextTokens ; } }
308+ public bool NeedsWordTokenizationTransform { get { return WordExtractorFactory != null || NeedsRemoveStopwordsTransform || ! string . IsNullOrEmpty ( OutputTextTokensColumnName ) ; } }
309309
310310 public bool NeedsRemoveStopwordsTransform { get { return StopWordsRemover != null ; } }
311311
@@ -358,7 +358,7 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
358358 KeepDiacritics = parent . OptionalSettings . KeepDiacritics ;
359359 KeepPunctuations = parent . OptionalSettings . KeepPunctuations ;
360360 KeepNumbers = parent . OptionalSettings . KeepNumbers ;
361- OutputTextTokens = parent . OptionalSettings . OutputTokens ;
361+ OutputTextTokensColumnName = parent . OptionalSettings . OutputTokensColumnName ;
362362 Dictionary = parent . _dictionary ;
363363 }
364364 }
@@ -371,8 +371,6 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
371371
372372 internal const Language DefaultLanguage = Language . English ;
373373
374- private const string TransformedTextColFormat = "{0}_TransformedText" ;
375-
376374 internal TextFeaturizingEstimator ( IHostEnvironment env , string outputColumnName , string inputColumnName = null )
377375 : this ( env , outputColumnName , new [ ] { inputColumnName ?? outputColumnName } )
378376 {
@@ -492,10 +490,10 @@ public ITransformer Fit(IDataView input)
492490 wordFeatureCol = dstCol ;
493491 }
494492
495- if ( tparams . OutputTextTokens )
493+ if ( ! string . IsNullOrEmpty ( tparams . OutputTextTokensColumnName ) )
496494 {
497495 string [ ] srcCols = wordTokCols ?? textCols ;
498- view = new ColumnConcatenatingTransformer ( h , string . Format ( TransformedTextColFormat , OutputColumn ) , srcCols ) . Transform ( view ) ;
496+ view = new ColumnConcatenatingTransformer ( h , tparams . OutputTextTokensColumnName , srcCols ) . Transform ( view ) ;
499497 }
500498
501499 if ( tparams . CharExtractorFactory != null )
@@ -564,7 +562,7 @@ public ITransformer Fit(IDataView input)
564562 // Otherwise, simply use the slot names, omitting the original source column names
565563 // entirely. For the Concat transform setting the Key == Value of the TaggedColumn
566564 // KVP signals this intent.
567- Contracts . Assert ( charFeatureCol != null || wordFeatureCol != null || tparams . OutputTextTokens ) ;
565+ Contracts . Assert ( charFeatureCol != null || wordFeatureCol != null || ! string . IsNullOrEmpty ( tparams . OutputTextTokensColumnName ) ) ;
568566 if ( charFeatureCol != null )
569567 srcTaggedCols . Add ( new KeyValuePair < string , string > ( charFeatureCol , charFeatureCol ) ) ;
570568 else if ( wordFeatureCol != null )
@@ -613,9 +611,10 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
613611
614612 result [ OutputColumn ] = new SchemaShape . Column ( OutputColumn , SchemaShape . Column . VectorKind . Vector , NumberDataViewType . Single , false ,
615613 new SchemaShape ( metadata ) ) ;
616- if ( OptionalSettings . OutputTokens )
614+
615+ if ( ! string . IsNullOrEmpty ( OptionalSettings . OutputTokensColumnName ) )
617616 {
618- string name = string . Format ( TransformedTextColFormat , OutputColumn ) ;
617+ string name = OptionalSettings . OutputTokensColumnName ;
619618 result [ name ] = new SchemaShape . Column ( name , SchemaShape . Column . VectorKind . VariableVector , TextDataViewType . Instance , false ) ;
620619 }
621620
0 commit comments