[release/4.0] Fix up docs for MLContext (#7363)

* fix up docs for MLContext * some more fixes * text class and sentence similarity trainers --------- Co-authored-by: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
dotnet · Jan 14, 2025 · 1fc5296 · 1fc5296
1 parent b489340
commit 1fc5296
Show file tree

Hide file tree

Showing 5 changed files with 70 additions and 61 deletions.
diff --git a/src/Microsoft.ML.AutoML/API/ColumnInference.cs b/src/Microsoft.ML.AutoML/API/ColumnInference.cs
@@ -15,7 +15,7 @@ namespace Microsoft.ML.AutoML
     public sealed class ColumnInferenceResults
     {
         /// <summary>
-        /// Inferred <see cref="TextLoader.Options" /> for the dataset.
+        /// Gets the inferred <see cref="TextLoader.Options" /> for the dataset.
         /// </summary>
         /// <remarks>
         /// Can be used to instantiate a new <see cref="TextLoader" /> to load
@@ -25,69 +25,69 @@ public sealed class ColumnInferenceResults
         public TextLoader.Options TextLoaderOptions { get; internal set; }
 
         /// <summary>
-        /// Information about the inferred columns in the dataset.
+        /// Gets information about the inferred columns in the dataset.
         /// </summary>
         /// <remarks>
         /// <para>Contains the inferred purposes of each column. See <see cref="AutoML.ColumnInformation"/> for more details.</para>
-        /// <para>This can be fed to the AutoML API when running an experiment.
-        /// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
-        /// for example.</para>
+        /// <para>This value can be fed to the AutoML API when running an experiment.
+        /// See <see cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />, for example.</para>
         /// </remarks>
         [JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
         public ColumnInformation ColumnInformation { get; internal set; }
     }
 
     /// <summary>
-    /// Information about the columns in a dataset.
+    /// Provides information about the columns in a dataset.
     /// </summary>
     /// <remarks>
     /// <para>Contains information about the purpose of each column in the dataset. For instance,
     /// it enumerates the dataset columns that AutoML should treat as categorical,
     /// the columns AutoML should ignore, which column is the label, etc.</para>
     /// <para><see cref="ColumnInformation"/> can be fed to the AutoML API when running an experiment.
-    /// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
-    /// for example.</para>
+    /// See <see cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />, for example.</para>
     /// </remarks>
     public sealed class ColumnInformation
     {
         /// <summary>
-        /// The dataset column to use as the label.
+        /// Gets or sets the dataset column to use as the label.
         /// </summary>
         /// <value>The default value is "Label".</value>
         public string LabelColumnName { get; set; }
 
         /// <summary>
-        /// The dataset column to use as a user ID for computation.
+        /// Gets or sets the dataset column to use as a user ID for computation.
         /// </summary>
         public string UserIdColumnName { get; set; }
 
         /// <summary>
-        /// The dataset column to use as a group ID for computation in a Ranking Task.
+        /// Gets or sets the dataset column to use as a group ID for computation in a Ranking Task.
         /// If a SamplingKeyColumnName is provided, then it should be the same as this column.
         /// </summary>
         public string GroupIdColumnName { get; set; }
 
         /// <summary>
-        /// The dataset column to use as a item ID for computation.
+        /// Gets or sets the dataset column to use as a item ID for computation.
         /// </summary>
         public string ItemIdColumnName { get; set; }
 
         /// <summary>
-        /// The dataset column to use for example weight.
+        /// Gets or sets the dataset column to use for example weight.
         /// </summary>
         public string ExampleWeightColumnName { get; set; }
 
         /// <summary>
-        /// The dataset column to use for grouping rows.
+        /// Gets or sets the dataset column to use for grouping rows.
+        /// </summary>
+        /// <remarks>
         /// If two examples share the same sampling key column name,
         /// they are guaranteed to appear in the same subset (train or test).
         /// This can be used to ensure no label leakage from the train to the test set.
         /// If <see langword="null"/>, no row grouping will be performed.
-        /// </summary>
+        /// </remarks>
         public string SamplingKeyColumnName { get; set; }
 
         /// <summary>
-        /// The dataset columns that are categorical.
+        /// Gets or sets the dataset columns that are categorical.
         /// </summary>
         /// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
         /// <remarks>
@@ -97,28 +97,28 @@ public sealed class ColumnInformation
         public ICollection<string> CategoricalColumnNames { get; private set; }
 
         /// <summary>
-        /// The dataset columns that are numeric.
+        /// Gets the dataset columns that are numeric.
         /// </summary>
         /// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
         [JsonProperty]
         public ICollection<string> NumericColumnNames { get; private set; }
 
         /// <summary>
-        /// The dataset columns that are text.
+        /// Gets the dataset columns that are text.
         /// </summary>
         /// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
         [JsonProperty]
         public ICollection<string> TextColumnNames { get; private set; }
 
         /// <summary>
-        /// The dataset columns that AutoML should ignore.
+        /// Gets the dataset columns that AutoML should ignore.
         /// </summary>
         /// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
         [JsonProperty]
         public ICollection<string> IgnoredColumnNames { get; private set; }
 
         /// <summary>
-        /// The dataset columns that are image paths.
+        /// Gets the dataset columns that are image paths.
         /// </summary>
         /// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
         [JsonProperty]

diff --git a/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs b/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs
@@ -122,7 +122,7 @@ public static ColumnInformation BuildColumnInfo(IEnumerable<DatasetColumnInfo> c
         }
 
         /// <summary>
-        /// Get all column names that are in <paramref name="columnInformation"/>.
+        /// Gets all column names that are in <paramref name="columnInformation"/>.
         /// </summary>
         /// <param name="columnInformation">Column information.</param>
         public static IEnumerable<string> GetColumnNames(ColumnInformation columnInformation)

diff --git a/src/Microsoft.ML.Data/MLContext.cs b/src/Microsoft.ML.Data/MLContext.cs
@@ -11,72 +11,78 @@
 namespace Microsoft.ML
 {
     /// <summary>
-    /// The common context for all ML.NET operations. Once instantiated by the user, it provides a way to
+    /// Represents the common context for all ML.NET operations.
+    /// </summary>
+    /// <remarks>
+    /// Once instantiated by the user, this class provides a way to
     /// create components for data preparation, feature engineering, training, prediction, and model evaluation.
     /// It also allows logging, execution control, and the ability to set repeatable random numbers.
-    /// </summary>
+    /// </remarks>
     public sealed class MLContext : IHostEnvironmentInternal
     {
         // REVIEW: consider making LocalEnvironment and MLContext the same class instead of encapsulation.
         private readonly LocalEnvironment _env;
 
         /// <summary>
-        /// Trainers and tasks specific to binary classification problems.
+        /// Gets the trainers and tasks specific to binary classification problems.
         /// </summary>
         public BinaryClassificationCatalog BinaryClassification { get; }
+
         /// <summary>
-        /// Trainers and tasks specific to multiclass classification problems.
+        /// Gets the trainers and tasks specific to multiclass classification problems.
         /// </summary>
         public MulticlassClassificationCatalog MulticlassClassification { get; }
+
         /// <summary>
-        /// Trainers and tasks specific to regression problems.
+        /// Gets the trainers and tasks specific to regression problems.
         /// </summary>
         public RegressionCatalog Regression { get; }
+
         /// <summary>
-        /// Trainers and tasks specific to clustering problems.
+        /// Gets the trainers and tasks specific to clustering problems.
         /// </summary>
         public ClusteringCatalog Clustering { get; }
 
         /// <summary>
-        /// Trainers and tasks specific to ranking problems.
+        /// Gets the trainers and tasks specific to ranking problems.
         /// </summary>
         public RankingCatalog Ranking { get; }
 
         /// <summary>
-        /// Trainers and tasks specific to anomaly detection problems.
+        /// Gets the trainers and tasks specific to anomaly detection problems.
         /// </summary>
         public AnomalyDetectionCatalog AnomalyDetection { get; }
 
         /// <summary>
-        /// Trainers and tasks specific to forecasting problems.
+        /// Gets the trainers and tasks specific to forecasting problems.
         /// </summary>
         public ForecastingCatalog Forecasting { get; }
 
         /// <summary>
-        /// Data processing operations.
+        /// Gets the data processing operations.
         /// </summary>
         public TransformsCatalog Transforms { get; }
 
         /// <summary>
-        /// Operations with trained models.
+        /// Gets the operations with trained models.
         /// </summary>
         public ModelOperationsCatalog Model { get; }
 
         /// <summary>
-        /// Data loading and saving.
+        /// Gets the data loading and saving operations.
         /// </summary>
         public DataOperationsCatalog Data { get; }
 
         // REVIEW: I think it's valuable to have the simplest possible interface for logging interception here,
         // and expand if and when necessary. Exposing classes like ChannelMessage, MessageSensitivity and so on
         // looks premature at this point.
         /// <summary>
-        /// The handler for the log messages.
+        /// Represents the callback method that will handle the log messages.
         /// </summary>
         public event EventHandler<LoggingEventArgs> Log;
 
         /// <summary>
-        /// This is a catalog of components that will be used for model loading.
+        /// Gets the catalog of components that will be used for model loading.
         /// </summary>
         public ComponentCatalog ComponentCatalog => _env.ComponentCatalog;
 
@@ -90,7 +96,8 @@ public string TempFilePath
         }
 
         /// <summary>
-        /// Allow falling back to run on CPU if couldn't run on GPU.
+        /// Gets or sets a value that indicates whether the CPU will
+        /// be used if the task couldn't run on GPU.
         /// </summary>
         public bool FallbackToCpu
         {
@@ -99,7 +106,7 @@ public bool FallbackToCpu
         }
 
         /// <summary>
-        /// GPU device ID to run execution on, <see langword="null" /> to run on CPU.
+        /// Gets or sets the GPU device ID to run execution on, <see langword="null" /> to run on CPU.
         /// </summary>
         public int? GpuDeviceId
         {
@@ -120,17 +127,17 @@ public int? GpuDeviceId
         ///
         /// If a fixed seed is provided by <paramref name="seed"/>, MLContext environment becomes
         /// deterministic, meaning that the results are repeatable and will remain the same across multiple runs.
-        /// For instance in many of ML.NET's API reference example code snippets, a seed is provided.
+        /// For instance, in many of ML.NET's API reference example code snippets, a seed is provided.
         /// That's because we want the users to get the same output as what's included in example comments,
         /// when they run the example on their own machine.
         ///
         /// Generally though, repeatability is not a requirement and that's the default behavior.
-        /// If a seed is not provided by <paramref name="seed"/>, i.e. it's set to <see langword="null"/>,
+        /// If a seed is not provided by <paramref name="seed"/>, that is, it's set to <see langword="null"/>,
         /// MLContext environment becomes non-deterministic and outputs change across multiple runs.
         ///
         /// There are many operations in ML.NET that don't use any randomness, such as
-        /// min-max normalization, concatenating columns, missing value indication, etc.
-        /// The behavior of those operations are deterministic regardless of the seed value.
+        /// min-max normalization, concatenating columns, and missing value indication.
+        /// The behavior of those operations is deterministic regardless of the seed value.
         ///
         /// Also ML.NET trainers don't use randomness *after* the training is finished.
         /// So, the predictions from a loaded model don't depend on the seed value.

diff --git a/src/Microsoft.ML.TorchSharp/NasBert/SentenceSimilarityTrainer.cs b/src/Microsoft.ML.TorchSharp/NasBert/SentenceSimilarityTrainer.cs
@@ -27,31 +27,32 @@
 namespace Microsoft.ML.TorchSharp.NasBert
 {
     /// <summary>
-    /// The <see cref="IEstimator{TTransformer}"/> for training a Deep Neural Network(DNN) to classify text.
+    /// Represents the <see cref="IEstimator{TTransformer}"/> for training a Deep Neural Network (DNN) to determine sentence similarity.
     /// </summary>
     /// <remarks>
     /// <format type="text/markdown"><![CDATA[
     /// To create this trainer, use [TextClassification](xref:Microsoft.ML.TorchSharpCatalog.TextClassification(Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers,Int32,System.String,System.String,System.String,System.String,Int32,Int32,Int32,Microsoft.ML.TorchSharp.NasBert.BertArchitecture,Microsoft.ML.IDataView)).
     ///
-    /// ### Input and Output Columns
-    /// The input label column data must be type<xref:System.Single> type and the sentence columns must be of type<xref:Microsoft.ML.Data.TextDataViewType>.
+    /// ### Input and output columns
+    /// The input label column data must be type <xref:System.Single> and the sentence columns must be of type <xref:Microsoft.ML.Data.TextDataViewType>.
     ///
     /// This trainer outputs the following columns:
     ///
-    /// | Output Column Name | Column Type | Description|
+    /// | Output column name | Column type | Description|
     /// | -- | -- | -- |
-    /// | `Score` | <xref:System.Single> | The degree of similarity between the 2 sentences. |
-    /// ### Trainer Characteristics
-    /// |  |  |
+    /// | `Score` | <xref:System.Single> | The degree of similarity between the two sentences. |
+    ///
+    /// ### Trainer characteristics
+    /// | Characteristic | Value  |
     /// | -- | -- |
-    /// | Machine learning task | Rregression |
+    /// | Machine learning task | Regression |
     /// | Is normalization required? | No |
     /// | Is caching required? | No |
     /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TorchSharp and libtorch-cpu or libtorch-cuda-11.3 or any of the OS specific variants. |
     /// | Exportable to ONNX | No |
     ///
-    /// ### Training Algorithm Details
-    /// Trains a Deep Neural Network(DNN) by leveraging an existing pre-trained NAS-BERT roBERTa model for the purpose of determining sentence similarity.
+    /// ### Training algorithm details
+    /// Trains a Deep Neural Network (DNN) by leveraging an existing, pretrained NAS-BERT roBERTa model for the purpose of determining sentence similarity.
     /// ]]>
     /// </format>
     /// </remarks>

diff --git a/src/Microsoft.ML.TorchSharp/NasBert/TextClassificationTrainer.cs b/src/Microsoft.ML.TorchSharp/NasBert/TextClassificationTrainer.cs
@@ -28,32 +28,33 @@
 namespace Microsoft.ML.TorchSharp.NasBert
 {
     /// <summary>
-    /// The <see cref="IEstimator{TTransformer}"/> for training a Deep Neural Network(DNN) to classify text.
+    /// The <see cref="IEstimator{TTransformer}"/> for training a Deep Neural Network (DNN) to classify text.
     /// </summary>
     /// <remarks>
     /// <format type="text/markdown"><![CDATA[
     /// To create this trainer, use [TextClassification](xref:Microsoft.ML.TorchSharpCatalog.TextClassification(Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers,Int32,System.String,System.String,System.String,System.String,Int32,Int32,Int32,Microsoft.ML.TorchSharp.NasBert.BertArchitecture,Microsoft.ML.IDataView)).
     ///
-    /// ### Input and Output Columns
-    /// The input label column data must be [key](xref:Microsoft.ML.Data.KeyDataViewType) type and the sentence columns must be of type<xref:Microsoft.ML.Data.TextDataViewType>.
+    /// ### Input and output columns
+    /// The input label column data must be [key](xref:Microsoft.ML.Data.KeyDataViewType) type and the sentence columns must be of type <xref:Microsoft.ML.Data.TextDataViewType>.
     ///
     /// This trainer outputs the following columns:
     ///
-    /// | Output Column Name | Column Type | Description|
+    /// | Output column name | Column type | Description|
     /// | -- | -- | -- |
-    /// | `PredictedLabel` | [key](xref:Microsoft.ML.Data.KeyDataViewType) type | The predicted label's index. If its value is i, the actual label would be the i-th category in the key-valued input label type. |
-    /// | `Score` | Vector of<xref:System.Single> | The scores of all classes.Higher value means higher probability to fall into the associated class. If the i-th element has the largest value, the predicted label index would be i.Note that i is zero-based index. |
-    /// ### Trainer Characteristics
-    /// |  |  |
+    /// | `PredictedLabel` | [key](xref:Microsoft.ML.Data.KeyDataViewType) type | The predicted label's index. If its value is `i`, the actual label would be the `i`-th category in the key-valued input label type. |
+    /// | `Score` | Vector of<xref:System.Single> | The scores of all classes. Higher value means higher probability to fall into the associated class. If the `i`-th element has the largest value, the predicted label index would be `i`. Note that `i` is a zero-based index. |
+    ///
+    /// ### Trainer characteristics
+    /// | Characteristic | Value |
     /// | -- | -- |
     /// | Machine learning task | Multiclass classification |
     /// | Is normalization required? | No |
     /// | Is caching required? | No |
     /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TorchSharp and libtorch-cpu or libtorch-cuda-11.3 or any of the OS specific variants. |
     /// | Exportable to ONNX | No |
     ///
-    /// ### Training Algorithm Details
-    /// Trains a Deep Neural Network(DNN) by leveraging an existing pre-trained NAS-BERT roBERTa model for the purpose of classifying text.
+    /// ### Training algorithm details
+    /// Trains a Deep Neural Network (DNN) by leveraging an existing, pretrained NAS-BERT roBERTa model for the purpose of classifying text.
     /// ]]>
     /// </format>
     /// </remarks>