diff --git a/src/Nest/Ingest/ProcessorFormatter.cs b/src/Nest/Ingest/ProcessorFormatter.cs index 3e1d27b25da..4e59e29c5cd 100644 --- a/src/Nest/Ingest/ProcessorFormatter.cs +++ b/src/Nest/Ingest/ProcessorFormatter.cs @@ -42,6 +42,7 @@ internal class ProcessorFormatter : IJsonFormatter { "circle", 30 }, { "enrich", 31 }, { "csv", 32 }, + { "inference", 33 }, }; public IProcessor Deserialize(ref JsonReader reader, IJsonFormatterResolver formatterResolver) @@ -161,6 +162,9 @@ public IProcessor Deserialize(ref JsonReader reader, IJsonFormatterResolver form case 32: processor = Deserialize(ref reader, formatterResolver); break; + case 33: + processor = Deserialize(ref reader, formatterResolver); + break; } } else @@ -230,6 +234,9 @@ public void Serialize(ref JsonWriter writer, IProcessor value, IJsonFormatterRes case "gsub": Serialize(ref writer, value, formatterResolver); break; + case "inference": + Serialize(ref writer, value, formatterResolver); + break; case "join": Serialize(ref writer, value, formatterResolver); break; diff --git a/src/Nest/Ingest/Processors/InferenceProcessor.cs b/src/Nest/Ingest/Processors/InferenceProcessor.cs new file mode 100644 index 00000000000..c312db36953 --- /dev/null +++ b/src/Nest/Ingest/Processors/InferenceProcessor.cs @@ -0,0 +1,219 @@ +using System; +using System.Collections.Generic; +using System.Linq.Expressions; +using System.Runtime.Serialization; +using Elasticsearch.Net; +using Elasticsearch.Net.Utf8Json; +using Nest; + +namespace Nest +{ + /// + /// Uses a pre-trained data frame analytics model to infer against the data that is being ingested in the pipeline. + /// + /// Available in Elasticsearch 7.6.0+ with at least basic license. + /// + [InterfaceDataContract] + public interface IInferenceProcessor : IProcessor + { + /// + /// The ID of the model to load and infer against. + /// + [DataMember(Name = "model_id")] + string ModelId { get; set; } + + /// + /// Field added to incoming documents to contain results objects. + /// + [DataMember(Name ="target_field")] + Field TargetField { get; set; } + + /// + /// Maps the document field names to the known field names of the model. + /// + [DataMember(Name = "field_mappings")] + IDictionary FieldMappings { get; set; } + + /// + /// Contains the inference type and its options. + /// + [DataMember(Name = "inference_config")] + IInferenceConfig InferenceConfig { get; set; } + } + + /// + public class InferenceProcessor : ProcessorBase, IInferenceProcessor + { + /// + public string ModelId { get; set; } + + /// + public Field TargetField { get; set; } + + /// + public IDictionary FieldMappings { get; set; } + + /// + public IInferenceConfig InferenceConfig { get; set; } + + protected override string Name => "inference"; + } + + /// + public class InferenceProcessorDescriptor + : ProcessorDescriptorBase, IInferenceProcessor>, IInferenceProcessor + where T : class + { + protected override string Name => "inference"; + + Field IInferenceProcessor.TargetField { get; set; } + string IInferenceProcessor.ModelId { get; set; } + IInferenceConfig IInferenceProcessor.InferenceConfig { get; set; } + IDictionary IInferenceProcessor.FieldMappings { get; set; } + + /// + public InferenceProcessorDescriptor TargetField(Field field) => Assign(field, (a, v) => a.TargetField = v); + + /// + public InferenceProcessorDescriptor TargetField(Expression> objectPath) => + Assign(objectPath, (a, v) => a.TargetField = v); + + /// + public InferenceProcessorDescriptor ModelId(string modelId) => + Assign(modelId, (a, v) => a.ModelId = v); + + /// + public InferenceProcessorDescriptor InferenceConfig(Func, IInferenceConfig> selector) => + Assign(selector, (a, v) => a.InferenceConfig = v.InvokeOrDefault(new InferenceConfigDescriptor())); + + /// + public InferenceProcessorDescriptor FieldMappings(Func, FluentDictionary> selector = null) => + Assign(selector, (a, v) => a.FieldMappings = v.InvokeOrDefault(new FluentDictionary())); + } + + [ReadAs(typeof(InferenceConfig))] + public interface IInferenceConfig + { + + [DataMember(Name = "regression")] + IRegressionInferenceConfig Regression { get; set; } + + [DataMember(Name = "classification")] + IClassificationInferenceConfig Classification { get; set; } + } + + public class InferenceConfig + : IInferenceConfig + { + public IRegressionInferenceConfig Regression { get; set; } + + public IClassificationInferenceConfig Classification { get; set; } + } + + public class InferenceConfigDescriptor : DescriptorBase, IInferenceConfig>, IInferenceConfig + { + IRegressionInferenceConfig IInferenceConfig.Regression { get; set; } + IClassificationInferenceConfig IInferenceConfig.Classification { get; set; } + + public InferenceConfigDescriptor Regression(Func, IRegressionInferenceConfig> selector) => + Assign(selector, (a, v) => a.Regression = v.InvokeOrDefault(new RegressionInferenceConfigDescriptor())); + + public InferenceConfigDescriptor Classification(Func, IClassificationInferenceConfig> selector) => + Assign(selector, (a, v) => a.Classification = v.InvokeOrDefault(new ClassificationInferenceConfigDescriptor())); + } + + [ReadAs(typeof(RegressionInferenceConfig))] + public interface IRegressionInferenceConfig + { + /// + /// Specifies the field to which the inference prediction is written. Defaults to predicted_value. + /// + [DataMember(Name = "results_field")] + Field ResultsField { get; set; } + } + + public class RegressionInferenceConfig : IRegressionInferenceConfig + { + /// + /// Specifies the field to which the inference prediction is written. Defaults to predicted_value. + /// + public Field ResultsField { get; set; } + } + + public class RegressionInferenceConfigDescriptor + : DescriptorBase, IRegressionInferenceConfig>, IRegressionInferenceConfig + { + Field IRegressionInferenceConfig.ResultsField { get; set; } + + /// + public RegressionInferenceConfigDescriptor ResultsField(Field field) => Assign(field, (a, v) => a.ResultsField = v); + + /// + public RegressionInferenceConfigDescriptor ResultsField(Expression> objectPath) => + Assign(objectPath, (a, v) => a.ResultsField = v); + } + + [ReadAs(typeof(ClassificationInferenceConfig))] + public interface IClassificationInferenceConfig + { + /// + /// Specifies the field to which the inference prediction is written. Defaults to predicted_value. + /// + [DataMember(Name = "results_field")] + Field ResultsField { get; set; } + + /// + /// Specifies the number of top class predictions to return. Defaults to 0. + /// + [DataMember(Name = "num_top_classes")] + int? NumTopClasses { get; set; } + + /// + /// Specifies the field to which the top classes are written. Defaults to top_classes. + /// + [DataMember(Name = "top_classes_results_field")] + Field TopClassesResultsField { get; set; } + } + + public class ClassificationInferenceConfig : IClassificationInferenceConfig + { + /// + /// Specifies the field to which the inference prediction is written. Defaults to predicted_value. + /// + public Field ResultsField { get; set; } + + /// + /// Specifies the number of top class predictions to return. Defaults to 0. + /// + public int? NumTopClasses { get; set; } + + /// + /// Specifies the field to which the top classes are written. Defaults to top_classes. + /// + public Field TopClassesResultsField { get; set; } + } + + public class ClassificationInferenceConfigDescriptor : DescriptorBase, IClassificationInferenceConfig>, IClassificationInferenceConfig + { + Field IClassificationInferenceConfig.ResultsField { get; set; } + int? IClassificationInferenceConfig.NumTopClasses { get; set; } + Field IClassificationInferenceConfig.TopClassesResultsField { get; set; } + + /// + public ClassificationInferenceConfigDescriptor ResultsField(Field field) => Assign(field, (a, v) => a.ResultsField = v); + + /// + public ClassificationInferenceConfigDescriptor ResultsField(Expression> objectPath) => + Assign(objectPath, (a, v) => a.ResultsField = v); + + /// + public ClassificationInferenceConfigDescriptor NumTopClasses(int? numTopClasses) => Assign(numTopClasses, (a, v) => a.NumTopClasses = v); + + /// + public ClassificationInferenceConfigDescriptor TopClassesResultsField(Field field) => Assign(field, (a, v) => a.TopClassesResultsField = v); + + /// + public ClassificationInferenceConfigDescriptor TopClassesResultsField(Expression> objectPath) => + Assign(objectPath, (a, v) => a.TopClassesResultsField = v); + } +} diff --git a/src/Nest/Ingest/ProcessorsDescriptor.cs b/src/Nest/Ingest/ProcessorsDescriptor.cs index a2ae0563337..e333ed66ef5 100644 --- a/src/Nest/Ingest/ProcessorsDescriptor.cs +++ b/src/Nest/Ingest/ProcessorsDescriptor.cs @@ -79,6 +79,10 @@ public ProcessorsDescriptor Grok(Func, IGrokProces public ProcessorsDescriptor Gsub(Func, IGsubProcessor> selector) where T : class => Assign(selector, (a, v) => a.AddIfNotNull(v?.Invoke(new GsubProcessorDescriptor()))); + /// + public ProcessorsDescriptor Inference(Func, IInferenceProcessor> selector) where T : class => + Assign(selector, (a, v) => a.AddIfNotNull(v?.Invoke(new InferenceProcessorDescriptor()))); + /// public ProcessorsDescriptor Join(Func, IJoinProcessor> selector) where T : class => Assign(selector, (a, v) => a.AddIfNotNull(v?.Invoke(new JoinProcessorDescriptor()))); diff --git a/tests/Tests/Ingest/ProcessorAssertions.cs b/tests/Tests/Ingest/ProcessorAssertions.cs index 41313573bf6..84ce24ff48b 100644 --- a/tests/Tests/Ingest/ProcessorAssertions.cs +++ b/tests/Tests/Ingest/ProcessorAssertions.cs @@ -27,7 +27,6 @@ public abstract class ProcessorAssertion : IProcessorAssertion public abstract string Key { get; } } - public static class ProcessorAssertions { public static IEnumerable All => @@ -279,6 +278,58 @@ public class Gsub : ProcessorAssertion public override string Key => "gsub"; } + [SkipVersion("<7.6.0", "Introduced in Elasticsearch 7.6.0+")] + public class Inference : ProcessorAssertion + { + public override Func>> Fluent => d => d + .Inference(c => c + .TargetField(p => p.Name) + .ModelId("model_id") + .FieldMappings() + .InferenceConfig(i => i + .Classification(cc => cc + .ResultsField("results") + .NumTopClasses(10) + .TopClassesResultsField("topClasses") + ) + ) + ); + + public override IProcessor Initializer => new InferenceProcessor + { + TargetField = "name", + ModelId = "model_id", + FieldMappings = new Dictionary(), + InferenceConfig = new InferenceConfig + { + Classification = new ClassificationInferenceConfig + { + ResultsField = "results", + NumTopClasses = 10, + TopClassesResultsField = "topClasses" + } + } + }; + + public override object Json => new + { + target_field = "name", + model_id = "model_id", + field_mappings = new {}, + inference_config = new + { + classification = new + { + results_field = "results", + num_top_classes = 10, + top_classes_results_field = "topClasses" + } + } + }; + + public override string Key => "inference"; + } + public class Join : ProcessorAssertion { public override Func>> Fluent =>