Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import aditional indexing settings on external fields #752

Merged
merged 10 commits into from
Mar 29, 2022
24 changes: 24 additions & 0 deletions internal/fields/dependency_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,14 @@ func transformImportedField(fd FieldDefinition) common.MapStr {
"type": fd.Type,
}

if fd.Index != nil {
m["index"] = *fd.Index
}

if fd.DocValues != nil {
m["doc_values"] = *fd.DocValues
}

if len(fd.Fields) > 0 {
var t []common.MapStr
for _, f := range fd.Fields {
Expand All @@ -221,5 +229,21 @@ func transformImportedField(fd FieldDefinition) common.MapStr {
}
m.Put("fields", t)
}

if len(fd.MultiFields) > 0 {
var t []common.MapStr
for _, f := range fd.MultiFields {
i := transformImportedMultiField(f)
jsoriano marked this conversation as resolved.
Show resolved Hide resolved
t = append(t, i)
}
m.Put("multi_fields", t)
}
return m
}

func transformImportedMultiField(fd MultiFieldDefinition) common.MapStr {
return common.MapStr{
jsoriano marked this conversation as resolved.
Show resolved Hide resolved
"name": fd.Name,
"type": fd.Type,
}
}
84 changes: 84 additions & 0 deletions internal/fields/dependency_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,71 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) {
changed: true,
valid: true,
},
{
title: "multi fields",
defs: []common.MapStr{
{
"name": "process.command_line",
"external": "test",
},
},
result: []common.MapStr{
{
"name": "process.command_line",
"type": "wildcard",
"description": "Full command line that started the process.",
"multi_fields": []common.MapStr{
{
"name": "text",
"type": "match_only_text",
},
},
},
},
changed: true,
valid: true,
},
{
title: "not indexed external",
defs: []common.MapStr{
{
"name": "event.original",
"external": "test",
},
},
result: []common.MapStr{
{
"name": "event.original",
"type": "text",
"description": "Original event.",
"index": false,
"doc_values": false,
},
},
changed: true,
valid: true,
},
{
title: "override not indexed external",
defs: []common.MapStr{
{
"name": "event.original",
"index": true,
"external": "test",
},
},
result: []common.MapStr{
{
"name": "event.original",
"type": "text",
"description": "Original event.",
"index": true,
"doc_values": false,
},
},
changed: true,
valid: true,
},
{
title: "unknown field",
defs: []common.MapStr{
Expand All @@ -128,6 +193,7 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) {
},
}

indexFalse := false
schema := map[string][]FieldDefinition{"test": []FieldDefinition{
{
Name: "container.id",
Expand All @@ -144,6 +210,24 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) {
Description: "Data stream dataset.",
Type: "constant_keyword",
},
{
Name: "process.command_line",
Description: "Full command line that started the process.",
Type: "wildcard",
MultiFields: []MultiFieldDefinition{
{
Name: "text",
Type: "match_only_text",
},
},
},
{
Name: "event.original",
Description: "Original event.",
Type: "text",
Index: &indexFalse,
DocValues: &indexFalse,
},
}}
dm := &DependencyManager{schema: schema}

Expand Down
102 changes: 78 additions & 24 deletions internal/fields/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@ package fields

// FieldDefinition describes a single field with its properties.
type FieldDefinition struct {
Name string `yaml:"name"`
Description string `yaml:"description"`
Type string `yaml:"type"`
Value string `yaml:"value"` // The value to associate with a constant_keyword field.
Pattern string `yaml:"pattern"`
Unit string `yaml:"unit"`
MetricType string `yaml:"metric_type"`
External string `yaml:"external"`
Fields []FieldDefinition `yaml:"fields"`
Name string `yaml:"name"`
Description string `yaml:"description"`
Type string `yaml:"type"`
Value string `yaml:"value"` // The value to associate with a constant_keyword field.
Pattern string `yaml:"pattern"`
Unit string `yaml:"unit"`
MetricType string `yaml:"metric_type"`
External string `yaml:"external"`
Index *bool `yaml:"index"`
DocValues *bool `yaml:"doc_values"`
Fields []FieldDefinition `yaml:"fields,omitempty"`
MultiFields []MultiFieldDefinition `yaml:"multi_fields,omitempty"`
}

func (orig *FieldDefinition) Update(fd FieldDefinition) {
Expand Down Expand Up @@ -42,26 +45,77 @@ func (orig *FieldDefinition) Update(fd FieldDefinition) {
if fd.External != "" {
orig.External = fd.External
}
if fd.Index != nil {
orig.Index = fd.Index
}
if fd.DocValues != nil {
orig.DocValues = fd.DocValues
}

if len(fd.Fields) > 0 {
// When a subfield the same name exists, update it. When not, append it.
updatedFields := make([]FieldDefinition, len(orig.Fields))
copy(updatedFields, orig.Fields)
for _, newField := range fd.Fields {
found := false
for i, origField := range orig.Fields {
if origField.Name != newField.Name {
continue
}
orig.updateFields(fd.Fields)
}

if len(fd.MultiFields) > 0 {
jsoriano marked this conversation as resolved.
Show resolved Hide resolved
orig.updateMultiFields(fd.MultiFields)
}
}

found = true
updatedFields[i].Update(newField)
break
func (orig *FieldDefinition) updateFields(fields []FieldDefinition) {
// When a subfield the same name exists, update it. When not, append it.
updatedFields := make([]FieldDefinition, len(orig.Fields))
copy(updatedFields, orig.Fields)
for _, newField := range fields {
found := false
for i, origField := range orig.Fields {
if origField.Name != newField.Name {
continue
}
if !found {
updatedFields = append(updatedFields, newField)

found = true
updatedFields[i].Update(newField)
break
}
if !found {
updatedFields = append(updatedFields, newField)
}
}
orig.Fields = updatedFields
}

func (orig *FieldDefinition) updateMultiFields(fields []MultiFieldDefinition) {
// When a subfield the same name exists, update it. When not, append it.
updatedFields := make([]MultiFieldDefinition, len(orig.MultiFields))
copy(updatedFields, orig.MultiFields)
for _, newField := range fields {
found := false
for i, origField := range orig.MultiFields {
if origField.Name != newField.Name {
continue
}

found = true
updatedFields[i].Update(newField)
break
}
if !found {
updatedFields = append(updatedFields, newField)
}
orig.Fields = updatedFields
}
orig.MultiFields = updatedFields
}

// MultiFieldDefinition describes a multi field with its properties.
type MultiFieldDefinition struct {
Name string `yaml:"name"`
jsoriano marked this conversation as resolved.
Show resolved Hide resolved
Type string `yaml:"type"`
}

func (orig *MultiFieldDefinition) Update(fd MultiFieldDefinition) {
if fd.Name != "" {
orig.Name = fd.Name
}
if fd.Type != "" {
orig.Type = fd.Type
}
}
22 changes: 18 additions & 4 deletions internal/fields/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,12 +339,26 @@ func compareKeys(key string, def FieldDefinition, searchedKey string) bool {
return true
}

// Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions.
// Unfortunately we have to assume that imported field could be a geo_point (nasty workaround).
// Only a dot can be accepted now.
if searchedKey[j] != '.' {
return false
}
j++

if len(searchedKey) > j {
extraPart := searchedKey[j:]

// Check if this is a multi field.
for _, multiField := range def.MultiFields {
if extraPart == multiField.Name {
return true
}
}

// Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions.
// Unfortunately we have to assume that imported field could be a geo_point (nasty workaround).
if def.Type == "geo_point" || def.External != "" {
extraPart := searchedKey[j:]
if extraPart == ".lon" || extraPart == ".lat" {
if extraPart == "lon" || extraPart == "lat" {
return true
}
}
Expand Down
12 changes: 12 additions & 0 deletions internal/fields/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,18 @@ func TestCompareKeys(t *testing.T) {
searchedKey: "example.geo.foo",
expected: false,
},
{
key: "example.command_line",
def: FieldDefinition{
MultiFields: []MultiFieldDefinition{
{
Name: "text",
},
},
},
searchedKey: "example.command_line.text",
expected: true,
},
}

for _, c := range cases {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
name: event.created
- external: ecs
name: event.kind
- external: ecs
name: event.original
- external: ecs
name: event.outcome
- external: ecs
Expand All @@ -30,6 +32,8 @@
name: log.level
- external: ecs
name: message
- external: ecs
name: process.command_line
- external: ecs
name: process.pid
- external: ecs
Expand Down
2 changes: 2 additions & 0 deletions test/packages/parallel/apache/docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Access logs collects the Apache access logs.
| event.dataset | Event dataset | constant_keyword |
| event.kind | This is one of four ECS Categorization Fields, and indicates the highest level in the ECS category hierarchy. `event.kind` gives high-level information about what type of information the event contains, without being specific to the contents of the event. For example, values of this field distinguish alert events from metric events. The value of this field can be used to inform how these kinds of events should be handled. They may warrant different retention, different access control, it may also help understand whether the data coming in at a regular interval or not. | keyword |
| event.module | Event module | constant_keyword |
| event.original | Raw text message of entire event. Used to demonstrate log integrity or where the full log message (before splitting it up in multiple parts) may be required, e.g. for reindex. This field is not indexed and doc_values are disabled. It cannot be searched, but it can be retrieved from `_source`. If users wish to override this and index this field, please see `Field data types` in the `Elasticsearch Reference`. | keyword |
| event.outcome | This is one of four ECS Categorization Fields, and indicates the lowest level in the ECS category hierarchy. `event.outcome` simply denotes whether the event represents a success or a failure from the perspective of the entity that produced the event. Note that when a single transaction is described in multiple events, each event may populate different values of `event.outcome`, according to their perspective. Also note that in the case of a compound event (a single event that contains multiple logical events), this field should be populated with the value that best captures the overall success or failure from the perspective of the event producer. Further note that not all events will have an associated outcome. For example, this field is generally not populated for metric events, events with `event.type:info`, or any events for which an outcome does not make logical sense. | keyword |
| file.path | Full path to the file, including the file name. It should include the drive letter, when appropriate. | keyword |
| host.architecture | Operating system architecture. | keyword |
Expand Down Expand Up @@ -73,6 +74,7 @@ Access logs collects the Apache access logs.
| log.level | Original log level of the log event. If the source of the event provides a log level or textual severity, this is the one that goes in `log.level`. If your source doesn't specify one, you may put your event transport's severity here (e.g. Syslog severity). Some examples are `warn`, `err`, `i`, `informational`. | keyword |
| log.offset | Log offset | long |
| message | For log events the message field contains the log message, optimized for viewing in a log viewer. For structured logs without an original message field, other fields can be concatenated to form a human-readable summary of the event. If multiple messages exist, they can be combined into one message. | match_only_text |
| process.command_line | Full command line that started the process, including the absolute path to the executable, and all arguments. Some arguments may be filtered to protect sensitive information. | wildcard |
| process.pid | Process id. | long |
| process.thread.id | Thread ID. | long |
| source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword |
Expand Down