Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LOG-6236: Align syslog output implementation with spec RFC3164 and RFC5124 #2830

Merged
merged 1 commit into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions internal/generator/vector/output/syslog/rfc3164_with_defaults.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
[transforms.example_parse_encoding]
type = "remap"
inputs = ["application"]
source = '''
. = merge(., parse_json!(string!(.message))) ?? .
if .log_type == "infrastructure" && .log_source == "node" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vparfonov There might be logs with .log_type == "infrastructure" that do not have .log_source == "node". As far as I can see, I found that the logs from namespace_name=openshift-kube-apiserver and container_name=kube-apiserver do not have .log_source == "node". This line may need a conditional statement for container logs within the openshift-* namespace.

Copy link
Contributor Author

@vparfonov vparfonov Nov 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kattz-kawa Good point, it can be .log_source == "container", but it will be not journal log something closer to application logs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the only input to this transform truly is "application" logs, "application" is ever only container logs; it is never sourced from journald. This means we should alter the transforms accordingly

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this logic is correct, could simplify slightly :

  • if (node) - these are journald logs and we can map the syslog fields exactly from the logs.
    Note: 'if (node)' is equivalent to 'if (infra && node)' because all node logs are infra logs.
    The difference in format is really node vs. container, not application vs. infra.
  • if (container) - container logs are formatted the same regardless of type (application or infra)
  • if (audit) - audit logs formatted differently from either of the previous two cases.

._internal.syslog.tag = to_string!(.systemd.u.SYSLOG_IDENTIFIER || "")
._internal.syslog.proc_id = to_string!(.systemd.t.PID || "")
}
if .log_source == "container" {
._internal.syslog.tag = join!([.kubernetes.namespace_name, .kubernetes.pod_name, .kubernetes.container_name], "")
._internal.syslog.severity = .level
._internal.syslog.facility = "user"
#Remove non-alphanumeric characters
._internal.syslog.tag = replace(._internal.syslog.tag, r'[^a-zA-Z0-9]', "")
#Truncate the sanitized tag to 32 characters
._internal.syslog.tag = truncate(._internal.syslog.tag, 32)
}
if .log_type == "audit" {
._internal.syslog.tag = .log_source
._internal.syslog.severity = "informational"
._internal.syslog.facility = "security"
}
.facility = to_string!(._internal.syslog.facility || "user")
.severity = to_string!(._internal.syslog.severity || "informational")
.proc_id = to_string!(._internal.syslog.proc_id || "-")
.tag = to_string!(._internal.syslog.tag || "")
if exists(.proc_id) && .proc_id != "-" && .proc_id != "" {
.tag = .tag + "[" + .proc_id + "]"
}
'''

[sinks.example]
type = "socket"
inputs = ["example_parse_encoding"]
address = "logserver:514"
mode = "udp"

[sinks.example.encoding]
codec = "syslog"
except_fields = ["_internal"]
rfc = "rfc3164"
add_log_source = false
facility = "$$.message.facility"
severity = "$$.message.severity"
proc_id = "$$.message.proc_id"
tag = "$$.message.tag"
107 changes: 96 additions & 11 deletions internal/generator/vector/output/syslog/syslog.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type SyslogEncodingRemap struct {
Inputs string
EncodingFields EncodingTemplateField
PayloadKey string
RFC string
}

func (ser SyslogEncodingRemap) Name() string {
Expand All @@ -74,12 +75,61 @@ inputs = {{.Inputs}}
source = '''
. = merge(., parse_json!(string!(.message))) ?? .

{{if eq .RFC "RFC3164" -}}
if .log_type == "infrastructure" && .log_source == "node" {
._internal.syslog.tag = to_string!(.systemd.u.SYSLOG_IDENTIFIER || "")
._internal.syslog.proc_id = to_string!(.systemd.t.PID || "")
}
if .log_source == "container" {
._internal.syslog.tag = join!([.kubernetes.namespace_name, .kubernetes.pod_name, .kubernetes.container_name], "")
._internal.syslog.severity = .level
._internal.syslog.facility = "user"
#Remove non-alphanumeric characters
._internal.syslog.tag = replace(._internal.syslog.tag, r'[^a-zA-Z0-9]', "")
#Truncate the sanitized tag to 32 characters
._internal.syslog.tag = truncate(._internal.syslog.tag, 32)

}
if .log_type == "audit" {
._internal.syslog.tag = .log_source
._internal.syslog.severity = "informational"
._internal.syslog.facility = "security"
}
{{end}}

{{if eq .RFC "RFC5424" -}}
._internal.syslog.msg_id = .log_source

if .log_type == "infrastructure" && .log_source == "node" {
._internal.syslog.app_name = to_string!(.systemd.u.SYSLOG_IDENTIFIER||"-")
._internal.syslog.proc_id = to_string!(.systemd.t.PID||"-")
}
if .log_source == "container" {
._internal.syslog.app_name = join!([.kubernetes.namespace_name, .kubernetes.pod_name, .kubernetes.container_name], "_")
._internal.syslog.proc_id = to_string!(.kubernetes.pod_id||"-")
._internal.syslog.severity = .level
._internal.syslog.facility = "user"
}
if .log_type == "audit" {
._internal.syslog.app_name = .log_source
._internal.syslog.proc_id = to_string!(.auditID || "-")
._internal.syslog.severity = "informational"
._internal.syslog.facility = "security"
}
{{end}}

{{if .EncodingFields.FieldVRLList -}}
{{range $templatePair := .EncodingFields.FieldVRLList -}}
.{{$templatePair.Field}} = {{$templatePair.VRLString}}
{{end -}}
{{end}}

{{if eq .RFC "RFC3164" -}}
if exists(.proc_id) && .proc_id != "-" && .proc_id != "" {
.tag = .tag + "[" + .proc_id + "]"
}
{{end}}

{{if .PayloadKey -}}
if is_null({{.PayloadKey}}) {
.payload_key = .
Expand Down Expand Up @@ -111,8 +161,12 @@ func (se SyslogEncoding) Template() string {
codec = "syslog"
except_fields = ["_internal"]
rfc = "{{.RFC}}"
{{ if .Facility }}
facility = "{{.Facility}}"
{{ end }}
{{ if .Severity }}
severity = "{{.Severity}}"
{{ end }}
{{ .AddLogSource }}
{{ .PayloadKey }}
{{end}}`
Expand Down Expand Up @@ -170,23 +224,53 @@ func getEncodingTemplatesAndFields(s *obs.Syslog) EncodingTemplateField {
templateFields := EncodingTemplateField{
FieldVRLList: []FieldVRLStringPair{},
}
if s.AppName != "" {

if s.Facility == "" {
templateFields.FieldVRLList = append(templateFields.FieldVRLList, FieldVRLStringPair{
Field: "app_name",
VRLString: commontemplate.TransformUserTemplateToVRL(s.AppName),
Field: "facility",
VRLString: commontemplate.TransformUserTemplateToVRL(`{._internal.syslog.facility || "user"}`),
})
}
if s.MsgId != "" {

if s.Severity == "" {
templateFields.FieldVRLList = append(templateFields.FieldVRLList, FieldVRLStringPair{
Field: "msg_id",
VRLString: commontemplate.TransformUserTemplateToVRL(s.MsgId),
Field: "severity",
VRLString: commontemplate.TransformUserTemplateToVRL(`{._internal.syslog.severity || "informational"}`),
})
}

if s.ProcId != "" {
if s.ProcId == "" {
s.ProcId = `{._internal.syslog.proc_id || "-"}`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My previous comment regarding "default" applies and the reason I really want to hash this out in the document of what the various scenarios look like. IMO the fallback should not be -; it should be empty

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes for RFC3164 should be empty, but for RFC5164 -. I will update

}
templateFields.FieldVRLList = append(templateFields.FieldVRLList, FieldVRLStringPair{
Field: "proc_id",
VRLString: commontemplate.TransformUserTemplateToVRL(s.ProcId),
})

if s.RFC == obs.SyslogRFC3164 {
if s.AppName == "" {
s.AppName = `{._internal.syslog.tag || ""}`
}
templateFields.FieldVRLList = append(templateFields.FieldVRLList, FieldVRLStringPair{
Field: "proc_id",
VRLString: commontemplate.TransformUserTemplateToVRL(s.ProcId),
Field: "tag",
VRLString: commontemplate.TransformUserTemplateToVRL(s.AppName),
})

} else {
if s.AppName == "" {
s.AppName = `{._internal.syslog.app_name || "-"}`
}
templateFields.FieldVRLList = append(templateFields.FieldVRLList, FieldVRLStringPair{
Field: "app_name",
VRLString: commontemplate.TransformUserTemplateToVRL(s.AppName),
})

if s.MsgId == "" {
s.MsgId = `{._internal.syslog.msg_id || "-"}`
}
templateFields.FieldVRLList = append(templateFields.FieldVRLList, FieldVRLStringPair{
Field: "msg_id",
VRLString: commontemplate.TransformUserTemplateToVRL(s.MsgId),
})
}

Expand Down Expand Up @@ -223,12 +307,13 @@ func parseEncoding(id string, inputs []string, templatePairs EncodingTemplateFie
Inputs: vectorhelpers.MakeInputs(inputs...),
EncodingFields: templatePairs,
PayloadKey: PayloadKey(o.PayloadKey),
RFC: string(o.RFC),
}
}

func Facility(s *obs.Syslog) string {
if s == nil || s.Facility == "" {
return "user"
return ""
}
if IsKeyExpr(s.Facility) {
return fmt.Sprintf("$%s", s.Facility)
Expand All @@ -238,7 +323,7 @@ func Facility(s *obs.Syslog) string {

func Severity(s *obs.Syslog) string {
if s == nil || s.Severity == "" {
return "informational"
return ""
}
if IsKeyExpr(s.Severity) {
return fmt.Sprintf("$%s", s.Severity)
Expand Down
10 changes: 9 additions & 1 deletion internal/generator/vector/output/syslog/syslog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package syslog_test

import (
"fmt"

. "github.com/onsi/ginkgo"
. "github.com/onsi/ginkgo/extensions/table"
. "github.com/onsi/gomega"
Expand Down Expand Up @@ -42,6 +41,7 @@ var _ = Describe("vector syslog clf output", func() {
},
},
}

initOutput = func() obs.OutputSpec {
return obs.OutputSpec{
Type: obs.OutputTypeSyslog,
Expand Down Expand Up @@ -100,6 +100,13 @@ var _ = Describe("vector syslog clf output", func() {
}
}, false),

Entry("should configure with defaults RFC3164", "rfc3164_with_defaults.toml", func(spec *obs.OutputSpec) {
spec.Syslog = &obs.Syslog{
URL: "udp://logserver:514",
RFC: obs.SyslogRFC3164,
}
}, false),

Entry("should configure TLS with log record field references", "tls_with_field_references.toml", func(spec *obs.OutputSpec) {
spec.TLS = tlsSpec
spec.Syslog = &obs.Syslog{
Expand All @@ -113,6 +120,7 @@ var _ = Describe("vector syslog clf output", func() {
PayloadKey: `{.payload_key}`,
}
}, false),

Entry("should set buffer tuning parameters", "tcp_with_tuning.toml", func(spec *obs.OutputSpec) {
spec.Syslog.URL = "tcp://logserver:514"
spec.Syslog.Tuning = &obs.SyslogTuningSpec{
Expand Down
32 changes: 29 additions & 3 deletions internal/generator/vector/output/syslog/tcp_with_defaults.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@ type = "remap"
inputs = ["application"]
source = '''
. = merge(., parse_json!(string!(.message))) ?? .
._internal.syslog.msg_id = .log_source

if .log_type == "infrastructure" && .log_source == "node" {
._internal.syslog.app_name = to_string!(.systemd.u.SYSLOG_IDENTIFIER||"-")
._internal.syslog.proc_id = to_string!(.systemd.t.PID||"-")
}
if .log_source == "container" {
._internal.syslog.app_name = join!([.kubernetes.namespace_name, .kubernetes.pod_name, .kubernetes.container_name], "_")
._internal.syslog.proc_id = to_string!(.kubernetes.pod_id||"-")
._internal.syslog.severity = .level
._internal.syslog.facility = "user"
}
if .log_type == "audit" {
._internal.syslog.app_name = .log_source
._internal.syslog.proc_id = to_string!(.auditID || "-")
._internal.syslog.severity = "informational"
._internal.syslog.facility = "security"
}
.facility = to_string!(._internal.syslog.facility || "user")
.severity = to_string!(._internal.syslog.severity || "informational")
.proc_id = to_string!(._internal.syslog.proc_id || "-")
.app_name = to_string!(._internal.syslog.app_name || "-")
.msg_id = to_string!(._internal.syslog.msg_id || "-")
'''

[sinks.example]
Expand All @@ -15,6 +38,9 @@ mode = "tcp"
codec = "syslog"
except_fields = ["_internal"]
rfc = "rfc5424"
facility = "user"
severity = "informational"
add_log_source = false
add_log_source = false
facility = "$$.message.facility"
severity = "$$.message.severity"
proc_id = "$$.message.proc_id"
app_name = "$$.message.app_name"
msg_id = "$$.message.msg_id"
32 changes: 30 additions & 2 deletions internal/generator/vector/output/syslog/tcp_with_tuning.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,31 @@ type = "remap"
inputs = ["application"]
source = '''
. = merge(., parse_json!(string!(.message))) ?? .

._internal.syslog.msg_id = .log_source

if .log_type == "infrastructure" && .log_source == "node" {
._internal.syslog.app_name = to_string!(.systemd.u.SYSLOG_IDENTIFIER||"-")
._internal.syslog.proc_id = to_string!(.systemd.t.PID||"-")
}
if .log_source == "container" {
._internal.syslog.app_name = join!([.kubernetes.namespace_name, .kubernetes.pod_name, .kubernetes.container_name], "_")
._internal.syslog.proc_id = to_string!(.kubernetes.pod_id||"-")
._internal.syslog.severity = .level
._internal.syslog.facility = "user"
}
if .log_type == "audit" {
._internal.syslog.app_name = .log_source
._internal.syslog.proc_id = to_string!(.auditID || "-")
._internal.syslog.severity = "informational"
._internal.syslog.facility = "security"
}
.facility = to_string!(._internal.syslog.facility || "user")
.severity = to_string!(._internal.syslog.severity || "informational")
.proc_id = to_string!(._internal.syslog.proc_id || "-")
.app_name = to_string!(._internal.syslog.app_name || "-")
.msg_id = to_string!(._internal.syslog.msg_id || "-")

'''

[sinks.example]
Expand All @@ -15,9 +40,12 @@ mode = "tcp"
codec = "syslog"
except_fields = ["_internal"]
rfc = "rfc5424"
facility = "user"
severity = "informational"
add_log_source = false
facility = "$$.message.facility"
severity = "$$.message.severity"
proc_id = "$$.message.proc_id"
app_name = "$$.message.app_name"
msg_id = "$$.message.msg_id"

[sinks.example.buffer]
type = "disk"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,27 @@ inputs = ["application"]
source = '''
. = merge(., parse_json!(string!(.message))) ?? .

._internal.syslog.msg_id = .log_source

if .log_type == "infrastructure" && .log_source == "node" {
._internal.syslog.app_name = to_string!(.systemd.u.SYSLOG_IDENTIFIER||"-")
._internal.syslog.proc_id = to_string!(.systemd.t.PID||"-")
}
if .log_source == "container" {
._internal.syslog.app_name = join!([.kubernetes.namespace_name, .kubernetes.pod_name, .kubernetes.container_name], "_")
._internal.syslog.proc_id = to_string!(.kubernetes.pod_id||"-")
._internal.syslog.severity = .level
._internal.syslog.facility = "user"
}
if .log_type == "audit" {
._internal.syslog.app_name = .log_source
._internal.syslog.proc_id = to_string!(.auditID || "-")
._internal.syslog.severity = "informational"
._internal.syslog.facility = "security"
}
.proc_id = to_string!(.proc_id||"none")
.app_name = to_string!(.app_name||"none")
.msg_id = to_string!(.msg_id||"none")
.proc_id = to_string!(.proc_id||"none")

if is_null(.payload_key) {
.payload_key = .
Expand All @@ -29,9 +47,9 @@ facility = "$$.message.facility"
severity = "$$.message.severity"
add_log_source = false
payload_key = "payload_key"
proc_id = "$$.message.proc_id"
app_name = "$$.message.app_name"
msg_id = "$$.message.msg_id"
proc_id = "$$.message.proc_id"

[sinks.example.tls]
enabled = true
Expand Down
Loading