Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Logging (JSON) formatting; span export #1364

Merged
merged 3 commits into from
Mar 15, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions internal/log/format.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package log

import (
"bytes"
"context"
"encoding/json"
"fmt"
"net"
"reflect"
"time"

"github.com/containerd/containerd/log"
"github.com/sirupsen/logrus"
)

const TimeFormat = log.RFC3339NanoFixed

func FormatTime(t time.Time) string {
return t.Format(TimeFormat)
}

// DurationFormat formats a [time.Duration] log entry.
//
// A nil value signals an error with the formatting.
type DurationFormat func(time.Duration) interface{}

func DurationFormatString(d time.Duration) interface{} { return d.String() }
func DurationFormatSeconds(d time.Duration) interface{} { return d.Seconds() }
func DurationFormatMilliseconds(d time.Duration) interface{} { return d.Milliseconds() }

// FormatIO formats net.Conn and other types that have an `Addr()` or `Name()`.
//
// See FormatEnabled for more information.
func FormatIO(ctx context.Context, v interface{}) string {
m := make(map[string]string)
m["type"] = reflect.TypeOf(v).String()

switch t := v.(type) {
case net.Conn:
m["localAddress"] = formatAddr(t.LocalAddr())
m["remoteAddress"] = formatAddr(t.RemoteAddr())
case interface{ Addr() net.Addr }:
m["address"] = formatAddr(t.Addr())
default:
return Format(ctx, t)
}

return Format(ctx, m)
}

func formatAddr(a net.Addr) string {
return a.Network() + "://" + a.String()
}

// Format formats an object into a JSON string, without any indendtation or
// HTML escapes.
// Context is used to output a log waring if the conversion fails.
//
// This is intended primarily for `trace.StringAttribute()`
func Format(ctx context.Context, v interface{}) string {
b, err := encode(v)
if err != nil {
G(ctx).WithError(err).Warning("could not format value")
return ""
}

return string(b)
}

func encode(v interface{}) ([]byte, error) {
return encodeBuffer(&bytes.Buffer{}, v)
}

func encodeBuffer(buf *bytes.Buffer, v interface{}) ([]byte, error) {
enc := json.NewEncoder(buf)
enc.SetEscapeHTML(false)
enc.SetIndent("", "")

if err := enc.Encode(v); err != nil {
err = fmt.Errorf("could not marshall %T to JSON for logging: %w", v, err)
return nil, err
}

// encoder.Encode appends a newline to the end
return bytes.TrimSpace(buf.Bytes()), nil
}

// GetCallerName checks if the entry appears caller caller information and returns the function name.
//
// This is intended to be used with "github.com/Microsoft/go-winio/pkg/etwlogrus".WithGetName.
func GetCallerName(e *logrus.Entry) string {
helsaawy marked this conversation as resolved.
Show resolved Hide resolved
if e.Caller == nil {
return ""
}
if e.Caller.Func != nil {
return e.Caller.Func.Name()
}
return e.Caller.Function
}
141 changes: 134 additions & 7 deletions internal/log/hook.go
Original file line number Diff line number Diff line change
@@ -1,35 +1,162 @@
package log

import (
"bytes"
"reflect"
"time"

"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/containerd/containerd/log"
"github.com/sirupsen/logrus"
"go.opencensus.io/trace"
)

// Hook serves to intercept and format `logrus.Entry`s before they are passed
// to the ETW hook.
const nullString = "null"

// Hook intercepts and formats a [logrus.Entry] before it logged.
//
// The containerd shim discards the (formatted) logrus output, and outputs only via ETW.
// The Linux GCS outputs logrus entries over stdout, which is consumed by the shim and
// then re-output via the ETW hook.
type Hook struct{}
// The shim either outputs the logs through an ETW hook, discarding the (formatted) output
// or logs output to a pipe for logging binaries to consume.
// The Linux GCS outputs logrus entries over stdout, which is then consumed and re-output
// by the shim.
type Hook struct {
// EncodeAsJSON formats structs, maps, arrays, slices, and [bytes.Buffer] as JSON.
// Variables of [bytes.Buffer] will be converted to []byte.
//
// Default is true.
EncodeAsJSON bool

// FormatTime specifies the format for [time.Time] variables.
// An empty string disabled formatting.
//
// Default is [github.com/containerd/containerd/log.RFC3339NanoFixed].
TimeFormat string

// Duration format converts a [time.Duration] fields to an appropriate encoding.
//
// Default is [DurationFormatSeconds].
DurationFormat DurationFormat

// AddSpanContext adds [logfields.TraceID] and [logfields.SpanID] fields to
// the entry from the span context stored in [logrus.Entry.Context], if it exists.
AddSpanContext bool
helsaawy marked this conversation as resolved.
Show resolved Hide resolved

// Whether to encode errors or keep them as is.
EncodeError bool
}

var _ logrus.Hook = &Hook{}

func NewHook() *Hook {
return &Hook{}
return &Hook{
EncodeAsJSON: true,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would be a good place to consider splitting PRs in general: Adding new functionality vs turning on that functionality by default.

It's a lot easier to get in changes to add new JSON-encoding support, and keep it off by default, and then make a single, small, obvious change to turn it on by default, rather than mixing that in with the mechanism implementation.

In this case, I'm not too worried about the time or duration formatting, but especially turning on JSON encoding by default seems riskier to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can split that part out in a new PR

I discussed with Howard a while back and am re-discussing with him now, but we make no guarantees about log format stability, and we change logs and their fields all the time.
Plus, currently relying on fmt.Sprintf("%v",...) is also not stable, since go makes no guarantees about what the verbose format for structs will be, and any field or struct update will impact that as well

TimeFormat: log.RFC3339NanoFixed,
DurationFormat: DurationFormatSeconds,
helsaawy marked this conversation as resolved.
Show resolved Hide resolved
AddSpanContext: true,
}
}

func (h *Hook) Levels() []logrus.Level {
return logrus.AllLevels
}

func (h *Hook) Fire(e *logrus.Entry) (err error) {
// JSON encode, if necessary, then add span information
h.encode(e)
h.addSpanContext(e)

return nil
}

// encode loops through all the fields in the [logrus.Entry] and encodes them according to
// the settings in [Hook].
// If [Hook.TimeFormat] is non-empty, it will be passed to [time.Time.Format] for
// fields of type [time.Time].
// If [Hook.EncodeAsJSON] is true, then fields that are numeric, boolean, strings, or
// errors will be encoded via a [json.NewEncoder] in encode().
//
// If [Hook.TimeFormat] is empty and [Hook.EncodeAsJSON] is false, then this is a no-op.
func (h *Hook) encode(e *logrus.Entry) {
helsaawy marked this conversation as resolved.
Show resolved Hide resolved
d := e.Data

formatTime := h.TimeFormat != ""
if !(h.EncodeAsJSON || formatTime) {
helsaawy marked this conversation as resolved.
Show resolved Hide resolved
return
}

// todo: replace these with constraints.Integer, constraints.Float, etc in go1.18
for k, v := range d {
kevpar marked this conversation as resolved.
Show resolved Hide resolved
if !h.EncodeError {
if _, ok := v.(error); k == logrus.ErrorKey || ok {
continue
}
}

if t, ok := v.(time.Time); formatTime && ok {
d[k] = t.Format(h.TimeFormat)
continue
}

if !h.EncodeAsJSON {
continue
}

switch vv := v.(type) {
// built in types
case bool, string, error, uintptr,
helsaawy marked this conversation as resolved.
Show resolved Hide resolved
int8, int16, int32, int64, int,
uint8, uint32, uint64, uint,
float32, float64:
continue

case time.Duration:
if h.DurationFormat != nil {
if i := h.DurationFormat(vv); i != nil {
d[k] = i
}
}
continue

// Rather than setting d[k] = vv.String(), JSON encode []byte value, since it
// may be a binary payload and not representable as a string.
// `case bytes.Buffer,*bytes.Buffer:` resolves `vv` to `interface{}`,
// so cannot use `vv.Bytes`.
// Could move to below the `reflect.Indirect()` call below, but
// that would require additional typematching and dereferencing.
// Easier to keep these duplicate branches here.
case bytes.Buffer:
v = vv.Bytes()
case *bytes.Buffer:
v = vv.Bytes()
}

// dereference any pointers
rv := reflect.Indirect(reflect.ValueOf(v))
// check if `v` is a null pointer
if !rv.IsValid() {
d[k] = nullString
continue
}

switch rv.Kind() {
case reflect.Map, reflect.Struct, reflect.Array, reflect.Slice:
default:
continue
}

b, err := encode(v)
if err != nil {
// Errors are written to stderr (ie, to `panic.log`) and stops the remaining
// hooks (ie, exporting to ETW) from firing. So add encoding errors to
// the entry data to be written out, but keep on processing.
d[k+"-"+logrus.ErrorKey] = err.Error()
}

// if `err != nil`, then `b == nil` and this will be the empty string
d[k] = string(b)
kevpar marked this conversation as resolved.
Show resolved Hide resolved
}
}

func (h *Hook) addSpanContext(e *logrus.Entry) {
ctx := e.Context
if ctx == nil {
Expand Down
22 changes: 6 additions & 16 deletions internal/log/scrub.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"encoding/json"
"errors"
"strings"
"sync/atomic"

hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
Expand Down Expand Up @@ -56,11 +55,11 @@ func ScrubProcessParameters(s string) (string, error) {
}
pp.Environment = map[string]string{_scrubbedReplacement: _scrubbedReplacement}

buf := bytes.NewBuffer(b[:0])
if err := encode(buf, pp); err != nil {
b, err := encodeBuffer(bytes.NewBuffer(b[:0]), pp)
if err != nil {
return "", err
}
return strings.TrimSpace(buf.String()), nil
return string(b), nil
}

// ScrubBridgeCreate scrubs requests sent over the bridge of type
Expand Down Expand Up @@ -150,21 +149,12 @@ func scrubBytes(b []byte, scrub scrubberFunc) ([]byte, error) {
return nil, err
}

buf := &bytes.Buffer{}
if err := encode(buf, m); err != nil {
b, err := encode(m)
if err != nil {
return nil, err
}

return bytes.TrimSpace(buf.Bytes()), nil
}

func encode(buf *bytes.Buffer, v interface{}) error {
enc := json.NewEncoder(buf)
enc.SetEscapeHTML(false)
if err := enc.Encode(v); err != nil {
return err
}
return nil
return b, nil
}

func isRequestBase(m genMap) bool {
Expand Down
69 changes: 69 additions & 0 deletions internal/oc/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package oc

import (
"errors"
"io"
"net"
"os"

"github.com/containerd/containerd/errdefs"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

// todo: break import cycle with "internal/hcs" and errors errors defined there
// todo: add errors defined in "internal/guest/gcserror" (Hresult does not implement error)

func toStatusCode(err error) codes.Code {
// checks if err implements GRPCStatus() *"google.golang.org/grpc/status".Status,
// wraps an error defined in "github.com/containerd/containerd/errdefs", or is a
// context timeout or cancelled error
if s, ok := status.FromError(errdefs.ToGRPC(err)); ok {
return s.Code()
}

switch {
kevpar marked this conversation as resolved.
Show resolved Hide resolved
// case IsAny(err):
// return codes.Cancelled
case isAny(err, os.ErrInvalid):
return codes.InvalidArgument
case isAny(err, os.ErrDeadlineExceeded):
kevpar marked this conversation as resolved.
Show resolved Hide resolved
return codes.DeadlineExceeded
case isAny(err, os.ErrNotExist):
return codes.NotFound
case isAny(err, os.ErrExist):
return codes.AlreadyExists
case isAny(err, os.ErrPermission):
return codes.PermissionDenied
// case IsAny(err):
// return codes.ResourceExhausted
case isAny(err, os.ErrClosed, net.ErrClosed, io.ErrClosedPipe, io.ErrShortBuffer):
return codes.FailedPrecondition
// case IsAny(err):
// return codes.Aborted
// case IsAny(err):
// return codes.OutOfRange
// case IsAny(err):
// return codes.Unimplemented
case isAny(err, io.ErrNoProgress):
return codes.Internal
// case IsAny(err):
// return codes.Unavailable
case isAny(err, io.ErrShortWrite, io.ErrUnexpectedEOF):
return codes.DataLoss
// case IsAny(err):
// return codes.Unauthenticated
default:
return codes.Unknown
}
}

// isAny returns true if errors.Is is true for any of the provided errors, errs.
func isAny(err error, errs ...error) bool {
for _, e := range errs {
if errors.Is(err, e) {
return true
}
}
return false
}
Loading