From e1ee00d16ed621594a92ce0456eb048362962ff0 Mon Sep 17 00:00:00 2001 From: Jose Carlos Venegas Munoz Date: Fri, 24 Apr 2020 20:35:38 +0000 Subject: [PATCH] clh: Improve hypervisor logging Use systemd-cat to collect hypervisor output. The `systemd-cat` program will open a journal fd and call `cat(1)` to redirect all the output to the fd. This requires an extra binary to read from hypervisor stdout (that has combined stdin, stderr and serial terminal). But because it is cat the overhead is minimal and only is started on Kata debug mode. Depends-on: github.com/kata-containers/packaging#1047 Fixes: #2439 Signed-off-by: Jose Carlos Venegas Munoz --- virtcontainers/clh.go | 104 +++++++++++++++++++--------------- virtcontainers/utils/utils.go | 21 +++++++ 2 files changed, 79 insertions(+), 46 deletions(-) diff --git a/virtcontainers/clh.go b/virtcontainers/clh.go index 315ad8bfe0..65ae04cb6a 100644 --- a/virtcontainers/clh.go +++ b/virtcontainers/clh.go @@ -6,10 +6,11 @@ package virtcontainers import ( - "bytes" "context" "encoding/json" "fmt" + "io" + "io/ioutil" "net" "net/http" "os" @@ -59,9 +60,7 @@ const ( clhStopSandboxTimeout = 3 clhSocket = "clh.sock" clhAPISocket = "clh-api.sock" - clhLogFile = "clh.log" virtioFsSocket = "virtiofsd.sock" - clhSerial = "serial-tty.log" supportedMajorVersion = 0 supportedMinorVersion = 5 defaultClhPath = "/usr/local/bin/cloud-hypervisor" @@ -120,7 +119,6 @@ type cloudHypervisor struct { APIClient clhClient version CloudHypervisorVersion vmconfig chclient.VmConfig - cmdOutput bytes.Buffer virtiofsd Virtiofsd store persistapi.PersistDriver } @@ -286,13 +284,8 @@ func (clh *cloudHypervisor) createSandbox(ctx context.Context, id string, networ // set the serial console to the cloud hypervisor if clh.config.Debug { - serialPath, err := clh.serialPath(clh.id) - if err != nil { - return err - } clh.vmconfig.Serial = chclient.ConsoleConfig{ - Mode: cctFILE, - File: serialPath, + Mode: cctTTY, } } else { @@ -368,17 +361,12 @@ func (clh *cloudHypervisor) startSandbox(timeout int) error { var strErr string strErr, pid, err := clh.LaunchClh() if err != nil { - return fmt.Errorf("failed to launch cloud-hypervisor: %s, error messages from log: %s", err, strErr) - } - clh.state.PID = pid - - if err := clh.waitVMM(clhTimeout); err != nil { - clh.Logger().WithField("error", err).WithField("output", clh.cmdOutput.String()).Warn("cloud-hypervisor init failed") if shutdownErr := clh.virtiofsd.Stop(); shutdownErr != nil { clh.Logger().WithField("error", shutdownErr).Warn("error shutting down Virtiofsd") } - return err + return fmt.Errorf("failed to launch cloud-hypervisor: %q, hypervisor output:\n%s", err, strErr) } + clh.state.PID = pid if err := clh.bootVM(ctx); err != nil { return err @@ -771,18 +759,10 @@ func (clh *cloudHypervisor) vsockSocketPath(id string) (string, error) { return utils.BuildSocketPath(clh.store.RunVMStoragePath(), id, clhSocket) } -func (clh *cloudHypervisor) serialPath(id string) (string, error) { - return utils.BuildSocketPath(clh.store.RunVMStoragePath(), id, clhSerial) -} - func (clh *cloudHypervisor) apiSocketPath(id string) (string, error) { return utils.BuildSocketPath(clh.store.RunVMStoragePath(), id, clhAPISocket) } -func (clh *cloudHypervisor) logFilePath(id string) (string, error) { - return utils.BuildSocketPath(clh.store.RunVMStoragePath(), id, clhLogFile) -} - func (clh *cloudHypervisor) waitVMM(timeout uint) error { clhRunning, err := clh.isClhRunning(timeout) @@ -873,8 +853,6 @@ func (clh *cloudHypervisor) getAvailableVersion() error { func (clh *cloudHypervisor) LaunchClh() (string, int, error) { - errStr := "" - clhPath, err := clh.clhPath() if err != nil { return "", -1, err @@ -882,36 +860,71 @@ func (clh *cloudHypervisor) LaunchClh() (string, int, error) { args := []string{cscAPIsocket, clh.state.apiSocket} if clh.config.Debug { + // Cloud hypervisor log levels + // 'v' occurrences increase the level + //0 => Error + //1 => Warn + //2 => Info + //3 => Debug + //4+ => Trace + // Use Info, the CI runs with debug enabled + // a high level of logging increases the boot time + // and in a nested environment this could increase + // the chances to fail because agent is not + // ready on time. + args = append(args, "-vv") + } - logfile, err := clh.logFilePath(clh.id) + clh.Logger().WithField("path", clhPath).Info() + clh.Logger().WithField("args", strings.Join(args, " ")).Info() + + cmdHypervisor := exec.Command(clhPath, args...) + var hypervisorOutput io.ReadCloser + if clh.config.Debug { + cmdHypervisor.Env = os.Environ() + cmdHypervisor.Env = append(cmdHypervisor.Env, "RUST_BACKTRACE=full") + // Get StdoutPipe only for debug, without debug golang will redirect to /dev/null + hypervisorOutput, err = cmdHypervisor.StdoutPipe() if err != nil { return "", -1, err } - args = append(args, cscLogFile) - args = append(args, logfile) } - clh.Logger().WithField("path", clhPath).Info() - clh.Logger().WithField("args", strings.Join(args, " ")).Info() + cmdHypervisor.Stderr = cmdHypervisor.Stdout - cmd := exec.Command(clhPath, args...) - cmd.Stdout = &clh.cmdOutput - cmd.Stderr = &clh.cmdOutput + err = utils.StartCmd(cmdHypervisor) + if err != nil { + return "", -1, err + } - if clh.config.Debug { - cmd.Env = os.Environ() - cmd.Env = append(cmd.Env, "RUST_BACKTRACE=full") + if err := clh.waitVMM(clhTimeout); err != nil { + clh.Logger().WithField("error", err).Warn("cloud-hypervisor init failed") + var output string + + if hypervisorOutput != nil { + b, errRead := ioutil.ReadAll(hypervisorOutput) + if errRead != nil { + output = "failed to read hypervisor output to get error information" + } else { + output = string(b) + } + } else { + output = "Please enable hypervisor logging to get stdout information" + } + + return output, -1, err } - if err := utils.StartCmd(cmd); err != nil { - fmt.Println("Error starting cloudHypervisor", err) - if cmd.Process != nil { - cmd.Process.Kill() + if clh.config.Debug { + cmdLogger := utils.NewProgramLogger("kata-hypervisor") + clh.Logger().Debugf("Starting process logger(%s) for hypervisor", cmdLogger) + if err := cmdLogger.StartLogger(hypervisorOutput); err != nil { + // Not critical to run a container, but output wont be logged + clh.Logger().Warnf("Failed start process logger(%s) %s", cmdLogger, err) } - return errStr, 0, err } - return errStr, cmd.Process.Pid, nil + return "", cmdHypervisor.Process.Pid, nil } //########################################################################### @@ -922,13 +935,12 @@ func (clh *cloudHypervisor) LaunchClh() (string, int, error) { const ( cctOFF string = "Off" - cctFILE string = "File" cctNULL string = "Null" + cctTTY string = "Tty" ) const ( cscAPIsocket string = "--api-socket" - cscLogFile string = "--log-file" ) //**************************************** diff --git a/virtcontainers/utils/utils.go b/virtcontainers/utils/utils.go index 85c554896a..da3abf762a 100644 --- a/virtcontainers/utils/utils.go +++ b/virtcontainers/utils/utils.go @@ -9,6 +9,7 @@ import ( "crypto/rand" "errors" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -275,3 +276,23 @@ const ( MiB = KiB << 10 GiB = MiB << 10 ) + +// Binary to use to log program output +const LoggerBinaryName = "systemd-cat" + +type ProgramLogger struct { + cmd *exec.Cmd +} + +func NewProgramLogger(loggerLabel string) ProgramLogger { + return ProgramLogger{cmd: exec.Command(LoggerBinaryName, "-t", loggerLabel)} +} + +func (p *ProgramLogger) StartLogger(output io.ReadCloser) error { + p.cmd.Stdin = output + return StartCmd(p.cmd) +} + +func (p ProgramLogger) String() string { + return p.cmd.Path +}