diff --git a/mantle/cmd/kola/testiso.go b/mantle/cmd/kola/testiso.go index e5c991fca8..f8f1a1377d 100644 --- a/mantle/cmd/kola/testiso.go +++ b/mantle/cmd/kola/testiso.go @@ -479,6 +479,16 @@ func filterTests(tests []string, patterns []string) ([]string, error) { return r, nil } +// possible wrapper for runTestIso to pass m platform.Machine +func runWrapper(cmd *cobra.Command, args []string, m platform.Machine) error { + arguments := append(args, m.ConsoleOutput()) + // print args + fmt.Println(arguments) + // or + _ = arguments + return nil +} + func runTestIso(cmd *cobra.Command, args []string) (err error) { if kola.CosaBuild == nil { return fmt.Errorf("Must provide --build") @@ -605,17 +615,17 @@ func runTestIso(cmd *cobra.Command, args []string) (err error) { switch components[0] { case "pxe-offline-install", "pxe-online-install": - duration, err = testPXE(ctx, inst, filepath.Join(outputDir, test)) + duration, err = testPXE(ctx, inst, filepath.Join(outputDir, test), m) case "iso-as-disk": - duration, err = testAsDisk(ctx, filepath.Join(outputDir, test)) + duration, err = testAsDisk(ctx, filepath.Join(outputDir, test), m) case "iso-live-login": - duration, err = testLiveLogin(ctx, filepath.Join(outputDir, test)) + duration, err = testLiveLogin(ctx, filepath.Join(outputDir, test), m) case "iso-fips": - duration, err = testLiveFIPS(ctx, filepath.Join(outputDir, test)) + duration, err = testLiveFIPS(ctx, filepath.Join(outputDir, test), m) case "iso-install", "iso-offline-install", "iso-offline-install-fromram": - duration, err = testLiveIso(ctx, inst, filepath.Join(outputDir, test), false) + duration, err = testLiveIso(ctx, inst, filepath.Join(outputDir, test), false, m) case "miniso-install": - duration, err = testLiveIso(ctx, inst, filepath.Join(outputDir, test), true) + duration, err = testLiveIso(ctx, inst, filepath.Join(outputDir, test), true, m) case "iso-offline-install-iscsi": var butane_config string switch components[1] { @@ -628,7 +638,7 @@ func runTestIso(cmd *cobra.Command, args []string) (err error) { default: plog.Fatalf("Unknown test name:%s", test) } - duration, err = testLiveInstalliscsi(ctx, inst, filepath.Join(outputDir, test), butane_config) + duration, err = testLiveInstalliscsi(ctx, inst, filepath.Join(outputDir, test), butane_config, m) default: plog.Fatalf("Unknown test name:%s", test) } @@ -654,9 +664,11 @@ func runTestIso(cmd *cobra.Command, args []string) (err error) { return nil } -func awaitCompletion(ctx context.Context, inst *platform.QemuInstance, outdir string, qchan *os.File, booterrchan chan error, expected []string) (time.Duration, error) { +func awaitCompletion(ctx context.Context, inst *platform.QemuInstance, outdir string, qchan *os.File, booterrchan chan error, expected []string, m ...platform.Machine) (time.Duration, error) { start := time.Now() errchan := make(chan error) + // if m is passed in + consoleFilePath := m[0].ConsoleOutput() go func() { timeout := (time.Duration(installTimeoutMins*(100+kola.Options.ExtendTimeoutPercent)) * time.Minute) / 100 time.Sleep(timeout) @@ -679,55 +691,81 @@ func awaitCompletion(ctx context.Context, inst *platform.QemuInstance, outdir st errchan <- err } }() - } - go func() { - err := inst.Wait() - // only one Wait() gets process data, so also manually check for signal - plog.Debugf("qemu exited err=%v", err) - if err == nil && inst.Signaled() { - err = errors.New("process killed") - } - if err != nil { - errchan <- errors.Wrapf(err, "QEMU unexpectedly exited while awaiting completion") - } - time.Sleep(1 * time.Minute) - errchan <- fmt.Errorf("QEMU exited; timed out waiting for completion") - }() - go func() { - r := bufio.NewReader(qchan) - for _, exp := range expected { - l, err := r.ReadString('\n') + + go func() { + file, err := os.Open(consoleFilePath) if err != nil { - if err == io.EOF { - // this may be from QEMU getting killed or exiting; wait a bit - // to give a chance for .Wait() above to feed the channel with a - // better error - time.Sleep(1 * time.Second) - errchan <- fmt.Errorf("Got EOF from completion channel, %s expected", exp) - } else { - errchan <- errors.Wrapf(err, "reading from completion channel") - } + fmt.Printf("Error opening %s file: \n", consoleFilePath) return } - line := strings.TrimSpace(l) - if line != exp { - errchan <- fmt.Errorf("Unexpected string from completion channel: %s expected: %s", line, exp) - return + fmt.Println(consoleFilePath) + defer file.Close() + errBuf, err := inst.CheckConsoleForBadness(ctx, m) + if err == nil && errBuf != "" { + plog.Info("Badness identified") + if err := os.WriteFile(consoleFilePath, []byte(errBuf), 0644); err != nil { + plog.Errorf("Failed to write journal: %v", err) + } + err = platform.ErrInitramfsEmergency } - plog.Debugf("Matched expected message %s", exp) - } - plog.Debugf("Matched all expected messages") - // OK! - errchan <- nil - }() - go func() { - //check for error when switching boot order - if booterrchan != nil { - if err := <-booterrchan; err != nil { - errchan <- err + + if err != nil { + errchan <- errors.Wrapf(err, "CheckConsoleForBadness detected an error") } - } - }() + }() + + go func() { + err := inst.Wait() + // only one Wait() gets process data, so also manually check for signal + plog.Debugf("qemu exited err=%v", err) + if err == nil && inst.Signaled() { + err = errors.New("process killed") + } + if err != nil { + errchan <- errors.Wrapf(err, "QEMU unexpectedly exited while awaiting completion") + } + time.Sleep(1 * time.Minute) + errchan <- fmt.Errorf("QEMU exited; timed out waiting for completion") + }() + + go func() { + r := bufio.NewReader(qchan) + for _, exp := range expected { + l, err := r.ReadString('\n') + if err != nil { + if err == io.EOF { + // this may be from QEMU getting killed or exiting; wait a bit + // to give a chance for .Wait() above to feed the channel with a + // better error + time.Sleep(1 * time.Second) + errchan <- fmt.Errorf("Got EOF from completion channel, %s expected", exp) + } else { + errchan <- errors.Wrapf(err, "reading from completion channel") + } + return + } + line := strings.TrimSpace(l) + if line != exp { + errchan <- fmt.Errorf("Unexpected string from completion channel: %s expected: %s", line, exp) + return + } + plog.Debugf("Matched expected message %s", exp) + } + plog.Debugf("Matched all expected messages") + // OK! + errchan <- nil + }() + + go func() { + //check for error when switching boot order + if booterrchan != nil { + if err := <-booterrchan; err != nil { + errchan <- err + } + } + }() + + } err := <-errchan return time.Since(start), err } @@ -745,7 +783,7 @@ func printResult(test string, duration time.Duration, err error) bool { return false } -func testPXE(ctx context.Context, inst platform.Install, outdir string) (time.Duration, error) { +func testPXE(ctx context.Context, inst platform.Install, outdir string, m platform.Machine) (time.Duration, error) { if addNmKeyfile { return 0, errors.New("--add-nm-keyfile not yet supported for PXE") } @@ -798,10 +836,10 @@ func testPXE(ctx context.Context, inst platform.Install, outdir string) (time.Du } }() - return awaitCompletion(ctx, mach.QemuInst, outdir, completionChannel, mach.BootStartedErrorChannel, []string{liveOKSignal, signalCompleteString}) + return awaitCompletion(ctx, mach.QemuInst, outdir, completionChannel, mach.BootStartedErrorChannel, []string{liveOKSignal, signalCompleteString}, m) } -func testLiveIso(ctx context.Context, inst platform.Install, outdir string, minimal bool) (time.Duration, error) { +func testLiveIso(ctx context.Context, inst platform.Install, outdir string, minimal bool, m platform.Machine) (time.Duration, error) { tmpd, err := os.MkdirTemp("", "kola-testiso") if err != nil { return 0, err @@ -874,11 +912,11 @@ func testLiveIso(ctx context.Context, inst platform.Install, outdir string, mini } }() - return awaitCompletion(ctx, mach.QemuInst, outdir, completionChannel, mach.BootStartedErrorChannel, []string{liveOKSignal, signalCompleteString}) + return awaitCompletion(ctx, mach.QemuInst, outdir, completionChannel, mach.BootStartedErrorChannel, []string{liveOKSignal, signalCompleteString}, m) } // testLiveFIPS verifies that adding fips=1 to the ISO results in a FIPS mode system -func testLiveFIPS(ctx context.Context, outdir string) (time.Duration, error) { +func testLiveFIPS(ctx context.Context, outdir string, m platform.Machine) (time.Duration, error) { tmpd, err := os.MkdirTemp("", "kola-testiso") if err != nil { return 0, err @@ -934,10 +972,10 @@ RequiredBy=fips-signal-ok.service } defer mach.Destroy() - return awaitCompletion(ctx, mach, outdir, completionChannel, nil, []string{liveOKSignal}) + return awaitCompletion(ctx, mach, outdir, completionChannel, nil, []string{liveOKSignal}, m) } -func testLiveLogin(ctx context.Context, outdir string) (time.Duration, error) { +func testLiveLogin(ctx context.Context, outdir string, m platform.Machine) (time.Duration, error) { builddir := kola.CosaBuild.Dir isopath := filepath.Join(builddir, kola.CosaBuild.Meta.BuildArtifacts.LiveIso.Path) builder, err := newBaseQemuBuilder(outdir) @@ -964,10 +1002,10 @@ func testLiveLogin(ctx context.Context, outdir string) (time.Duration, error) { } defer mach.Destroy() - return awaitCompletion(ctx, mach, outdir, completionChannel, nil, []string{"coreos-liveiso-success"}) + return awaitCompletion(ctx, mach, outdir, completionChannel, nil, []string{"coreos-liveiso-success"}, m) } -func testAsDisk(ctx context.Context, outdir string) (time.Duration, error) { +func testAsDisk(ctx context.Context, outdir string, m platform.Machine) (time.Duration, error) { builddir := kola.CosaBuild.Dir isopath := filepath.Join(builddir, kola.CosaBuild.Meta.BuildArtifacts.LiveIso.Path) builder, config, err := newQemuBuilder(outdir) @@ -995,7 +1033,7 @@ func testAsDisk(ctx context.Context, outdir string) (time.Duration, error) { } defer mach.Destroy() - return awaitCompletion(ctx, mach, outdir, completionChannel, nil, []string{liveOKSignal}) + return awaitCompletion(ctx, mach, outdir, completionChannel, nil, []string{liveOKSignal}, m) } // iscsi_butane_setup.yaml contains the full butane config but here is an overview of the setup @@ -1022,7 +1060,7 @@ func testAsDisk(ctx context.Context, outdir string) (time.Duration, error) { // 6 - /var/nested-ign.json contains an ignition config: // - when the system is booted, write a success string to /dev/virtio-ports/testisocompletion // - as this serial device is mapped to the host serial device, the test concludes -func testLiveInstalliscsi(ctx context.Context, inst platform.Install, outdir string, butane string) (time.Duration, error) { +func testLiveInstalliscsi(ctx context.Context, inst platform.Install, outdir string, butane string, m platform.Machine) (time.Duration, error) { builddir := kola.CosaBuild.Dir isopath := filepath.Join(builddir, kola.CosaBuild.Meta.BuildArtifacts.LiveIso.Path) @@ -1097,5 +1135,5 @@ func testLiveInstalliscsi(ctx context.Context, inst platform.Install, outdir str } defer mach.Destroy() - return awaitCompletion(ctx, mach, outdir, completionChannel, nil, []string{"iscsi-boot-ok"}) + return awaitCompletion(ctx, mach, outdir, completionChannel, nil, []string{"iscsi-boot-ok"}, m) } diff --git a/mantle/kola/harness.go b/mantle/kola/harness.go index 6ecd84b81d..114a7548cb 100644 --- a/mantle/kola/harness.go +++ b/mantle/kola/harness.go @@ -1917,39 +1917,51 @@ func ScpKolet(machines []platform.Machine) error { return fmt.Errorf("Unable to locate kolet binary for %s", mArch) } -// CheckConsole checks some console output for badness and returns short -// descriptions of any bad lines it finds along with a boolean -// indicating if the configuration has the bad lines marked as -// warnOnly or not (for things we don't want to error for). If t is -// specified, its flags are respected and tags possibly updated for -// rerun success. -func CheckConsole(output []byte, t *register.Test) (bool, []string) { - var badlines []string - warnOnly, allowRerunSuccess := true, true +// CheckConsoleText checks console output for badness +// input : the console content +// output true if badlines were found and the bad lines. +func CheckConsoleText(input []byte) (bool, []string) { + warnOnly := true + badlines := []string{} + for _, check := range consoleChecks { - if check.skipFlag != nil && t != nil && t.HasFlag(*check.skipFlag) { + if check.skipFlag != nil { continue } - match := check.match.FindSubmatch(output) + match := check.match.FindSubmatch(input) if match != nil { badline := check.desc if len(match) > 1 { // include first subexpression - badline += fmt.Sprintf(" (%s)", match[1]) + badline += fmt.Sprintf(" (%v)", match[1]) } badlines = append(badlines, badline) if !check.warnOnly { warnOnly = false } - if !check.allowRerunSuccess { - allowRerunSuccess = false - } } } + return warnOnly, badlines +} + +// CheckConsole checks some console output for badness and returns short +// descriptions of any bad lines it finds along with a boolean +// indicating if the configuration has the bad lines marked as +// warnOnly or not (for things we don't want to error for). If t is +// specified, its flags are respected and tags possibly updated for +// rerun success. +func CheckConsole(output []byte, t *register.Test) (bool, []string) { + var badlines []string + var badness bool + allowRerunSuccess := true + + //here pass output to checkconsoleText + badness, badlines = CheckConsoleText(output) + if len(badlines) > 0 && allowRerunSuccess && t != nil { markTestForRerunSuccess(t, "CheckConsole:") } - return warnOnly, badlines + return badness, badlines } func SetupOutputDir(outputDir, platform string) (string, error) { diff --git a/mantle/platform/qemu.go b/mantle/platform/qemu.go index 6fe76da9fc..d8c4108260 100644 --- a/mantle/platform/qemu.go +++ b/mantle/platform/qemu.go @@ -43,6 +43,7 @@ import ( "syscall" "time" + "github.com/coreos/coreos-assembler/mantle/platform" "github.com/coreos/coreos-assembler/mantle/platform/conf" "github.com/coreos/coreos-assembler/mantle/util" coreosarch "github.com/coreos/stream-metadata-go/arch" @@ -216,6 +217,52 @@ func (inst *QemuInstance) Wait() error { // be a newline-delimited stream of JSON strings, as returned // by `journalctl -o json`. func (inst *QemuInstance) WaitIgnitionError(ctx context.Context) (string, error) { + b := bufio.NewReaderSize(inst.journalPipe, 64768) // Why such large buffer? + var r strings.Builder + iscorrupted := false + _, err := b.Peek(1) + if err != nil { + // It's normal to get EOF if we didn't catch an error and qemu + // is shutting down. We also need to handle when the Destroy() + // function closes the journal FD on us. + if e, ok := err.(*os.PathError); ok && e.Err == os.ErrClosed { + return "", nil + } else if err == io.EOF { + return "", nil + } + return "", errors.Wrapf(err, "Reading from journal") + } + for { + if err := ctx.Err(); err != nil { + return "", err + } + + line, prefix, err := b.ReadLine() + if err != nil { + return r.String(), errors.Wrapf(err, "Reading from journal channel") + } + if prefix { + iscorrupted = true + } + if len(line) == 0 || string(line) == "{}" { + break + } + r.Write(line) + r.Write([]byte("\n")) + } + if iscorrupted { + return r.String(), fmt.Errorf("journal was truncated due to overly long line") + } + return r.String(), nil +} + +// Copy of WaitIgnitionError -> CheckConsoleForBadness +// CheckConsoleForBadness will only return if the instance +// failed inside the initramfs. The resulting string will +// be a newline-delimited stream of JSON strings, as returned +// by `journalctl -o json`. + +func (inst *QemuInstance) CheckConsoleForBadness(ctx context.Context, m platform.Machine) (string, error) { b := bufio.NewReaderSize(inst.journalPipe, 64768) var r strings.Builder iscorrupted := false @@ -252,12 +299,17 @@ func (inst *QemuInstance) WaitIgnitionError(ctx context.Context) (string, error) if iscorrupted { return r.String(), fmt.Errorf("journal was truncated due to overly long line") } + // Check the machine state + err = platform.CheckMachine(ctx, m) + if err != nil { + return "", errors.Wrapf(err, "Machine is in bad state") + } return r.String(), nil } // WaitAll wraps the process exit as well as WaitIgnitionError, // returning an error if either fail. -func (inst *QemuInstance) WaitAll(ctx context.Context) error { +func (inst *QemuInstance) WaitAll(ctx context.Context, m platform.Machine) error { c := make(chan error) waitCtx, cancel := context.WithCancel(ctx) defer cancel() @@ -277,6 +329,20 @@ func (inst *QemuInstance) WaitAll(ctx context.Context) error { } }() + // Early stop due to failure in initramfs. + go func() { + buf, err := inst.CheckConsoleForBadness(waitCtx, m) + if err != nil { + c <- err + return + } + + // TODO: parse buf and try to nicely render something. + if buf != "" { + c <- ErrInitramfsEmergency + return + } + }() // Machine terminated. go func() { select {