From 0cf89ca0e226f88fcdaf30ac3d11483b4d7796c1 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Thu, 23 Jan 2020 16:43:38 -0500 Subject: [PATCH] kola: add `run-upgrade` command This adds a new `run-upgrade` command focused on running upgrade tests. It also adds a single test in that testsuite: `fcos.upgrade.basic`. To run this test, one can do: ``` kola run-upgrade -v \ --cosa-build /path/to/meta.json \ --qemu-image /path/to/starting-image.qcow2 ``` You can tell kola to automatically detect the parent image to start from: ``` kola run-upgrade -v \ --cosa-build /path/to/meta.json \ --find-parent-image ``` For FCOS, this will fetch the metadata for the latest release for the target stream. On AWS, it will use the AMI from there as the starting image. On the QEMU platform, it will download the QEMU image locally (with signature verification). The code is extensible to add support for RHCOS and other target platforms. Why make it a separate command from `run`? Multiple reasons: 1. As shown above, it's about multiple artifacts, not just the system under test. By contrast, `run` is largely about using a single artifact input. For example, on AWS, `--aws-ami` points to the *starting* image, and `--cosa-build` points to the target upgrade. 2. It's more expensive than other tests. To make it truly cross-platform and self-contained, it works by pushing the OSTree content to the node and serving it from there to itself. Therefore, it's not a test that developers would necessarily be interested in running locally very often (though it's definitely adapted for local tests too when needed). 3. Unlike `run`, it has some special semantics like `--find-parent-image` to make it easier to use. Now, this is only part of the FCOS upgrade testing story. Here's roughly how I see this all fit together: 1. The FCOS pipeline runs `kola run-upgrade -p qemu` and possibly `kola run-upgrade -p aws` after the basic `kola run` tests have passed. 2. Once the build is clean and pushed out to S3, its content will be imported into the annex/compose repo. 3. Once there, we can do more realistic tests by targeting the annex repo and a dedicated Cincinnati. For example, we can have canary nodes following those updates that started from various previous releases to catch any state-dependent issues. Another more explicit approach is a test that starts those nodes at the select releases and gate new releases on that test. Essentially, the main advantage of this test is that we can do some upgrade testing *before* pushing out any bits at all to S3. The major bug category this is intended to catch are state-dependent ones (i.e. anything that *isn't* captured by the OSTree commit). However, it does also exercise many of the major parts of the update system (zincati, rpm-ostree, ostree, libcurl). Though it's clearly not a replacement for more realistic e2e tests downstream. --- cmd/kola/kola.go | 196 +++++++++++++++++++++++++++- cmd/kolet/kolet.go | 1 + kola/README.md | 2 +- kola/harness.go | 4 + kola/register/register.go | 8 ++ kola/registry/registry.go | 1 + kola/tests/upgrade/basic.go | 248 ++++++++++++++++++++++++++++++++++++ 7 files changed, 456 insertions(+), 4 deletions(-) create mode 100644 kola/tests/upgrade/basic.go diff --git a/cmd/kola/kola.go b/cmd/kola/kola.go index f6b6bee22..f7297ae76 100644 --- a/cmd/kola/kola.go +++ b/cmd/kola/kola.go @@ -17,19 +17,26 @@ package main import ( "encoding/json" "fmt" + "io/ioutil" "log" "net/http" "os" "path/filepath" "sort" + "strings" "text/tabwriter" "github.com/coreos/pkg/capnslog" + "github.com/pkg/errors" "github.com/spf13/cobra" "github.com/coreos/mantle/cli" + "github.com/coreos/mantle/cosa" + "github.com/coreos/mantle/fcos" "github.com/coreos/mantle/kola" "github.com/coreos/mantle/kola/register" + "github.com/coreos/mantle/sdk" + "github.com/coreos/mantle/util" // register OS test suite _ "github.com/coreos/mantle/kola/registry" @@ -57,6 +64,16 @@ will be ignored. PreRun: preRun, } + cmdRunUpgrade = &cobra.Command{ + Use: "run-upgrade [glob pattern...]", + Short: "Run upgrade kola tests", + Long: `Run all upgrade kola tests (default) or related groups.`, + RunE: runRunUpgrade, + PreRunE: preRunUpgrade, + PostRun: postRunUpgrade, + SilenceUsage: true, + } + cmdList = &cobra.Command{ Use: "list", Short: "List kola test names", @@ -73,18 +90,25 @@ This can be useful for e.g. serving locally built OSTree repos to qemu. Run: runHttpServer, } - listJSON bool - httpPort int + listJSON bool + httpPort int + findParentImage bool + qemuImageDir string + qemuImageDirIsTemp bool ) func init() { root.AddCommand(cmdRun) - root.AddCommand(cmdList) + root.AddCommand(cmdList) cmdList.Flags().BoolVar(&listJSON, "json", false, "format output in JSON") root.AddCommand(cmdHttpServer) cmdHttpServer.Flags().IntVarP(&httpPort, "port", "P", 8000, "Listen on provided port") + + root.AddCommand(cmdRunUpgrade) + cmdRunUpgrade.Flags().BoolVar(&findParentImage, "find-parent-image", false, "automatically find parent image if not provided -- note on qemu, this will download the image") + cmdRunUpgrade.Flags().StringVar(&qemuImageDir, "qemu-image-dir", "", "directory in which to cache QEMU images if --fetch-parent-image is enabled") } func main() { @@ -358,3 +382,169 @@ func runHttpServer(cmd *cobra.Command, args []string) { fmt.Fprintf(os.Stdout, "Serving HTTP on port: %d\n", httpPort) log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", httpPort), nil)) } + +func preRunUpgrade(cmd *cobra.Command, args []string) error { + // unlike `kola run`, we *require* the --cosa-build arg -- XXX: figure out + // how to get this working using cobra's MarkFlagRequired() + if kola.Options.CosaBuild == "" { + errors.New("Error: missing required argument --cosa-build") + } + + err := syncOptions() + if err != nil { + return err + } + + if findParentImage { + err = syncFindParentImageOptions() + if err != nil { + return err + } + } + + return nil +} + +func postRunUpgrade(cmd *cobra.Command, args []string) { + if qemuImageDir != "" && qemuImageDirIsTemp { + os.RemoveAll(qemuImageDir) + } +} + +// syncFindParentImageOptions handles --find-parent-image automagic. +func syncFindParentImageOptions() error { + var err error + + var parentBaseUrl string + switch kola.Options.Distribution { + case "fcos": + parentBaseUrl, err = getParentFcosBuildBase() + if err != nil { + return err + } + default: + return fmt.Errorf("--find-parent-image not yet supported for distro %s", kola.Options.Distribution) + } + + var parentCosaBuild *cosa.Build + parentCosaBuild, err = cosa.FetchAndParseBuild(parentBaseUrl + "meta.json") + if err != nil { + return err + } + + // Here we handle the --fetch-parent-image --> platform-specific options + // based on its cosa build metadata + switch kolaPlatform { + case "qemu-unpriv": + if qemuImageDir == "" { + if qemuImageDir, err = ioutil.TempDir("", "kola-run-upgrade"); err != nil { + return err + } + qemuImageDirIsTemp = true + } + qcowUrl := parentBaseUrl + parentCosaBuild.Images.QEMU.Path + qcowLocal := filepath.Join(qemuImageDir, parentCosaBuild.Images.QEMU.Path) + decompressedQcowLocal, err := downloadImageAndDecompress(qcowUrl, qcowLocal) + if err != nil { + return err + } + kola.QEMUOptions.DiskImage = decompressedQcowLocal + case "aws": + kola.AWSOptions.AMI, err = parentCosaBuild.FindAMI(kola.AWSOptions.Region) + if err != nil { + return err + } + default: + err = fmt.Errorf("--find-parent-image not yet supported for platform %s", kolaPlatform) + } + + return nil +} + +// Note this is a no-op if the decompressed dest already exists. +func downloadImageAndDecompress(url, compressedDest string) (string, error) { + var decompressedDest string + if strings.HasSuffix(compressedDest, ".xz") { + // if the decompressed file is already present locally, assume it's + // good and verified already + decompressedDest = strings.TrimSuffix(compressedDest, ".xz") + if exists, err := util.PathExists(decompressedDest); err != nil { + return "", err + } else if exists { + return decompressedDest, nil + } else { + if err := sdk.DownloadCompressedSignedFile(decompressedDest, url, nil, "", util.XzDecompressStream); err != nil { + return "", err + } + return decompressedDest, nil + } + } + + if err := sdk.DownloadSignedFile(compressedDest, url, nil, ""); err != nil { + return "", err + } + + return compressedDest, nil +} + +func getParentFcosBuildBase() (string, error) { + // For FCOS, we can be clever and automagically fetch the metadata for the + // parent release, which should be the latest release on that stream. + + // We're taking liberal shortcuts here... the cleaner way to do this is + // parse commitmeta.json for `fedora-coreos.stream`, then fetch the stream + // metadata for that stream, then fetch the release metadata + + if kola.CosaBuild.Ref == "" { + return "", errors.New("no ref in build metadata") + } + + stream := filepath.Base(kola.CosaBuild.Ref) + + var parentVersion string + if kola.CosaBuild.FedoraCoreOSParentVersion != "" { + parentVersion = kola.CosaBuild.FedoraCoreOSParentVersion + } else { + // ok, we're probably operating on a local dev build since the pipeline + // always injects the parent; just instead fetch the release index + // for that stream and get the last build id from there + index, err := fcos.FetchAndParseCanonicalReleaseIndex(stream) + if err != nil { + return "", err + } + + n := len(index.Releases) + if n == 0 { + return "", fmt.Errorf("no parent version in build metadata, and no build on stream %s", stream) + } + + parentVersion = index.Releases[n-1].Version + } + + // XXX: multi-arch + // XXX: centralize URL and parameterize + return fmt.Sprintf("https://builds.coreos.fedoraproject.org/prod/streams/%s/builds/%s/x86_64/", stream, parentVersion), nil +} + +func runRunUpgrade(cmd *cobra.Command, args []string) error { + outputDir, err := kola.SetupOutputDir(outputDir, kolaPlatform) + if err != nil { + return err + } + + var patterns []string + if len(args) == 0 { + patterns = []string{"*"} // run all tests by default + } else { + patterns = args + } + + runErr := kola.RunUpgradeTests(patterns, kolaPlatform, outputDir, !kola.Options.NoTestExitError) + + // needs to be after RunTests() because harness empties the directory + if err := writeProps(); err != nil { + return err + } + + return runErr +} diff --git a/cmd/kolet/kolet.go b/cmd/kolet/kolet.go index 861f991c1..393fb5001 100644 --- a/cmd/kolet/kolet.go +++ b/cmd/kolet/kolet.go @@ -82,6 +82,7 @@ func registerTestMap(m map[string]*register.Test) { func main() { registerTestMap(register.Tests) + registerTestMap(register.UpgradeTests) root.AddCommand(cmdRun) cli.Execute(root) diff --git a/kola/README.md b/kola/README.md index e0a8546bc..1c2ceba01 100644 --- a/kola/README.md +++ b/kola/README.md @@ -190,4 +190,4 @@ import ( _ "github.com/coreos/mantle/kola/tests/systemd" _ "github.com/coreos/mantle/kola/tests/update" ) -``` \ No newline at end of file +``` diff --git a/kola/harness.go b/kola/harness.go index a300129ca..bff364329 100644 --- a/kola/harness.go +++ b/kola/harness.go @@ -431,6 +431,10 @@ func RunTests(patterns []string, pltfrm, outputDir string, propagateTestErrors b return runProvidedTests(register.Tests, patterns, pltfrm, outputDir, propagateTestErrors) } +func RunUpgradeTests(patterns []string, pltfrm, outputDir string, propagateTestErrors bool) error { + return runProvidedTests(register.UpgradeTests, patterns, pltfrm, outputDir, propagateTestErrors) +} + // getClusterSemVer returns the CoreOS semantic version via starting a // machine and checking func getClusterSemver(flight platform.Flight, outputDir string) (*semver.Version, error) { diff --git a/kola/register/register.go b/kola/register/register.go index 990de2683..142fa0a56 100644 --- a/kola/register/register.go +++ b/kola/register/register.go @@ -82,6 +82,10 @@ type Test struct { // to tests. var Tests = map[string]*Test{} +// Registered tests that run as part of `kola run-upgrade` live here. Mapping of +// names to tests. +var UpgradeTests = map[string]*Test{} + // Register is usually called via init() functions and is how kola test // harnesses knows which tests it can choose from. Panics if existing name is // registered @@ -102,6 +106,10 @@ func RegisterTest(t *Test) { Register(Tests, t) } +func RegisterUpgradeTest(t *Test) { + Register(UpgradeTests, t) +} + func (t *Test) HasFlag(flag Flag) bool { for _, f := range t.Flags { if f == flag { diff --git a/kola/registry/registry.go b/kola/registry/registry.go index c9d459b23..fbf6d11f0 100644 --- a/kola/registry/registry.go +++ b/kola/registry/registry.go @@ -16,4 +16,5 @@ import ( _ "github.com/coreos/mantle/kola/tests/rhcos" _ "github.com/coreos/mantle/kola/tests/rpmostree" _ "github.com/coreos/mantle/kola/tests/systemd" + _ "github.com/coreos/mantle/kola/tests/upgrade" ) diff --git a/kola/tests/upgrade/basic.go b/kola/tests/upgrade/basic.go new file mode 100644 index 000000000..9d0442d13 --- /dev/null +++ b/kola/tests/upgrade/basic.go @@ -0,0 +1,248 @@ +// Copyright 2020 Red Hat, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package upgrade + +import ( + "bytes" + "encoding/json" + "net/http" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/coreos/pkg/capnslog" + + "github.com/coreos/mantle/kola" + "github.com/coreos/mantle/kola/cluster" + "github.com/coreos/mantle/kola/register" + "github.com/coreos/mantle/kola/tests/util" + "github.com/coreos/mantle/platform" + "github.com/coreos/mantle/platform/conf" +) + +const ostreeRepo = "/srv/ostree" + +var plog = capnslog.NewPackageLogger("github.com/coreos/mantle", "kola/tests/upgrade") + +func init() { + register.RegisterUpgradeTest(®ister.Test{ + Run: upgradeFromPrevious, + ClusterSize: 1, + // if renaming this, also rename the command in kolet-httpd.service below + Name: "fcos.upgrade.basic", + FailFast: true, + NativeFuncs: map[string]register.NativeFuncWrap{ + "httpd": register.CreateNativeFuncWrap(httpd), + }, + Distros: []string{"fcos"}, + // This Ignition does a few things: + // 1. bumps Zincati verbosity + // 2. auto-runs httpd once kolet is scp'ed + // 3. changes the zincati config to point to localhost:8080 so we'll be + // able to feed the update graph we want + // 4. change the OSTree remote to localhost:8080 + // We could use file:/// to simplify things though using a URL at least + // exercises the ostree/libcurl stack. + // We use strings.Replace here because fmt.Sprintf would try to + // interpret the percent signs and there's too many of them to be worth + // escaping. + UserDataV3: conf.Ignition(strings.Replace(`{ + "ignition": { "version": "3.0.0" }, + "systemd": { + "units": [ + { + "name": "zincati.service", + "dropins": [{ + "name": "verbose.conf", + "contents": "[Service]\nEnvironment=ZINCATI_VERBOSITY=\"-vvvv\"" + }] + }, + { + "name": "kolet-httpd.path", + "enabled": true, + "contents": "[Path]\nPathExists=/var/home/core/kolet\n[Install]\nWantedBy=multi-user.target" + }, + { + "name": "kolet-httpd.service", + "contents": "[Service]\nExecStart=/var/home/core/kolet run fcos.upgrade.basic httpd -v\n[Install]\nWantedBy=multi-user.target" + } + ] + }, + "storage": { + "files": [ + { + "path": "/etc/zincati/config.d/99-updates.toml", + "contents": { "source": "data:,updates.enabled%20%3D%20true%0Acincinnati.base_url%3D%20%22http%3A%2F%2Flocalhost%3A8080%22%0A" }, + "mode": 420 + }, + { + "path": "/etc/ostree/remotes.d/fedora.conf", + "contents": { "source": "data:,%5Bremote%20%22fedora%22%5D%0Aurl%3Dhttp%3A%2F%2Flocalhost%3A8080%0Agpg-verify%3Dfalse%0A" }, + "overwrite": true, + "mode": 420 + } + ], + "directories": [ + { + "path": "OSTREE_REPO", + "mode": 493, + "user": { + "name": "core" + } + } + ] + } +}`, "OSTREE_REPO", ostreeRepo, -1)), + }) +} + +// upgradeFromPrevious verifies that the previous build is capable of upgrading +// to the current build and to another build +func upgradeFromPrevious(c cluster.TestCluster) { + m := c.Machines()[0] + graph := new(Graph) + + c.Run("setup", func(c cluster.TestCluster) { + // this is the only heavy-weight part, though remember this test is + // optimized for qemu testing locally where this won't leave localhost at + // all. cloud testing should mostly be a pipeline thing, where the infra + // connection should be much faster + ostreeTarPath := filepath.Join(filepath.Dir(kola.Options.CosaBuild), kola.CosaBuild.Images.OSTree.Path) + if err := c.DropFile(ostreeTarPath); err != nil { + c.Fatal(err) + } + + c.MustSSHf(m, "tar -xf %s -C %s", kola.CosaBuild.Images.OSTree.Path, ostreeRepo) + + graph.seedFromMachine(c, m) + graph.addUpdate(c, m, kola.CosaBuild.OSTreeVersion, kola.CosaBuild.OSTreeCommit) + }) + + c.Run("upgrade-from-previous", func(c cluster.TestCluster) { + waitForUpgradeToVersion(c, m, kola.CosaBuild.OSTreeVersion) + }) + + // Now, synthesize an update and serve that -- this is similar to + // `rpmostree.upgrade-rollback`, but the major difference here is that the + // starting disk is the previous release (and also, we're doing this via + // Zincati & HTTP). Essentially, this sanity-checks that old starting state + // + new content set can update. + c.Run("upgrade-from-current", func(c cluster.TestCluster) { + newVersion := kola.CosaBuild.OSTreeVersion + ".kola" + newCommit := c.MustSSHf(m, "ostree commit --repo %s -b %s --tree ref=%s --add-metadata-string version=%s", + ostreeRepo, kola.CosaBuild.Ref, kola.CosaBuild.OSTreeCommit, newVersion) + + graph.addUpdate(c, m, newVersion, string(newCommit)) + + waitForUpgradeToVersion(c, m, newVersion) + }) +} + +// Should dedupe this with fedora-coreos-cincinnati -- we just handle the +// bare minimum here. One question here is: why not use Cincinnati itself for +// this? We could do this, though it'd somewhat muddle the focus of these tests +// and make setup more complex. +type Graph struct { + Nodes []Node `json:"nodes"` + Edges [][2]int `json:"edges,omitempty"` +} + +type Node struct { + Version string `json:"version"` + Metadata map[string]string `json:"metadata"` + Payload string `json:"payload"` +} + +func (g *Graph) seedFromMachine(c cluster.TestCluster, m platform.Machine) { + d, err := util.GetBootedDeployment(c, m) + if err != nil { + c.Fatal(err) + } + + g.Nodes = []Node{ + { + Version: d.Version, + Payload: d.Checksum, + Metadata: map[string]string{ + "org.fedoraproject.coreos.releases.age_index": "0", + "org.fedoraproject.coreos.scheme": "checksum", + }, + }, + } + + g.sync(c, m) +} + +func (g *Graph) addUpdate(c cluster.TestCluster, m platform.Machine, version, payload string) { + i := len(g.Nodes) + + g.Nodes = append(g.Nodes, Node{ + Version: version, + Payload: payload, + Metadata: map[string]string{ + "org.fedoraproject.coreos.releases.age_index": strconv.Itoa(i), + "org.fedoraproject.coreos.scheme": "checksum", + }, + }) + + g.Edges = append(g.Edges, [2]int{i - 1, i}) + + g.sync(c, m) +} + +func (g *Graph) sync(c cluster.TestCluster, m platform.Machine) { + b, err := json.Marshal(g) + if err != nil { + c.Fatalf("failed to marshal graph: %v") + } + + if err := platform.InstallFile(bytes.NewReader(b), m, "graph.json"); err != nil { + c.Fatalf("failed to update graph.json: %v", err) + } +} + +func waitForUpgradeToVersion(c cluster.TestCluster, m platform.Machine, version string) { + oldBootId, err := platform.GetMachineBootId(m) + if err != nil { + c.Fatal(err) + } + + // XXX: patch zincati to have faster refresh rate + // https://github.com/coreos/zincati/issues/203 + c.MustSSH(m, "sudo systemctl restart zincati.service") + + if err := m.WaitForReboot(120*time.Second, oldBootId); err != nil { + c.Fatalf("failed waiting for machine reboot: %v", err) + } + + d, err := util.GetBootedDeployment(c, m) + if err != nil { + c.Fatal(err) + } + + if d.Version != version { + c.Fatalf("expected reboot into version %s, but got version %s", version, d.Version) + } +} + +func httpd() error { + http.Handle("/", http.FileServer(http.Dir(ostreeRepo))) + http.HandleFunc("/v1/graph", func(w http.ResponseWriter, r *http.Request) { + http.ServeFile(w, r, "/var/home/core/graph.json") + }) + plog.Info("Starting server") + return http.ListenAndServe("localhost:8080", nil) +}