From 516c7c23d50b4b53d5c977e19049797e57126c6d Mon Sep 17 00:00:00 2001 From: Chi Zhang Date: Thu, 13 Aug 2020 14:26:36 -0700 Subject: [PATCH] Refactor kubetest2 gke deployer into different source files --- kubetest2-gke/deployer/build.go | 36 ++ kubetest2-gke/deployer/commandutils.go | 16 + kubetest2-gke/deployer/common.go | 25 ++ kubetest2-gke/deployer/deployer.go | 512 +------------------------ kubetest2-gke/deployer/down.go | 107 ++++++ kubetest2-gke/deployer/dumplogs.go | 91 +++++ kubetest2-gke/deployer/firewall.go | 148 +++++++ kubetest2-gke/deployer/up.go | 193 ++++++++++ 8 files changed, 626 insertions(+), 502 deletions(-) create mode 100644 kubetest2-gke/deployer/build.go create mode 100644 kubetest2-gke/deployer/down.go create mode 100644 kubetest2-gke/deployer/dumplogs.go create mode 100644 kubetest2-gke/deployer/firewall.go create mode 100644 kubetest2-gke/deployer/up.go diff --git a/kubetest2-gke/deployer/build.go b/kubetest2-gke/deployer/build.go new file mode 100644 index 00000000..e03a2081 --- /dev/null +++ b/kubetest2-gke/deployer/build.go @@ -0,0 +1,36 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployer + +import ( + "fmt" + + "sigs.k8s.io/kubetest2/pkg/build" +) + +func (d *deployer) Build() error { + if err := build.Build(); err != nil { + return err + } + + if d.stageLocation != "" { + if err := build.Stage(d.stageLocation); err != nil { + return fmt.Errorf("error staging build: %v", err) + } + } + return nil +} diff --git a/kubetest2-gke/deployer/commandutils.go b/kubetest2-gke/deployer/commandutils.go index a9fc0574..356fa9c0 100644 --- a/kubetest2-gke/deployer/commandutils.go +++ b/kubetest2-gke/deployer/commandutils.go @@ -105,3 +105,19 @@ func home(parts ...string) string { p := append([]string{os.Getenv("HOME")}, parts...) return filepath.Join(p...) } + +func (d *deployer) getClusterCredentials(project, cluster string) error { + // Get gcloud to create the file. + loc, err := d.location() + if err != nil { + return err + } + + if err := runWithOutput(exec.Command("gcloud", + d.containerArgs("clusters", "get-credentials", cluster, "--project="+project, loc)...), + ); err != nil { + return fmt.Errorf("error executing get-credentials: %v", err) + } + + return nil +} diff --git a/kubetest2-gke/deployer/common.go b/kubetest2-gke/deployer/common.go index cc8f827c..9c6d5ed7 100644 --- a/kubetest2-gke/deployer/common.go +++ b/kubetest2-gke/deployer/common.go @@ -18,6 +18,7 @@ package deployer import ( "fmt" + realexec "os/exec" "strconv" "strings" "time" @@ -25,6 +26,7 @@ import ( "k8s.io/klog" "sigs.k8s.io/kubetest2/pkg/boskos" + "sigs.k8s.io/kubetest2/pkg/exec" ) const ( @@ -110,3 +112,26 @@ func buildProjectClustersLayout(projects, clusters []string, projectClustersLayo } return nil } + +func (d *deployer) containerArgs(args ...string) []string { + return append(append([]string{}, "container"), args...) +} + +func runWithNoOutput(cmd exec.Cmd) error { + exec.NoOutput(cmd) + return cmd.Run() +} + +func runWithOutput(cmd exec.Cmd) error { + exec.InheritOutput(cmd) + return cmd.Run() +} + +// execError returns a string format of err including stderr if the +// err is an ExitError, useful for errors from e.g. exec.Cmd.Output(). +func execError(err error) string { + if ee, ok := err.(*realexec.ExitError); ok { + return fmt.Sprintf("%v (output: %q)", err, string(ee.Stderr)) + } + return err.Error() +} diff --git a/kubetest2-gke/deployer/deployer.go b/kubetest2-gke/deployer/deployer.go index 83a54d0c..60b126a8 100644 --- a/kubetest2-gke/deployer/deployer.go +++ b/kubetest2-gke/deployer/deployer.go @@ -18,27 +18,16 @@ limitations under the License. package deployer import ( - "context" "flag" "fmt" - "io/ioutil" - "os" - realexec "os/exec" // Only for ExitError; Use kubetest2/pkg/exec to actually exec stuff "path/filepath" "regexp" - "sort" - "strconv" - "strings" "sync" "github.com/spf13/pflag" - "golang.org/x/sync/errgroup" "k8s.io/klog" "sigs.k8s.io/boskos/client" - "sigs.k8s.io/kubetest2/pkg/build" - "sigs.k8s.io/kubetest2/pkg/exec" - "sigs.k8s.io/kubetest2/pkg/metadata" "sigs.k8s.io/kubetest2/pkg/types" ) @@ -129,6 +118,16 @@ type deployer struct { boskosHeartbeatClose chan struct{} } +// assert that New implements types.NewDeployer +var _ types.NewDeployer = New + +// assert that deployer implements types.Deployer +var _ types.Deployer = &deployer{} + +func (d *deployer) Provider() string { + return Name +} + // New implements deployer.New for gke func New(opts types.Options) (types.Deployer, *pflag.FlagSet) { // create a deployer object and set fields that are not flag controlled @@ -193,13 +192,6 @@ func (d *deployer) location() (string, error) { return "--region=" + d.region, nil } -func (d *deployer) createCommand() []string { - return strings.Fields(d.createCommandFlag) -} - -// assert that New implements types.NewDeployer -var _ types.NewDeployer = New - func bindFlags(d *deployer) *pflag.FlagSet { flags := pflag.NewFlagSet(Name, pflag.ContinueOnError) @@ -227,487 +219,3 @@ func bindFlags(d *deployer) *pflag.FlagSet { return flags } - -// assert that deployer implements types.Deployer -var _ types.Deployer = &deployer{} - -func (d *deployer) Provider() string { - return "gke" -} - -func (d *deployer) Build() error { - if err := build.Build(); err != nil { - return err - } - - if d.stageLocation != "" { - if err := build.Stage(d.stageLocation); err != nil { - return fmt.Errorf("error staging build: %v", err) - } - } - return nil -} - -// Deployer implementation methods below -func (d *deployer) Up() error { - if err := d.init(); err != nil { - return err - } - - // Only run prepare once for the first GCP project. - if err := d.prepareGcpIfNeeded(d.projects[0]); err != nil { - return err - } - if err := d.createNetwork(); err != nil { - return err - } - if err := d.setupNetwork(); err != nil { - return err - } - - klog.V(1).Infof("Environment: %v", os.Environ()) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - eg, ctx := errgroup.WithContext(ctx) - loc, err := d.location() - if err != nil { - return err - } - for i := range d.projects { - project := d.projects[i] - subNetworkArgs := subNetworkArgs(d.projects, d.region, d.network, i) - for j := range d.projectClustersLayout[project] { - cluster := d.projectClustersLayout[project][j] - eg.Go(func() error { - // Create the cluster - args := make([]string, len(d.createCommand())) - copy(args, d.createCommand()) - args = append(args, - "--project="+project, - loc, - "--machine-type="+d.machineType, - "--image-type="+image, - "--num-nodes="+strconv.Itoa(d.nodes), - "--network="+transformNetworkName(d.projects, d.network), - ) - if d.workloadIdentityEnabled { - args = append(args, fmt.Sprintf("--workload-pool=%s.svc.id.goog", project)) - } - args = append(args, subNetworkArgs...) - args = append(args, cluster) - klog.V(1).Infof("Gcloud command: gcloud %+v\n", args) - if err := runWithOutput(exec.CommandContext(ctx, "gcloud", args...)); err != nil { - // Cancel the context to kill other cluster creation processes if any error happens. - cancel() - return fmt.Errorf("error creating cluster: %v", err) - } - return nil - }) - } - } - - if err := eg.Wait(); err != nil { - return fmt.Errorf("error creating clusters: %v", err) - } - - if err := d.testSetup(); err != nil { - return fmt.Errorf("error running setup for the tests: %v", err) - } - - return nil -} - -func (d *deployer) IsUp() (up bool, err error) { - if err := d.prepareGcpIfNeeded(d.projects[0]); err != nil { - return false, err - } - - for _, project := range d.projects { - for _, cluster := range d.projectClustersLayout[project] { - if err := d.getClusterCredentials(project, cluster); err != nil { - return false, err - } - - // naively assume that if the api server reports nodes, the cluster is up - lines, err := exec.CombinedOutputLines( - exec.RawCommand("kubectl get nodes -o=name"), - ) - if err != nil { - return false, metadata.NewJUnitError(err, strings.Join(lines, "\n")) - } - if len(lines) == 0 { - return false, fmt.Errorf("project had no nodes active: %s", project) - } - } - } - - return true, nil -} - -// DumpClusterLogs for GKE generates a small script that wraps -// log-dump.sh with the appropriate shell-fu to get the cluster -// dumped. -// -// TODO(RonWeber): This whole path is really gross, but this seemed -// the least gross hack to get this done. -// -// TODO(RonWeber): Make this work with multizonal and regional clusters. -func (d *deployer) DumpClusterLogs() error { - // gkeLogDumpTemplate is a template of a shell script where - // - %[1]s is the project - // - %[2]s is the zone - // - %[3]s is a filter composed of the instance groups - // - %[4]s is the log-dump.sh command line - const gkeLogDumpTemplate = ` -function log_dump_custom_get_instances() { - if [[ $1 == "master" ]]; then - return 0 - fi - - gcloud compute instances list '--project=%[1]s' '--filter=%[4]s' '--format=get(name)' -} -export -f log_dump_custom_get_instances -# Set below vars that log-dump.sh expects in order to use scp with gcloud. -export PROJECT=%[1]s -export ZONE='%[2]s' -export KUBERNETES_PROVIDER=gke -export KUBE_NODE_OS_DISTRIBUTION='%[3]s' -%[5]s -` - for _, project := range d.projects { - // Prevent an obvious injection. - if strings.Contains(d.localLogsDir, "'") || strings.Contains(d.gcsLogsDir, "'") { - return fmt.Errorf("%q or %q contain single quotes - nice try", d.localLogsDir, d.gcsLogsDir) - } - - // Generate a slice of filters to be OR'd together below - var filters []string - for _, cluster := range d.projectClustersLayout[project] { - if err := d.getInstanceGroups(); err != nil { - return err - } - for _, ig := range d.instanceGroups[project][cluster] { - filters = append(filters, fmt.Sprintf("(metadata.created-by:*%s)", ig.path)) - } - } - - // Generate the log-dump.sh command-line - dumpCmd := fmt.Sprintf("./cluster/log-dump/log-dump.sh '%s'", d.localLogsDir) - if d.gcsLogsDir != "" { - dumpCmd += " " + d.gcsLogsDir - } - - if err := runWithOutput(exec.Command("bash", "-c", fmt.Sprintf(gkeLogDumpTemplate, - project, - d.zone, - os.Getenv("NODE_OS_DISTRIBUTION"), - strings.Join(filters, " OR "), - dumpCmd))); err != nil { - return err - } - } - - return nil -} - -func (d *deployer) testSetup() error { - if d.testPrepared { - // Ensure setup is a singleton. - return nil - } - - // Only run prepare once for the first GCP project. - if err := d.prepareGcpIfNeeded(d.projects[0]); err != nil { - return err - } - if _, err := d.Kubeconfig(); err != nil { - return err - } - - for _, project := range d.projects { - for _, cluster := range d.projectClustersLayout[project] { - if err := d.getInstanceGroups(); err != nil { - return err - } - - if err := d.ensureFirewall(project, cluster, d.network); err != nil { - return err - } - } - } - d.testPrepared = true - return nil -} - -// Kubeconfig returns a path to a kubeconfig file for the cluster in -// a temp directory, creating one if one does not exist. -// It also sets the KUBECONFIG environment variable appropriately. -func (d *deployer) Kubeconfig() (string, error) { - if d.kubecfgPath != "" { - return d.kubecfgPath, nil - } - - tmpdir, err := ioutil.TempDir("", "kubetest2-gke") - if err != nil { - return "", err - } - - kubecfgFiles := make([]string, 0) - for _, project := range d.projects { - for _, cluster := range d.projectClustersLayout[project] { - filename := filepath.Join(tmpdir, fmt.Sprintf("kubecfg-%s-%s", project, cluster)) - if err := os.Setenv("KUBECONFIG", filename); err != nil { - return "", err - } - if err := d.getClusterCredentials(project, cluster); err != nil { - return "", err - } - kubecfgFiles = append(kubecfgFiles, filename) - } - } - - d.kubecfgPath = strings.Join(kubecfgFiles, string(os.PathListSeparator)) - return d.kubecfgPath, nil -} - -func (d *deployer) ensureFirewall(project, cluster, network string) error { - if network == "default" { - return nil - } - firewall, err := d.getClusterFirewall(project, cluster) - if err != nil { - return fmt.Errorf("error getting unique firewall: %v", err) - } - if runWithNoOutput(exec.Command("gcloud", "compute", "firewall-rules", "describe", firewall, - "--project="+project, - "--format=value(name)")) == nil { - // Assume that if this unique firewall exists, it's good to go. - return nil - } - klog.V(1).Infof("Couldn't describe firewall '%s', assuming it doesn't exist and creating it", firewall) - - tagOut, err := exec.Output(exec.Command("gcloud", "compute", "instances", "list", - "--project="+project, - "--filter=metadata.created-by:*"+d.instanceGroups[project][cluster][0].path, - "--limit=1", - "--format=get(tags.items)")) - if err != nil { - return fmt.Errorf("instances list failed: %s", execError(err)) - } - tag := strings.TrimSpace(string(tagOut)) - if tag == "" { - return fmt.Errorf("instances list returned no instances (or instance has no tags)") - } - - if err := runWithOutput(exec.Command("gcloud", "compute", "firewall-rules", "create", firewall, - "--project="+project, - "--network="+network, - "--allow="+e2eAllow, - "--target-tags="+tag)); err != nil { - return fmt.Errorf("error creating e2e firewall: %v", err) - } - return nil -} - -func (d *deployer) getInstanceGroups() error { - if d.instanceGroups != nil { - return nil - } - - // Initialize project instance groups structure - d.instanceGroups = map[string]map[string][]*ig{} - - location, err := d.location() - if err != nil { - return err - } - - for _, project := range d.projects { - d.instanceGroups[project] = map[string][]*ig{} - - for _, cluster := range d.projectClustersLayout[project] { - igs, err := exec.Output(exec.Command("gcloud", d.containerArgs("clusters", "describe", cluster, - "--format=value(instanceGroupUrls)", - "--project="+project, - location)...)) - if err != nil { - return fmt.Errorf("instance group URL fetch failed: %s", execError(err)) - } - igURLs := strings.Split(strings.TrimSpace(string(igs)), ";") - if len(igURLs) == 0 { - return fmt.Errorf("no instance group URLs returned by gcloud, output %q", string(igs)) - } - sort.Strings(igURLs) - - // Inialize cluster instance groups - d.instanceGroups[project][cluster] = make([]*ig, 0) - - for _, igURL := range igURLs { - m := poolRe.FindStringSubmatch(igURL) - if len(m) == 0 { - return fmt.Errorf("instanceGroupUrl %q did not match regex %v", igURL, poolRe) - } - d.instanceGroups[project][cluster] = append(d.instanceGroups[project][cluster], &ig{path: m[0], zone: m[1], name: m[2], uniq: m[3]}) - } - } - } - - return nil -} - -func (d *deployer) getClusterFirewall(project, cluster string) (string, error) { - if err := d.getInstanceGroups(); err != nil { - return "", err - } - // We want to ensure that there's an e2e-ports-* firewall rule - // that maps to the cluster nodes, but the target tag for the - // nodes can be slow to get. Use the hash from the lexically first - // node pool instead. - return "e2e-ports-" + d.instanceGroups[project][cluster][0].uniq, nil -} - -// This function ensures that all firewall-rules are deleted from specific network. -// We also want to keep in logs that there were some resources leaking. -func (d *deployer) cleanupNetworkFirewalls(project, network string) (int, error) { - fws, err := exec.Output(exec.Command("gcloud", "compute", "firewall-rules", "list", - "--format=value(name)", - "--project="+project, - "--filter=network:"+network)) - if err != nil { - return 0, fmt.Errorf("firewall rules list failed: %s", execError(err)) - } - if len(fws) > 0 { - fwList := strings.Split(strings.TrimSpace(string(fws)), "\n") - klog.V(1).Infof("Network %s has %v undeleted firewall rules %v", network, len(fwList), fwList) - commandArgs := []string{"compute", "firewall-rules", "delete", "-q"} - commandArgs = append(commandArgs, fwList...) - commandArgs = append(commandArgs, "--project="+project) - errFirewall := runWithOutput(exec.Command("gcloud", commandArgs...)) - if errFirewall != nil { - return 0, fmt.Errorf("error deleting firewall: %v", errFirewall) - } - return len(fwList), nil - } - return 0, nil -} - -func (d *deployer) Down() error { - if err := d.init(); err != nil { - return err - } - - if err := d.prepareGcpIfNeeded(d.projects[0]); err != nil { - return err - } - - var wg sync.WaitGroup - for i := range d.projects { - project := d.projects[i] - for j := range d.projectClustersLayout[project] { - cluster := d.clusters[j] - firewall, err := d.getClusterFirewall(project, cluster) - if err != nil { - // This is expected if the cluster doesn't exist. - continue - } - d.instanceGroups = nil - - loc, err := d.location() - if err != nil { - return err - } - - wg.Add(1) - go func() { - defer wg.Done() - // We best-effort try all of these and report errors as appropriate. - errCluster := runWithOutput(exec.Command( - "gcloud", d.containerArgs("clusters", "delete", "-q", cluster, - "--project="+project, - loc)...)) - - // don't delete default network - if d.network == "default" { - if errCluster != nil { - klog.V(1).Infof("Error deleting cluster using default network, allow the error for now %s", errCluster) - } - return - } - - var errFirewall error - if runWithNoOutput(exec.Command("gcloud", "compute", "firewall-rules", "describe", firewall, - "--project="+project, - "--format=value(name)")) == nil { - klog.V(1).Infof("Found rules for firewall '%s', deleting them", firewall) - errFirewall = exec.Command("gcloud", "compute", "firewall-rules", "delete", "-q", firewall, - "--project="+project).Run() - } else { - klog.V(1).Infof("Found no rules for firewall '%s', assuming resources are clean", firewall) - } - numLeakedFWRules, errCleanFirewalls := d.cleanupNetworkFirewalls(project, d.network) - - if errCluster != nil { - klog.Errorf("error deleting cluster: %v", errCluster) - } - if errFirewall != nil { - klog.Errorf("error deleting firewall: %v", errFirewall) - } - if errCleanFirewalls != nil { - klog.Errorf("error cleaning-up firewalls: %v", errCleanFirewalls) - } - if numLeakedFWRules > 0 { - klog.Errorf("leaked firewall rules") - } - }() - } - } - wg.Wait() - - if err := d.teardownNetwork(); err != nil { - return err - } - if err := d.deleteNetwork(); err != nil { - return err - } - - return nil -} - -func (d *deployer) getClusterCredentials(project, cluster string) error { - // Get gcloud to create the file. - loc, err := d.location() - if err != nil { - return err - } - - if err := runWithOutput(exec.Command("gcloud", - d.containerArgs("clusters", "get-credentials", cluster, "--project="+project, loc)...), - ); err != nil { - return fmt.Errorf("error executing get-credentials: %v", err) - } - - return nil -} - -func (d *deployer) containerArgs(args ...string) []string { - return append(append([]string{}, "container"), args...) -} - -func runWithNoOutput(cmd exec.Cmd) error { - exec.NoOutput(cmd) - return cmd.Run() -} - -func runWithOutput(cmd exec.Cmd) error { - exec.InheritOutput(cmd) - return cmd.Run() -} - -// execError returns a string format of err including stderr if the -// err is an ExitError, useful for errors from e.g. exec.Cmd.Output(). -func execError(err error) string { - if ee, ok := err.(*realexec.ExitError); ok { - return fmt.Sprintf("%v (output: %q)", err, string(ee.Stderr)) - } - return err.Error() -} diff --git a/kubetest2-gke/deployer/down.go b/kubetest2-gke/deployer/down.go new file mode 100644 index 00000000..e0afc10c --- /dev/null +++ b/kubetest2-gke/deployer/down.go @@ -0,0 +1,107 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployer + +import ( + "sync" + + "k8s.io/klog" + + "sigs.k8s.io/kubetest2/pkg/exec" +) + +func (d *deployer) Down() error { + if err := d.init(); err != nil { + return err + } + + if err := d.prepareGcpIfNeeded(d.projects[0]); err != nil { + return err + } + + var wg sync.WaitGroup + for i := range d.projects { + project := d.projects[i] + for j := range d.projectClustersLayout[project] { + cluster := d.clusters[j] + firewall, err := d.getClusterFirewall(project, cluster) + if err != nil { + // This is expected if the cluster doesn't exist. + continue + } + d.instanceGroups = nil + + loc, err := d.location() + if err != nil { + return err + } + + wg.Add(1) + go func() { + defer wg.Done() + // We best-effort try all of these and report errors as appropriate. + errCluster := runWithOutput(exec.Command( + "gcloud", d.containerArgs("clusters", "delete", "-q", cluster, + "--project="+project, + loc)...)) + + // don't delete default network + if d.network == "default" { + if errCluster != nil { + klog.V(1).Infof("Error deleting cluster using default network, allow the error for now %s", errCluster) + } + return + } + + var errFirewall error + if runWithNoOutput(exec.Command("gcloud", "compute", "firewall-rules", "describe", firewall, + "--project="+project, + "--format=value(name)")) == nil { + klog.V(1).Infof("Found rules for firewall '%s', deleting them", firewall) + errFirewall = exec.Command("gcloud", "compute", "firewall-rules", "delete", "-q", firewall, + "--project="+project).Run() + } else { + klog.V(1).Infof("Found no rules for firewall '%s', assuming resources are clean", firewall) + } + numLeakedFWRules, errCleanFirewalls := d.cleanupNetworkFirewalls(project, d.network) + + if errCluster != nil { + klog.Errorf("error deleting cluster: %v", errCluster) + } + if errFirewall != nil { + klog.Errorf("error deleting firewall: %v", errFirewall) + } + if errCleanFirewalls != nil { + klog.Errorf("error cleaning-up firewalls: %v", errCleanFirewalls) + } + if numLeakedFWRules > 0 { + klog.Errorf("leaked firewall rules") + } + }() + } + } + wg.Wait() + + if err := d.teardownNetwork(); err != nil { + return err + } + if err := d.deleteNetwork(); err != nil { + return err + } + + return nil +} diff --git a/kubetest2-gke/deployer/dumplogs.go b/kubetest2-gke/deployer/dumplogs.go new file mode 100644 index 00000000..5b1424a1 --- /dev/null +++ b/kubetest2-gke/deployer/dumplogs.go @@ -0,0 +1,91 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployer + +import ( + "fmt" + "os" + "strings" + + "sigs.k8s.io/kubetest2/pkg/exec" +) + +// DumpClusterLogs for GKE generates a small script that wraps +// log-dump.sh with the appropriate shell-fu to get the cluster +// dumped. +// +// TODO(RonWeber): This whole path is really gross, but this seemed +// the least gross hack to get this done. +// +// TODO(RonWeber): Make this work with multizonal and regional clusters. +func (d *deployer) DumpClusterLogs() error { + // gkeLogDumpTemplate is a template of a shell script where + // - %[1]s is the project + // - %[2]s is the zone + // - %[3]s is a filter composed of the instance groups + // - %[4]s is the log-dump.sh command line + const gkeLogDumpTemplate = ` +function log_dump_custom_get_instances() { + if [[ $1 == "master" ]]; then + return 0 + fi + + gcloud compute instances list '--project=%[1]s' '--filter=%[4]s' '--format=get(name)' +} +export -f log_dump_custom_get_instances +# Set below vars that log-dump.sh expects in order to use scp with gcloud. +export PROJECT=%[1]s +export ZONE='%[2]s' +export KUBERNETES_PROVIDER=gke +export KUBE_NODE_OS_DISTRIBUTION='%[3]s' +%[5]s +` + for _, project := range d.projects { + // Prevent an obvious injection. + if strings.Contains(d.localLogsDir, "'") || strings.Contains(d.gcsLogsDir, "'") { + return fmt.Errorf("%q or %q contain single quotes - nice try", d.localLogsDir, d.gcsLogsDir) + } + + // Generate a slice of filters to be OR'd together below + var filters []string + for _, cluster := range d.projectClustersLayout[project] { + if err := d.getInstanceGroups(); err != nil { + return err + } + for _, ig := range d.instanceGroups[project][cluster] { + filters = append(filters, fmt.Sprintf("(metadata.created-by:*%s)", ig.path)) + } + } + + // Generate the log-dump.sh command-line + dumpCmd := fmt.Sprintf("./cluster/log-dump/log-dump.sh '%s'", d.localLogsDir) + if d.gcsLogsDir != "" { + dumpCmd += " " + d.gcsLogsDir + } + + if err := runWithOutput(exec.Command("bash", "-c", fmt.Sprintf(gkeLogDumpTemplate, + project, + d.zone, + os.Getenv("NODE_OS_DISTRIBUTION"), + strings.Join(filters, " OR "), + dumpCmd))); err != nil { + return err + } + } + + return nil +} diff --git a/kubetest2-gke/deployer/firewall.go b/kubetest2-gke/deployer/firewall.go new file mode 100644 index 00000000..4449cbf1 --- /dev/null +++ b/kubetest2-gke/deployer/firewall.go @@ -0,0 +1,148 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployer + +import ( + "fmt" + "sort" + "strings" + + "k8s.io/klog" + + "sigs.k8s.io/kubetest2/pkg/exec" +) + +func (d *deployer) ensureFirewall(project, cluster, network string) error { + if network == "default" { + return nil + } + firewall, err := d.getClusterFirewall(project, cluster) + if err != nil { + return fmt.Errorf("error getting unique firewall: %v", err) + } + if runWithNoOutput(exec.Command("gcloud", "compute", "firewall-rules", "describe", firewall, + "--project="+project, + "--format=value(name)")) == nil { + // Assume that if this unique firewall exists, it's good to go. + return nil + } + klog.V(1).Infof("Couldn't describe firewall '%s', assuming it doesn't exist and creating it", firewall) + + tagOut, err := exec.Output(exec.Command("gcloud", "compute", "instances", "list", + "--project="+project, + "--filter=metadata.created-by:*"+d.instanceGroups[project][cluster][0].path, + "--limit=1", + "--format=get(tags.items)")) + if err != nil { + return fmt.Errorf("instances list failed: %s", execError(err)) + } + tag := strings.TrimSpace(string(tagOut)) + if tag == "" { + return fmt.Errorf("instances list returned no instances (or instance has no tags)") + } + + if err := runWithOutput(exec.Command("gcloud", "compute", "firewall-rules", "create", firewall, + "--project="+project, + "--network="+network, + "--allow="+e2eAllow, + "--target-tags="+tag)); err != nil { + return fmt.Errorf("error creating e2e firewall: %v", err) + } + return nil +} + +func (d *deployer) getClusterFirewall(project, cluster string) (string, error) { + if err := d.getInstanceGroups(); err != nil { + return "", err + } + // We want to ensure that there's an e2e-ports-* firewall rule + // that maps to the cluster nodes, but the target tag for the + // nodes can be slow to get. Use the hash from the lexically first + // node pool instead. + return "e2e-ports-" + d.instanceGroups[project][cluster][0].uniq, nil +} + +// This function ensures that all firewall-rules are deleted from specific network. +// We also want to keep in logs that there were some resources leaking. +func (d *deployer) cleanupNetworkFirewalls(project, network string) (int, error) { + fws, err := exec.Output(exec.Command("gcloud", "compute", "firewall-rules", "list", + "--format=value(name)", + "--project="+project, + "--filter=network:"+network)) + if err != nil { + return 0, fmt.Errorf("firewall rules list failed: %s", execError(err)) + } + if len(fws) > 0 { + fwList := strings.Split(strings.TrimSpace(string(fws)), "\n") + klog.V(1).Infof("Network %s has %v undeleted firewall rules %v", network, len(fwList), fwList) + commandArgs := []string{"compute", "firewall-rules", "delete", "-q"} + commandArgs = append(commandArgs, fwList...) + commandArgs = append(commandArgs, "--project="+project) + errFirewall := runWithOutput(exec.Command("gcloud", commandArgs...)) + if errFirewall != nil { + return 0, fmt.Errorf("error deleting firewall: %v", errFirewall) + } + return len(fwList), nil + } + return 0, nil +} + +func (d *deployer) getInstanceGroups() error { + if d.instanceGroups != nil { + return nil + } + + // Initialize project instance groups structure + d.instanceGroups = map[string]map[string][]*ig{} + + location, err := d.location() + if err != nil { + return err + } + + for _, project := range d.projects { + d.instanceGroups[project] = map[string][]*ig{} + + for _, cluster := range d.projectClustersLayout[project] { + igs, err := exec.Output(exec.Command("gcloud", d.containerArgs("clusters", "describe", cluster, + "--format=value(instanceGroupUrls)", + "--project="+project, + location)...)) + if err != nil { + return fmt.Errorf("instance group URL fetch failed: %s", execError(err)) + } + igURLs := strings.Split(strings.TrimSpace(string(igs)), ";") + if len(igURLs) == 0 { + return fmt.Errorf("no instance group URLs returned by gcloud, output %q", string(igs)) + } + sort.Strings(igURLs) + + // Inialize cluster instance groups + d.instanceGroups[project][cluster] = make([]*ig, 0) + + for _, igURL := range igURLs { + m := poolRe.FindStringSubmatch(igURL) + if len(m) == 0 { + return fmt.Errorf("instanceGroupUrl %q did not match regex %v", igURL, poolRe) + } + d.instanceGroups[project][cluster] = append(d.instanceGroups[project][cluster], &ig{path: m[0], zone: m[1], name: m[2], uniq: m[3]}) + } + } + } + + return nil +} diff --git a/kubetest2-gke/deployer/up.go b/kubetest2-gke/deployer/up.go new file mode 100644 index 00000000..9d7af145 --- /dev/null +++ b/kubetest2-gke/deployer/up.go @@ -0,0 +1,193 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployer + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "golang.org/x/sync/errgroup" + "k8s.io/klog" + + "sigs.k8s.io/kubetest2/pkg/exec" + "sigs.k8s.io/kubetest2/pkg/metadata" +) + +// Deployer implementation methods below +func (d *deployer) Up() error { + if err := d.init(); err != nil { + return err + } + + // Only run prepare once for the first GCP project. + if err := d.prepareGcpIfNeeded(d.projects[0]); err != nil { + return err + } + if err := d.createNetwork(); err != nil { + return err + } + if err := d.setupNetwork(); err != nil { + return err + } + + klog.V(1).Infof("Environment: %v", os.Environ()) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + eg, ctx := errgroup.WithContext(ctx) + loc, err := d.location() + if err != nil { + return err + } + for i := range d.projects { + project := d.projects[i] + subNetworkArgs := subNetworkArgs(d.projects, d.region, d.network, i) + for j := range d.projectClustersLayout[project] { + cluster := d.projectClustersLayout[project][j] + eg.Go(func() error { + // Create the cluster + args := make([]string, len(d.createCommand())) + copy(args, d.createCommand()) + args = append(args, + "--project="+project, + loc, + "--machine-type="+d.machineType, + "--image-type="+image, + "--num-nodes="+strconv.Itoa(d.nodes), + "--network="+transformNetworkName(d.projects, d.network), + ) + if d.workloadIdentityEnabled { + args = append(args, fmt.Sprintf("--workload-pool=%s.svc.id.goog", project)) + } + args = append(args, subNetworkArgs...) + args = append(args, cluster) + klog.V(1).Infof("Gcloud command: gcloud %+v\n", args) + if err := runWithOutput(exec.CommandContext(ctx, "gcloud", args...)); err != nil { + // Cancel the context to kill other cluster creation processes if any error happens. + cancel() + return fmt.Errorf("error creating cluster: %v", err) + } + return nil + }) + } + } + + if err := eg.Wait(); err != nil { + return fmt.Errorf("error creating clusters: %v", err) + } + + if err := d.testSetup(); err != nil { + return fmt.Errorf("error running setup for the tests: %v", err) + } + + return nil +} + +func (d *deployer) createCommand() []string { + return strings.Fields(d.createCommandFlag) +} + +func (d *deployer) IsUp() (up bool, err error) { + if err := d.prepareGcpIfNeeded(d.projects[0]); err != nil { + return false, err + } + + for _, project := range d.projects { + for _, cluster := range d.projectClustersLayout[project] { + if err := d.getClusterCredentials(project, cluster); err != nil { + return false, err + } + + // naively assume that if the api server reports nodes, the cluster is up + lines, err := exec.CombinedOutputLines( + exec.RawCommand("kubectl get nodes -o=name"), + ) + if err != nil { + return false, metadata.NewJUnitError(err, strings.Join(lines, "\n")) + } + if len(lines) == 0 { + return false, fmt.Errorf("project had no nodes active: %s", project) + } + } + } + + return true, nil +} + +func (d *deployer) testSetup() error { + if d.testPrepared { + // Ensure setup is a singleton. + return nil + } + + // Only run prepare once for the first GCP project. + if err := d.prepareGcpIfNeeded(d.projects[0]); err != nil { + return err + } + if _, err := d.Kubeconfig(); err != nil { + return err + } + + for _, project := range d.projects { + for _, cluster := range d.projectClustersLayout[project] { + if err := d.getInstanceGroups(); err != nil { + return err + } + + if err := d.ensureFirewall(project, cluster, d.network); err != nil { + return err + } + } + } + d.testPrepared = true + return nil +} + +// Kubeconfig returns a path to a kubeconfig file for the cluster in +// a temp directory, creating one if one does not exist. +// It also sets the KUBECONFIG environment variable appropriately. +func (d *deployer) Kubeconfig() (string, error) { + if d.kubecfgPath != "" { + return d.kubecfgPath, nil + } + + tmpdir, err := ioutil.TempDir("", "kubetest2-gke") + if err != nil { + return "", err + } + + kubecfgFiles := make([]string, 0) + for _, project := range d.projects { + for _, cluster := range d.projectClustersLayout[project] { + filename := filepath.Join(tmpdir, fmt.Sprintf("kubecfg-%s-%s", project, cluster)) + if err := os.Setenv("KUBECONFIG", filename); err != nil { + return "", err + } + if err := d.getClusterCredentials(project, cluster); err != nil { + return "", err + } + kubecfgFiles = append(kubecfgFiles, filename) + } + } + + d.kubecfgPath = strings.Join(kubecfgFiles, string(os.PathListSeparator)) + return d.kubecfgPath, nil +}