Skip to content

Commit

Permalink
Some fixes to kubetest2 GKE deployer
Browse files Browse the repository at this point in the history
  • Loading branch information
chizhg committed Aug 15, 2020
1 parent bc1255f commit 8214b29
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 71 deletions.
53 changes: 11 additions & 42 deletions kubetest2-gke/deployer/down.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,7 @@ func (d *deployer) Down() error {
for i := range d.projects {
project := d.projects[i]
for j := range d.projectClustersLayout[project] {
cluster := d.clusters[j]
firewall, err := d.getClusterFirewall(project, cluster)
if err != nil {
// This is expected if the cluster doesn't exist.
continue
}
d.instanceGroups = nil

cluster := d.projectClustersLayout[project][j]
loc, err := d.location()
if err != nil {
return err
Expand All @@ -54,48 +47,24 @@ func (d *deployer) Down() error {
go func() {
defer wg.Done()
// We best-effort try all of these and report errors as appropriate.
errCluster := runWithOutput(exec.Command(
if err := runWithOutput(exec.Command(
"gcloud", d.containerArgs("clusters", "delete", "-q", cluster,
"--project="+project,
loc)...))

// don't delete default network
if d.network == "default" {
if errCluster != nil {
klog.V(1).Infof("Error deleting cluster using default network, allow the error for now %s", errCluster)
}
return
}

var errFirewall error
if runWithNoOutput(exec.Command("gcloud", "compute", "firewall-rules", "describe", firewall,
"--project="+project,
"--format=value(name)")) == nil {
klog.V(1).Infof("Found rules for firewall '%s', deleting them", firewall)
errFirewall = exec.Command("gcloud", "compute", "firewall-rules", "delete", "-q", firewall,
"--project="+project).Run()
} else {
klog.V(1).Infof("Found no rules for firewall '%s', assuming resources are clean", firewall)
}
numLeakedFWRules, errCleanFirewalls := d.cleanupNetworkFirewalls(project, d.network)

if errCluster != nil {
klog.Errorf("error deleting cluster: %v", errCluster)
}
if errFirewall != nil {
klog.Errorf("error deleting firewall: %v", errFirewall)
}
if errCleanFirewalls != nil {
klog.Errorf("error cleaning-up firewalls: %v", errCleanFirewalls)
}
if numLeakedFWRules > 0 {
klog.Errorf("leaked firewall rules")
loc)...)); err != nil {
klog.Errorf("Error deleting cluster: %v", err)
}
}()
}
}
wg.Wait()

numDeletedFWRules, errCleanFirewalls := d.cleanupNetworkFirewalls(d.projects[0], d.network)
if errCleanFirewalls != nil {
klog.Errorf("Error cleaning-up firewall rules: %v", errCleanFirewalls)
} else {
klog.V(1).Infof("Deleted %d network firewall rules", numDeletedFWRules)
}

if err := d.teardownNetwork(); err != nil {
return err
}
Expand Down
44 changes: 25 additions & 19 deletions kubetest2-gke/deployer/firewall.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,30 @@ import (
"fmt"
"sort"
"strings"
"time"

"k8s.io/klog"

"sigs.k8s.io/kubetest2/pkg/exec"
)

func (d *deployer) ensureFirewall(project, cluster, network string) error {
func (d *deployer) ensureFirewall(hostProject, curtProject, cluster, network string) error {
klog.V(1).Infof("Ensuring firewall rules for cluster %s in %s", cluster, curtProject)
if network == "default" {
return nil
}
firewall, err := d.getClusterFirewall(project, cluster)
if err != nil {
return fmt.Errorf("error getting unique firewall: %v", err)
}
firewall := d.getClusterFirewall(curtProject, cluster)
if runWithNoOutput(exec.Command("gcloud", "compute", "firewall-rules", "describe", firewall,
"--project="+project,
"--project="+hostProject,
"--format=value(name)")) == nil {
// Assume that if this unique firewall exists, it's good to go.
return nil
}
klog.V(1).Infof("Couldn't describe firewall '%s', assuming it doesn't exist and creating it", firewall)

tagOut, err := exec.Output(exec.Command("gcloud", "compute", "instances", "list",
"--project="+project,
"--filter=metadata.created-by:*"+d.instanceGroups[project][cluster][0].path,
"--project="+curtProject,
"--filter=metadata.created-by:*"+d.instanceGroups[curtProject][cluster][0].path,
"--limit=1",
"--format=get(tags.items)"))
if err != nil {
Expand All @@ -56,7 +55,7 @@ func (d *deployer) ensureFirewall(project, cluster, network string) error {
}

if err := runWithOutput(exec.Command("gcloud", "compute", "firewall-rules", "create", firewall,
"--project="+project,
"--project="+hostProject,
"--network="+network,
"--allow="+e2eAllow,
"--target-tags="+tag)); err != nil {
Expand All @@ -65,23 +64,26 @@ func (d *deployer) ensureFirewall(project, cluster, network string) error {
return nil
}

func (d *deployer) getClusterFirewall(project, cluster string) (string, error) {
if err := d.getInstanceGroups(); err != nil {
return "", err
}
func (d *deployer) getClusterFirewall(project, cluster string) string {
// We want to ensure that there's an e2e-ports-* firewall rule
// that maps to the cluster nodes, but the target tag for the
// nodes can be slow to get. Use the hash from the lexically first
// node pool instead.
return "e2e-ports-" + d.instanceGroups[project][cluster][0].uniq, nil
return "e2e-ports-" + d.instanceGroups[project][cluster][0].uniq
}

// This function ensures that all firewall-rules are deleted from specific network.
// We also want to keep in logs that there were some resources leaking.
func (d *deployer) cleanupNetworkFirewalls(project, network string) (int, error) {
func (d *deployer) cleanupNetworkFirewalls(hostProject, network string) (int, error) {
// Do not delete firewall rules for the default network.
if network == "default" {
return 0, nil
}

klog.V(1).Infof("Cleaning up network firewall rules for network %s in %s", network, hostProject)
fws, err := exec.Output(exec.Command("gcloud", "compute", "firewall-rules", "list",
"--format=value(name)",
"--project="+project,
"--project="+hostProject,
"--filter=network:"+network))
if err != nil {
return 0, fmt.Errorf("firewall rules list failed: %s", execError(err))
Expand All @@ -91,17 +93,21 @@ func (d *deployer) cleanupNetworkFirewalls(project, network string) (int, error)
klog.V(1).Infof("Network %s has %v undeleted firewall rules %v", network, len(fwList), fwList)
commandArgs := []string{"compute", "firewall-rules", "delete", "-q"}
commandArgs = append(commandArgs, fwList...)
commandArgs = append(commandArgs, "--project="+project)
commandArgs = append(commandArgs, "--project="+hostProject)
errFirewall := runWithOutput(exec.Command("gcloud", commandArgs...))
if errFirewall != nil {
return 0, fmt.Errorf("error deleting firewall: %v", errFirewall)
}
return len(fwList), nil
// It looks sometimes gcloud exits before the firewall rules are actually deleted,
// so sleep 10 seconds to wait for the firewall rules being deleted completely.
// TODO(chizhg): change to a more reliable way to check if they are deleted or not.
time.Sleep(10 * time.Second)
}
return 0, nil
return len(fws), nil
}

func (d *deployer) getInstanceGroups() error {
// If instanceGroups has already been populated, return directly.
if d.instanceGroups != nil {
return nil
}
Expand Down
16 changes: 11 additions & 5 deletions kubetest2-gke/deployer/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func (d *deployer) createNetwork() error {
"--network="+d.network,
"--range="+parts[0],
"--secondary-range",
fmt.Sprintf("%s-service=%s,%s-pods=%s", subnetName, parts[1], subnetName, parts[2]),
fmt.Sprintf("%s-services=%s,%s-pods=%s", subnetName, parts[1], subnetName, parts[2]),
)); err != nil {
return err
}
Expand All @@ -118,6 +118,11 @@ func (d *deployer) createNetwork() error {
}

func (d *deployer) deleteNetwork() error {
// Do not delete the default network.
if d.network == "default" {
return nil
}

// Delete the subnetworks if it's a multi-project profile.
// Reference: https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-shared-vpc#deleting_the_shared_network
if len(d.projects) >= 1 {
Expand All @@ -129,14 +134,15 @@ func (d *deployer) deleteNetwork() error {
subnetName,
"--project="+hostProject,
"--region="+d.region,
"--quiet",
)); err != nil {
return err
}
}
}

if err := runWithOutput(exec.Command("gcloud", "compute", "networks", "delete", "-q", d.network,
"--project="+d.projects[0])); err != nil {
"--project="+d.projects[0], "--quiet")); err != nil {
return err
}

Expand Down Expand Up @@ -216,7 +222,7 @@ func enableSharedVPCAndGrantRoles(projects []string, region, network string) err

// Grant the required IAM roles to service accounts that belong to the service projects.
for i := 1; i < len(projects); i++ {
serviceProject := projects[1]
serviceProject := projects[i]
subnetName := network + "-" + serviceProject
// Get the subnet etag.
subnetETag, err := exec.Output(exec.Command("gcloud", "compute", "networks", "subnets",
Expand Down Expand Up @@ -260,7 +266,7 @@ func grantHostServiceAgentUserRole(projects []string) error {

hostProject := projects[0]
for i := 1; i < len(projects); i++ {
serviceProject := projects[1]
serviceProject := projects[i]
serviceProjectNum, err := getProjectNumber(serviceProject)
if err != nil {
return err
Expand Down Expand Up @@ -327,7 +333,7 @@ func removeHostServiceAgentUserRole(projects []string) error {

hostProject := projects[0]
for i := 1; i < len(projects); i++ {
serviceProject := projects[1]
serviceProject := projects[i]
serviceProjectNum, err := getProjectNumber(serviceProject)
if err != nil {
return err
Expand Down
10 changes: 5 additions & 5 deletions kubetest2-gke/deployer/up.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,14 +145,14 @@ func (d *deployer) testSetup() error {
if _, err := d.Kubeconfig(); err != nil {
return err
}
if err := d.getInstanceGroups(); err != nil {
return err
}

hostProject := d.projects[0]
for _, project := range d.projects {
for _, cluster := range d.projectClustersLayout[project] {
if err := d.getInstanceGroups(); err != nil {
return err
}

if err := d.ensureFirewall(project, cluster, d.network); err != nil {
if err := d.ensureFirewall(hostProject, project, cluster, d.network); err != nil {
return err
}
}
Expand Down

0 comments on commit 8214b29

Please sign in to comment.