diff --git a/infra/gcp/clusters/README.md b/infra/gcp/clusters/README.md index 9deba0ae4dc..29a87307bfa 100644 --- a/infra/gcp/clusters/README.md +++ b/infra/gcp/clusters/README.md @@ -1,18 +1,35 @@ # clusters -This directory contains Terraform cluster configurations for the various GCP +This directory contains Terraform modules and configurations for the various +GCP projects and Kubernetes clusters that the Kubernetes project maintains. projects that the Kubernetes project maintains. -Each directory represents a GCP project. Each sub-directory of those represents -a GKE cluster configuration. We may template these into modules at some point, -but for now they are designed to be straight forward and verbose. +## Layout + +``` +. +├── modules +│   └── +└── projects + └── + └── +``` + +Each directory in `modules` represents a Terraform module intended for reuse +inside of this repo. Not every configuration is able to use these modules yet +due to differences in google provider version. + +Each directory in `projects` represents a GCP project. Each subdirectory of +those represents a GKE cluster configuration. + +## Prerequsites -Prerequisites: - Be a member of the k8s-infra-cluster-admins@kubernetes.io group. - Have Terraform installed (https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_amd64.zip) -Instructions: +## Instructions + - Ensure you are logged into your GCP account with `gcloud auth application-default login` - From within a cluster directory: - `terraform init` will initialize your local state diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/README.md b/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/README.md deleted file mode 100644 index ee5733c9e36..00000000000 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# `k8s-infra-gke-cluster` terraform module - -This terraform module defines a GKE Cluster for k8s-infra use -- GCP Service Account for nodes -- BigQuery dataset for usage metering -- GKE cluster with some useful defaults -- No nodes are provided, they are expected to come from the `k8s-infra-gke-nodepool` module - -Because this is a "test" cluster: -- the BigQuery dataset will be deleted on `terraform destroy` -- the GKE cluster will be deleted on `terraform destroy` - -NOTE: due to [hashicorp/terraform#22544] this cannot be templated to handle -both test and prod clusters - -[hashicorp/terraform#22544]: https://github.com/hashicorp/terraform/issues/22544 diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/README.md b/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/README.md deleted file mode 100644 index d80c89c47fc..00000000000 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# `k8s-infra-gke-nodepool` terraform module - -This terraform module defines a GKE Nodepool for k8s-infra use: -- It is expected the cluster has been provisioned using the `k8s-infra-gke-cluster` module -- Workload identity is enabled by default for this nodepool diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/main.tf b/infra/gcp/clusters/kubernetes-public/prow-build-test/main.tf deleted file mode 100644 index b50b11430af..00000000000 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/main.tf +++ /dev/null @@ -1,107 +0,0 @@ -/** - * Copyright 2020 The Kubernetes Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -This file defines: -- GCP Service Account for prow-build-test -- GCP Service Account for boskos-jenkins -- GKE cluster configuration for prow-build-test -- GKE nodepool configuration for prow-build-test -*/ - -locals { - project_name = "kubernetes-public" - cluster_name = "prow-build-test" // The name of the cluster defined in this file - cluster_ksa_name = "prow-build" // MUST match the name of the KSA intended to use the prow_build_cluster_sa serviceaccount - cluster_location = "us-central1" // The GCP location (region or zone) where the cluster should be created - bigquery_location = "US" // The bigquery specific location where the dataset should be created - pod_namespace = "test-pods" // MUST match whatever prow is configured to use when it schedules to this cluster - boskos_janitor_gsa_name = "boskos-janitor-test" // The name of the GCP SA used by boskos-janitor - boskos_janitor_ksa_name = "boskos-janitor" // MUST match the name of the KSA intended to use the boskos_janitor_sa serviceaccount -} - -// This configures the source project where we should install the cluster -data "google_project" "project" { - project_id = local.project_name -} - -// Create GCP SA for pods -resource "google_service_account" "prow_build_cluster_sa" { - project = data.google_project.project.name - account_id = local.cluster_name - display_name = "Used by pods in '${local.cluster_name}' GKE cluster" -} -// Allow pods using the build cluster KSA to use the GCP SA via workload identity -data "google_iam_policy" "prow_build_cluster_sa_workload_identity" { - binding { - role = "roles/iam.workloadIdentityUser" - - members = [ - "serviceAccount:${data.google_project.project.name}.svc.id.goog[${local.pod_namespace}/${local.cluster_ksa_name}]", - ] - } -} -// Authoritative iam-policy: replaces any existing policy attached to this service_account -resource "google_service_account_iam_policy" "prow_build_cluster_sa_iam" { - service_account_id = google_service_account.prow_build_cluster_sa.name - policy_data = data.google_iam_policy.prow_build_cluster_sa_workload_identity.policy_data -} - -// Create GCP SA for boskos-janitor -resource "google_service_account" "boskos_janitor_sa" { - project = data.google_project.project.name - account_id = local.boskos_janitor_gsa_name - display_name = "Used by boskos-janitor in '${local.cluster_name}' GKE cluster" -} -// Allow pods using the build cluster KSA to use the GCP SA via workload identity -data "google_iam_policy" "boskos_janitor_sa_workload_identity" { - binding { - role = "roles/iam.workloadIdentityUser" - - members = [ - "serviceAccount:${data.google_project.project.name}.svc.id.goog[${local.pod_namespace}/${local.boskos_janitor_ksa_name}]", - ] - } -} -// Authoritative iam-policy: replaces any existing policy attached to this service account -resource "google_service_account_iam_policy" "boskos_janitor_sa_iam" { - service_account_id = google_service_account.boskos_janitor_sa.name - policy_data = data.google_iam_policy.boskos_janitor_sa_workload_identity.policy_data -} - -module "prow_build_test_cluster" { - source = "./k8s-infra-gke-cluster" - project_name = data.google_project.project.name - cluster_name = local.cluster_name - cluster_location = local.cluster_location - bigquery_location = local.bigquery_location -} - -module "prow_build_test_nodepool" { - source = "./k8s-infra-gke-nodepool" - project_name = data.google_project.project.name - cluster_name = module.prow_build_test_cluster.cluster.name - location = module.prow_build_test_cluster.cluster.location - name = "pool1" - min_count = 1 - max_count = 3 - // k8s-prow-builds uses n1-highmem-8 - machine_type = "n1-highmem-2" - // k8s-prow-builds uses 250 - disk_size_gb = 100 - disk_type = "pd-ssd" - service_account = module.prow_build_test_cluster.cluster_node_sa.email -} diff --git a/infra/gcp/clusters/modules/gke-cluster/README.md b/infra/gcp/clusters/modules/gke-cluster/README.md new file mode 100644 index 00000000000..49ef518be86 --- /dev/null +++ b/infra/gcp/clusters/modules/gke-cluster/README.md @@ -0,0 +1,16 @@ +# `gke-cluster` terraform module + +This terraform module defines a GKE cluster following wg-k8s-infra conventions: +- GCP Service Account for nodes +- BigQuery dataset for usage metering +- GKE cluster with some useful defaults +- No nodes are provided, they are expected to come from nodepools created via the [`gke-nodepool`] module + +It is assumed the GCP project for this cluster has been created via the [`gke-project`] module + +If this is a "prod" cluster: +- the BigQuery dataset will NOT be deleted on `terraform destroy` +- the GKE cluster will NOT be deleted on `terraform destroy` + +[`gke-project`]: /infra/gcp/clusters/modules/gke-project +[`gke-nodepool`]: /infra/gcp/clusters/modules/gke-nodepool diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/main.tf b/infra/gcp/clusters/modules/gke-cluster/main.tf similarity index 51% rename from infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/main.tf rename to infra/gcp/clusters/modules/gke-cluster/main.tf index e8425d415fb..e2a0e71f62e 100644 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/main.tf +++ b/infra/gcp/clusters/modules/gke-cluster/main.tf @@ -39,7 +39,14 @@ resource "google_project_iam_member" "cluster_node_sa_monitoring_metricwriter" { } // BigQuery dataset for usage data -resource "google_bigquery_dataset" "usage_metering" { +// +// Uses a workaround from https://github.com/hashicorp/terraform/issues/22544#issuecomment-582974372 +// to set delete_contents_on_destroy to false if is_prod_cluster +// +// IMPORTANT: The prod_ and test_ forms of this resource MUST be kept in sync. +// Any changes in one MUST be reflected in the other. +resource "google_bigquery_dataset" "prod_usage_metering" { + count = var.is_prod_cluster == "true" ? 1 : 0 dataset_id = replace("usage_metering_${var.cluster_name}", "-", "_") project = var.project_name description = "GKE Usage Metering for cluster '${var.cluster_name}'" @@ -54,21 +61,144 @@ resource "google_bigquery_dataset" "usage_metering" { user_by_email = google_service_account.cluster_node_sa.email } + // NOTE: unique to prod_usage_metering // This restricts deletion of this dataset if there is data in it - // IMPORTANT: Should be true on test clusters + delete_contents_on_destroy = false +} +resource "google_bigquery_dataset" "test_usage_metering" { + count = var.is_prod_cluster == "true" ? 0 : 1 + dataset_id = replace("usage_metering_${var.cluster_name}", "-", "_") + project = var.project_name + description = "GKE Usage Metering for cluster '${var.cluster_name}'" + location = var.bigquery_location + + access { + role = "OWNER" + special_group = "projectOwners" + } + access { + role = "WRITER" + user_by_email = google_service_account.cluster_node_sa.email + } + + // NOTE: unique to test_usage_metering delete_contents_on_destroy = true } -// Create GKE cluster, but with no node pools. Node pools can be provisioned below -resource "google_container_cluster" "cluster" { +// Create GKE cluster, but with no node pools. Node pools are provisioned via another module. +// +// Uses a workaround from https://github.com/hashicorp/terraform/issues/22544#issuecomment-582974372 +// to set lifecycle.prevent_destroy to false if is_prod_cluster +// +// IMPORTANT: The prod_ and test_ forms of this resource MUST be kept in sync. +// Any changes in one MUST be reflected in the other. +resource "google_container_cluster" "prod_cluster" { + count = var.is_prod_cluster == "true" ? 1 : 0 + name = var.cluster_name location = var.cluster_location provider = google-beta project = var.project_name + // NOTE: unique to prod_cluster // GKE clusters are critical objects and should not be destroyed - // IMPORTANT: should be false on test clusters + lifecycle { + prevent_destroy = true + } + + // Network config + network = "default" + + // Start with a single node, because we're going to delete the default pool + initial_node_count = 1 + + // Removes the default node pool, so we can custom create them as separate + // objects + remove_default_node_pool = true + + // Disable local and certificate auth + master_auth { + username = "" + password = "" + + client_certificate_config { + issue_client_certificate = false + } + } + + // Enable google-groups for RBAC + authenticator_groups_config { + security_group = "gke-security-groups@kubernetes.io" + } + + // Enable workload identity for GCP IAM + workload_identity_config { + identity_namespace = "${var.project_name}.svc.id.goog" + } + + // Enable Stackdriver Kubernetes Monitoring + logging_service = "logging.googleapis.com/kubernetes" + monitoring_service = "monitoring.googleapis.com/kubernetes" + + // Set maintenance time + maintenance_policy { + daily_maintenance_window { + start_time = "11:00" // (in UTC), 03:00 PST + } + } + + // Restrict master to Google IP space; use Cloud Shell to access + master_authorized_networks_config { + } + + // Enable GKE Usage Metering + resource_usage_export_config { + enable_network_egress_metering = true + bigquery_destination { + dataset_id = google_bigquery_dataset.prod_usage_metering[0].dataset_id + } + } + + // Enable GKE Network Policy + network_policy { + enabled = true + provider = "CALICO" + } + + // Configure cluster addons + addons_config { + horizontal_pod_autoscaling { + disabled = false + } + http_load_balancing { + disabled = false + } + network_policy_config { + disabled = false + } + } + + // Enable PodSecurityPolicy enforcement + pod_security_policy_config { + enabled = false // TODO: we should turn this on + } + + // Enable VPA + vertical_pod_autoscaling { + enabled = true + } +} +resource "google_container_cluster" "test_cluster" { + count = var.is_prod_cluster == "true" ? 0 : 1 + + name = var.cluster_name + location = var.cluster_location + + provider = google-beta + project = var.project_name + + // NOTE: unique to test_cluster lifecycle { prevent_destroy = false } @@ -122,7 +252,7 @@ resource "google_container_cluster" "cluster" { resource_usage_export_config { enable_network_egress_metering = true bigquery_destination { - dataset_id = google_bigquery_dataset.usage_metering.dataset_id + dataset_id = google_bigquery_dataset.test_usage_metering[0].dataset_id } } diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/outputs.tf b/infra/gcp/clusters/modules/gke-cluster/outputs.tf similarity index 74% rename from infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/outputs.tf rename to infra/gcp/clusters/modules/gke-cluster/outputs.tf index abbeb0f9015..2cd4da2b103 100644 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/outputs.tf +++ b/infra/gcp/clusters/modules/gke-cluster/outputs.tf @@ -16,7 +16,12 @@ output "cluster" { description = "The cluster" - value = google_container_cluster.cluster + // Workaround from https://github.com/hashicorp/terraform/issues/22544#issuecomment-582974372 + // This should be either test_cluster or prod_cluster + value = coalescelist( + google_container_cluster.test_cluster.*, + google_container_cluster.prod_cluster.* + )[0] } output "cluster_node_sa" { diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/variables.tf b/infra/gcp/clusters/modules/gke-cluster/variables.tf similarity index 86% rename from infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/variables.tf rename to infra/gcp/clusters/modules/gke-cluster/variables.tf index 869f2641412..c042763fa28 100644 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/variables.tf +++ b/infra/gcp/clusters/modules/gke-cluster/variables.tf @@ -33,3 +33,9 @@ variable "bigquery_location" { description = "The bigquery specific location where the dataset should be created" type = string } + +variable "is_prod_cluster" { + description = "If this is not a prod cluster it's safe to delete resources on destroy" + type = string + default = "false" +} diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/versions.tf b/infra/gcp/clusters/modules/gke-cluster/versions.tf similarity index 95% rename from infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/versions.tf rename to infra/gcp/clusters/modules/gke-cluster/versions.tf index 07414a670aa..034ff6fbe49 100644 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/versions.tf +++ b/infra/gcp/clusters/modules/gke-cluster/versions.tf @@ -15,7 +15,7 @@ */ terraform { - required_version = ">= 0.12.8" + required_version = "~> 0.12.20" required_providers { google = "~> 3.19.0" google-beta = "~> 3.19.0" diff --git a/infra/gcp/clusters/modules/gke-nodepool/README.md b/infra/gcp/clusters/modules/gke-nodepool/README.md new file mode 100644 index 00000000000..8a4a1fa7ee6 --- /dev/null +++ b/infra/gcp/clusters/modules/gke-nodepool/README.md @@ -0,0 +1,11 @@ +# `gke-nodepool` terraform module + +This terraform module defines a GKE nodepool following wg-k8s-infra conventions, including: +- Workload Identity is enabled by default for this nodepool +- Legacy metadata endpoints are disabled +- Auto-repair and auto-upgrade are enabled + +It is assumed that the associated GKE cluster has been provisioned using the [`gke-cluster`] module + +[`gke-cluster`]: /infra/gcp/clusters/modules/gke-cluster +[`gke-project`]: /infra/gcp/clusters/modules/gke-project diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/main.tf b/infra/gcp/clusters/modules/gke-nodepool/main.tf similarity index 98% rename from infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/main.tf rename to infra/gcp/clusters/modules/gke-nodepool/main.tf index c64919fcfd3..4e1466c9ca8 100644 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/main.tf +++ b/infra/gcp/clusters/modules/gke-nodepool/main.tf @@ -40,6 +40,7 @@ resource "google_container_node_pool" "node_pool" { // Set machine type, and enable all oauth scopes tied to the service account node_config { + image_type = var.image_type machine_type = var.machine_type disk_size_gb = var.disk_size_gb disk_type = var.disk_type diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/variables.tf b/infra/gcp/clusters/modules/gke-nodepool/variables.tf similarity index 93% rename from infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/variables.tf rename to infra/gcp/clusters/modules/gke-nodepool/variables.tf index 6783e486301..7f16d67971c 100644 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-nodepool/variables.tf +++ b/infra/gcp/clusters/modules/gke-nodepool/variables.tf @@ -48,6 +48,12 @@ variable "machine_type" { type = string } +variable "image_type" { + description = "The image_type of this node_pool" + type = string + default = "COS" +} + variable "disk_size_gb" { description = "The disk_size_gb of this node_pool" type = string diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/versions.tf b/infra/gcp/clusters/modules/gke-nodepool/versions.tf similarity index 95% rename from infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/versions.tf rename to infra/gcp/clusters/modules/gke-nodepool/versions.tf index 07414a670aa..034ff6fbe49 100644 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/k8s-infra-gke-cluster/versions.tf +++ b/infra/gcp/clusters/modules/gke-nodepool/versions.tf @@ -15,7 +15,7 @@ */ terraform { - required_version = ">= 0.12.8" + required_version = "~> 0.12.20" required_providers { google = "~> 3.19.0" google-beta = "~> 3.19.0" diff --git a/infra/gcp/clusters/modules/gke-project/README.md b/infra/gcp/clusters/modules/gke-project/README.md new file mode 100644 index 00000000000..61bc1975e9f --- /dev/null +++ b/infra/gcp/clusters/modules/gke-project/README.md @@ -0,0 +1,13 @@ +# `gke-project` terraform module + +This terraform module defines a GCP project following wg-k8s-infra conventions +that is intended to host a GKE cluster created by the [`gke-cluster`] module: +- Project is associated with CNCF org +- Project is linked to CNCF billing account +- Services necessary to support [`gke-cluster`] are enabled +- Some default IAM bindings are added: + - k8s-infra-cluster-admins@ gets `roles/compute.viewer`, `roles/container.admin`, `roles/ServiceAccountLister` + - gke-security-groups@ gets `roles/container.clusterViewer` + +[`gke-cluster`]: /infra/gcp/clusters/modules/gke-cluster +[`gke-nodepool`]: /infra/gcp/clusters/modules/gke-nodepool diff --git a/infra/gcp/clusters/modules/gke-project/main.tf b/infra/gcp/clusters/modules/gke-project/main.tf new file mode 100644 index 00000000000..e21cd48b1fc --- /dev/null +++ b/infra/gcp/clusters/modules/gke-project/main.tf @@ -0,0 +1,102 @@ +/** + * Copyright 2020 The Kubernetes Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/// Create the project in which we're creating the cluster +resource "google_project" "project" { + name = var.project_name + project_id = var.project_name + org_id = "758905017065" // kubernetes.io + billing_account = "018801-93540E-22A20E" +} + +// Services we need +resource "google_project_service" "compute" { + project = google_project.project.project_id + service = "compute.googleapis.com" + disable_dependent_services = true +} +resource "google_project_service" "logging" { + project = google_project.project.project_id + service = "logging.googleapis.com" + disable_dependent_services = true +} +resource "google_project_service" "monitoring" { + project = google_project.project.project_id + service = "monitoring.googleapis.com" + disable_dependent_services = true +} +resource "google_project_service" "bigquery" { + project = google_project.project.project_id + service = "bigquery.googleapis.com" + disable_dependent_services = true +} +resource "google_project_service" "container" { + project = google_project.project.project_id + service = "container.googleapis.com" + disable_dependent_services = true +} +resource "google_project_service" "storage_component" { + project = google_project.project.project_id + service = "storage-component.googleapis.com" + disable_dependent_services = true +} +resource "google_project_service" "oslogin" { + project = google_project.project.project_id + service = "oslogin.googleapis.com" + disable_dependent_services = true +} +resource "google_project_service" "cloudbuild" { + project = google_project.project.project_id + service = "cloudbuild.googleapis.com" + disable_dependent_services = true +} +resource "google_project_service" "stackdriver" { + project = google_project.project.project_id + service = "stackdriver.googleapis.com" + disable_dependent_services = true +} + + +// "Empower cluster admins" is what ensure-main-project.sh says +resource "google_project_iam_member" "cluster_admins_as_compute_viewer" { + project = google_project.project.project_id + role = "roles/compute.viewer" + member = "group:${var.cluster_admins_group}" +} +resource "google_project_iam_member" "cluster_admins_as_container_admin" { + project = google_project.project.project_id + role = "roles/container.admin" + member = "group:${var.cluster_admins_group}" +} +resource "google_project_iam_custom_role" "service_account_lister" { + project = google_project.project.project_id + role_id = "ServiceAccountLister" + title = "Service Account Lister" + description = "Can list ServiceAccounts." + permissions = ["iam.serviceAccounts.list"] +} +resource "google_project_iam_member" "cluster_admins_as_service_account_lister" { + project = google_project.project.project_id + role = "projects/${google_project.project.project_id}/roles/${google_project_iam_custom_role.service_account_lister.role_id}" + member = "group:${var.cluster_admins_group}" +} + +// "Empowering cluster users" is what ensure-main-project.sh says +resource "google_project_iam_member" "cluster_users_as_container_cluster_viewer" { + project = google_project.project.project_id + role = "roles/container.clusterViewer" + member = "group:${var.cluster_users_group}" +} diff --git a/infra/gcp/clusters/modules/gke-project/outputs.tf b/infra/gcp/clusters/modules/gke-project/outputs.tf new file mode 100644 index 00000000000..b9c2f09e580 --- /dev/null +++ b/infra/gcp/clusters/modules/gke-project/outputs.tf @@ -0,0 +1,20 @@ +/** + * Copyright 2020 The Kubernetes Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "project_id" { + description = "The project_id of the project that was created" + value = google_project.project.project_id +} diff --git a/infra/gcp/clusters/modules/gke-project/variables.tf b/infra/gcp/clusters/modules/gke-project/variables.tf new file mode 100644 index 00000000000..19936791439 --- /dev/null +++ b/infra/gcp/clusters/modules/gke-project/variables.tf @@ -0,0 +1,37 @@ +/** + * Copyright 2020 The Kubernetes Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "project_id" { + description = "The id of the project, eg: my-awesome-project" + type = string +} + +variable "project_name" { + description = "The display name of the project, eg: My Awesome Project" + type = string +} + +variable "cluster_admins_group" { + description = "The group to treat as cluster admins" + type = string + default = "k8s-infra-cluster-admins@kubernetes.io" +} + +variable "cluster_users_group" { + description = "The group to treat as cluster users" + type = string + default = "gke-security-groups@kubernetes.io" +} diff --git a/infra/gcp/clusters/modules/gke-project/versions.tf b/infra/gcp/clusters/modules/gke-project/versions.tf new file mode 100644 index 00000000000..034ff6fbe49 --- /dev/null +++ b/infra/gcp/clusters/modules/gke-project/versions.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +terraform { + required_version = "~> 0.12.20" + required_providers { + google = "~> 3.19.0" + google-beta = "~> 3.19.0" + } +} diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/00-provider.tf b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/00-provider.tf new file mode 100644 index 00000000000..0e674c4f774 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/00-provider.tf @@ -0,0 +1,20 @@ +/* +This file defines: +- Required Terraform version +- Required provider versions +- Storage backend details +*/ + +terraform { + required_version = "~> 0.12.20" + + backend "gcs" { + bucket = "k8s-infra-clusters-terraform" + prefix = "k8s-infra-prow-build-trusted/prow-build-trusted" // $project_name/$cluster_name + } + + required_providers { + google = "~> 3.19.0" + google-beta = "~> 3.19.0" + } +} diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/README.md b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/README.md new file mode 100644 index 00000000000..d33bde52fc0 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/README.md @@ -0,0 +1,92 @@ +# k8s-infra-prow-build-trusted/prow-build-trusted + +These terraform resources define a GCP project containing a GKE cluster +intended to serve as a "trusted build cluster" for prow.k8s.io. This is +intended for jobs that need access to more sensitive secrets, such as +github tokens, or service accounts authorized to push to staging buckets +some service accounts defined for use by pods within the cluster. + +## Initial setup + +### Provisioning + +There was some manual work in bringing this up fully: +- expect `terraform apply` to fail initially while trying to create bindings + for `roles/iam.workloadIdentityUser`, as the identity namespace won't exist + until the GKE cluster is created; re-run to succeed +- run `ensure_release_projects.sh` and `ensure-staging-storage.sh` to make + sure the `gcb-builder` account will be able to run jobs for the + projects referenced within +- deploy resources and secrets to the cluster +``` +# from with a cloud-shell +# e.g. gcloud alpha cloud-shell ssh --project=k8s-infra-prow-build-trusted + +# get credentials for the cluster +gcloud container clusters get-credentials \ + prow-build-trusted --project=k8s-prow-build-trusted --region=us-central1 + +# get k8s.io on here, for this example we'll assume everything's pushed to git +git clone git://github.com/kubernetes/k8s.io + +# deploy the resources; note boskos-resources.yaml isn't a configmap +cd k8s.io/infra/gcp/clusters/k8s-infra-prow-build-trusted/prow-build-trusted +kubectl apply -f ./resources + +# create the service-account secret +gcloud iam service-accounts keys create \ + --project=k8s-infra-prow-build-trusted \ + --iam-account=prow-build-trusted@k8s-infra-prow-build-trusted.iam.gserviceaccount.com \ + tmp.json +kubectl create secret generic -n test-pods service-account \ + --from-file=service-account.json=tmp.json +rm tmp.json +``` + +### Connecting to prow.k8s.io + +There was some manual work to hook this up to prow.k8s.io: +- generate a kubeconfig with credentials that prow.k8s.io will use to access + the build cluster, and hand it off to prow.k8s.io on-call +``` +# from with a cloud-shell +# e.g. gcloud alpha cloud-shell ssh --project=k8s-infra-prow-build-trusted + +# get credentials for the cluster +gcloud container clusters get-credentials \ + prow-build-trusted --project=k8s-prow-build-trusted --region=us-central1 + +# generate a kubeconfig to handoff to prow.k8s.io on-call +# the "name" is what prowjobs will specify in their cluster: field +# to target this cluster +git clone git://github.com/kubernetes/test-infra +cd test-infra/gencred && go build . +/gencred \ + --context gke_k8s-infra-prow-build-trusted_us-central1_prow-build-trusted \ + --name k8s-infra-prow-build-trusted \ + --serviceaccount \ + --output k8s-infra-prow-build-trusted.kubeconfig.yaml +``` +- ask prow.k8s.io on-call to give the build cluster's service account the + following IAM privileges +``` +# write build logs/artifacts to kubernetes-jenkins +gsutil iam ch \ + serviceAccount:prow-build-trusted@k8s-infra-prow-build-trusted.iam.gserviceaccount.com:objectAdmin \ + gs://kubernetes-jenkins +# stage builds for use by other jobs +gsutil iam ch \ + serviceAccount:prow-build-trusted@k8s-infra-prow-build-trusted.iam.gserviceaccount.com:objectAdmin \ + gs://kubernetes-release-pull +# TODO: this isn't working, the bucket is in google-containers project which has +# a ban on non-google.com accounts being added to iam +gsutil iam ch \ + serviceAccount:prow-build-trusted@k8s-infra-prow-build-trusted.iam.gserviceaccount.com:objectAdmin \ + gs://kubernetes-release-dev +``` + +## TODO + +- figure out whether this build cluster needs write access to gs://kubernetes-release-dev +- deploy ghproxy to this cluster (does this need its own nodepool/instance?) +- try out a dry-run peribolos job on this cluster diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/main.tf b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/main.tf new file mode 100644 index 00000000000..f0f717af944 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/main.tf @@ -0,0 +1,106 @@ +/** + * Copyright 2020 The Kubernetes Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +This file defines: +- Google Project k8s-infra-prow-build-trusted to host the cluster +- GCP Service Account for prow-build-trusted +- GKE cluster configuration for prow-build-trusted +- GKE nodepool configuration for prow-build-trusted +*/ + +locals { + project_id = "k8s-infra-prow-build-trusted" + cluster_name = "prow-build-trusted" // The name of the cluster defined in this file + cluster_location = "us-central1" // The GCP location (region or zone) where the cluster should be created + bigquery_location = "US" // The bigquery specific location where the dataset should be created + pod_namespace = "test-pods" // MUST match whatever prow is configured to use when it schedules to this cluster + cluster_sa_name = "prow-build-trusted" // Name of the GSA and KSA that pods use by default + gcb_builder_sa_name = "gcb-builder" // Name of the GSA and KSA that pods use to be allowed to run GCB builds and push to GCS buckets +} + +module "project" { + source = "../../../modules/gke-project" + project_id = local.project_id + project_name = local.project_id +} + +// Create GCP SA for pods +resource "google_service_account" "prow_build_cluster_sa" { + project = local.project_id + account_id = local.cluster_sa_name + display_name = "Used by pods in '${local.cluster_name}' GKE cluster" +} +// Allow pods using the build cluster KSA to use the GCP SA via workload identity +data "google_iam_policy" "prow_build_cluster_sa_workload_identity" { + binding { + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${local.project_id}.svc.id.goog[${local.pod_namespace}/${local.cluster_sa_name}]", + ] + } +} +// Authoritative iam-policy: replaces any existing policy attached to this service_account +resource "google_service_account_iam_policy" "prow_build_cluster_sa_iam" { + service_account_id = google_service_account.prow_build_cluster_sa.name + policy_data = data.google_iam_policy.prow_build_cluster_sa_workload_identity.policy_data +} + +// Create GCP SA for jobs that use GCB and push results to GCS +resource "google_service_account" "gcb_builder_sa" { + project = local.project_id + account_id = local.gcb_builder_sa_name + display_name = local.gcb_builder_sa_name +} +// Allow pods using the build cluster KSA to use the GCP SA via workload identity +data "google_iam_policy" "gcb_builder_sa_workload_identity" { + binding { + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${local.project_id}.svc.id.goog[${local.pod_namespace}/${local.gcb_builder_sa_name}]", + ] + } +} +// Authoritative iam-policy: replaces any existing policy attached to this service_account +resource "google_service_account_iam_policy" "gcb_builder_sa_iam" { + service_account_id = google_service_account.gcb_builder_sa.name + policy_data = data.google_iam_policy.gcb_builder_sa_workload_identity.policy_data +} + +module "prow_build_cluster" { + source = "../../../modules/gke-cluster" + project_name = local.project_id + cluster_name = local.cluster_name + cluster_location = local.cluster_location + bigquery_location = local.bigquery_location + is_prod_cluster = "true" +} + +module "prow_build_nodepool" { + source = "../../../modules/gke-nodepool" + project_name = local.project_id + cluster_name = module.prow_build_cluster.cluster.name + location = module.prow_build_cluster.cluster.location + name = "trusted-pool1" + min_count = 1 + max_count = 3 + machine_type = "n1-standard-8" + disk_size_gb = 200 + disk_type = "pd-standard" + service_account = module.prow_build_cluster.cluster_node_sa.email +} diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/resources/build-serviceaccounts.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/resources/build-serviceaccounts.yaml new file mode 100644 index 00000000000..aef249aaa84 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/resources/build-serviceaccounts.yaml @@ -0,0 +1,24 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + iam.gke.io/gcp-service-account: prow-build-trusted@k8s-infra-prow-build-trusted.iam.gserviceaccount.com + name: prow-build-trusted + namespace: test-pods +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + iam.gke.io/gcp-service-account: k8s-infra-gcp-auditor@kubernetes-public.iam.gserviceaccount.com + name: k8s-infra-gcp-auditor + namespace: test-pods +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + iam.gke.io/gcp-service-account: gcb-builder@k8s-infra-prow-build-trusted.iam.gserviceaccount.com + name: gcb-builder + namespace: test-pods diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/resources/test-pods-namespace.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/resources/test-pods-namespace.yaml new file mode 100644 index 00000000000..eff8a8612f0 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build-trusted/prow-build-trusted/resources/test-pods-namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: test-pods diff --git a/infra/gcp/clusters/kubernetes-public/prow-build-test/00-provider.tf b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/00-provider.tf similarity index 71% rename from infra/gcp/clusters/kubernetes-public/prow-build-test/00-provider.tf rename to infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/00-provider.tf index e791b3edd78..eae6df82c2d 100644 --- a/infra/gcp/clusters/kubernetes-public/prow-build-test/00-provider.tf +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/00-provider.tf @@ -6,11 +6,11 @@ This file defines: */ terraform { - required_version = ">= 0.12.8" + required_version = "~> 0.12.20" backend "gcs" { bucket = "k8s-infra-clusters-terraform" - prefix = "kubernetes-public/prow-build-test" // $project_name/$cluster_name + prefix = "k8s-infra-prow-build/prow-build" // $project_name/$cluster_name } required_providers { diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/README.md b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/README.md new file mode 100644 index 00000000000..fbc5320ba08 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/README.md @@ -0,0 +1,105 @@ +# k8s-infra-prow-build/prow-build + +These terraform resources define a GCP project containing a GKE cluster +intended to serve as a "build cluster" for prow.k8s.io. There are also +some service accounts defined for use by pods within the cluster. + +## Initial Setup + +### Provisioning + +There was some manual work in bringing this up fully: +- expect `terraform apply` to fail initially while trying to create bindings + for `roles/iam.workloadIdentityUser`, as the identity namespace won't exist + until the GKE cluster is created; re-run to succeed +- run `ensure_e2e_projects.sh` to ensure e2e projects have been provisioned, + and an external ip has been created for boskos-metrics + - edit `resources/boskos-resources.yaml` to include the projects + - edit `resources/boskos.yaml` to have `boskos-metrics` use the external ip +- deploy resources to the cluster +``` +# from with a cloud-shell +# e.g. gcloud alpha cloud-shell ssh --project=k8s-infra-prow-build + +# get credentials for the cluster +gcloud container clusters get-credentials \ + prow-build --project=k8s-prow-build --region=us-central1 + +# get k8s.io on here, for this example we'll assume everything's pushed to git +git clone git://github.com/kubernetes/k8s.io + +# deploy the resources; note boskos-resources.yaml isn't a configmap +cd k8s.io/infra/gcp/clusters/k8s-infra-prow-build/prow-build +kubectl apply -f ./resources +kubectl create configmap -n test-pods resources \ + --from-file=config=./resources/boskos-resources.yaml \ + --dry-run -o=yaml | k replace -f - + +# create the service-account secret +gcloud iam service-accounts keys create \ + --project=k8s-infra-prow-build \ + --iam-account=prow-build@k8s-infra-prow-build.iam.gserviceaccount.com \ + tmp.json +kubectl create secret generic -n test-pods service-account \ + --from-file=service-account.json=tmp.json +rm tmp.json + +# create the ssh-key-secret +# TODO: these files were manually created and the pubkey hardcoded into +# ensure_e2e_projects.sh above; consider rewriting this guide to +# describe generating the key, and then store it into cloud secrets +# to get it here +kubectl create secret generic -n test-pods ssh-key-secret \ + --from-file=ssh-private=prow-build-test.ssh-key \ + --from-file=ssh-public=prow-build-test.ssh-key.pub +rm prow-build-test.ssh-key* +``` + +### Connecting to prow.k8s.io + +There was some manual work to hook this up to prow.k8s.io: +- generate a kubeconfig with credentials that prow.k8s.io will use to access + the build cluster, and hand it off to prow.k8s.io on-call +``` +# from with a cloud-shell +# e.g. gcloud alpha cloud-shell ssh --project=k8s-infra-prow-build + +# get credentials for the cluster +gcloud container clusters get-credentials \ + prow-build --project=k8s-prow-build --region=us-central1 + +# generate a kubeconfig to handoff to prow.k8s.io on-call +# the "name" is what prowjobs will specify in their cluster: field +# to target this cluster +git clone git://github.com/kubernetes/test-infra +cd test-infra/gencred && go build . +/gencred \ + --context gke_k8s-infra-prow-build_us-central1_prow-build \ + --name k8s-infra-prow-build \ + --serviceaccount \ + --output k8s-infra-prow-build.kubeconfig.yaml +``` +- ask prow.k8s.io on-call to give the build cluster's service account the + following IAM privileges +``` +# write build logs/artifacts to kubernetes-jenkins +gsutil iam ch \ + serviceAccount:prow-build@k8s-infra-prow-build.iam.gserviceaccount.com:objectAdmin \ + gs://kubernetes-jenkins +# stage builds for use by other jobs +gsutil iam ch \ + serviceAccount:prow-build@k8s-infra-prow-build.iam.gserviceaccount.com:objectAdmin \ + gs://kubernetes-release-pull +# TODO: this isn't working, the bucket is in google-containers project which has +# a ban on non-google.com accounts being added to iam +gsutil iam ch \ + serviceAccount:prow-build@k8s-infra-prow-build.iam.gserviceaccount.com:objectAdmin \ + gs://kubernetes-release-dev +``` + +## TODO + +- some jobs can't be migrated until we use a bucket other than gs://kubernetes-release-dev +- create a nodepool for greenhouse and deploy to this cluster +- setup postsubmit to deploy boskos-resources.yaml +- decide the story for deploying/upgrading boskos diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf new file mode 100644 index 00000000000..45a07c1a8b2 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf @@ -0,0 +1,112 @@ +/** + * Copyright 2020 The Kubernetes Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +This file defines: +- GCP Project k8s-infra-prow-build to hold a prow build cluster +- GCP Service Account for k8s-infra-prow-build pods (bound via workload identity to a KSA of the same name) +- GCP Service Account for boskos-janitor (bound via workload identity to a KSA of the same name) +- GKE cluster configuration for prow-build +- GKE nodepool configuration for prow-build +*/ + +locals { + project_id = "k8s-infra-prow-build" + cluster_name = "prow-build" // The name of the cluster defined in this file + cluster_location = "us-central1" // The GCP location (region or zone) where the cluster should be created + bigquery_location = "US" // The bigquery specific location where the dataset should be created + pod_namespace = "test-pods" // MUST match whatever prow is configured to use when it schedules to this cluster + cluster_sa_name = "prow-build" // Name of the GSA and KSA that pods use by default + boskos_janitor_sa_name = "boskos-janitor" // Name of the GSA and KSA used by boskos-janitor +} + +module "project" { + source = "../../../modules/gke-project" + project_id = local.project_id + project_name = local.project_id +} + +// Create GCP SA for pods +resource "google_service_account" "prow_build_cluster_sa" { + project = local.project_id + account_id = local.cluster_sa_name + display_name = "Used by pods in '${local.cluster_name}' GKE cluster" +} +// Allow pods using the build cluster KSA to use the GCP SA via workload identity +data "google_iam_policy" "prow_build_cluster_sa_workload_identity" { + binding { + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${local.project_id}.svc.id.goog[${local.pod_namespace}/${local.cluster_sa_name}]", + ] + } +} +// Authoritative iam-policy: replaces any existing policy attached to this service_account +resource "google_service_account_iam_policy" "prow_build_cluster_sa_iam" { + service_account_id = google_service_account.prow_build_cluster_sa.name + policy_data = data.google_iam_policy.prow_build_cluster_sa_workload_identity.policy_data +} + +// Create GCP SA for boskos-janitor +resource "google_service_account" "boskos_janitor_sa" { + project = local.project_id + account_id = local.boskos_janitor_sa_name + display_name = "Used by ${local.boskos_janitor_sa_name} in '${local.cluster_name}' GKE cluster" +} +// Allow pods using the build cluster KSA to use the GCP SA via workload identity +data "google_iam_policy" "boskos_janitor_sa_workload_identity" { + binding { + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${local.project_id}.svc.id.goog[${local.pod_namespace}/${local.boskos_janitor_sa_name}]", + ] + } +} +// Authoritative iam-policy: replaces any existing policy attached to this service account +resource "google_service_account_iam_policy" "boskos_janitor_sa_iam" { + service_account_id = google_service_account.boskos_janitor_sa.name + policy_data = data.google_iam_policy.boskos_janitor_sa_workload_identity.policy_data +} + +module "prow_build_cluster" { + source = "../../../modules/gke-cluster" + project_name = local.project_id + cluster_name = local.cluster_name + cluster_location = local.cluster_location + bigquery_location = local.bigquery_location + is_prod_cluster = "true" +} + +module "prow_build_nodepool" { + source = "../../../modules/gke-nodepool" + project_name = local.project_id + cluster_name = module.prow_build_cluster.cluster.name + location = module.prow_build_cluster.cluster.location + name = "pool1" + min_count = 2 + max_count = 6 + # kind-ipv6 jobs need an ipv6 stack; COS doesn't provide one, so we need to + # use an UBUNTU image instead. Why the CONTAINERD variant? I don't know, but + # it's what k8s-prow-builds/prow (prow.k8s.io's existing google.com build + # cluster) is using today, so we're just going to follow that + image_type = "UBUNTU_CONTAINERD" + machine_type = "n1-highmem-8" + disk_size_gb = 250 + disk_type = "pd-ssd" + service_account = module.prow_build_cluster.cluster_node_sa.email +} diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-janitor.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-janitor.yaml new file mode 100644 index 00000000000..a1304d66180 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-janitor.yaml @@ -0,0 +1,36 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: boskos-janitor + labels: + app: boskos-janitor + namespace: test-pods +spec: + replicas: 2 # 2 distributed janitor instances + selector: + matchLabels: + app: boskos-janitor + template: + metadata: + labels: + app: boskos-janitor + spec: + terminationGracePeriodSeconds: 300 + serviceAccountName: boskos-janitor + containers: + - name: boskos-janitor + image: gcr.io/k8s-prow/boskos/janitor:v20200422-8c8546d74 + args: + - --boskos-url=http://boskos.test-pods.svc.cluster.local. + - --resource-type=k8s-infra-gce-project + - --pool-size=20 + - -- + - --hours=0 +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + iam.gke.io/gcp-service-account: boskos-janitor@k8s-infra-prow-build.iam.gserviceaccount.com + name: boskos-janitor + namespace: test-pods diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-reaper-deployment.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-reaper-deployment.yaml new file mode 100644 index 00000000000..22df6378304 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-reaper-deployment.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: boskos-reaper + labels: + app: boskos-reaper + namespace: test-pods +spec: + selector: + matchLabels: + app: boskos-reaper + replicas: 1 # one canonical source of resources + template: + metadata: + labels: + app: boskos-reaper + spec: + terminationGracePeriodSeconds: 30 + containers: + - name: boskos-reaper + image: gcr.io/k8s-prow/boskos/reaper:v20200422-8c8546d74 + args: + - --boskos-url=http://boskos.test-pods.svc.cluster.local. + - --resource-type=k8s-infra-gce-project diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-resources.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-resources.yaml new file mode 100644 index 00000000000..0371d81b294 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos-resources.yaml @@ -0,0 +1,44 @@ +resources: +- names: + - k8s-infra-e2e-boskos-001 + - k8s-infra-e2e-boskos-002 + - k8s-infra-e2e-boskos-003 + - k8s-infra-e2e-boskos-004 + - k8s-infra-e2e-boskos-005 + - k8s-infra-e2e-boskos-006 + - k8s-infra-e2e-boskos-007 + - k8s-infra-e2e-boskos-008 + - k8s-infra-e2e-boskos-009 + - k8s-infra-e2e-boskos-010 + - k8s-infra-e2e-boskos-011 + - k8s-infra-e2e-boskos-012 + - k8s-infra-e2e-boskos-013 + - k8s-infra-e2e-boskos-014 + - k8s-infra-e2e-boskos-015 + - k8s-infra-e2e-boskos-016 + - k8s-infra-e2e-boskos-017 + - k8s-infra-e2e-boskos-018 + - k8s-infra-e2e-boskos-019 + - k8s-infra-e2e-boskos-020 + - k8s-infra-e2e-boskos-021 + - k8s-infra-e2e-boskos-022 + - k8s-infra-e2e-boskos-023 + - k8s-infra-e2e-boskos-024 + - k8s-infra-e2e-boskos-025 + - k8s-infra-e2e-boskos-026 + - k8s-infra-e2e-boskos-027 + - k8s-infra-e2e-boskos-028 + - k8s-infra-e2e-boskos-029 + - k8s-infra-e2e-boskos-030 + - k8s-infra-e2e-boskos-031 + - k8s-infra-e2e-boskos-032 + - k8s-infra-e2e-boskos-033 + - k8s-infra-e2e-boskos-034 + - k8s-infra-e2e-boskos-035 + - k8s-infra-e2e-boskos-036 + - k8s-infra-e2e-boskos-037 + - k8s-infra-e2e-boskos-038 + - k8s-infra-e2e-boskos-039 + - k8s-infra-e2e-boskos-040 + state: dirty + type: k8s-infra-gce-project diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos.yaml new file mode 100644 index 00000000000..f9f20285f31 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/boskos.yaml @@ -0,0 +1,172 @@ +# Copyright 2017 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: dynamicresourcelifecycles.boskos.k8s.io +spec: + group: boskos.k8s.io + names: + kind: DRLCObject + listKind: DRLCObjectList + plural: dynamicresourcelifecycles + singular: dynamicresourcelifecycle + scope: Namespaced + version: v1 + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: Type + type: string + description: The dynamic resource type. + JSONPath: .spec.config.type + - name: Min-Count + type: integer + description: The minimum count requested. + JSONPath: .spec.min-count + - name: Max-Count + type: integer + description: The maximum count requested. + JSONPath: .spec.max-count +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: resources.boskos.k8s.io +spec: + group: boskos.k8s.io + names: + kind: ResourceObject + listKind: ResourceObjectList + plural: resources + singular: resource + scope: Namespaced + version: v1 + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: Type + type: string + description: The resource type. + JSONPath: .spec.type + - name: State + type: string + description: The current state of the resource. + JSONPath: .status.state + - name: Owner + type: string + description: The current owner of the resource. + JSONPath: .status.owner + - name: Last-Updated + type: date + JSONPath: .status.lastUpdate +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: boskos +rules: +- apiGroups: ["boskos.k8s.io"] + verbs: ["*"] + resources: ["*"] +--- +kind: ServiceAccount +apiVersion: v1 +metadata: + name: boskos + namespace: test-pods +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: boskos +subjects: +- kind: ServiceAccount + name: boskos + namespace: test-pods +roleRef: + kind: ClusterRole + name: boskos + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: boskos + namespace: test-pods +spec: + replicas: 1 # one canonical source of resources + selector: + matchLabels: + app: boskos + template: + metadata: + labels: + app: boskos + namespace: test-pods + spec: + serviceAccountName: boskos + terminationGracePeriodSeconds: 30 + containers: + - name: boskos + image: gcr.io/k8s-prow/boskos/boskos:v20200422-8c8546d74 + args: + - --config=/etc/config/config + - --namespace=test-pods + ports: + - containerPort: 8080 + protocol: TCP + volumeMounts: + - name: boskos-config + mountPath: /etc/config + readOnly: true + volumes: + - name: boskos-config + configMap: + name: resources +--- +apiVersion: v1 +kind: Service +metadata: + name: boskos + namespace: test-pods +spec: + selector: + app: boskos + ports: + - name: default + protocol: TCP + port: 80 + targetPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: boskos-metrics + namespace: test-pods +spec: + selector: + app: boskos + ports: + - name: metrics + port: 9090 + protocol: TCP + targetPort: 9090 + loadBalancerIP: 35.225.208.117 # k8s-infra-prow-build/boskos-metrics + type: LoadBalancer diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/build-serviceaccounts.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/build-serviceaccounts.yaml new file mode 100644 index 00000000000..417b0febba2 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/build-serviceaccounts.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + iam.gke.io/gcp-service-account: prow-build@k8s-infra-prow-build.iam.gserviceaccount.com + name: prow-build + namespace: test-pods diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/create-loop-devs_daemonset.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/create-loop-devs_daemonset.yaml new file mode 100644 index 00000000000..318536abca5 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/create-loop-devs_daemonset.yaml @@ -0,0 +1,54 @@ +# A daemonset to create /dev/loopX device nodes before they are +# needed. +# +# Normally, new nodes are created dynamically by the kernel. But nodes +# in a KIND cluster are started with a copy of /dev from the host and +# loop devices created later on do not show up in that static /dev +# (https://github.com/kubernetes-sigs/kind/issues/1248). Creating +# "enough" (100 in this daemonset) in advance avoids running out of +# loop devices. +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: create-loop-devs + namespace: kube-system + labels: + app: create-loop-devs +spec: + selector: + matchLabels: + name: create-loop-devs + template: + metadata: + labels: + name: create-loop-devs + spec: + tolerations: + - operator: Exists + effect: NoSchedule + containers: + - name: loopdev + command: + - sh + - -c + - | + while true; do + for i in $(seq 0 100); do + if ! [ -e /dev/loop$i ]; then + mknod /dev/loop$i b 7 $i + fi + done + sleep 100000000 + done + image: alpine:3.6 + imagePullPolicy: IfNotPresent + resources: {} + securityContext: + privileged: true + volumeMounts: + - name: dev + mountPath: /dev + volumes: + - name: dev + hostPath: + path: /dev diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/test-pods-namespace.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/test-pods-namespace.yaml new file mode 100644 index 00000000000..eff8a8612f0 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/test-pods-namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: test-pods diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/tune-sysctls_daemonset.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/tune-sysctls_daemonset.yaml new file mode 100644 index 00000000000..63fee22a7e2 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/tune-sysctls_daemonset.yaml @@ -0,0 +1,47 @@ +# a simple daemonset to tune sysctls +# intended to be used in a prow build cluster +# see https://github.com/kubernetes/test-infra/pull/13515 for more info +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: tune-sysctls + namespace: kube-system + labels: + app: tune-sysctls +spec: + selector: + matchLabels: + name: tune-sysctls + template: + metadata: + labels: + name: tune-sysctls + spec: + hostNetwork: true + hostPID: true + hostIPC: true + tolerations: + - operator: Exists + effect: NoSchedule + containers: + - name: setsysctls + command: + - sh + - -c + - | + while true; do + sysctl -w fs.inotify.max_user_watches=524288 + sleep 10 + done + image: alpine:3.6 + imagePullPolicy: IfNotPresent + resources: {} + securityContext: + privileged: true + volumeMounts: + - name: sys + mountPath: /sys + volumes: + - name: sys + hostPath: + path: /sys diff --git a/infra/gcp/clusters/kubernetes-public/aaa/00-inputs.tf b/infra/gcp/clusters/projects/kubernetes-public/aaa/00-inputs.tf similarity index 100% rename from infra/gcp/clusters/kubernetes-public/aaa/00-inputs.tf rename to infra/gcp/clusters/projects/kubernetes-public/aaa/00-inputs.tf diff --git a/infra/gcp/clusters/kubernetes-public/aaa/10-cluster-configuration.tf b/infra/gcp/clusters/projects/kubernetes-public/aaa/10-cluster-configuration.tf similarity index 100% rename from infra/gcp/clusters/kubernetes-public/aaa/10-cluster-configuration.tf rename to infra/gcp/clusters/projects/kubernetes-public/aaa/10-cluster-configuration.tf diff --git a/infra/gcp/clusters/kubernetes-public/aaa/11-pool1-configuration.tf b/infra/gcp/clusters/projects/kubernetes-public/aaa/11-pool1-configuration.tf similarity index 100% rename from infra/gcp/clusters/kubernetes-public/aaa/11-pool1-configuration.tf rename to infra/gcp/clusters/projects/kubernetes-public/aaa/11-pool1-configuration.tf diff --git a/infra/gcp/clusters/kubernetes-public/aaa/11-pool2-configuration.tf b/infra/gcp/clusters/projects/kubernetes-public/aaa/11-pool2-configuration.tf similarity index 100% rename from infra/gcp/clusters/kubernetes-public/aaa/11-pool2-configuration.tf rename to infra/gcp/clusters/projects/kubernetes-public/aaa/11-pool2-configuration.tf diff --git a/infra/gcp/ensure-e2e-projects.sh b/infra/gcp/ensure-e2e-projects.sh index a60fc373bc6..e54397ea0f0 100755 --- a/infra/gcp/ensure-e2e-projects.sh +++ b/infra/gcp/ensure-e2e-projects.sh @@ -34,19 +34,15 @@ function usage() { ## setup service accounts and ips for the prow build cluster -# TODO: replace prow-build-test with actual service account -PROW_BUILD_SVCACCT=$(svc_acct_email "kubernetes-public" "prow-build-test") - -# TODO: replace boskos-janitor-test with actual service account -BOSKOS_JANITOR_SVCACCT=$(svc_acct_email "kubernetes-public" "boskos-janitor-test") +PROW_BUILD_SVCACCT=$(svc_acct_email "k8s-infra-prow-build" "prow-build") +BOSKOS_JANITOR_SVCACCT=$(svc_acct_email "k8s-infra-prow-build" "boskos-janitor") color 6 "Ensuring boskos-janitor is empowered" ( color 6 "Ensuring external ip address exists for boskos-metrics service in prow build cluster" # this is so monitoring.prow.k8s.io is able to scrape metrics from boskos -# TODO: replace this with a global address used by an ingress ensure_regional_address \ - "kubernetes-public" \ + "k8s-infra-prow-build" \ "us-central1" \ "boskos-metrics" \ "to allow monitoring.k8s.prow.io to scrape boskos metrics" @@ -54,18 +50,17 @@ ensure_regional_address \ ## setup projects to be used by e2e tests for standing up clusters -# TODO: replace spiffxp- projects with actual projects E2E_PROJECTS=( - # for manual use during node-e2e job migration, eg: --gcp-project=spiffxp-node-e2e-project - spiffxp-node-e2e-project - # for manual use during job migration, eg: --gcp-project=spiffxp-gce-project - spiffxp-gce-project - # managed by boskos, part of the gce-project pool, eg: --gcp-project-type=gce-project - spiffxp-boskos-project-01 - spiffxp-boskos-project-02 - spiffxp-boskos-project-03 + # for manual use during node-e2e job migration, eg: --gcp-project=k8s-infra-e2e-gce-project + k8s-infra-e2e-gce-project + # for manual use during job migration, eg: --gcp-project=k8s-infra-e2e-node-e2e-project + k8s-infra-e2e-node-e2e-project ) +for i in $(seq 1 40); do + E2E_PROJECTS+=($(printf "k8s-infra-e2e-boskos-%03i" $i)) +done + if [ $# = 0 ]; then # default to all e2e projects set -- "${E2E_PROJECTS[@]}" @@ -80,6 +75,7 @@ for prj; do color 6 "Enabling APIs necessary for kubernetes e2e jobs to use e2e project: ${prj}" enable_api "${prj}" compute.googleapis.com enable_api "${prj}" logging.googleapis.com + enable_api "${prj}" monitoring.googleapis.com enable_api "${prj}" storage-component.googleapis.com color 6 "Empower prow-build service account to edit e2e project: ${prj}" diff --git a/infra/gcp/ensure-main-project.sh b/infra/gcp/ensure-main-project.sh index b040e7589fa..b27ac3d5611 100755 --- a/infra/gcp/ensure-main-project.sh +++ b/infra/gcp/ensure-main-project.sh @@ -127,7 +127,12 @@ ensure_service_account \ "k8s-infra-gcp-auditor" \ "Grants readonly access to org resources" -color 6 "Empowering k8s-infra-gcp-auditor serviceaccount to be used on build cluster" +color 6 "Empowering k8s-infra-gcp-auditor serviceaccount to be used on trusted build cluster" +empower_ksa_to_svcacct \ + "k8s-infra-prow-build-trusted.svc.id.goog[test-pods/k8s-infra-gcp-auditor]" \ + "${PROJECT}" \ + $(svc_acct_email "${PROJECT}" "k8s-infra-gcp-auditor") +# TODO(spiffxp): delete this binding empower_ksa_to_svcacct \ "kubernetes-public.svc.id.goog[test-pods/k8s-infra-gcp-auditor]" \ "${PROJECT}" \ diff --git a/infra/gcp/lib.sh b/infra/gcp/lib.sh index 55be50c4fb8..5f27e37beed 100755 --- a/infra/gcp/lib.sh +++ b/infra/gcp/lib.sh @@ -39,8 +39,11 @@ AUDITOR_INVOKER_SVCACCT="k8s-infra-gcr-auditor-invoker" # This is the Cloud Run service name of the auditor. AUDITOR_SERVICE_NAME="cip-auditor" +# TODO: decommission this once we've flipped to prow-build-trusted # The service account email for Prow (not in this org for now). PROW_SVCACCT="deployer@k8s-prow.iam.gserviceaccount.com" +# The service account email used by prow-build-trusted to trigger GCB and push to GCS +GCB_BUILDER_SVCACCT="gcb-builder@k8s-infra-prow-build-trusted.iam.gserviceaccount.com" # The GCP org stuff needed to turn it all on. GCP_ORG="758905017065" # kubernetes.io @@ -221,11 +224,18 @@ function empower_prow() { local project="$1" local bucket="$2" + # commands are copy-pasted so that one set can turn into deletes + # when we're ready to decommission PROW_SVCACCT + # Allow prow to trigger builds. gcloud \ projects add-iam-policy-binding "${project}" \ --member "serviceAccount:${PROW_SVCACCT}" \ --role roles/cloudbuild.builds.builder + gcloud \ + projects add-iam-policy-binding "${project}" \ + --member "serviceAccount:${GCB_BUILDER_SVCACCT}" \ + --role roles/cloudbuild.builds.builder # Allow prow to push source and access build logs. gsutil iam ch \ @@ -234,6 +244,12 @@ function empower_prow() { gsutil iam ch \ "serviceAccount:${PROW_SVCACCT}:objectViewer" \ "${bucket}" + gsutil iam ch \ + "serviceAccount:${GCB_BUILDER_SVCACCT}:objectCreator" \ + "${bucket}" + gsutil iam ch \ + "serviceAccount:${GCB_BUILDER_SVCACCT}:objectViewer" \ + "${bucket}" } # Grant full privileges to GCR admins