From 08fb61e7cf3bccc1863b2d289380f71cf7a0e665 Mon Sep 17 00:00:00 2001 From: Casey Callendrello Date: Fri, 6 Jul 2018 16:03:56 +0200 Subject: [PATCH] Add alpha support for local libvirt (cl only) This modifies the bare-metal platform slightly for direct libvirt provisioning. It uses terraform-provider-libvirt, and can bring up a test cluster in under 4 minutes. --- docs/cl/libvirt.md | 258 ++++++++++++++++++ docs/index.md | 3 +- .../container-linux/kubernetes/bootkube.tf | 15 + .../kubernetes/cl/controller.yaml.tmpl | 170 ++++++++++++ .../kubernetes/cl/worker.yaml.tmpl | 104 +++++++ .../container-linux/kubernetes/controllers.tf | 37 +++ libvirt/container-linux/kubernetes/libvirt.tf | 32 +++ libvirt/container-linux/kubernetes/outputs.tf | 3 + .../container-linux/kubernetes/profiles.tf | 41 +++ libvirt/container-linux/kubernetes/require.tf | 21 ++ libvirt/container-linux/kubernetes/ssh.tf | 131 +++++++++ .../container-linux/kubernetes/variables.tf | 124 +++++++++ libvirt/container-linux/kubernetes/workers.tf | 37 +++ 13 files changed, 975 insertions(+), 1 deletion(-) create mode 100644 docs/cl/libvirt.md create mode 100644 libvirt/container-linux/kubernetes/bootkube.tf create mode 100644 libvirt/container-linux/kubernetes/cl/controller.yaml.tmpl create mode 100644 libvirt/container-linux/kubernetes/cl/worker.yaml.tmpl create mode 100644 libvirt/container-linux/kubernetes/controllers.tf create mode 100644 libvirt/container-linux/kubernetes/libvirt.tf create mode 100644 libvirt/container-linux/kubernetes/outputs.tf create mode 100644 libvirt/container-linux/kubernetes/profiles.tf create mode 100644 libvirt/container-linux/kubernetes/require.tf create mode 100644 libvirt/container-linux/kubernetes/ssh.tf create mode 100644 libvirt/container-linux/kubernetes/variables.tf create mode 100644 libvirt/container-linux/kubernetes/workers.tf diff --git a/docs/cl/libvirt.md b/docs/cl/libvirt.md new file mode 100644 index 000000000..7a43021dd --- /dev/null +++ b/docs/cl/libvirt.md @@ -0,0 +1,258 @@ +# Libvirt + +In this tutorial, boot and provision a Kubernetes v1.11.0 using a local libvirt instance. + +The libvirt architecture is similar to the bare-metal one, except that it is optimized for running local clusters. Like bare-metal, load balancing between controllers is up to the end-user. For more details, see the [DNS](#dns) section below. + +We'll download a base image, then use Typhoon to directly provision the virtual machines via Terraform. + +Controllers are provisioned to run an `etcd-member` peer and a `kubelet` service. Workers run just a `kubelet` service. A one-time [bootkube](https://github.com/kubernetes-incubator/bootkube) bootstrap schedules the `apiserver`, `scheduler`, `controller-manager`, and `coredns` on controllers and schedules `kube-proxy` and `calico` (or `flannel`) on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. + +## Requirements + +* At least 2 GiB free ram and 20 GiB free disk +* NetworkManager in dnsmasq mode +* libvirt / virsh +* Terraform v0.11.x and + - [terraform-provider-libvirt ](https://github.com/dmacvicar/terraform-provider-libvirt) + - [terraform-provider-ct ](https://github.com/coreos/terraform-provider-ct) + +## Libvirt + +Libvirt is a suite of tools that manages virtual machines, storage, and networking. It does not run virtual machines directly, rather it relies on a lower-level virtualization engine such as qemu or bhyve. + +You will need the `virsh` and `qemu-img` binaries installed on your system: + +``` +sudo dnf install libvirt-client qemu-img +``` + +## Container Linux Base image + +You will need to manually download a Container Linux base image. Qemu supports a copy-on-write format, so this will allow for deduplication. + +``` +wget https://stable.release.core-os.net/amd64-usr/current/coreos_production_qemu_image.img.bz2 +bunzip2 coreos_production_qemu_image.img.bz2 +qemu-img resize coreos_production_qemu_image.img +8G +``` + +Make a note of the absolute path to this image, you'll need it later. + + +## DNS + +Libvirt will start dnsmasq server for each cluster, and will create a DNS record for every node. By default, these names are only resolvable within the cluster. However, it is easier if the names are also resolvable on the local machine. In order to do this, you will need to put your host's NetworkManager in to `dnsmasq` mode, which will route all DNS queries through a local dnsmasq instance. Then we can instruct this dnsmasq to delegate queries for a specific domain to the libvirt resolver. + +This step is optional, but recommended. + +You will need to know the `machine_domain` along with `node_ip_pool`, as described [below](#cluster). + +For example, if all your nodes have a common domain of `hestia.k8s` and the default `node_ip_pool` of `192.168.120.0/24`, then your DNS server will be at `192.168.120.1` + +1. Edit `/etc/NetworkManager/NetworkManager.conf` and set `dns=dnsmasq` in section `[main]` +2. Tell dnsmasq to use your cluster. The syntax is `server=//`. For this example: +``` +echo server=/hestia.k8s/192.168.120.1 | sudo tee /etc/NetworkManager/dnsmasq.d/typhoon.conf +``` +3. `systemctl restart NetworkManager` + + +### APIServer name + +As a cluster administrator, you are responsible for providing load balancing over the controllers. Specifically, `k8s_domain_name` must be resolvable inside the cluster for installation to succeed. However, this can be overkill for short-lived clusters. + +If variable `libvirt_create_k8s_domain_name` is `1`, then an extra record will be created for the libvirt dnsmasq with the name of `k8s_domain_name` and the IP address of the first controller node. This will enable bootstrapping, but is not suitable for production use. + + +## Terraform Setup + +Install [Terraform](https://www.terraform.io/downloads.html) v0.11.x on your system. + +```sh +$ terraform version +Terraform v0.11.1 +``` + +Install [terraform-provider-libvirt ](https://github.com/dmacvicar/terraform-provider-libvirt). + +```sh + go get github.com/dmacvicar/terraform-provider-libvirt +sudo cp $GOPATH/bin/terraform-provider-libvirt /usr/local/bin/ +``` + + +Install [terraform-provider-ct ](https://github.com/coreos/terraform-provider-ct). +```sh + go get github.com/coreos/terraform-provider-ct +sudo cp $GOPATH/bin/terraform-provider-ct /usr/local/bin/ +``` + +Add plugins to `.terraformrc`: +``` +providers { + ct = "/usr/local/bin/terraform-provider-ct" + libvirt = "/usr/local/bin/terraform-provider-libvirt" +} +``` + +Read [concepts](../architecture/concepts.md) to learn about Terraform, modules, and organizing resources. Change to your infrastructure repository (e.g. `infra`). + +``` +cd infra/clusters +``` + +## Cluster + +Define a Kubernetes cluster using the module `libvirt/container-linux/kubernetes`. + +```tf +module "libvirt-hestia" { + source = "git::https://github.com/poseidon/typhoon//libvirt/container-linux/kubernetes?ref=v1.11.0" + + cluster_name = "hestia" + base_image_path = "/home/user/coreos.img" + machine_domain = "hestia.k8s" + + controller_names = ["node1", "node2"] + + worker_names = [ "node5", "node6" ] + + ssh_authorized_key = "ssh-rsa AAAA..." + + asset_dir = "/home/user/.secrets/clusters/hestia" +} +``` + +Reference the [variables docs](#variables) or the [variables.tf](https://github.com/poseidon/typhoon/blob/master/libvirt/container-linux/kubernetes/variables.tf) source. + +## ssh-agent + +Initial bootstrapping requires `bootkube.service` be started on one controller node. Terraform uses `ssh-agent` to automate this step. Add your SSH private key to `ssh-agent`. + +```sh +ssh-add ~/.ssh/id_rsa +ssh-add -L +``` + +## Apply + +Initialize the config directory if this is the first use with Terraform. + +```sh +terraform init +``` + +Plan the resources to be created. + +```sh +$ terraform plan +Plan: 55 to add, 0 to change, 0 to destroy. +``` + +Apply the changes. Terraform will generate bootkube assets to `asset_dir` and create Matchbox profiles (e.g. controller, worker) and matching rules via the Matchbox API. + +```sh +$ terraform apply +... +``` + +Apply will create the libvirt resources, then create the machines, then copy some initial configuration via SSH. + +### Bootstrap + +Wait for the `bootkube-start` step to finish bootstrapping the Kubernetes control plane. This may take 5-15 minutes depending on your network. + +``` +module.libvirt-cluster-hestia.null_resource.bootkube-start: Creation complete (ID: 5441741360626669024) + +Apply complete! Resources: 55 added, 0 changed, 0 destroyed. +``` + +To watch the bootstrap process in detail, SSH to the first controller and journal the logs. + +``` +$ ssh core@node1.hestia.k8s +$ journalctl -f -u bootkube +bootkube[5]: Pod Status: pod-checkpointer Running +bootkube[5]: Pod Status: kube-apiserver Running +bootkube[5]: Pod Status: kube-scheduler Running +bootkube[5]: Pod Status: kube-controller-manager Running +bootkube[5]: All self-hosted control plane components successfully started +bootkube[5]: Tearing down temporary bootstrap control plane... +``` + +## Verify + +[Install kubectl](https://coreos.com/kubernetes/docs/latest/configure-kubectl.html) on your system. Use the generated `kubeconfig` credentials to access the Kubernetes cluster and list nodes. + +Your `k8s_domain_name` must be resolvable on the local host. You might need to either use [dnsmasq](#dns) mode or hard-code an entry in `/etc/hosts` + +``` +$ export KUBECONFIG=/home/user/.secrets/clusters/hestia/auth/kubeconfig +$ kubectl get nodes +NAME STATUS AGE VERSION +node1.example.com Ready 11m v1.11.0 +node2.example.com Ready 11m v1.11.0 +node3.example.com Ready 11m v1.11.0 +``` + +List the pods. + +``` +$ kubectl get pods --all-namespaces +NAMESPACE NAME READY STATUS RESTARTS AGE +kube-system calico-node-6qp7f 2/2 Running 1 11m +kube-system calico-node-gnjrm 2/2 Running 0 11m +kube-system calico-node-llbgt 2/2 Running 0 11m +kube-system coredns-1187388186-mx9rt 1/1 Running 0 11m +kube-system kube-apiserver-7336w 1/1 Running 0 11m +kube-system kube-controller-manager-3271970485-b9chx 1/1 Running 0 11m +kube-system kube-controller-manager-3271970485-v30js 1/1 Running 1 11m +kube-system kube-proxy-50sd4 1/1 Running 0 11m +kube-system kube-proxy-bczhp 1/1 Running 0 11m +kube-system kube-proxy-mp2fw 1/1 Running 0 11m +kube-system kube-scheduler-3895335239-fd3l7 1/1 Running 1 11m +kube-system kube-scheduler-3895335239-hfjv0 1/1 Running 0 11m +kube-system pod-checkpointer-wf65d 1/1 Running 0 11m +kube-system pod-checkpointer-wf65d-node1.example.com 1/1 Running 0 11m +``` + +## Going Further + +Learn about [maintenance](../topics/maintenance.md) and [addons](../addons/overview.md). + +!!! note + On Container Linux clusters, install the `CLUO` addon to coordinate reboots and drains when nodes auto-update. Otherwise, updates may not be applied until the next reboot. + +## Variables + +Check the [variables.tf](https://github.com/poseidon/typhoon/blob/master/libvirt/container-linux/kubernetes/variables.tf) source. + +### Required + +| Name | Description | Example | +|:-----|:------------|:--------| +| cluster_name | Unique cluster name | hestia | +| base_image_path | Path to an uncompressed Container Linux qcow2 image | "/home/user/downloads/..." | +| machine_domain | Domain name for all machines | hestia.k8s | +| ssh_authorized_key | SSH public key for user 'core' | "ssh-rsa AAAAB3Nz..." | +| asset_dir | Path to a directory where generated assets should be placed (contains secrets) | "/home/user/.secrets/clusters/mercury" | +| k8s_domain_name | Domain name that resolves to one or more controllers | console.hestia.k8s | +| controller_names | Ordered list of controller hostnames | ["node1"] | +| worker_names | Ordered list of worker hostnames | ["node2", "node3"] | + +### Optional + +| Name | Description | Default | Example | +|:-----|:------------|:--------|:--------| +| controller_memory | Ram in MiB to allocate to each controller | 2048 | +| worker_memory | Ram in MiB to allocate to each worker | 2048 | +| networking | Choice of networking provider | "calico" | "calico" or "flannel" | +| network_mtu | CNI interface MTU (calico-only) | 1480 | - | +| network_ip_autodetection_method | Method to detect host IPv4 address (calico-only) | first-found | can-reach=10.0.0.1 | +| node_ip_pool | The IP range for machines | "192.168.120.0/24" | "10.1.0.0/24" | +| pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | +| service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | +| dns_server | A resolving DNS server to use for the nodes | "8.8.8.8" | "4.2.2.2" | +| libvirt_create_k8s_domain_name | Whether or not libvirt should answer for k8s_domain_name | 1 | 0 | diff --git a/docs/index.md b/docs/index.md index 1fd0861da..fd0ae89bc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,13 +31,14 @@ Typhoon provides a Terraform Module for each supported operating system and plat | Digital Ocean | Fedora Atomic | [digital-ocean/fedora-atomic/kubernetes](atomic/digital-ocean.md) | alpha | | Google Cloud | Container Linux | [google-cloud/container-linux/kubernetes](cl/google-cloud.md) | stable | | Google Cloud | Fedora Atomic | [google-cloud/container-linux/kubernetes](atomic/google-cloud.md) | alpha | +| Libvirt | Container Linux | [libvirt/container-linux/kubernetes](cl/libvirt.md) | alpha | The AWS and bare-metal `container-linux` modules allow picking Red Hat Container Linux (formerly CoreOS Container Linux) or Kinvolk's Flatcar Linux friendly fork. ## Documentation * Architecture [concepts](architecture/concepts.md) and [operating-systems](architecture/operating-systems.md) -* Tutorials for [AWS](cl/aws.md), [Bare-Metal](cl/bare-metal.md), [Digital Ocean](cl/digital-ocean.md), and [Google-Cloud](cl/google-cloud.md) +* Tutorials for [AWS](cl/aws.md), [Bare-Metal](cl/bare-metal.md), [Libvirt](cl/libvirt.md), [Digital Ocean](cl/digital-ocean.md), and [Google-Cloud](cl/google-cloud.md) ## Example diff --git a/libvirt/container-linux/kubernetes/bootkube.tf b/libvirt/container-linux/kubernetes/bootkube.tf new file mode 100644 index 000000000..cbfb426b8 --- /dev/null +++ b/libvirt/container-linux/kubernetes/bootkube.tf @@ -0,0 +1,15 @@ +# Self-hosted Kubernetes assets (kubeconfig, manifests) +module "bootkube" { + source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=81ba300e712e116c9ea9470ccdce7859fecc76b6" + + cluster_name = "${var.cluster_name}" + api_servers = ["${var.k8s_domain_name}"] + etcd_servers = ["${formatlist("%s.%s", var.controller_names, var.machine_domain)}"] + asset_dir = "${var.asset_dir}" + networking = "${var.networking}" + network_mtu = "${var.network_mtu}" + network_ip_autodetection_method = "${var.network_ip_autodetection_method}" + pod_cidr = "${var.pod_cidr}" + service_cidr = "${var.service_cidr}" + cluster_domain_suffix = "${var.cluster_domain_suffix}" +} diff --git a/libvirt/container-linux/kubernetes/cl/controller.yaml.tmpl b/libvirt/container-linux/kubernetes/cl/controller.yaml.tmpl new file mode 100644 index 000000000..6e1a2ae7c --- /dev/null +++ b/libvirt/container-linux/kubernetes/cl/controller.yaml.tmpl @@ -0,0 +1,170 @@ +--- +systemd: + units: + - name: etcd-member.service + enable: true + dropins: + - name: 40-etcd-cluster.conf + contents: | + [Service] + Environment="ETCD_IMAGE_TAG=v3.3.8" + Environment="ETCD_NAME=${etcd_name}" + Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${domain_name}:2379" + Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${domain_name}:2380" + Environment="ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379" + Environment="ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380" + Environment="ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381" + Environment="ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}" + Environment="ETCD_STRICT_RECONFIG_CHECK=true" + Environment="ETCD_SSL_DIR=/etc/ssl/etcd" + Environment="ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt" + Environment="ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt" + Environment="ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key" + Environment="ETCD_CLIENT_CERT_AUTH=true" + Environment="ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt" + Environment="ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt" + Environment="ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key" + Environment="ETCD_PEER_CLIENT_CERT_AUTH=true" + - name: docker.service + enable: true + - name: locksmithd.service + mask: true + - name: kubelet.path + enable: true + contents: | + [Unit] + Description=Watch for kubeconfig + [Path] + PathExists=/etc/kubernetes/kubeconfig + [Install] + WantedBy=multi-user.target + - name: wait-for-dns.service + enable: true + contents: | + [Unit] + Description=Wait for DNS entries + Wants=systemd-resolved.service + Before=kubelet.service + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done' + [Install] + RequiredBy=kubelet.service + RequiredBy=etcd-member.service + - name: kubelet.service + contents: | + [Unit] + Description=Kubelet via Hyperkube + Wants=rpc-statd.service + [Service] + EnvironmentFile=/etc/kubernetes/kubelet.env + Environment="RKT_RUN_ARGS=--uuid-file-save=/var/cache/kubelet-pod.uuid \ + --volume=resolv,kind=host,source=/etc/resolv.conf \ + --mount volume=resolv,target=/etc/resolv.conf \ + --volume var-lib-cni,kind=host,source=/var/lib/cni \ + --mount volume=var-lib-cni,target=/var/lib/cni \ + --volume var-lib-calico,kind=host,source=/var/lib/calico \ + --mount volume=var-lib-calico,target=/var/lib/calico \ + --volume opt-cni-bin,kind=host,source=/opt/cni/bin \ + --mount volume=opt-cni-bin,target=/opt/cni/bin \ + --volume var-log,kind=host,source=/var/log \ + --mount volume=var-log,target=/var/log \ + --insecure-options=image" + ExecStartPre=/bin/mkdir -p /opt/cni/bin + ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests + ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d + ExecStartPre=/bin/mkdir -p /etc/kubernetes/checkpoint-secrets + ExecStartPre=/bin/mkdir -p /etc/kubernetes/inactive-manifests + ExecStartPre=/bin/mkdir -p /var/lib/cni + ExecStartPre=/bin/mkdir -p /var/lib/calico + ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins + ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt" + ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid + ExecStart=/usr/lib/coreos/kubelet-wrapper \ + --anonymous-auth=false \ + --authentication-token-webhook \ + --authorization-mode=Webhook \ + --client-ca-file=/etc/kubernetes/ca.crt \ + --cluster_dns=${k8s_dns_service_ip} \ + --cluster_domain=${cluster_domain_suffix} \ + --cni-conf-dir=/etc/kubernetes/cni/net.d \ + --exit-on-lock-contention \ + --hostname-override=${domain_name} \ + --kubeconfig=/etc/kubernetes/kubeconfig \ + --lock-file=/var/run/lock/kubelet.lock \ + --network-plugin=cni \ + --node-labels=node-role.kubernetes.io/master \ + --node-labels=node-role.kubernetes.io/controller="true" \ + --pod-manifest-path=/etc/kubernetes/manifests \ + --register-with-taints=node-role.kubernetes.io/master=:NoSchedule \ + --volume-plugin-dir=/var/lib/kubelet/volumeplugins + ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid + Restart=always + RestartSec=10 + [Install] + WantedBy=multi-user.target + - name: bootkube.service + contents: | + [Unit] + Description=Bootstrap a Kubernetes control plane with a temp api-server + ConditionPathExists=!/opt/bootkube/init_bootkube.done + [Service] + Type=oneshot + RemainAfterExit=true + WorkingDirectory=/opt/bootkube + ExecStart=/opt/bootkube/bootkube-start + ExecStartPost=/bin/touch /opt/bootkube/init_bootkube.done +storage: + files: + - path: /etc/kubernetes/kubelet.env + filesystem: root + mode: 0644 + contents: + inline: | + KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube + KUBELET_IMAGE_TAG=v1.11.0 + - path: /etc/hostname + filesystem: root + mode: 0644 + contents: + inline: + ${domain_name} + - path: /etc/sysctl.d/max-user-watches.conf + filesystem: root + contents: + inline: | + fs.inotify.max_user_watches=16184 + - path: /opt/bootkube/bootkube-start + filesystem: root + mode: 0544 + user: + id: 500 + group: + id: 500 + contents: + inline: | + #!/bin/bash + # Wrapper for bootkube start + set -e + # Move experimental manifests + [ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-* + BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}" + BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.12.0}" + BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}" + exec /usr/bin/rkt run \ + --trust-keys-from-https \ + --volume assets,kind=host,source=$BOOTKUBE_ASSETS \ + --mount volume=assets,target=/assets \ + --volume bootstrap,kind=host,source=/etc/kubernetes \ + --mount volume=bootstrap,target=/etc/kubernetes \ + $$RKT_OPTS \ + $${BOOTKUBE_ACI}:$${BOOTKUBE_VERSION} \ + --net=host \ + --dns=host \ + --exec=/bootkube -- start --asset-dir=/assets "$@" +passwd: + users: + - name: core + ssh_authorized_keys: + - ${ssh_authorized_key} diff --git a/libvirt/container-linux/kubernetes/cl/worker.yaml.tmpl b/libvirt/container-linux/kubernetes/cl/worker.yaml.tmpl new file mode 100644 index 000000000..18e1e6a60 --- /dev/null +++ b/libvirt/container-linux/kubernetes/cl/worker.yaml.tmpl @@ -0,0 +1,104 @@ +--- +systemd: + units: + - name: docker.service + enable: true + - name: locksmithd.service + mask: true + - name: kubelet.path + enable: true + contents: | + [Unit] + Description=Watch for kubeconfig + [Path] + PathExists=/etc/kubernetes/kubeconfig + [Install] + WantedBy=multi-user.target + - name: wait-for-dns.service + enable: true + contents: | + [Unit] + Description=Wait for DNS entries + Wants=systemd-resolved.service + Before=kubelet.service + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done' + [Install] + RequiredBy=kubelet.service + - name: kubelet.service + contents: | + [Unit] + Description=Kubelet via Hyperkube + Wants=rpc-statd.service + [Service] + EnvironmentFile=/etc/kubernetes/kubelet.env + Environment="RKT_RUN_ARGS=--uuid-file-save=/var/cache/kubelet-pod.uuid \ + --volume=resolv,kind=host,source=/etc/resolv.conf \ + --mount volume=resolv,target=/etc/resolv.conf \ + --volume var-lib-cni,kind=host,source=/var/lib/cni \ + --mount volume=var-lib-cni,target=/var/lib/cni \ + --volume var-lib-calico,kind=host,source=/var/lib/calico \ + --mount volume=var-lib-calico,target=/var/lib/calico \ + --volume opt-cni-bin,kind=host,source=/opt/cni/bin \ + --mount volume=opt-cni-bin,target=/opt/cni/bin \ + --volume var-log,kind=host,source=/var/log \ + --mount volume=var-log,target=/var/log \ + --insecure-options=image" + ExecStartPre=/bin/mkdir -p /opt/cni/bin + ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests + ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d + ExecStartPre=/bin/mkdir -p /var/lib/cni + ExecStartPre=/bin/mkdir -p /var/lib/calico + ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins + ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt" + ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid + ExecStart=/usr/lib/coreos/kubelet-wrapper \ + --anonymous-auth=false \ + --authentication-token-webhook \ + --authorization-mode=Webhook \ + --client-ca-file=/etc/kubernetes/ca.crt \ + --cluster_dns=${k8s_dns_service_ip} \ + --cluster_domain=${cluster_domain_suffix} \ + --cni-conf-dir=/etc/kubernetes/cni/net.d \ + --exit-on-lock-contention \ + --hostname-override=${domain_name} \ + --kubeconfig=/etc/kubernetes/kubeconfig \ + --lock-file=/var/run/lock/kubelet.lock \ + --network-plugin=cni \ + --node-labels=node-role.kubernetes.io/node \ + --pod-manifest-path=/etc/kubernetes/manifests \ + --volume-plugin-dir=/var/lib/kubelet/volumeplugins + ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid + Restart=always + RestartSec=5 + [Install] + WantedBy=multi-user.target + +storage: + files: + - path: /etc/kubernetes/kubelet.env + filesystem: root + mode: 0644 + contents: + inline: | + KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube + KUBELET_IMAGE_TAG=v1.11.0 + - path: /etc/hostname + filesystem: root + mode: 0644 + contents: + inline: + ${domain_name} + - path: /etc/sysctl.d/max-user-watches.conf + filesystem: root + contents: + inline: | + fs.inotify.max_user_watches=16184 +passwd: + users: + - name: core + ssh_authorized_keys: + - ${ssh_authorized_key} + diff --git a/libvirt/container-linux/kubernetes/controllers.tf b/libvirt/container-linux/kubernetes/controllers.tf new file mode 100644 index 000000000..4fa86a689 --- /dev/null +++ b/libvirt/container-linux/kubernetes/controllers.tf @@ -0,0 +1,37 @@ +locals { + controller_count = "${length(var.controller_names)}" +} + +resource "libvirt_volume" "controller-root" { + count = "${local.controller_count}" + + name = "${var.cluster_name}-${element(var.controller_names, count.index)}-root" + base_volume_id = "${libvirt_volume.base.id}" +} + +resource "libvirt_ignition" "controller" { + count = "${local.controller_count}" + + name = "${var.cluster_name}-${element(var.controller_names, count.index)}-ign" + content = "${element(data.ct_config.controllers.*.rendered, count.index)}" +} + +resource "libvirt_domain" "controller" { + count = "${local.controller_count}" + + name = "${var.cluster_name}-${element(var.controller_names, count.index)}" + memory = "${var.controller_memory}" + + coreos_ignition = "${element(libvirt_ignition.controller.*.id, count.index)}" + + disk { + volume_id = "${element(libvirt_volume.controller-root.*.id, count.index)}" + } + + network_interface { + network_id = "${libvirt_network.net.id}" + hostname = "${element(var.controller_names, count.index)}" + # Give controllers stable IPs + addresses = ["${cidrhost(var.node_ip_pool, 10 + count.index)}"] + } +} diff --git a/libvirt/container-linux/kubernetes/libvirt.tf b/libvirt/container-linux/kubernetes/libvirt.tf new file mode 100644 index 000000000..c1fc32662 --- /dev/null +++ b/libvirt/container-linux/kubernetes/libvirt.tf @@ -0,0 +1,32 @@ +// Some basic libvirt building blocks: +// - the network +// - the base QCOW volume + +resource "libvirt_network" "net" { + name = "${var.cluster_name}" + + mode = "nat" + domain = "${var.machine_domain}" + addresses = ["${var.node_ip_pool}"] + + dns_forwarder { + address = "${var.dns_server}" + } +} + +resource "libvirt_volume" "base" { + name = "${var.cluster_name}-base" + source = "${var.base_image_path}" +} + +# Set up the cluster domain name +# we have to use the +resource "null_resource" "k8s_domain_name" { + count = "${var.libvirt_create_k8s_domain_name}" + + provisioner "local-exec" { + command = "virsh -c qemu:///system net-update ${var.cluster_name} add dns-host \"${var.k8s_domain_name}\" --live --config" + } + + depends_on = ["libvirt_network.net"] +} diff --git a/libvirt/container-linux/kubernetes/outputs.tf b/libvirt/container-linux/kubernetes/outputs.tf new file mode 100644 index 000000000..41bd79f2a --- /dev/null +++ b/libvirt/container-linux/kubernetes/outputs.tf @@ -0,0 +1,3 @@ +output "kubeconfig" { + value = "${module.bootkube.kubeconfig}" +} diff --git a/libvirt/container-linux/kubernetes/profiles.tf b/libvirt/container-linux/kubernetes/profiles.tf new file mode 100644 index 000000000..23e454598 --- /dev/null +++ b/libvirt/container-linux/kubernetes/profiles.tf @@ -0,0 +1,41 @@ +data "template_file" "controller-configs" { + count = "${length(var.controller_names)}" + + template = "${file("${path.module}/cl/controller.yaml.tmpl")}" + + vars { + domain_name = "${element(var.controller_names, count.index)}.${var.machine_domain}" + etcd_name = "${element(var.controller_names, count.index)}" + etcd_initial_cluster = "${join(",", formatlist("%s=https://%s.%s:2380", var.controller_names, var.controller_names, var.machine_domain))}" + k8s_dns_service_ip = "${module.bootkube.kube_dns_service_ip}" + cluster_domain_suffix = "${var.cluster_domain_suffix}" + ssh_authorized_key = "${var.ssh_authorized_key}" + } +} + +data "ct_config" "controllers" { + count = "${length(var.controller_names)}" + + content = "${element(data.template_file.controller-configs.*.rendered, count.index)}" +} + +// Kubernetes Worker profiles +data "template_file" "worker-configs" { + count = "${length(var.worker_names)}" + + template = "${file("${path.module}/cl/worker.yaml.tmpl")}" + + vars { + domain_name = "${element(var.worker_names, count.index)}.${var.machine_domain}" + k8s_dns_service_ip = "${module.bootkube.kube_dns_service_ip}" + cluster_domain_suffix = "${var.cluster_domain_suffix}" + ssh_authorized_key = "${var.ssh_authorized_key}" + } +} + +// invoke ct to generate ignition configs +data "ct_config" "workers" { + count = "${length(var.worker_names)}" + + content = "${element(data.template_file.worker-configs.*.rendered, count.index)}" +} diff --git a/libvirt/container-linux/kubernetes/require.tf b/libvirt/container-linux/kubernetes/require.tf new file mode 100644 index 000000000..a6435bec6 --- /dev/null +++ b/libvirt/container-linux/kubernetes/require.tf @@ -0,0 +1,21 @@ +# Terraform version and plugin versions + +terraform { + required_version = ">= 0.11.0" +} + +provider "local" { + version = "~> 1.0" +} + +provider "null" { + version = "~> 1.0" +} + +provider "template" { + version = "~> 1.0" +} + +provider "tls" { + version = "~> 1.0" +} diff --git a/libvirt/container-linux/kubernetes/ssh.tf b/libvirt/container-linux/kubernetes/ssh.tf new file mode 100644 index 000000000..ac08bf84f --- /dev/null +++ b/libvirt/container-linux/kubernetes/ssh.tf @@ -0,0 +1,131 @@ +# Secure copy etcd TLS assets and kubeconfig to controllers. Activates kubelet.service +resource "null_resource" "copy-controller-secrets" { + count = "${length(var.controller_names)}" + + depends_on = [ + "libvirt_domain.controller", + ] + + connection { + type = "ssh" + host = "${element(libvirt_domain.controller.*.network_interface.0.addresses.0, count.index)}" + user = "core" + timeout = "60m" + } + + provisioner "file" { + content = "${module.bootkube.kubeconfig}" + destination = "$HOME/kubeconfig" + } + + provisioner "file" { + content = "${module.bootkube.etcd_ca_cert}" + destination = "$HOME/etcd-client-ca.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_client_cert}" + destination = "$HOME/etcd-client.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_client_key}" + destination = "$HOME/etcd-client.key" + } + + provisioner "file" { + content = "${module.bootkube.etcd_server_cert}" + destination = "$HOME/etcd-server.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_server_key}" + destination = "$HOME/etcd-server.key" + } + + provisioner "file" { + content = "${module.bootkube.etcd_peer_cert}" + destination = "$HOME/etcd-peer.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_peer_key}" + destination = "$HOME/etcd-peer.key" + } + + provisioner "remote-exec" { + inline = [ + "sudo mkdir -p /etc/ssl/etcd/etcd", + "sudo mv etcd-client* /etc/ssl/etcd/", + "sudo cp /etc/ssl/etcd/etcd-client-ca.crt /etc/ssl/etcd/etcd/server-ca.crt", + "sudo mv etcd-server.crt /etc/ssl/etcd/etcd/server.crt", + "sudo mv etcd-server.key /etc/ssl/etcd/etcd/server.key", + "sudo cp /etc/ssl/etcd/etcd-client-ca.crt /etc/ssl/etcd/etcd/peer-ca.crt", + "sudo mv etcd-peer.crt /etc/ssl/etcd/etcd/peer.crt", + "sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key", + "sudo chown -R etcd:etcd /etc/ssl/etcd", + "sudo chmod -R 500 /etc/ssl/etcd", + "sudo mv $HOME/kubeconfig /etc/kubernetes/kubeconfig", + ] + } +} + +# Secure copy kubeconfig to all workers. Activates kubelet.service +resource "null_resource" "copy-worker-secrets" { + count = "${length(var.worker_names)}" + + # Without depends_on, remote-exec could start and wait for machines before + # matchbox groups are written, causing a deadlock. + depends_on = [ + "libvirt_domain.worker", + ] + + connection { + type = "ssh" + host = "${element(libvirt_domain.worker.*.network_interface.0.addresses.0, count.index)}" + user = "core" + timeout = "60m" + } + + provisioner "file" { + content = "${module.bootkube.kubeconfig}" + destination = "$HOME/kubeconfig" + } + + provisioner "remote-exec" { + inline = [ + "sudo mv $HOME/kubeconfig /etc/kubernetes/kubeconfig", + ] + } +} + +# Secure copy bootkube assets to ONE controller and start bootkube to perform +# one-time self-hosted cluster bootstrapping. +resource "null_resource" "bootkube-start" { + # Without depends_on, this remote-exec may start before the kubeconfig copy. + # Terraform only does one task at a time, so it would try to bootstrap + # while no Kubelets are running. + depends_on = [ + "null_resource.copy-controller-secrets", + "null_resource.copy-worker-secrets", + ] + + connection { + type = "ssh" + host = "${libvirt_domain.controller.0.network_interface.0.addresses.0}" + user = "core" + timeout = "15m" + } + + provisioner "file" { + source = "${var.asset_dir}" + destination = "$HOME/assets" + } + + provisioner "remote-exec" { + inline = [ + "sudo mv $HOME/assets /opt/bootkube", + "sudo systemctl start bootkube", + ] + } +} diff --git a/libvirt/container-linux/kubernetes/variables.tf b/libvirt/container-linux/kubernetes/variables.tf new file mode 100644 index 000000000..c589c9700 --- /dev/null +++ b/libvirt/container-linux/kubernetes/variables.tf @@ -0,0 +1,124 @@ +# cluster-level configuration + +variable "cluster_name" { + type = "string" + description = "Unique cluster name" +} + +variable "base_image_path" { + type = "string" + description = "Path to a downloaded and uncompressed container linux derivative image" +} + +variable "k8s_domain_name" { + description = "Controller DNS name which resolves to a controller instance. Workers and kubeconfig's will communicate with this endpoint (e.g. cluster.example.com)" + type = "string" +} + +variable "libvirt_create_k8s_domain_name" { + description = "Whether or not libvirt should create a record for k8s_domain_name. Set this to false if you already have a load balancing solution created." + type = "string" + default = "1" +} + +# machines + +variable "ssh_authorized_key" { + type = "string" + description = "SSH public key for user 'core'" +} + +variable "asset_dir" { + description = "Path to a directory where generated assets should be placed (contains secrets)" + type = "string" +} + +# machines + +variable "machine_domain" { + description = "the domain to use for all machine names" + type = "string" +} + +# controllers +variable "controller_names" { + description = "list of controller hostnames (not fqdn)" + type = "list" +} + +variable "controller_memory" { + description = "ram to allocate in MiB for each controller" + type = "string" + default = "2048" +} + +# workers +variable "worker_names" { + description = "list of worker hostnames (not fqdn)" + type = "list" +} + +variable "worker_memory" { + description = "ram to allocate in MiB for each worker" + type = "string" + default = "2048" +} + + +# Optional cluster networking configuration + +variable "networking" { + description = "Choice of networking provider (flannel or calico)" + type = "string" + default = "calico" +} + +variable "network_mtu" { + description = "CNI interface MTU (applies to calico only)" + type = "string" + default = "1480" +} + +variable "network_ip_autodetection_method" { + description = "Method to autodetect the host IPv4 address (applies to calico only)" + type = "string" + default = "first-found" +} + +variable "pod_cidr" { + description = "CIDR IPv4 range to assign Kubernetes pods" + type = "string" + default = "10.2.0.0/16" +} + +variable "service_cidr" { + description = <