From 83c99bd58ff7a0d42b28d6d44c458f1dfe5f9d21 Mon Sep 17 00:00:00 2001 From: Otto Yiu Date: Tue, 11 Jul 2017 15:52:40 -0700 Subject: [PATCH] Release notes for Calico Pod CIDR changes made in #2768 Also document the migration procedure necessary for existing calico clusters --- docs/releases/1.7-NOTES.md | 9 ++ docs/releases/1.7-calico-cidr-migration.md | 68 ++++++++++++ docs/releases/1.7-calico-cidr-migration.yaml | 108 +++++++++++++++++++ 3 files changed, 185 insertions(+) create mode 100644 docs/releases/1.7-NOTES.md create mode 100644 docs/releases/1.7-calico-cidr-migration.md create mode 100644 docs/releases/1.7-calico-cidr-migration.yaml diff --git a/docs/releases/1.7-NOTES.md b/docs/releases/1.7-NOTES.md new file mode 100644 index 0000000000000..03a4018e43263 --- /dev/null +++ b/docs/releases/1.7-NOTES.md @@ -0,0 +1,9 @@ +_This is a WIP document describing changes to the upcoming kops 1.7 release_ + +# Significant changes + +* Calico now configured with the correct pod CIDR: #2768 + +# Known Issues + +* Existing Calico users on clusters that were created prior to kops 1.7 are suspectible to IP conflict between Pods and Services due to an overlap of the two IP ranges. Migration to a new Pod CIDR is recommended, and is a manual procedure due to risk of potential downtime during this operation. For the migration procedure, please refer to [this document](1.7-calico-cidr-migration.md). diff --git a/docs/releases/1.7-calico-cidr-migration.md b/docs/releases/1.7-calico-cidr-migration.md new file mode 100644 index 0000000000000..64ae924fc88e3 --- /dev/null +++ b/docs/releases/1.7-calico-cidr-migration.md @@ -0,0 +1,68 @@ +# Calico Pod CIDR Migration Procedure +Prior to kops 1.7, calico was misconfigured to use the `.NonMasqueradeCIDR` +field as the CIDR range for Pod IPs. As a result, IP conflict may occur +when a Service is allocated an IP that has already been assigned to a Pod, +or vice versa. To prevent this from occuring, manual steps are necessary before +upgrading your cluster using kops 1.7 onwards. + + +## Background +The field in the clusterSpec, `.NonMasqueradeCIDR`, captures the IP +range of the cluster. + +Within this IP range, smaller IP ranges are then carved out for: +* Service IPs - as defined as `.serviceClusterIPRange` +* Pod IPs - as defined as `.kubeControllerManager.clusterCIDR` + +It was found out in Issue #1171, that weave and calico was misconfigured to +use the wider IP range rather than the range dedicated to Pods only. This was +fixed in PR #2717 and #2768 for the two CNIs, by switching over to using the +`.kubeControllerManager.clusterCIDR` field instead. + +With the `--ip-alloc-range` flag changes for weave, it effectively creates a +new network. Pods in the existing network will not necessarily be able to talk +to those in the new network. Restarting of all nodes will need to happen +to guarantee that all Pods spin up with IPs in the new network. See [here]( +https://github.com/weaveworks/weave/issues/2874) for more details. + +Just like weave, the above change alone is not enough to mitigate the problem +on existing clusters running calico. Effectively, a new network will need to be +created first (in the form of an IP Pool in Calico terms), and a restart of all +nodes will need to happen to have Pods be allocated the proper IP addresses. + +## Prerequisites + +* Kubernetes cluster with calico as the CNI, created prior to kops 1.7 +* Scheduled maintenance window, this procedure *WILL* result in cluster degregation. + +## Procedure +**WARNING** - This procedure will cause disruption to Pods running on the cluster. +New Pods may not be able to resolve DNS through kube-dns or other services through its service IP during the rolling restart. +Attempt this migration procedure on a staging cluster prior to doing this in production. + +--- +Calico only uses the `CALICO_IPV4POOL_CIDR` to create a default IPv4 pool if a +pool doesn't exist already: +https://github.com/projectcalico/calicoctl/blob/v1.3.0/calico_node/startup/startup.go#L463 + +Therefore, we need to run two one-off jobs that execute calicoctl to migrate on +the new CIDR - one to create a new IPv4 pool that we want, and one to delete +the existing IP pool that we no longer want. This is to be executed after an +`kops update cluster --yes` using kops 1.7 and beyond, +and before a `kops rolling-upgrade cluster`. + +1. Using kops >= 1.7, update your cluster using `kops update cluster [--yes]`. +2. In the same current working directory as this document, specify your cluster name in a `NAME` variable and run: +```bash +export NAME="YOUR_CLUSTER_NAME" +export MIGRATION_FILE="1.7-calico-cidr-migration.yaml" +export NON_MASQUERADE_CIDR="`kops get cluster $NAME -o json --full | jq .spec.nonMasqueradeCIDR --raw-output`" +export POD_CIDR="`kops get cluster $NAME -o json --full | jq .spec.kubeControllerManager.clusterCIDR --raw-output`" +sed -i -e "s@{{NON_MASQUERADE_CIDR}}@${NON_MASQUERADE_CIDR}@g" ${MIGRATION_FILE} +sed -i -e "s@{{POD_CIDR}}@${POD_CIDR}@g" ${MIGRATION_FILE} +kubectl apply -f ${MIGRATION_FILE} +``` +3. Run `kops rolling-upgrade [--yes]` to initiate a rolling restart on the cluster. You may need to force a rolling upgrade, if +kops does not see any changes that require one. + +That's it, you should see new Pods be allocated IPs in the new IP range! diff --git a/docs/releases/1.7-calico-cidr-migration.yaml b/docs/releases/1.7-calico-cidr-migration.yaml new file mode 100644 index 0000000000000..0625685bfd9d4 --- /dev/null +++ b/docs/releases/1.7-calico-cidr-migration.yaml @@ -0,0 +1,108 @@ +# This ConfigMap is used in the creation of a new Calico IP Pool. +kind: ConfigMap +apiVersion: v1 +metadata: + name: calico-config-ippool + namespace: kube-system +data: + # The default IP Pool to be created for the cluster. + # Pod IP addresses will be assigned from this pool. + ippool.yaml: | + apiVersion: v1 + kind: ipPool + metadata: + cidr: {{POD_CIDR}} + spec: + ipip: + enabled: true + mode: cross-subnet + nat-outgoing: true +--- +## This manifest deploys a Job which adds a new ippool to calico +apiVersion: batch/v1 +kind: Job +metadata: + name: configure-calico-ippool + namespace: kube-system + labels: + k8s-app: calico + role.kubernetes.io/networking: "1" +spec: + template: + metadata: + name: configure-calico-ippool + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + hostNetwork: true + serviceAccountName: calico + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + - key: CriticalAddonsOnly + operator: Exists + restartPolicy: OnFailure + containers: + - name: configure-calico + image: calico/ctl:v1.2.1 + args: + - apply + - -f + - /etc/config/calico/ippool.yaml + volumeMounts: + - name: config-volume + mountPath: /etc/config + env: + # The location of the etcd cluster. + - name: ETCD_ENDPOINTS + valueFrom: + configMapKeyRef: + name: calico-config + key: etcd_endpoints + volumes: + - name: config-volume + configMap: + name: calico-config-ippool + items: + - key: ippool.yaml + path: calico/ippool.yaml + +--- +## This manifest deploys a Job which deletes the old ippool from calico +apiVersion: batch/v1 +kind: Job +metadata: + name: configure-calico-ippool-remove + namespace: kube-system + labels: + k8s-app: calico + role.kubernetes.io/networking: "1" +spec: + template: + metadata: + name: configure-calico-ippool-remove + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + hostNetwork: true + serviceAccountName: calico + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + - key: CriticalAddonsOnly + operator: Exists + restartPolicy: OnFailure + containers: + - name: configure-calico + image: calico/ctl:v1.2.1 + args: + - delete + - ipPool + - {{NON_MASQUERADE_CIDR}} + env: + # The location of the etcd cluster. + - name: ETCD_ENDPOINTS + valueFrom: + configMapKeyRef: + name: calico-config + key: etcd_endpoints