Skip to content

[registry-facade] Support zero-downtime updates #2601

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions .werft/build.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ async function build(context, version) {
const dynamicCPULimits = "dynamic-cpu-limits" in buildConfig;
const withInstaller = "with-installer" in buildConfig || masterBuild;
const noPreview = "no-preview" in buildConfig || publishRelease;
const registryFacadeHandover = "registry-facade-handover" in buildConfig;
werft.log("job config", JSON.stringify({
buildConfig,
version,
Expand All @@ -67,6 +68,7 @@ async function build(context, version) {
workspaceFeatureFlags,
dynamicCPULimits,
noPreview,
registryFacadeHandover,
}));

/**
Expand Down Expand Up @@ -111,15 +113,15 @@ async function build(context, version) {
werft.phase("deploy", "not deploying");
console.log("no-preview or publish-release is set");
} else {
await deployToDev(version, previewWithHttps, workspaceFeatureFlags, dynamicCPULimits);
await deployToDev(version, previewWithHttps, workspaceFeatureFlags, dynamicCPULimits, registryFacadeHandover);
}
}


/**
* Deploy dev
*/
async function deployToDev(version, previewWithHttps, workspaceFeatureFlags, dynamicCPULimits) {
async function deployToDev(version, previewWithHttps, workspaceFeatureFlags, dynamicCPULimits, registryFacadeHandover) {
werft.phase("deploy", "deploying to dev");
const destname = version.split(".")[0];
const namespace = `staging-${destname}`;
Expand Down Expand Up @@ -194,7 +196,7 @@ async function deployToDev(version, previewWithHttps, workspaceFeatureFlags, dyn
exec(`/usr/local/bin/helm3 delete jaeger-${destname} || echo jaeger-${destname} was not installed yet`, {slice: 'predeploy cleanup'});

let objs = [];
["ws-scheduler", "node-daemon", "cluster", "workspace", "jaeger", "jaeger-agent", "ws-sync", "ws-manager-node", "ws-daemon"].forEach(comp =>
["ws-scheduler", "node-daemon", "cluster", "workspace", "jaeger", "jaeger-agent", "ws-sync", "ws-manager-node", "ws-daemon", "registry-facade"].forEach(comp =>
["ClusterRole", "ClusterRoleBinding", "PodSecurityPolicy"].forEach(kind =>
shell
.exec(`kubectl get ${kind} -l component=${comp} --no-headers -o=custom-columns=:metadata.name | grep ${namespace}-ns`)
Expand Down Expand Up @@ -239,6 +241,11 @@ async function deployToDev(version, previewWithHttps, workspaceFeatureFlags, dyn
if (dynamicCPULimits) {
flags+=` -f ../.werft/values.variant.cpuLimits.yaml`;
}
if (registryFacadeHandover) {
flags+=` --set components.registryFacade.handover.enabled=true`;
flags+=` --set components.registryFacade.handover.socket=/var/lib/gitpod/registry-facade-${namespace}`;
}

// const pathToVersions = `${shell.pwd().toString()}/versions.yaml`;
// if (fs.existsSync(pathToVersions)) {
// flags+=` -f ${pathToVersions}`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ spec:
- 'secret'
- 'emptyDir'
- 'persistentVolumeClaim'
hostNetwork: false
- 'hostPath'
hostNetwork: true
hostIPC: false
hostPID: false
hostPorts:
Expand Down
20 changes: 20 additions & 0 deletions chart/templates/registry-facade-clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) 2020 Gitpod GmbH. All rights reserved.
# Licensed under the MIT License. See License-MIT.txt in the project root for license information.

{{ if .Values.installPodSecurityPolicies -}}
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: {{ .Release.Namespace }}-ns-registry-facade
labels:
app: {{ template "gitpod.fullname" . }}
component: cluster
kind: clusterrole
stage: {{ .Values.installation.stage }}
rules:
- apiGroups: ["policy"]
resources: ["podsecuritypolicies"]
verbs: ["use"]
resourceNames:
- {{ .Release.Namespace }}-ns-registry-facade
{{- end -}}
10 changes: 9 additions & 1 deletion chart/templates/registry-facade-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ data:
{
{{ if .Values.components.workspace.pullSecret.secretName -}}"dockerAuth": "/mnt/pull-secret.json",{{- end }}
"registry": {
{{- if $comp.handover.enabled }}
"port": {{ $comp.ports.registry.servicePort }},
{{- else }}
"port": {{ $comp.ports.registry.containerPort }},
{{- end }}
{{- if (or .Values.certificatesSecret.secretName $comp.certificatesSecret.secretName) }}
{{- if (or .Values.certificatesSecret.certManager $comp.certificatesSecret.certManager) }}
"tls": {
Expand Down Expand Up @@ -45,7 +49,11 @@ data:
"ref": "{{ template "gitpod.comp.imageFull" (dict "root" . "gp" $.Values "comp" .Values.components.workspace.dockerUp) }}",
"type": "image"
}
]
],
"handover": {
"enabled": {{ $comp.handover.enabled }},
"sockets": "/mnt/handover"
}
},
"pprofAddr": ":6060",
"prometheusAddr": ":9500"
Expand Down
37 changes: 35 additions & 2 deletions chart/templates/registry-facade-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,24 @@ spec:
spec:
{{ include "gitpod.workspaceAffinity" $this | indent 6 }}
serviceAccountName: registry-facade
{{- if $comp.handover.enabled }}
initContainers:
- name: handover-ownership
image: {{ template "gitpod.comp.imageFull" $this }}
command:
- "/bin/sh"
- "-c"
- "chown -R 1000:1000 /mnt/handover"
volumeMounts:
- name: handover
mountPath: "/mnt/handover"
securityContext:
privileged: false
runAsUser: 0
{{- end }}
{{- if $comp.handover.enabled }}
hostNetwork: true
{{- end }}
containers:
- name: registry-facade
image: {{ template "gitpod.comp.imageFull" $this }}
Expand All @@ -46,21 +64,30 @@ spec:
{{ include "gitpod.container.resources" $this | indent 8 }}
ports:
- name: registry
{{- if $comp.handover.enabled }}
# if hostNetwork == true then containerPort == hostPort
containerPort: {{ $comp.ports.registry.servicePort }}
{{- else }}
containerPort: {{ $comp.ports.registry.containerPort }}
hostPort: {{ $comp.ports.registry.servicePort }}
- name: metrics
containerPort: 9500
{{- end }}
securityContext:
privileged: false
runAsUser: 1000
{{ include "gitpod.container.defaultEnv" $this | indent 8 }}
{{ include "gitpod.container.tracingEnv" $this | indent 8 }}
volumeMounts:
- name: cache
mountPath: "/mnt/cache"
- name: config
mountPath: "/mnt/config"
readOnly: true
- name: cache
mountPath: "/mnt/cache"
{{- if $comp.handover.enabled }}
- name: handover
mountPath: "/mnt/handover"
{{- end }}
{{- if .Values.components.workspace.pullSecret.secretName }}
- name: pull-secret
mountPath: /mnt/pull-secret.json
Expand All @@ -76,6 +103,12 @@ spec:
- name: config
configMap:
name: {{ template "gitpod.comp.configMap" $this }}
{{- if $comp.handover.enabled }}
- name: handover
hostPath:
path: {{ $comp.handover.socket | quote }}
type: DirectoryOrCreate
{{- end }}
{{- if .Values.components.workspace.pullSecret.secretName }}
- name: pull-secret
secret:
Expand Down
65 changes: 65 additions & 0 deletions chart/templates/registry-facade-podsecuritypolicy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) 2020 Gitpod GmbH. All rights reserved.
# Licensed under the MIT License. See License-MIT.txt in the project root for license information.

{{ if .Values.installPodSecurityPolicies -}}
# Taken from the examples here:
# Examples: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#example-policies
# File: https://raw.githubusercontent.com/kubernetes/website/master/content/en/examples/policy/restricted-psp.yaml
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: {{ .Release.Namespace }}-ns-registry-facade
labels:
app: {{ template "gitpod.fullname" . }}
component: cluster
kind: podsecuritypolicy
stage: {{ .Values.installation.stage }}
annotations:
seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'runtime/default'
apparmor.security.beta.kubernetes.io/allowedProfileNames: 'runtime/default'
seccomp.security.alpha.kubernetes.io/defaultProfileName: 'runtime/default'
apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default'
spec:
#####
# The nginx master process (currently?) runs as root, thus we have to turn some safe things off
#####
### TODO root proxy
# privileged: false
# # Required to prevent escalations to root.
# allowPrivilegeEscalation: false
# # This is redundant with non-root + disallow privilege escalation,
# # but we can provide it for defense in depth.
# requiredDropCapabilities:
# - ALL
### TODO root proxy
# Allow core volume types.
volumes:
- 'configMap'
- 'secret'
- 'emptyDir'
- 'hostPath'
hostNetwork: true
hostIPC: false
hostPID: false
hostPorts:
- min: 30000
max: 33000
runAsUser:
rule: 'RunAsAny'
seLinux:
# This policy assumes the nodes are using AppArmor rather than SELinux.
rule: 'RunAsAny'
supplementalGroups:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
- min: 1
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
- min: 1
max: 65535
readOnlyRootFilesystem: false
{{- end -}}
2 changes: 1 addition & 1 deletion chart/templates/registry-facade-rolebinding.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ subjects:
name: registry-facade
roleRef:
kind: ClusterRole
name: {{ .Release.Namespace }}-ns-psp:restricted-root-user
name: {{ .Release.Namespace }}-ns-registry-facade
apiGroup: rbac.authorization.k8s.io
3 changes: 3 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ components:
servicePort: 3000
svcLabels:
feature: registry
handover:
enabled: false
socket: /var/lib/gitpod/registry-facade
serviceType: "ClusterIP"

server:
Expand Down
65 changes: 65 additions & 0 deletions components/registry-facade/cmd/handover.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (c) 2020 TypeFox GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package cmd

import (
"context"
"os"
"os/signal"
"syscall"
"time"

"github.com/gitpod-io/gitpod/common-go/log"
"github.com/gitpod-io/gitpod/registry-facade/pkg/registry"
"github.com/spf13/cobra"
)

// debugHandover represents the run command
var debugHandover = &cobra.Command{
Use: "handover <socket-dir>",
Short: "Attempts to get the listener socket from a registry-facade - and offers it back up for someone else",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

l, err := registry.ReceiveHandover(ctx, args[0])
if err != nil {
return err
}
if l == nil {
log.Warn("received no listener")
return nil
}

log.Info("handover successfull - holding listener for someone else")

hoctx, cancelHO := context.WithCancel(context.Background())
defer cancelHO()

ho, err := registry.OfferHandover(hoctx, args[0], l, nil)
if err != nil {
return err
}

log.Info("waiting for someone else to handover to - stop with Ctrl+C")
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)

select {
case didHO := <-ho:
if didHO {
<-ho
}
case <-sigChan:
}

return nil
},
}

func init() {
rootCmd.AddCommand(debugHandover)
}
12 changes: 10 additions & 2 deletions components/registry-facade/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,16 @@ var runCmd = &cobra.Command{
return docker.NewResolver(resolverOpts)
}

registryDoneChan := make(chan struct{})
reg, err := registry.NewRegistry(cfg.Registry, resolverProvider, prometheus.WrapRegistererWithPrefix("registry_", gpreg))
if err != nil {
log.WithError(err).Fatal("cannot create registry")
}
go reg.MustServe()
go func() {
defer close(registryDoneChan)
reg.MustServe()
}()

if cfg.PProfAddr != "" {
go pprof.Serve(cfg.PProfAddr)
}
Expand All @@ -105,7 +110,10 @@ var runCmd = &cobra.Command{
log.Info("🏪 registry facade is up and running")
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
<-sigChan
select {
case <-sigChan:
case <-registryDoneChan:
}
},
}

Expand Down
2 changes: 2 additions & 0 deletions components/registry-facade/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ require (
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.1.0
github.com/spf13/cobra v0.0.5
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
golang.org/x/sys v0.0.0-20201112073958-5cba982894dd
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
google.golang.org/grpc v1.34.0
gotest.tools/v3 v3.0.3 // indirect
Expand Down
Loading