Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow DNS resolution of the runner pod for all k8s setup #886

Merged
merged 6 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ make run
Set the name of the container image to be created from the source code. This will be used when building, pushing and referring to the image on YAML files:

```sh
export IMG=registry-path/tf-controller:latest
export MANAGER_IMG=registry-path/tf-controller
```

Build the container image, tagging it as `$IMG`:
Build the container image, tagging it as `$MANAGER_IMG:latest`:

```sh
make docker-build
Expand Down
11 changes: 8 additions & 3 deletions api/v1alpha2/terraform_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1alpha2
import (
"bytes"
"fmt"
"net"
"strings"
"time"
"unicode/utf8"
Expand Down Expand Up @@ -880,9 +881,13 @@ func (in *Terraform) FromBytes(b []byte, scheme *runtime.Scheme) error {
), b, in)
}

func (in *Terraform) GetRunnerHostname(ip string, clusterDomain string) string {
prefix := strings.ReplaceAll(ip, ".", "-")
return fmt.Sprintf("%s.%s.pod.%s", prefix, in.Namespace, clusterDomain)
func (in *Terraform) GetRunnerHostname(target string, clusterDomain string) string {
if net.ParseIP(target) != nil {
prefix := strings.ReplaceAll(target, ".", "-")
return fmt.Sprintf("%s.%s.pod.%s", prefix, in.Namespace, clusterDomain)
} else {
return fmt.Sprintf("%s.tf-runner.%s.svc.%s", target, in.Namespace, clusterDomain)
}
}

func (in *TerraformSpec) GetAlwaysCleanupRunnerPod() bool {
Expand Down
1 change: 1 addition & 0 deletions charts/tf-controller/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ __Note__: If you need to use the `imagePullSecrets` it would be best to set `ser
| serviceAccount.create | bool | `true` | If `true`, create a new service account |
| serviceAccount.name | string | tf-controller | Service account to be used |
| tolerations | list | `[]` | Tolerations properties for the TF-Controller deployment |
| usePodSubdomainResolution | bool | `false` | Argument for `--use-pod-subdomain-resolution` (Controller). UsePodSubdomainResolution allow pod hostname/subdomain DNS resolution for the pod runner instead of IP based DNS resolution. |
| volumeMounts | list | `[]` | Volume mounts properties for the TF-Controller deployment |
| volumes | list | `[]` | Volumes properties for the TF-Controller deployment |

Expand Down
1 change: 1 addition & 0 deletions charts/tf-controller/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ Create a unique list of runner allowed namespaces
*/}}
{{- define "tf-controller.runner.allowedNamespaces" -}}
{{- $allowedNamespaces := append .Values.runner.serviceAccount.allowedNamespaces .Release.Namespace -}}
{{- $allowedNamespaces := append $allowedNamespaces "flux-system" -}}
{{- $allowedNamespaces = $allowedNamespaces | uniq -}}
{{ toJson $allowedNamespaces }}
{{- end }}
Expand Down
1 change: 1 addition & 0 deletions charts/tf-controller/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ spec:
- --kube-api-burst={{ .Values.kubeAPIBurst }}
- --allow-break-the-glass={{ .Values.allowBreakTheGlass }}
- --cluster-domain={{ .Values.clusterDomain }}
- --use-pod-subdomain-resolution={{ .Values.usePodSubdomainResolution }}
command:
- /sbin/tini
- --
Expand Down
18 changes: 18 additions & 0 deletions charts/tf-controller/templates/runner-discovery-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{{- if .Values.usePodSubdomainResolution -}}
{{- range include "tf-controller.runner.allowedNamespaces" . | fromJsonArray }}
---
apiVersion: v1
kind: Service
metadata:
name: tf-runner
namespace: {{ . }}
spec:
clusterIP: None
ports:
- name: grpc
port: 30000
selector:
app.kubernetes.io/created-by: tf-controller
app.kubernetes.io/name: tf-runner
{{- end }}
{{- end }}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the yaml file needs a new line I believe.

3 changes: 3 additions & 0 deletions charts/tf-controller/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ allowBreakTheGlass: false
# -- Argument for `--cluster-domain` (Controller).
# ClusterDomain indicates the cluster domain, defaults to cluster.local.
clusterDomain: cluster.local
# -- Argument for `--use-pod-subdomain-resolution` (Controller).
# UsePodSubdomainResolution allow pod hostname/subdomain DNS resolution for the pod runner instead of IP based DNS resolution.
usePodSubdomainResolution: false
awsPackage:
install: true
tag: v4.38.0-v1alpha11
Expand Down
66 changes: 35 additions & 31 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,25 +74,26 @@ func init() {

func main() {
var (
metricsAddr string
eventsAddr string
healthAddr string
concurrent int
requeueDependency time.Duration
clientOptions client.Options
logOptions logger.Options
leaderElectionOptions leaderelection.Options
watchAllNamespaces bool
httpRetry int
caValidityDuration time.Duration
certValidityDuration time.Duration
rotationCheckFrequency time.Duration
runnerGRPCPort int
runnerCreationTimeout time.Duration
runnerGRPCMaxMessageSize int
allowBreakTheGlass bool
clusterDomain string
aclOptions acl.Options
metricsAddr string
eventsAddr string
healthAddr string
concurrent int
requeueDependency time.Duration
clientOptions client.Options
logOptions logger.Options
leaderElectionOptions leaderelection.Options
watchAllNamespaces bool
httpRetry int
caValidityDuration time.Duration
certValidityDuration time.Duration
rotationCheckFrequency time.Duration
runnerGRPCPort int
runnerCreationTimeout time.Duration
runnerGRPCMaxMessageSize int
allowBreakTheGlass bool
clusterDomain string
aclOptions acl.Options
usePodSubdomainResolution bool
)

flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
Expand All @@ -114,6 +115,7 @@ func main() {
flag.IntVar(&runnerGRPCMaxMessageSize, "runner-grpc-max-message-size", 4, "The maximum message size for gRPC connections in MiB.")
flag.BoolVar(&allowBreakTheGlass, "allow-break-the-glass", false, "Allow break the glass mode.")
flag.StringVar(&clusterDomain, "cluster-domain", "cluster.local", "The cluster domain used by the cluster.")
flag.BoolVar(&usePodSubdomainResolution, "use-pod-subdomain-resolution", false, "Allow to use pod hostname/subdomain DNS resolution instead of IP based")

clientOptions.BindFlags(flag.CommandLine)
logOptions.BindFlags(flag.CommandLine)
Expand Down Expand Up @@ -173,6 +175,7 @@ func main() {
TriggerCARotation: make(chan mtls.Trigger),
TriggerNamespaceTLSGeneration: make(chan mtls.Trigger),
ClusterDomain: clusterDomain,
UsePodSubdomainResolution: usePodSubdomainResolution,
}

const localHost = "localhost"
Expand All @@ -188,18 +191,19 @@ func main() {
}

reconciler := &controllers.TerraformReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
EventRecorder: eventRecorder,
Metrics: metricsH,
StatusPoller: polling.NewStatusPoller(mgr.GetClient(), mgr.GetRESTMapper(), polling.Options{}),
CertRotator: rotator,
RunnerGRPCPort: runnerGRPCPort,
RunnerCreationTimeout: runnerCreationTimeout,
RunnerGRPCMaxMessageSize: runnerGRPCMaxMessageSize,
AllowBreakTheGlass: allowBreakTheGlass,
ClusterDomain: clusterDomain,
NoCrossNamespaceRefs: aclOptions.NoCrossNamespaceRefs,
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
EventRecorder: eventRecorder,
Metrics: metricsH,
StatusPoller: polling.NewStatusPoller(mgr.GetClient(), mgr.GetRESTMapper(), polling.Options{}),
CertRotator: rotator,
RunnerGRPCPort: runnerGRPCPort,
RunnerCreationTimeout: runnerCreationTimeout,
RunnerGRPCMaxMessageSize: runnerGRPCMaxMessageSize,
AllowBreakTheGlass: allowBreakTheGlass,
ClusterDomain: clusterDomain,
NoCrossNamespaceRefs: aclOptions.NoCrossNamespaceRefs,
UsePodSubdomainResolution: usePodSubdomainResolution,
}

if err = reconciler.SetupWithManager(mgr, concurrent, httpRetry); err != nil {
Expand Down
18 changes: 10 additions & 8 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,21 +176,23 @@ func TestMain(m *testing.M) {
TriggerCARotation: make(chan mtls.Trigger),
TriggerNamespaceTLSGeneration: make(chan mtls.Trigger),
ClusterDomain: "cluster.local",
UsePodSubdomainResolution: false,
}

if err := mtls.AddRotator(ctx, k8sManager, rotator); err != nil {
panic(err)
}

reconciler = &TerraformReconciler{
Client: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
EventRecorder: k8sManager.GetEventRecorderFor("tf-controller"),
StatusPoller: polling.NewStatusPoller(k8sManager.GetClient(), k8sManager.GetRESTMapper(), polling.Options{}),
CertRotator: rotator,
RunnerGRPCPort: 30000,
RunnerCreationTimeout: 120 * time.Second,
RunnerGRPCMaxMessageSize: 4,
Client: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
EventRecorder: k8sManager.GetEventRecorderFor("tf-controller"),
StatusPoller: polling.NewStatusPoller(k8sManager.GetClient(), k8sManager.GetRESTMapper(), polling.Options{}),
CertRotator: rotator,
RunnerGRPCPort: 30000,
RunnerCreationTimeout: 120 * time.Second,
RunnerGRPCMaxMessageSize: 4,
UsePodSubdomainResolution: false,
}

// We use 1 concurrent and 10s httpRetry in the test
Expand Down
19 changes: 10 additions & 9 deletions controllers/tf_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,16 @@ type TerraformReconciler struct {
statusManager string
requeueDependency time.Duration

StatusPoller *polling.StatusPoller
Scheme *runtime.Scheme
CertRotator *mtls.CertRotator
RunnerGRPCPort int
RunnerCreationTimeout time.Duration
RunnerGRPCMaxMessageSize int
AllowBreakTheGlass bool
ClusterDomain string
NoCrossNamespaceRefs bool
StatusPoller *polling.StatusPoller
Scheme *runtime.Scheme
CertRotator *mtls.CertRotator
RunnerGRPCPort int
RunnerCreationTimeout time.Duration
RunnerGRPCMaxMessageSize int
AllowBreakTheGlass bool
ClusterDomain string
NoCrossNamespaceRefs bool
UsePodSubdomainResolution bool
}

//+kubebuilder:rbac:groups=infra.contrib.fluxcd.io,resources=terraforms,verbs=get;list;watch;create;update;patch;delete
Expand Down
17 changes: 14 additions & 3 deletions controllers/tf_controller_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,12 @@ func (r *TerraformReconciler) LookupOrCreateRunner(ctx context.Context, terrafor
traceLog.Error(err, "Hit an error")
return nil, nil, err
}
traceLog.Info("Get pod hostname", "pod-ip", podIP)
hostname = terraform.GetRunnerHostname(podIP, r.ClusterDomain)
traceLog.Info("Get pod coordinates", "pod-ip", podIP, "pod-hostname", terraform.Name)
if r.UsePodSubdomainResolution {
hostname = terraform.GetRunnerHostname(terraform.Name, r.ClusterDomain)
} else {
hostname = terraform.GetRunnerHostname(podIP, r.ClusterDomain)
}
}

traceLog.Info("Pod hostname set", "hostname", hostname)
Expand Down Expand Up @@ -254,7 +258,7 @@ func (r *TerraformReconciler) runnerPodSpec(terraform infrav1.Terraform, tlsSecr
resources = *terraform.Spec.RunnerPodTemplate.Spec.Resources
}

return v1.PodSpec{
podSpec := v1.PodSpec{
TerminationGracePeriodSeconds: gracefulTermPeriod,
InitContainers: terraform.Spec.RunnerPodTemplate.Spec.InitContainers,
Containers: []v1.Container{
Expand Down Expand Up @@ -288,6 +292,13 @@ func (r *TerraformReconciler) runnerPodSpec(terraform infrav1.Terraform, tlsSecr
HostAliases: terraform.Spec.RunnerPodTemplate.Spec.HostAliases,
PriorityClassName: terraform.Spec.RunnerPodTemplate.Spec.PriorityClassName,
}

if r.UsePodSubdomainResolution {
podSpec.Hostname = terraform.Name
podSpec.Subdomain = "tf-runner"
}

return podSpec
}

func (r *TerraformReconciler) reconcileRunnerPod(ctx context.Context, terraform infrav1.Terraform, tlsSecret *v1.Secret, revision string) (string, error) {
Expand Down
3 changes: 2 additions & 1 deletion docs/use_tf_controller/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@
- [Use TF-controller with **GitOps dependency management**](with_GitOps_dependency_management.md)
- [Use TF-controller with **the ready-to-use AWS package**](with_the_ready_to_use_AWS_package.md)
- [User TF-controller with **plan-only mode**](with_plan_only_mode.md)
- [Use TF-controller with **external webhooks**](with_external_webhooks.md)
- [Use TF-controller with **external webhooks**](with_external_webhooks.md)
- [Use TF-controller with Terraform Runners **exposed via hostname/subdomain**](with_tf_runner_exposed_using_hostname_subdomain.md)
Loading