diff --git a/.gitignore b/.gitignore index 839769b..fad1263 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,5 @@ coverage-report.html *.swo *~ .vscode +# macOS +.DS_Store diff --git a/api/v1beta/lightrunjavaagent_types.go b/api/v1beta/lightrunjavaagent_types.go index 9bf9b11..bef29df 100644 --- a/api/v1beta/lightrunjavaagent_types.go +++ b/api/v1beta/lightrunjavaagent_types.go @@ -23,6 +23,17 @@ import ( // Important: Run "make" to regenerate code after modifying this file // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. +// WorkloadType defines the type of workload that can be patched +// +kubebuilder:validation:Enum=Deployment;StatefulSet +type WorkloadType string + +const ( + // WorkloadTypeDeployment represents a Kubernetes Deployment + WorkloadTypeDeployment WorkloadType = "Deployment" + // WorkloadTypeStatefulSet represents a Kubernetes StatefulSet + WorkloadTypeStatefulSet WorkloadType = "StatefulSet" +) + type InitContainer struct { // Name of the volume that will be added to pod SharedVolumeName string `json:"sharedVolumeName"` @@ -38,8 +49,17 @@ type LightrunJavaAgentSpec struct { ContainerSelector []string `json:"containerSelector"` InitContainer InitContainer `json:"initContainer"` - //Name of the Deployment that will be patched - DeploymentName string `json:"deploymentName"` + // Name of the Deployment that will be patched. Deprecated, use WorkloadName and WorkloadType instead + // +optional + DeploymentName string `json:"deploymentName,omitempty"` + + // Name of the Workload that will be patched. workload can be either Deployment or StatefulSet e.g. my-deployment, my-statefulset + // +optional + WorkloadName string `json:"workloadName,omitempty"` + + // Type of the workload that will be patched supported values are Deployment, StatefulSet + // +optional + WorkloadType WorkloadType `json:"workloadType,omitempty"` //Name of the Secret in the same namespace contains lightrun key and conmpany id SecretName string `json:"secretName"` @@ -76,14 +96,16 @@ type LightrunJavaAgentSpec struct { type LightrunJavaAgentStatus struct { LastScheduleTime *metav1.Time `json:"lastScheduleTime,omitempty"` Conditions []metav1.Condition `json:"conditions,omitempty"` + WorkloadStatus string `json:"workloadStatus,omitempty"` DeploymentStatus string `json:"deploymentStatus,omitempty"` } //+kubebuilder:object:root=true //+kubebuilder:subresource:status //+kubebuilder:resource:shortName=lrja -//+kubebuilder:printcolumn:priority=0,name=Deployment,type=string,JSONPath=".spec.deploymentName",description="Deployment name",format="" -//+kubebuilder:printcolumn:priority=0,name="Status",type=string,JSONPath=".status.deploymentStatus",description="Status of Deployment Reconciliation",format="" +//+kubebuilder:printcolumn:priority=0,name=Workload,type=string,JSONPath=".spec.workloadName",description="Workload name",format="" +//+kubebuilder:printcolumn:priority=0,name=Type,type=string,JSONPath=".spec.workloadType",description="Workload type",format="" +//+kubebuilder:printcolumn:priority=0,name="Status",type=string,JSONPath=".status.workloadStatus",description="Status of Workload Reconciliation",format="" //+kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" // LightrunJavaAgent is the Schema for the lightrunjavaagents API diff --git a/charts/lightrun-operator/crds/lightrunjavaagent_crd.yaml b/charts/lightrun-operator/crds/lightrunjavaagent_crd.yaml index a834798..d748749 100644 --- a/charts/lightrun-operator/crds/lightrunjavaagent_crd.yaml +++ b/charts/lightrun-operator/crds/lightrunjavaagent_crd.yaml @@ -16,12 +16,16 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: - - description: Deployment name - jsonPath: .spec.deploymentName - name: Deployment + - description: Workload name + jsonPath: .spec.workloadName + name: Workload type: string - - description: Status of Deployment Reconciliation - jsonPath: .status.deploymentStatus + - description: Workload type + jsonPath: .spec.workloadType + name: Type + type: string + - description: Status of Workload Reconciliation + jsonPath: .status.workloadStatus name: Status type: string - jsonPath: .metadata.creationTimestamp @@ -85,7 +89,8 @@ spec: type: string type: array deploymentName: - description: Name of the Deployment that will be patched + description: Name of the Deployment that will be patched. Deprecated, + use WorkloadName and WorkloadType instead type: string initContainer: properties: @@ -114,11 +119,21 @@ spec: Lightrun server hostname that will be used for downloading an agent Key and company id in the secret has to be taken from this server as well type: string + workloadName: + description: Name of the Workload that will be patched. workload can + be either Deployment or StatefulSet e.g. my-deployment, my-statefulset + type: string + workloadType: + description: Type of the workload that will be patched supported values + are Deployment, StatefulSet + enum: + - Deployment + - StatefulSet + type: string required: - agentEnvVarName - agentTags - containerSelector - - deploymentName - initContainer - secretName - serverHostname @@ -200,6 +215,8 @@ spec: lastScheduleTime: format: date-time type: string + workloadStatus: + type: string type: object type: object served: true diff --git a/charts/lightrun-operator/generated/rbac_manager_rules.yaml b/charts/lightrun-operator/generated/rbac_manager_rules.yaml index fa6d110..70dc372 100644 --- a/charts/lightrun-operator/generated/rbac_manager_rules.yaml +++ b/charts/lightrun-operator/generated/rbac_manager_rules.yaml @@ -33,6 +33,15 @@ - list - patch - watch +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list + - patch + - watch - apiGroups: - "" resources: diff --git a/config/crd/bases/agents.lightrun.com_lightrunjavaagents.yaml b/config/crd/bases/agents.lightrun.com_lightrunjavaagents.yaml index d39c7b3..9ca3b13 100644 --- a/config/crd/bases/agents.lightrun.com_lightrunjavaagents.yaml +++ b/config/crd/bases/agents.lightrun.com_lightrunjavaagents.yaml @@ -17,12 +17,16 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: - - description: Deployment name - jsonPath: .spec.deploymentName - name: Deployment + - description: Workload name + jsonPath: .spec.workloadName + name: Workload type: string - - description: Status of Deployment Reconciliation - jsonPath: .status.deploymentStatus + - description: Workload type + jsonPath: .spec.workloadType + name: Type + type: string + - description: Status of Workload Reconciliation + jsonPath: .status.workloadStatus name: Status type: string - jsonPath: .metadata.creationTimestamp @@ -86,7 +90,8 @@ spec: type: string type: array deploymentName: - description: Name of the Deployment that will be patched + description: Name of the Deployment that will be patched. Deprecated, + use WorkloadName and WorkloadType instead type: string initContainer: properties: @@ -115,11 +120,21 @@ spec: Lightrun server hostname that will be used for downloading an agent Key and company id in the secret has to be taken from this server as well type: string + workloadName: + description: Name of the Workload that will be patched. workload can + be either Deployment or StatefulSet e.g. my-deployment, my-statefulset + type: string + workloadType: + description: Type of the workload that will be patched supported values + are Deployment, StatefulSet + enum: + - Deployment + - StatefulSet + type: string required: - agentEnvVarName - agentTags - containerSelector - - deploymentName - initContainer - secretName - serverHostname @@ -201,6 +216,8 @@ spec: lastScheduleTime: format: date-time type: string + workloadStatus: + type: string type: object type: object served: true diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 3c66206..4e8efaa 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -39,6 +39,15 @@ rules: - list - patch - watch +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list + - patch + - watch - apiGroups: - "" resources: diff --git a/config/samples/agents_v1beta_lightrunjavaagent.yaml b/config/samples/agents_v1beta_lightrunjavaagent.yaml index abff67c..a0e3bb4 100644 --- a/config/samples/agents_v1beta_lightrunjavaagent.yaml +++ b/config/samples/agents_v1beta_lightrunjavaagent.yaml @@ -7,7 +7,8 @@ spec: image: "lightruncom/k8s-operator-init-java-agent-linux:1.7.0-init.0" sharedVolumeName: lightrun-agent-init sharedVolumeMountPath: "/lightrun" - deploymentName: app + workloadName: app + workloadType: Deployment secretName: lightrun-secrets serverHostname: #for saas it will be app.lightrun.com agentEnvVarName: JAVA_TOOL_OPTIONS diff --git a/config/samples/operator.yaml b/config/samples/operator.yaml index 0ec60d1..b8c65a9 100644 --- a/config/samples/operator.yaml +++ b/config/samples/operator.yaml @@ -28,12 +28,16 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: - - description: Deployment name - jsonPath: .spec.deploymentName - name: Deployment + - description: Workload name + jsonPath: .spec.workloadName + name: Workload type: string - - description: Status of Deployment Reconciliation - jsonPath: .status.deploymentStatus + - description: Workload type + jsonPath: .spec.workloadType + name: Type + type: string + - description: Status of Workload Reconciliation + jsonPath: .status.workloadStatus name: Status type: string - jsonPath: .metadata.creationTimestamp @@ -97,7 +101,8 @@ spec: type: string type: array deploymentName: - description: Name of the Deployment that will be patched + description: Name of the Deployment that will be patched. Deprecated, + use WorkloadName and WorkloadType instead type: string initContainer: properties: @@ -126,11 +131,21 @@ spec: Lightrun server hostname that will be used for downloading an agent Key and company id in the secret has to be taken from this server as well type: string + workloadName: + description: Name of the Workload that will be patched. workload can + be either Deployment or StatefulSet e.g. my-deployment, my-statefulset + type: string + workloadType: + description: Type of the workload that will be patched supported values + are Deployment, StatefulSet + enum: + - Deployment + - StatefulSet + type: string required: - agentEnvVarName - agentTags - containerSelector - - deploymentName - initContainer - secretName - serverHostname @@ -212,6 +227,8 @@ spec: lastScheduleTime: format: date-time type: string + workloadStatus: + type: string type: object type: object served: true @@ -315,6 +332,15 @@ rules: - list - patch - watch +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list + - patch + - watch - apiGroups: - "" resources: diff --git a/docs/README.md b/docs/README.md index 54d703f..7b1d29e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,7 +9,7 @@ [![Build Status](https://github.com/lightrun-platform/lightrun-k8s-operator/actions/workflows/release.yaml/badge.svg)](https://github.com/lightrun-platform/lightrun-k8s-operator/actions/workflows/release.yaml/) [![Tests](https://github.com/lightrun-platform/lightrun-k8s-operator/actions/workflows/e2e.yaml/badge.svg)](https://github.com/lightrun-platform/lightrun-k8s-operator/actions/workflows/e2e.yaml) -The ***Lightrun Kubernetes(K8s) Operator*** makes it easy to insert Lightrun agents into your K8s workloads without changing your docker or manifest files. The ***Lightrun K8s Operator*** project was initially scaffolded using [operator-sdk](https://sdk.operatorframework.io/) and [kubebuilder book](https://book.kubebuilder.io/), and aims to follow the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/). +The **Lightrun Kubernetes Operator** simplifies inserting Lightrun agents into your K8s workloads without modifying your Docker images or Kubernetes manifests. Built using [operator-sdk](https://sdk.operatorframework.io/) and following the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/). Table of contents ================= @@ -26,13 +26,13 @@ Table of contents ## Description -In theory for adding a Lightrun agent to an application running on Kubernetes, you must: -1. Install the agent into the Kubernetes pod. -2. Notify the running application to start using the installed agent. +The Lightrun K8s Operator automates the two steps needed to add a Lightrun agent to your Kubernetes applications: +1. Install the agent into the Kubernetes pod +2. Configure the running application to use the installed agent -The ***Lightrun K8s operator*** does those steps for you. [details](https://github.com/lightrun-platform/lightrun-k8s-operator/blob/main/docs/how.md) +For more details on how this works, see our [technical documentation](https://github.com/lightrun-platform/lightrun-k8s-operator/blob/main/docs/how.md). -> Important - [Read this before deploying to production](https://github.com/lightrun-platform/lightrun-k8s-operator/blob/main/docs/before_prod.md). +> **Important:** [Read this before deploying to production](https://github.com/lightrun-platform/lightrun-k8s-operator/blob/main/docs/before_prod.md). ### Requirements - Kubernetes >= 1.19 @@ -41,155 +41,197 @@ The ***Lightrun K8s operator*** does those steps for you. [details](https://gith To set up the Lightrun K8s operator: -1. Create namespace for the operator and test deployment -```sh -kubectl create namespace lightrun-operator -kubectl create namespace lightrun-agent-test -``` -_`lightrun-operator` namespace is hardcoded in the example `operator.yaml` due to Role and RoleBinding objects_ -_If you want to deploy operator to a different namespace - you can use helm chart_ - -2. Deploy operator to the operator namesapce -```sh -kubectl apply -f https://raw.githubusercontent.com/lightrun-platform/lightrun-k8s-operator/main/examples/operator.yaml -n lightrun-operator -``` - -3. Create simple deployment for test -> _App source code [PrimeMain.java](../examples/app/PrimeMain.java)_ -```sh -kubectl apply -f https://raw.githubusercontent.com/lightrun-platform/lightrun-k8s-operator/main/examples/deployment.yaml -n lightrun-agent-test -``` - -4. Download Lightrun agent config -```sh -curl https://raw.githubusercontent.com/lightrun-platform/lightrun-k8s-operator/main/examples/lightrunjavaagent.yaml > agent.yaml -``` - -5. Update the following config parameters in the `agent.yaml` file. - - serverHostname - for SaaS it is `app.lightrun.com`, for on-prem use your own hostname +1. Create namespaces for the operator and test deployment: + ```sh + kubectl create namespace lightrun-operator + kubectl create namespace lightrun-agent-test + ``` + > Note: The `lightrun-operator` namespace is hardcoded in the example `operator.yaml` due to Role and RoleBinding objects. + > To deploy the operator to a different namespace, use the Helm chart installation method. + +2. Deploy the operator: + ```sh + kubectl apply -f https://raw.githubusercontent.com/lightrun-platform/lightrun-k8s-operator/main/examples/operator.yaml -n lightrun-operator + ``` + +3. Create a test deployment: + ```sh + kubectl apply -f https://raw.githubusercontent.com/lightrun-platform/lightrun-k8s-operator/main/examples/deployment.yaml -n lightrun-agent-test + ``` + > The test application source code is available at [PrimeMain.java](../examples/app/PrimeMain.java) + +4. Download Lightrun agent config and lightrun-secret + ```sh + curl https://raw.githubusercontent.com/lightrun-platform/lightrun-k8s-operator/main/examples/lightrunjavaagent.yaml > agent.yaml + + curl https://raw.githubusercontent.com/lightrun-platform/lightrun-k8s-operator/main/examples/lightrun-secret.yaml > lightrun-secret.yaml + ``` + +5. Configure your Lightrun credentials: + + **In `agent.yaml`:** + ```yaml + # Update the serverHostname field + serverHostname: "app.lightrun.com" # For SaaS deployment + # For on-prem deployment, use your own hostname + ``` + + **In `lightrun-secret.yaml`:** + ```yaml + # Update the following fields: + lightrun_key: "your-lightrun-key" # Find this on the Lightrun setup agent page + pinned_cert_hash: "your-cert-hash" # Obtain from https:///api/getPinnedServerCert + # Note: Authentication required + ``` + + ![Lightrun Setup Page](setup.png) + +6. Create the agent custom resource: + ```sh + kubectl apply -f lightrun-secret.yaml -n lightrun-agent-test + + kubectl apply -f agent.yaml -n lightrun-agent-test + ``` + +7. Verify the installation: + - Check the Lightrun server's agents page to confirm your new agent is registered + + ![Agents List](agents.png) - - lightrun_key - You can find this value on the set up page, 2nd step - ![](setup.png) +## Example with Helm Chart - - pinned_cert_hash - you can fetch it from **https://``/api/getPinnedServerCert** - > have to be authenticated +A [Helm chart](../charts/lightrun-operator/) is available in the repository branch `helm-repo`. -6. Create agent custom resource -```sh -kubectl apply -f agent.yaml -n lightrun-agent-test -``` +### Installation Steps -7. Go to the Lightrun server and check if you see new agent registered in the list of the agents -![](agents.png) +1. Add the Lightrun repository to your Helm repositories: + ```sh + helm repo add lightrun-k8s-operator https://lightrun-platform.github.io/lightrun-k8s-operator + ``` -## Example with Helm Chart +2. Install the chart: + - Using default [values](../charts/lightrun-operator/values.yaml): + ```sh + helm install lightrun-k8s-operator/lightrun-k8s-operator -n lightrun-operator --create-namespace + ``` -[Helm chart](../charts/lightrun-operator/) is available in repository branch `helm-repo` -- Add the repo to your Helm repository list -```sh -helm repo add lightrun-k8s-operator https://lightrun-platform.github.io/lightrun-k8s-operator -``` + - Using custom values file: + ```sh + helm install lightrun-k8s-operator/lightrun-k8s-operator -f -n lightrun-operator --create-namespace + ``` -- Install the Helm chart: -> _Using default [values](../charts/lightrun-operator/values.yaml)_ - -```sh -helm install lightrun-k8s-operator/lightrun-k8s-operator -n lightrun-operator --create-namespace -``` + > **Note:** `helm upgrade --install` or `helm install --dry-run` may not work properly due to limitations with how Helm handles CRDs. + > For more information, see the [Helm documentation](https://helm.sh/docs/chart_best_practices/custom_resource_definitions/). - > _Using custom values file_ - -```sh -helm install lightrun-k8s-operator/lightrun-k8s-operator -f -n lightrun-operator --create-namespace -``` -> `helm upgrade --install` or `helm install --dry-run` may not work properly due to limitations of how Helm work with CRDs. -You can find more info [here](https://helm.sh/docs/chart_best_practices/custom_resource_definitions/) +3. Uninstall the chart: + ```sh + helm delete lightrun-k8s-operator + ``` + > **Note:** CRDs will not be deleted due to Helm limitations. See [Helm's documentation on CRD limitations](https://helm.sh/docs/topics/charts/#limitations-on-crds) for more details. +### Version Compatibility -- Uninstall the Helm chart. -```sh -helm delete lightrun-k8s-operator -``` -> `CRDs` will not be deleted due to Helm CRDs limitations. You can learn more about the limitations [here](https://helm.sh/docs/topics/charts/#limitations-on-crds). - -### Chart version vs controller version -For the sake of simplicity, we are keeping the convention of the same version for both the controller image and the Helm chart. This helps to ensure that controller actions are aligned with CRDs preventing failed resource validation errors. +For simplicity, we maintain the same version for both the controller image and the Helm chart. This ensures alignment between controller actions and CRDs, preventing resource validation errors. ## Limitations -- Operator can only patch environment variable that configured as a key/value pair - ``` +### Environment Variables +- The operator can only patch environment variables configured as key/value pairs: + ```yaml env: - name: JAVA_TOOL_OPTIONS - value: "some initital value" + value: "some initial value" ``` - if value mapped from the configMap or secret using `valueFrom`, operator will fail to update the deployment with the following error: + If the value is mapped from a configMap or secret using `valueFrom`, the operator will fail with: ``` 'Deployment.apps "" is invalid: spec.template.spec.containers[0].env[31].valueFrom: Invalid value: "": may not be specified when `value` is not empty' ``` -- If an application has [JDWR](https://en.wikipedia.org/wiki/Java_Debug_Wire_Protocol) enabled, it will cause a conflict with the Lightrun agent installed by the Lightrun K8s operator. -- You must install the correct init container for your application’s container platform. For example, _lightruncom/k8s-operator-init-java-agent-`linux`:1.7.0-init.0_. - #### Supported Platforms - - Linux - - Alpine - > Available init containers: +### Compatibility +- Applications with [JDWR](https://en.wikipedia.org/wiki/Java_Debug_Wire_Protocol) enabled will conflict with the Lightrun agent. + +### Platform Support +- **Container Platforms:** + - Linux (x86_64, arm64) + - Alpine (x86_64, arm64) + + > **Available Init Containers:** > - [Java agent for linux x86_64](https://hub.docker.com/r/lightruncom/k8s-operator-init-java-agent-linux/tags) - > - [Java agent for linux arm64 ](https://hub.docker.com/r/lightruncom/k8s-operator-init-java-agent-linux-arm64) + > - [Java agent for linux arm64](https://hub.docker.com/r/lightruncom/k8s-operator-init-java-agent-linux-arm64) > - [Java agent for alpine x86_64](https://hub.docker.com/r/lightruncom/k8s-operator-init-java-agent-alpine/tags) - > - [Java agent for alpine arm64 ](https://hub.docker.com/r/lightruncom/k8s-operator-init-java-agent-alpine-arm64) -- K8s type of resources - - Deployment -- Application's language - - Java + > - [Java agent for alpine arm64](https://hub.docker.com/r/lightruncom/k8s-operator-init-java-agent-alpine-arm64) + +- **Kubernetes Resources:** + - Deployment + - StatefulSet + +- **Programming Languages:** + - Java ## Contributing Guide -If you have any idea for an improvement or find a bug do not hesitate in opening an issue, just simply fork and create a pull-request. -Please open an issue first for any big changes. +We welcome contributions to the Lightrun K8s Operator! If you have ideas for improvements or find bugs, please: -> `make post-commit-hook` - Run this command to add post commit hook. It will regenerate rules and CRD from the code after every commit, so you'll not forget to do it. - You'll need to commit those changes as well. +1. Open an issue first for any significant changes +2. Fork the repository +3. Create a pull request with your changes -### Test It Out Locally -You’ll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) or [K3S](https://k3s.io/) to get a local cluster for testing, or run against a remote cluster. -**Note:** When using `make` commands, your controller will automatically use the current context in your kubeconfig file (i.e. whatever cluster `kubectl cluster-info` shows). +### Development Guidelines -1. Clone repo ```sh -git clone git@github.com:lightrun-platform/lightrun-k8s-operator.git -cd lightrun-k8s-operator +make post-commit-hook ``` -2. Install the CRDs into the cluster: +Run this command to add a post-commit hook that automatically regenerates rules and CRDs after every commit, ensuring your code and resources stay in sync. -```sh -make install -``` +### Test It Out Locally -3. Run your controller (this will run in the foreground): -```sh -make run -``` +You'll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) or [K3S](https://k3s.io/) for local testing, or run against a remote cluster. -4. Open another terminal tab and deploy simple app to your cluster -```sh -kubectl apply -f ./examples/deployment.yaml -kubectl get deployments sample-deployment -``` +> **Note:** When using `make` commands, the controller will automatically use the current context in your kubeconfig file (i.e., whatever cluster `kubectl cluster-info` shows). -5. Update `lightrun_key`, `pinned_cert_hash` and `serverHostname` in the [CR example file](../examples/lightrunjavaagent.yaml) +#### Development Setup +1. Clone the repository: + ```sh + git clone git@github.com:lightrun-platform/lightrun-k8s-operator.git + cd lightrun-k8s-operator + ``` -6. Create LightrunJavaAgent custom resource -```sh -kubectl apply -f ./examples/lightrunjavaagent.yaml -``` +2. Install the CRDs into your cluster: + ```sh + make install + ``` + +3. Run the controller (this will run in the foreground): + ```sh + make run + ``` + +4. In a new terminal, deploy a sample application: + ```sh + kubectl apply -f ./examples/deployment.yaml + kubectl get deployments sample-deployment + ``` + +5. Prepare the Lightrun agent configuration: + - Edit the [CR example file](../examples/lightrunjavaagent.yaml) to update: + - `lightrun_key` + - Edit the [secret example file](../examples/lightrun-secret.yaml) to update: + - `pinned_cert_hash` + - `serverHostname` + +6. Apply the custom resource: + ```sh + kubectl apply -f ./examples/lightrunjavaagent.yaml + + kubectl apply -f ./examples/lightrun-secret.yaml + ``` + +After applying the custom resource, you should see the controller logs indicating it has detected the new resource. -At this point you will see in the controller logs that it recognized new resource and started to work. -If you run the following command, you will see that changes done by the controller (init container, volume, patched ENV var). +To verify the changes made by the controller (init container, volume, environment variables), run: ```sh kubectl describe deployments sample-deployment ``` diff --git a/docs/before_prod.md b/docs/before_prod.md index 1411fd8..94a9714 100644 --- a/docs/before_prod.md +++ b/docs/before_prod.md @@ -1,14 +1,14 @@ ### Important to know before deploying to production - `LightrunJavaAgent` Customer resource hardly dependent on the secret with `lightrun_key` and `pinned_cert_hash` values. It has do be deployed in the same namespace as the secret. - - `LightrunJavaAgent` CR has to be installed in the same namespace as deployment - - You need to create `LightrunJavaAgent` CR per deployment that you want to patch - - When `creating or deleting CR`, deployment will trigger `recreation of all the pods`, as Pod Template Spec will be changed - - If, for some reason, your cluster will not be able to `download init container` images from https://hub.docker.com/, your deployment will stuck in this state until it won't be resolved. This is the limitation of the init containers + - `LightrunJavaAgent` CR has to be installed in the same namespace as the target resource (Deployment or StatefulSet) + - You need to create `LightrunJavaAgent` CR per resource (Deployment or StatefulSet) that you want to patch + - When `creating or deleting CR`, the target resource will trigger `recreation of all the pods`, as Pod Template Spec will be changed + - If, for some reason, your cluster will not be able to `download init container` images from https://hub.docker.com/, your target resource will stuck in this state until it won't be resolved. This is the limitation of the init containers - If you will change `secret` values, `agentConfig` or `agentTags`, operator will update Config Map with that data and trigger recreation of the pods to apply new config of the agent - Always check `release notes` before upgrading the operator. If CRD fields was changed you'll need to act accordingly during the upgrade - You can't have `duplicate ENV` variable in the container spec. - - If you are using `gitops` tools, you'll have to tell them to ignore ENV var of the patched container. Otherwise it will try to default it as per your deployment yaml. Other things that are changed by operator are handled with help of `managedFields`. You can read about it [here](https://kubernetes.io/docs/reference/using-api/server-side-apply/) + - If you are using `gitops` tools, you'll have to tell them to ignore ENV var of the patched container. Otherwise it will try to default it as per your deployment/statefulset yaml. Other things that are changed by operator are handled with help of `managedFields`. You can read about it [here](https://kubernetes.io/docs/reference/using-api/server-side-apply/) Example for [Argo CD](https://argo-cd.readthedocs.io/en/stable/user-guide/diffing/) ```yaml ignoreDifferences: @@ -17,4 +17,14 @@ name: jqPathExpressions: - '.spec.template.spec.containers[] | select(.name == "").env[] | select(.name == "JAVA_TOOL_OPTIONS")' + ``` + + For StatefulSets, use: + ```yaml + ignoreDifferences: + - group: apps + kind: StatefulSet + name: + jqPathExpressions: + - '.spec.template.spec.containers[] | select(.name == "").env[] | select(.name == "JAVA_TOOL_OPTIONS")' ``` \ No newline at end of file diff --git a/docs/custom_resource.md b/docs/custom_resource.md index 206de41..872b54f 100644 --- a/docs/custom_resource.md +++ b/docs/custom_resource.md @@ -16,8 +16,13 @@ spec: # Mount path where volume will be parked. Various distributions may have it's limitations. # For example you can't mount volumes to any path except `/tmp` when using AWS Fargate sharedVolumeMountPath: "/lightrun" - # Name of the deployment that you are going to patch. + # Name of the workload that you are going to patch. # Has to be in the same namespace + workloadName: app + # Type of the workload that you are going to patch. + # Has to be one of `Deployment` or `StatefulSet` + workloadType: Deployment + # deprecated, use workloadName and workloadType instead deploymentName: app # Name of the secret where agent will take `lightrun_key` and `pinned_cert_hash` from # Has to be in the same namespace @@ -55,4 +60,4 @@ stringData: pinned_cert_hash: kind: Secret type: Opaque -``` \ No newline at end of file +``` diff --git a/docs/how.md b/docs/how.md index c19d330..ff08368 100644 --- a/docs/how.md +++ b/docs/how.md @@ -7,14 +7,14 @@ (subject to how it's been installed). Every event related to these CRs triggers the reconcile loop of the controller. You can find logic of this loop [here](reconcile_loop.excalidraw.png) - When triggered, the controller performs several actions: - - Check if it has access to deployment + - Check if it has access to the target resource (Deployment or StatefulSet) - Fetch data from the CR secret - Create config map with agent config from CR data - - Patch the deployment: + - Patch the target resource (Deployment or StatefulSet): - insert init container - add volume - map that volume to the specified container - add/update specified ENV variable in order to let Java know where agent files are found (the mapped volume) - - After deployment is patched, k8s will `recreate all the pods` in the deployment. New Pods will be initialized with the Lightrun agent - - If user deletes the `LightrunJavaAgent` CR, the Controller will roll back all the changes to deployment. This will trigger `recreation of all pods` again + - After the target resource is patched, k8s will `recreate all the pods` in the Deployment or StatefulSet. New Pods will be initialized with the Lightrun agent + - If user deletes the `LightrunJavaAgent` CR, the Controller will roll back all the changes to the target resource. This will trigger `recreation of all pods` again - [High level diagram](resource_relations.excalidraw.png) of resources created/edited by the operator \ No newline at end of file diff --git a/docs/reconcile_loop.excalidraw.png b/docs/reconcile_loop.excalidraw.png index b3b557c..18011b1 100644 Binary files a/docs/reconcile_loop.excalidraw.png and b/docs/reconcile_loop.excalidraw.png differ diff --git a/docs/resource_relations.excalidraw.png b/docs/resource_relations.excalidraw.png index f6c6f2a..12758ac 100644 Binary files a/docs/resource_relations.excalidraw.png and b/docs/resource_relations.excalidraw.png differ diff --git a/examples/lightrun-secret.yaml b/examples/lightrun-secret.yaml new file mode 100644 index 0000000..99ba76e --- /dev/null +++ b/examples/lightrun-secret.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: v1 +metadata: + name: lightrun-secrets +stringData: + # Lightrun key you can take from the server UI at the "setup agent" step + lightrun_key: + # Server certificate hash. It is ensuring that agent is connected to the right Lightrun server + pinned_cert_hash: +kind: Secret +type: Opaque diff --git a/examples/lightrunjavaagent.yaml b/examples/lightrunjavaagent.yaml index 75927f9..b1dea07 100644 --- a/examples/lightrunjavaagent.yaml +++ b/examples/lightrunjavaagent.yaml @@ -1,7 +1,7 @@ apiVersion: agents.lightrun.com/v1beta kind: LightrunJavaAgent metadata: - name: example-cr + name: example-deployment-cr spec: ############################################################################################### # Fields that you need to change if you want to try operator with your own deployment @@ -59,15 +59,3 @@ spec: - latest # Agent name. If not provided, pod name will be used #agentName: "operator-test-agent" - ---- -apiVersion: v1 -metadata: - name: lightrun-secrets -stringData: - # Lightrun key you can take from the server UI at the "setup agent" step - lightrun_key: - # Server certificate hash. It is ensuring that agent is connected to the right Lightrun server - pinned_cert_hash: -kind: Secret -type: Opaque diff --git a/examples/operator.yaml b/examples/operator.yaml index adec20d..000dfc3 100644 --- a/examples/operator.yaml +++ b/examples/operator.yaml @@ -18,12 +18,16 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: - - description: Deployment name - jsonPath: .spec.deploymentName - name: Deployment + - description: Workload name + jsonPath: .spec.workloadName + name: Workload type: string - - description: Status of Deployment Reconciliation - jsonPath: .status.deploymentStatus + - description: Workload type + jsonPath: .spec.workloadType + name: Type + type: string + - description: Status of Workload Reconciliation + jsonPath: .status.workloadStatus name: Status type: string - jsonPath: .metadata.creationTimestamp @@ -87,7 +91,8 @@ spec: type: string type: array deploymentName: - description: Name of the Deployment that will be patched + description: Name of the Deployment that will be patched. Deprecated, + use WorkloadName and WorkloadType instead type: string initContainer: properties: @@ -116,11 +121,21 @@ spec: Lightrun server hostname that will be used for downloading an agent Key and company id in the secret has to be taken from this server as well type: string + workloadName: + description: Name of the Workload that will be patched. workload can + be either Deployment or StatefulSet e.g. my-deployment, my-statefulset + type: string + workloadType: + description: Type of the workload that will be patched supported values + are Deployment, StatefulSet + enum: + - Deployment + - StatefulSet + type: string required: - agentEnvVarName - agentTags - containerSelector - - deploymentName - initContainer - secretName - serverHostname @@ -202,6 +217,8 @@ spec: lastScheduleTime: format: date-time type: string + workloadStatus: + type: string type: object type: object served: true @@ -267,6 +284,15 @@ rules: - list - patch - watch + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list + - patch + - watch - apiGroups: - "" resources: diff --git a/internal/controller/helpers.go b/internal/controller/helpers.go index c3645e2..07afaa6 100644 --- a/internal/controller/helpers.go +++ b/internal/controller/helpers.go @@ -25,24 +25,57 @@ const ( func (r *LightrunJavaAgentReconciler) mapDeploymentToAgent(ctx context.Context, obj client.Object) []reconcile.Request { deployment := obj.(*appsv1.Deployment) + // TODO: remove this once we deprecate deploymentNameIndexField + var agents agentv1beta.LightrunJavaAgentList + if err := r.List(ctx, &agents, + client.InNamespace(deployment.Namespace), + client.MatchingFields{ + deploymentNameIndexField: deployment.Name, // old agents + }, + ); err != nil { + r.Log.Error(err, "failed to list by deploymentNameIndexField") + } + // New indexer for workloadNameIndexField + var newAgents agentv1beta.LightrunJavaAgentList + if err := r.List(ctx, &newAgents, + client.InNamespace(deployment.Namespace), + client.MatchingFields{ + workloadNameIndexField: deployment.Name, // new agents + }, + ); err != nil { + r.Log.Error(err, "failed to list by workloadNameIndexField") + } - var lightrunJavaAgentList agentv1beta.LightrunJavaAgentList + // Combine results + agents.Items = append(agents.Items, newAgents.Items...) - if err := r.List(ctx, &lightrunJavaAgentList, - client.InNamespace(deployment.Namespace), - client.MatchingFields{deploymentNameIndexField: deployment.Name}, + requests := make([]reconcile.Request, len(agents.Items)) + for i, a := range agents.Items { + requests[i] = reconcile.Request{NamespacedName: client.ObjectKeyFromObject(&a)} + } + return requests +} + +func (r *LightrunJavaAgentReconciler) mapStatefulSetToAgent(ctx context.Context, obj client.Object) []reconcile.Request { + statefulSet := obj.(*appsv1.StatefulSet) + + var agents agentv1beta.LightrunJavaAgentList + + if err := r.List(ctx, &agents, + client.InNamespace(statefulSet.Namespace), + client.MatchingFields{workloadNameIndexField: statefulSet.Name}, ); err != nil { r.Log.Error(err, "could not list LightrunJavaAgentList. "+ - "change to deployment will not be reconciled.", - deployment.Name, deployment.Namespace) + "change to statefulset will not be reconciled.", + statefulSet.Name, statefulSet.Namespace) return nil } - requests := make([]reconcile.Request, len(lightrunJavaAgentList.Items)) + requests := make([]reconcile.Request, len(agents.Items)) - for i, lightrunJavaAgent := range lightrunJavaAgentList.Items { + for i, agent := range agents.Items { requests[i] = reconcile.Request{ - NamespacedName: client.ObjectKeyFromObject(&lightrunJavaAgent), + NamespacedName: client.ObjectKeyFromObject(&agent), } } return requests @@ -96,6 +129,7 @@ func (r *LightrunJavaAgentReconciler) successStatus(ctx context.Context, instanc Status: metav1.ConditionTrue, } SetStatusCondition(&instance.Status.Conditions, condition) + instance.Status.WorkloadStatus = r.findLastConditionType(&instance.Status.Conditions) instance.Status.DeploymentStatus = r.findLastConditionType(&instance.Status.Conditions) err := r.Status().Update(ctx, instance) if err != nil { @@ -122,6 +156,7 @@ func (r *LightrunJavaAgentReconciler) errorStatus(ctx context.Context, instance Status: metav1.ConditionTrue, } SetStatusCondition(&instance.Status.Conditions, condition) + instance.Status.WorkloadStatus = r.findLastConditionType(&instance.Status.Conditions) instance.Status.DeploymentStatus = r.findLastConditionType(&instance.Status.Conditions) err := r.Status().Update(ctx, instance) if err != nil { diff --git a/internal/controller/lightrunjavaagent_controller.go b/internal/controller/lightrunjavaagent_controller.go index e0c0b0b..90ae8d1 100644 --- a/internal/controller/lightrunjavaagent_controller.go +++ b/internal/controller/lightrunjavaagent_controller.go @@ -19,6 +19,7 @@ package controller import ( "context" "errors" + "fmt" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -37,6 +38,7 @@ import ( const ( deploymentNameIndexField = "spec.deployment" + workloadNameIndexField = "spec.workloadName" secretNameIndexField = "spec.secret" finalizerName = "agent.finalizers.lightrun.com" ) @@ -56,32 +58,88 @@ type LightrunJavaAgentReconciler struct { //+kubebuilder:rbac:groups=agents.lightrun.com,resources=lightrunjavaagents/finalizers,verbs=update //+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;watch;list;patch +//+kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;watch;list;patch //+kubebuilder:rbac:groups=core,resources=secrets,verbs=get;watch;list func (r *LightrunJavaAgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := r.Log.WithValues("lightrunJavaAgent", req.NamespacedName) - fieldManager := "lightrun-conrtoller" lightrunJavaAgent := &agentv1beta.LightrunJavaAgent{} if err = r.Get(ctx, req.NamespacedName, lightrunJavaAgent); err != nil { return ctrl.Result{}, client.IgnoreNotFound(err) } + // Determine which workload type to reconcile + workloadType, err := r.determineWorkloadType(lightrunJavaAgent) + if err != nil { + log.Error(err, "failed to determine workload type") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + switch workloadType { + case agentv1beta.WorkloadTypeDeployment: + return r.reconcileDeployment(ctx, lightrunJavaAgent, req.Namespace) + case agentv1beta.WorkloadTypeStatefulSet: + return r.reconcileStatefulSet(ctx, lightrunJavaAgent, req.Namespace) + default: + return r.errorStatus(ctx, lightrunJavaAgent, fmt.Errorf("unsupported workload type: %s", workloadType)) + } +} + +func (r *LightrunJavaAgentReconciler) determineWorkloadType(lightrunJavaAgent *agentv1beta.LightrunJavaAgent) (agentv1beta.WorkloadType, error) { + // Get the spec from the LightrunJavaAgent resource + spec := lightrunJavaAgent.Spec + + // Check if legacy deploymentName field is configured + var isDeploymentConfigured bool = spec.DeploymentName != "" + // Check if new workload configuration fields are set + var isWorkloadConfigured bool = spec.WorkloadName != "" && spec.WorkloadType != "" + + // Error if both legacy and new configuration methods are used + if isDeploymentConfigured && isWorkloadConfigured { + return "", errors.New("invalid configuration: use either deploymentName (legacy) OR workloadName with workloadType, not both") + } + + // Error if neither configuration method is used + if !isDeploymentConfigured && !isWorkloadConfigured { + return "", errors.New("invalid configuration: must set either DeploymentName (legacy) or WorkloadName with WorkloadType") + } + if isDeploymentConfigured { + r.Log.Info("Using deprecated field deploymentName, consider migrating to workloadName and workloadType") + return agentv1beta.WorkloadTypeDeployment, nil + } + return spec.WorkloadType, nil +} + +// reconcileDeployment handles the reconciliation logic for Deployment workloads +func (r *LightrunJavaAgentReconciler) reconcileDeployment(ctx context.Context, lightrunJavaAgent *agentv1beta.LightrunJavaAgent, namespace string) (ctrl.Result, error) { + // Get the workload name - use DeploymentName for backward compatibility + // or WorkloadName for newer CR versions + deploymentName := lightrunJavaAgent.Spec.WorkloadName + if deploymentName == "" && lightrunJavaAgent.Spec.DeploymentName != "" { + // Fall back to legacy field if WorkloadName isn't set + deploymentName = lightrunJavaAgent.Spec.DeploymentName + } + if deploymentName == "" { + return r.errorStatus(ctx, lightrunJavaAgent, errors.New("unable to reconcile deployment: missing workloadName or deploymentName(legacy and deprecated)")) + } + log := r.Log.WithValues("lightrunJavaAgent", lightrunJavaAgent.Name, "deployment", deploymentName) + fieldManager := "lightrun-conrtoller" + deplNamespacedObj := client.ObjectKey{ - Name: lightrunJavaAgent.Spec.DeploymentName, - Namespace: req.Namespace, + Name: deploymentName, + Namespace: namespace, } originalDeployment := &appsv1.Deployment{} err = r.Get(ctx, deplNamespacedObj, originalDeployment) if err != nil { // Deployment not found if client.IgnoreNotFound(err) == nil { - log.Info("Deployment not found. Verify name/namespace", "Deployment", lightrunJavaAgent.Spec.DeploymentName) + log.Info("Deployment not found. Verify name/namespace", "Deployment", deploymentName) // remove our finalizer from the list and update it. err = r.removeFinalizer(ctx, lightrunJavaAgent, finalizerName) if err != nil { return r.errorStatus(ctx, lightrunJavaAgent, err) } - return r.errorStatus(ctx, lightrunJavaAgent, errors.New("deployment not found")) + return r.errorStatus(ctx, lightrunJavaAgent, errors.New("deployment not found: "+deploymentName)) } else { log.Error(err, "unable to fetch deployment") return r.errorStatus(ctx, lightrunJavaAgent, err) @@ -90,7 +148,7 @@ func (r *LightrunJavaAgentReconciler) Reconcile(ctx context.Context, req ctrl.Re if oldLrjaName, ok := originalDeployment.Annotations[annotationAgentName]; ok && oldLrjaName != lightrunJavaAgent.Name { log.Error(err, "Deployment already patched by LightrunJavaAgent", "Existing LightrunJavaAgent", oldLrjaName) - return r.errorStatus(ctx, lightrunJavaAgent, errors.New("deployment already patched")) + return r.errorStatus(ctx, lightrunJavaAgent, errors.New("deployment already patched: "+deploymentName)) } deploymentApplyConfig, err := appsv1ac.ExtractDeployment(originalDeployment, fieldManager) @@ -106,7 +164,7 @@ func (r *LightrunJavaAgentReconciler) Reconcile(ctx context.Context, req ctrl.Re log.V(2).Info("Searching for secret", "Name", lightrunJavaAgent.Spec.SecretName) secretNamespacedObj := client.ObjectKey{ Name: lightrunJavaAgent.Spec.SecretName, - Namespace: req.Namespace, + Namespace: namespace, } secret = &corev1.Secret{} err = r.Get(ctx, secretNamespacedObj, secret) @@ -119,7 +177,7 @@ func (r *LightrunJavaAgentReconciler) Reconcile(ctx context.Context, req ctrl.Re // Ensure that finalizer is in place if !containsString(lightrunJavaAgent.ObjectMeta.Finalizers, finalizerName) { - log.Info("Start working on deployment", "Deployment", lightrunJavaAgent.Spec.DeploymentName) + log.Info("Start working on deployment", "Deployment", deploymentName) log.Info("Adding finalizer") err = r.addFinalizer(ctx, lightrunJavaAgent, finalizerName) if err != nil { @@ -177,7 +235,7 @@ func (r *LightrunJavaAgentReconciler) Reconcile(ctx context.Context, req ctrl.Re return r.errorStatus(ctx, lightrunJavaAgent, err) } - log.Info("Deployment returned to original state", "Deployment", lightrunJavaAgent.Spec.DeploymentName) + log.Info("Deployment returned to original state", "Deployment", deploymentName) return r.successStatus(ctx, lightrunJavaAgent, reconcileTypeProgressing) } else { // Nothing to do here @@ -220,10 +278,10 @@ func (r *LightrunJavaAgentReconciler) Reconcile(ctx context.Context, req ctrl.Re } return r.errorStatus(ctx, lightrunJavaAgent, err) } - cmDataHash := hash(cm.Data["config"] + cm.Data["metadata"]) + cmDataHash := configMapDataHash(cm.Data) // Server side apply - log.V(2).Info("Patching deployment, SSA", "Deployment", lightrunJavaAgent.Spec.DeploymentName, "LightunrJavaAgent", lightrunJavaAgent.Name) + log.V(2).Info("Patching deployment, SSA", "Deployment", deploymentName, "LightunrJavaAgent", lightrunJavaAgent.Name) err = r.patchDeployment(lightrunJavaAgent, secret, originalDeployment, deploymentApplyConfig, cmDataHash) if err != nil { log.Error(err, "unable to patch deployment") @@ -285,32 +343,291 @@ func (r *LightrunJavaAgentReconciler) Reconcile(ctx context.Context, req ctrl.Re } // Update status to Healthy - log.V(1).Info("Reconciling finished successfully", "Deployment", lightrunJavaAgent.Spec.DeploymentName, "LightunrJavaAgent", lightrunJavaAgent.Name) + log.V(1).Info("Reconciling finished successfully", "Deployment", deploymentName, "LightunrJavaAgent", lightrunJavaAgent.Name) return r.successStatus(ctx, lightrunJavaAgent, reconcileTypeReady) } -// SetupWithManager sets up the controller with the Manager. +// reconcileStatefulSet handles the reconciliation logic for StatefulSet workloads +func (r *LightrunJavaAgentReconciler) reconcileStatefulSet(ctx context.Context, lightrunJavaAgent *agentv1beta.LightrunJavaAgent, namespace string) (ctrl.Result, error) { + log := r.Log.WithValues("lightrunJavaAgent", lightrunJavaAgent.Name, "statefulSet", lightrunJavaAgent.Spec.WorkloadName) + fieldManager := "lightrun-controller" + statefulSetName := lightrunJavaAgent.Spec.WorkloadName + if statefulSetName == "" { + return r.errorStatus(ctx, lightrunJavaAgent, errors.New("unable to reconcile statefulset: missing workloadName field")) + } + stsNamespacedObj := client.ObjectKey{ + Name: lightrunJavaAgent.Spec.WorkloadName, + Namespace: namespace, + } + originalStatefulSet := &appsv1.StatefulSet{} + err = r.Get(ctx, stsNamespacedObj, originalStatefulSet) + if err != nil { + // StatefulSet not found + if client.IgnoreNotFound(err) == nil { + log.Info("StatefulSet not found. Verify name/namespace", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName) + // remove our finalizer from the list and update it. + err = r.removeFinalizer(ctx, lightrunJavaAgent, finalizerName) + if err != nil { + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + return r.errorStatus(ctx, lightrunJavaAgent, errors.New("statefulset not found: "+statefulSetName)) + } else { + log.Error(err, "unable to fetch statefulset") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + } + + // Check if this LightrunJavaAgent is being deleted + if !lightrunJavaAgent.ObjectMeta.DeletionTimestamp.IsZero() { + // The object is being deleted + if containsString(lightrunJavaAgent.ObjectMeta.Finalizers, finalizerName) { + // our finalizer is present, so lets handle any cleanup operations + + // Restore original StatefulSet (unpatch) + // Volume and init container + log.Info("Unpatching StatefulSet", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName) + + originalStatefulSet = &appsv1.StatefulSet{} + err = r.Get(ctx, stsNamespacedObj, originalStatefulSet) + if err != nil { + if client.IgnoreNotFound(err) == nil { + log.Info("StatefulSet not found", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName) + // remove our finalizer from the list and update it. + log.Info("Removing finalizer") + err = r.removeFinalizer(ctx, lightrunJavaAgent, finalizerName) + if err != nil { + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + // Successfully removed finalizer and nothing to restore + return r.successStatus(ctx, lightrunJavaAgent, reconcileTypeReady) + } + log.Error(err, "unable to unpatch statefulset", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName) + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // Revert environment variable modifications + clientSidePatch := client.MergeFrom(originalStatefulSet.DeepCopy()) + for i, container := range originalStatefulSet.Spec.Template.Spec.Containers { + for _, targetContainer := range lightrunJavaAgent.Spec.ContainerSelector { + if targetContainer == container.Name { + r.unpatchJavaToolEnv(originalStatefulSet.Annotations, &originalStatefulSet.Spec.Template.Spec.Containers[i]) + } + } + } + delete(originalStatefulSet.Annotations, annotationPatchedEnvName) + delete(originalStatefulSet.Annotations, annotationPatchedEnvValue) + delete(originalStatefulSet.Annotations, annotationAgentName) + err = r.Patch(ctx, originalStatefulSet, clientSidePatch) + if err != nil { + log.Error(err, "failed to unpatch statefulset environment variables") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // Remove Volumes and init container + emptyApplyConfig := appsv1ac.StatefulSet(stsNamespacedObj.Name, stsNamespacedObj.Namespace) + obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(emptyApplyConfig) + if err != nil { + log.Error(err, "failed to convert StatefulSet to unstructured") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + patch := &unstructured.Unstructured{ + Object: obj, + } + err = r.Patch(ctx, patch, client.Apply, &client.PatchOptions{ + FieldManager: fieldManager, + Force: pointer.Bool(true), + }) + if err != nil { + log.Error(err, "failed to unpatch statefulset") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // remove our finalizer from the list and update it. + log.Info("Removing finalizer") + err = r.removeFinalizer(ctx, lightrunJavaAgent, finalizerName) + if err != nil { + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + log.Info("StatefulSet returned to original state", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName) + return r.successStatus(ctx, lightrunJavaAgent, reconcileTypeProgressing) + } + // Nothing to do here + return r.successStatus(ctx, lightrunJavaAgent, reconcileTypeProgressing) + } + + // Check if already patched by another LightrunJavaAgent + if oldLrjaName, ok := originalStatefulSet.Annotations[annotationAgentName]; ok && oldLrjaName != lightrunJavaAgent.Name { + log.Error(err, "StatefulSet already patched by LightrunJavaAgent", "Existing LightrunJavaAgent", oldLrjaName) + return r.errorStatus(ctx, lightrunJavaAgent, errors.New("statefulset :"+statefulSetName+" already patched")) + } + + // Add finalizer if not already present + if !containsString(lightrunJavaAgent.ObjectMeta.Finalizers, finalizerName) { + log.V(2).Info("Adding finalizer") + err = r.addFinalizer(ctx, lightrunJavaAgent, finalizerName) + if err != nil { + log.Error(err, "unable to add finalizer") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + } + + // Get the secret + secretObj := client.ObjectKey{ + Name: lightrunJavaAgent.Spec.SecretName, + Namespace: namespace, + } + secret = &corev1.Secret{} + err = r.Get(ctx, secretObj, secret) + if err != nil { + log.Error(err, "unable to fetch Secret", "Secret", lightrunJavaAgent.Spec.SecretName) + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // Verify that env var won't exceed 1024 chars + agentArg, err := agentEnvVarArgument(lightrunJavaAgent.Spec.InitContainer.SharedVolumeMountPath, lightrunJavaAgent.Spec.AgentCliFlags) + if err != nil { + log.Error(err, "agentEnvVarArgument exceeds 1024 chars") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // Create config map + log.V(2).Info("Reconciling config map with agent configuration") + configMap, err := r.createAgentConfig(lightrunJavaAgent) + if err != nil { + log.Error(err, "unable to create configMap") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + applyOpts := []client.PatchOption{client.ForceOwnership, client.FieldOwner("lightrun-controller")} + + err = r.Patch(ctx, &configMap, client.Apply, applyOpts...) + if err != nil { + log.Error(err, "unable to apply configMap") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // Calculate ConfigMap data hash + cmDataHash := configMapDataHash(configMap.Data) + + // Extract StatefulSet for applying changes + statefulSetApplyConfig, err := appsv1ac.ExtractStatefulSet(originalStatefulSet, fieldManager) + if err != nil { + log.Error(err, "failed to extract StatefulSet") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // Server side apply for StatefulSet changes + log.V(2).Info("Patching StatefulSet", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName, "LightunrJavaAgent", lightrunJavaAgent.Name) + err = r.patchStatefulSet(lightrunJavaAgent, secret, originalStatefulSet, statefulSetApplyConfig, cmDataHash) + if err != nil { + log.Error(err, "failed to patch statefulset") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(statefulSetApplyConfig) + if err != nil { + log.Error(err, "failed to convert StatefulSet to unstructured") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + patch := &unstructured.Unstructured{ + Object: obj, + } + err = r.Patch(ctx, patch, client.Apply, &client.PatchOptions{ + FieldManager: fieldManager, + Force: pointer.Bool(true), + }) + if err != nil { + log.Error(err, "failed to patch statefulset") + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // Client side patch (we can't rollback JAVA_TOOL_OPTIONS env with server side apply) + log.V(2).Info("Patching Java Env", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName, "LightunrJavaAgent", lightrunJavaAgent.Name) + originalStatefulSet = &appsv1.StatefulSet{} + err = r.Get(ctx, stsNamespacedObj, originalStatefulSet) + if err != nil { + if client.IgnoreNotFound(err) == nil { + log.Info("StatefulSet not found", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName) + err = r.removeFinalizer(ctx, lightrunJavaAgent, finalizerName) + if err != nil { + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + return r.errorStatus(ctx, lightrunJavaAgent, errors.New("statefulset not found")) + } + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + clientSidePatch := client.MergeFrom(originalStatefulSet.DeepCopy()) + for i, container := range originalStatefulSet.Spec.Template.Spec.Containers { + for _, targetContainer := range lightrunJavaAgent.Spec.ContainerSelector { + if targetContainer == container.Name { + err = r.patchJavaToolEnv(originalStatefulSet.Annotations, &originalStatefulSet.Spec.Template.Spec.Containers[i], lightrunJavaAgent.Spec.AgentEnvVarName, agentArg) + if err != nil { + log.Error(err, "failed to patch "+lightrunJavaAgent.Spec.AgentEnvVarName) + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + } + } + } + originalStatefulSet.Annotations[annotationPatchedEnvName] = lightrunJavaAgent.Spec.AgentEnvVarName + originalStatefulSet.Annotations[annotationPatchedEnvValue] = agentArg + err = r.Patch(ctx, originalStatefulSet, clientSidePatch) + if err != nil { + log.Error(err, "failed to patch "+lightrunJavaAgent.Spec.AgentEnvVarName) + return r.errorStatus(ctx, lightrunJavaAgent, err) + } + + // Update status to Healthy + log.V(1).Info("Reconciling finished successfully", "StatefulSet", lightrunJavaAgent.Spec.WorkloadName, "LightunrJavaAgent", lightrunJavaAgent.Name) + return r.successStatus(ctx, lightrunJavaAgent, reconcileTypeReady) +} + +// SetupWithManager configures the controller with the Manager and sets up watches and indexers. +// It creates several field indexers to enable efficient lookups of LightrunJavaAgent CRs based on: +// - DeploymentName (legacy field) +// - WorkloadName (newer field that replaces DeploymentName) +// - SecretName +// +// It also sets up watches for Deployments, StatefulSets, and Secrets so the controller can +// react to changes in these resources that are referenced by LightrunJavaAgent CRs. func (r *LightrunJavaAgentReconciler) SetupWithManager(mgr ctrl.Manager) error { - // Add spec.container_selector.deployment field to cache for future filtering + // Index field for deployments - allows looking up LightrunJavaAgents by deploymentName + // This is used for legacy support where DeploymentName was used instead of WorkloadName + // TODO: remove this once we deprecate deploymentNameIndexField err = mgr.GetFieldIndexer().IndexField( context.Background(), &agentv1beta.LightrunJavaAgent{}, deploymentNameIndexField, func(object client.Object) []string { - lightrunJavaAgent := object.(*agentv1beta.LightrunJavaAgent) - - if lightrunJavaAgent.Spec.DeploymentName == "" { + agent := object.(*agentv1beta.LightrunJavaAgent) + if agent.Spec.DeploymentName == "" { return nil } - - return []string{lightrunJavaAgent.Spec.DeploymentName} + r.Log.Info("Indexing DeploymentName", "DeploymentName", agent.Spec.DeploymentName) + return []string{agent.Spec.DeploymentName} }) + if err != nil { + return err + } + // Index field for workloads by name - allows looking up LightrunJavaAgents by WorkloadName + err = mgr.GetFieldIndexer().IndexField( + context.Background(), + &agentv1beta.LightrunJavaAgent{}, + workloadNameIndexField, + func(object client.Object) []string { + agent := object.(*agentv1beta.LightrunJavaAgent) + if agent.Spec.WorkloadName == "" { + return nil + } + r.Log.Info("Indexing WorkloadName", "WorkloadName", agent.Spec.WorkloadName) + return []string{agent.Spec.WorkloadName} + }) if err != nil { return err } - // Add spec.container_selector.secret field to cache for future filtering + // Index field for secrets - allows looking up LightrunJavaAgents by SecretName + // This enables the controller to find LightrunJavaAgents affected by Secret changes err = mgr.GetFieldIndexer().IndexField( context.Background(), &agentv1beta.LightrunJavaAgent{}, @@ -329,13 +646,22 @@ func (r *LightrunJavaAgentReconciler) SetupWithManager(mgr ctrl.Manager) error { return err } + // Configure the controller builder: + // - For: register LightrunJavaAgent as the primary resource this controller reconciles + // - Watches: set up event handlers to watch for changes in related resources: + // * Deployments: reconcile LightrunJavaAgents when their target Deployment changes + // * StatefulSets: reconcile LightrunJavaAgents when their target StatefulSet changes + // * Secrets: reconcile LightrunJavaAgents when their referenced Secret changes return ctrl.NewControllerManagedBy(mgr). For(&agentv1beta.LightrunJavaAgent{}). - Owns(&corev1.ConfigMap{}). Watches( &appsv1.Deployment{}, handler.EnqueueRequestsFromMapFunc(r.mapDeploymentToAgent), ). + Watches( + &appsv1.StatefulSet{}, + handler.EnqueueRequestsFromMapFunc(r.mapStatefulSetToAgent), + ). Watches( &corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(r.mapSecretToAgent), diff --git a/internal/controller/lightrunjavaagent_controller_test.go b/internal/controller/lightrunjavaagent_controller_test.go index 46aaa83..2cdeb03 100644 --- a/internal/controller/lightrunjavaagent_controller_test.go +++ b/internal/controller/lightrunjavaagent_controller_test.go @@ -13,28 +13,31 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" ) var _ = Describe("LightrunJavaAgent controller", func() { // Define utility constants for object names and testing timeouts/durations and intervals. const ( - lragent1Name = "lragent" - deployment = "app-deployment" - secret = "agent-secret" - server = "example.lightrun.com" - agentName = "coolio-agent" - timeout = time.Second * 10 - duration = time.Second * 10 - interval = time.Millisecond * 250 - wrongNamespace = "wrong-namespace" - initContainerImage = "lightruncom/lightrun-init-agent:latest" - agentPlatform = "linux" - initVolumeName = "lightrun-agent-init" - javaEnv = "JAVA_TOOL_OPTIONS" - defaultAgentPath = "-agentpath:/lightrun/agent/lightrun_agent.so" - agentCliFlags = "--lightrun_extra_class_path=" - javaEnvNonEmptyValue = "-Djava.net.preferIPv4Stack=true" + lragent1Name = "lragent" + deployment = "app-deployment" + statefulset = "app-statefulset" + secretName = "agent-secret" + server = "example.lightrun.com" + agentName = "coolio-agent" + timeout = time.Second * 10 + duration = time.Second * 10 + interval = time.Millisecond * 250 + wrongNamespace = "wrong-namespace" + initContainerImage = "lightruncom/lightrun-init-agent:latest" + agentPlatform = "linux" + initVolumeName = "lightrun-agent-init" + javaEnv = "JAVA_TOOL_OPTIONS" + defaultAgentPath = "-agentpath:/lightrun/agent/lightrun_agent.so" + agentCliFlags = "--lightrun_extra_class_path=" + javaEnvNonEmptyValue = "-Djava.net.preferIPv4Stack=true" + reconcileTypeNotProgressing = "ReconcileFailed" ) var containerSelector = []string{"app", "app2"} var agentConfig map[string]string = map[string]string{ @@ -109,6 +112,24 @@ var _ = Describe("LightrunJavaAgent controller", func() { Namespace: testNamespace, } + var patchedSts appsv1.StatefulSet + stsRequest := types.NamespacedName{ + Name: statefulset, + Namespace: testNamespace, + } + + var lrAgentSts agentsv1beta.LightrunJavaAgent + lrAgentStsRequest := types.NamespacedName{ + Name: "lragent-sts", + Namespace: testNamespace, + } + + var lrAgentBothResource agentsv1beta.LightrunJavaAgent + lrAgentBothRequest := types.NamespacedName{ + Name: "lragent-both", + Namespace: testNamespace, + } + ctx := context.Background() Context("When setting up the test environment", func() { It("Should create a test Namespace", func() { @@ -129,7 +150,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { } Expect(k8sClient.Create(ctx, &ns)).Should(Succeed()) }) - It("Should create LightrunJavaAgent custom resource", func() { + It("Should create LightrunJavaAgent custom resources", func() { By("Creating a first LightrunJavaAgent resource") lrAgent := agentsv1beta.LightrunJavaAgent{ ObjectMeta: metav1.ObjectMeta{ @@ -138,7 +159,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { }, Spec: agentsv1beta.LightrunJavaAgentSpec{ DeploymentName: deployment, - SecretName: secret, + SecretName: secretName, ServerHostname: server, AgentName: agentName, AgentTags: agentTags, @@ -163,7 +184,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { }, Spec: agentsv1beta.LightrunJavaAgentSpec{ DeploymentName: deployment + "-2", - SecretName: secret, + SecretName: secretName, ServerHostname: server, AgentName: agentName, AgentTags: agentTags, @@ -182,13 +203,65 @@ var _ = Describe("LightrunJavaAgent controller", func() { By("Creating a secret") secret := corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: secret, + Name: secretName, Namespace: testNamespace, }, StringData: secretData, } Expect(k8sClient.Create(ctx, &secret)).Should(Succeed()) + By("Creating a StatefulSet-targeting LightrunJavaAgent resource") + lrAgentSts := agentsv1beta.LightrunJavaAgent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "lragent-sts", + Namespace: testNamespace, + }, + Spec: agentsv1beta.LightrunJavaAgentSpec{ + WorkloadName: statefulset, + WorkloadType: agentsv1beta.WorkloadTypeStatefulSet, + SecretName: secretName, + ServerHostname: server, + AgentName: agentName, + AgentTags: agentTags, + AgentConfig: agentConfig, + AgentCliFlags: agentCliFlags, + AgentEnvVarName: javaEnv, + ContainerSelector: containerSelector, + InitContainer: agentsv1beta.InitContainer{ + Image: initContainerImage, + SharedVolumeName: initVolumeName, + SharedVolumeMountPath: "/lightrun", + }, + }, + } + Expect(k8sClient.Create(ctx, &lrAgentSts)).Should(Succeed()) + + By("Creating a LightrunJavaAgent resource with both Deployment and StatefulSet specified (for validation test)") + lrAgentBothResource = agentsv1beta.LightrunJavaAgent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "lragent-both", + Namespace: testNamespace, + }, + Spec: agentsv1beta.LightrunJavaAgentSpec{ + DeploymentName: deployment, + WorkloadName: statefulset, + WorkloadType: agentsv1beta.WorkloadTypeStatefulSet, + SecretName: secretName, + ServerHostname: server, + AgentName: agentName, + AgentTags: agentTags, + AgentConfig: agentConfig, + AgentCliFlags: agentCliFlags, + AgentEnvVarName: javaEnv, + ContainerSelector: containerSelector, + InitContainer: agentsv1beta.InitContainer{ + Image: initContainerImage, + SharedVolumeName: initVolumeName, + SharedVolumeMountPath: "/lightrun", + }, + }, + } + Expect(k8sClient.Create(ctx, &lrAgentBothResource)).Should(Succeed()) }) }) @@ -358,7 +431,8 @@ var _ = Describe("LightrunJavaAgent controller", func() { }) It("Should not change hash of the configmap in the deployment metadata", func() { Eventually(func() bool { - return patchedDepl.Spec.Template.Annotations[annotationConfigMapHash] == fmt.Sprint(hash(cm.Data["config"]+cm.Data["metadata"])) + expectedHash := configMapDataHash(cm.Data) + return patchedDepl.Spec.Template.Annotations[annotationConfigMapHash] == fmt.Sprint(expectedHash) }).Should(BeTrue()) }) @@ -643,7 +717,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { if err := k8sClient.Get(ctx, lrAgentRequest2, &lrAgent2); err != nil { return false } - return lrAgent2.Status.DeploymentStatus == "Ready" + return lrAgent2.Status.WorkloadStatus == "Ready" }).Should(BeTrue()) }) @@ -656,7 +730,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { }, Spec: agentsv1beta.LightrunJavaAgentSpec{ DeploymentName: deployment + "-2", - SecretName: secret, + SecretName: secretName, ServerHostname: server, AgentName: agentName, AgentTags: agentTags, @@ -678,7 +752,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { if err := k8sClient.Get(ctx, lrAgentRequest3, &lrAgent3); err != nil { return false } - return lrAgent3.Status.DeploymentStatus == "ReconcileFailed" + return lrAgent3.Status.WorkloadStatus == "ReconcileFailed" }).Should(BeTrue()) }) It("Should not add finalizer to the duplicate CR", func() { @@ -753,7 +827,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { }, Spec: agentsv1beta.LightrunJavaAgentSpec{ DeploymentName: deployment + "-3", - SecretName: secret, + SecretName: secretName, ServerHostname: server, AgentName: agentName, AgentTags: agentTags, @@ -774,7 +848,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { if err := k8sClient.Get(ctx, lrAgentRequest4, &lrAgent4); err != nil { return false } - return lrAgent4.Status.DeploymentStatus == "" && lrAgent4.Status.Conditions == nil + return lrAgent4.Status.WorkloadStatus == "" && lrAgent4.Status.Conditions == nil }).Should(BeTrue()) }) It("Should not patch the deployment", func() { @@ -842,7 +916,7 @@ var _ = Describe("LightrunJavaAgent controller", func() { }, Spec: agentsv1beta.LightrunJavaAgentSpec{ DeploymentName: deployment + "-4", - SecretName: secret, + SecretName: secretName, ServerHostname: server, AgentName: agentName, AgentTags: agentTags, @@ -994,4 +1068,225 @@ var _ = Describe("LightrunJavaAgent controller", func() { }) }) }) + + It("Should create StatefulSet", func() { + By("Creating StatefulSet") + ctx := context.Background() + + sts := appsv1.StatefulSet{ + TypeMeta: metav1.TypeMeta{APIVersion: appsv1.SchemeGroupVersion.String(), Kind: "StatefulSet"}, + ObjectMeta: metav1.ObjectMeta{ + Name: statefulset, + Namespace: testNamespace, + }, + Spec: appsv1.StatefulSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "stateful-app"}, + }, + ServiceName: "stateful-app-service", + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "stateful-app"}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "app", + Image: "busybox", + }, + { + Name: "app2", + Image: "busybox", + Env: []corev1.EnvVar{ + { + Name: javaEnv, + Value: javaEnvNonEmptyValue, + }, + }, + }, + { + Name: "no-patch", + Image: "busybox", + }, + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, &sts)).Should(Succeed()) + }) + + Context("When validating workload type specification", func() { + It("Should detect when both Deployment and StatefulSet are specified", func() { + var lrAgentResult agentsv1beta.LightrunJavaAgent + Eventually(func() bool { + err := k8sClient.Get(ctx, lrAgentBothRequest, &lrAgentResult) + if err != nil { + return false + } + + for _, condition := range lrAgentResult.Status.Conditions { + if condition.Type == reconcileTypeNotProgressing && condition.Status == metav1.ConditionTrue && + condition.Reason == "reconcileFailed" && strings.Contains(condition.Message, "invalid configuration: use either deploymentName (legacy) OR workloadName with workloadType, not both") { + return true + } + } + return false + }, timeout, interval).Should(BeTrue()) + + // Also verify the workload status is set correctly + Expect(lrAgentResult.Status.WorkloadStatus).To(Equal(reconcileTypeNotProgressing)) + }) + }) + + Context("When patching StatefulSet matched by CRD", func() { + It("Should add init Container to StatefulSet", func() { + Eventually(func() bool { + if err := k8sClient.Get(ctx, stsRequest, &patchedSts); err != nil { + return false + } + if len(patchedSts.Spec.Template.Spec.InitContainers) != 0 { + return true + } + return false + }, timeout, interval).Should(BeTrue()) + }) + + It("Should add volumes to StatefulSet", func() { + Eventually(func() bool { + if err := k8sClient.Get(ctx, stsRequest, &patchedSts); err != nil { + return false + } + if len(patchedSts.Spec.Template.Spec.Volumes) == 2 { + return patchedSts.Spec.Template.Spec.Volumes[0].Name == initVolumeName + } + return false + }, timeout, interval).Should(BeTrue()) + }) + + It("Should patch StatefulSet containers", func() { + Eventually(func() bool { + if err := k8sClient.Get(ctx, stsRequest, &patchedSts); err != nil { + return false + } + for _, c := range patchedSts.Spec.Template.Spec.Containers { + if c.Name == "app" { + for _, v := range c.VolumeMounts { + if v.Name == initVolumeName { + return true + } + } + } + } + return false + }, timeout, interval).Should(BeTrue()) + }) + + It("Should patch StatefulSet environment variables", func() { + Eventually(func() bool { + if err := k8sClient.Get(ctx, stsRequest, &patchedSts); err != nil { + return false + } + + for _, c := range patchedSts.Spec.Template.Spec.Containers { + if c.Name == "app" { + for _, e := range c.Env { + if e.Name == javaEnv && strings.Contains(e.Value, defaultAgentPath) { + return true + } + } + } + } + return false + }, timeout, interval).Should(BeTrue()) + }) + + It("Should include agent cli flags in StatefulSet", func() { + Eventually(func() bool { + if err := k8sClient.Get(ctx, stsRequest, &patchedSts); err != nil { + return false + } + + for _, c := range patchedSts.Spec.Template.Spec.Containers { + if c.Name == "app" { + for _, e := range c.Env { + if e.Name == javaEnv && strings.Contains(e.Value, agentCliFlags) { + return true + } + } + } + } + return false + }, timeout, interval).Should(BeTrue()) + }) + + It("Should add environment variables to a container that already has them in StatefulSet", func() { + Eventually(func() bool { + if err := k8sClient.Get(ctx, stsRequest, &patchedSts); err != nil { + return false + } + + for _, c := range patchedSts.Spec.Template.Spec.Containers { + if c.Name == "app2" { + for _, e := range c.Env { + if e.Name == javaEnv && strings.Contains(e.Value, defaultAgentPath) && strings.Contains(e.Value, javaEnvNonEmptyValue) { + return true + } + } + } + } + return false + }, timeout, interval).Should(BeTrue()) + }) + }) + + Context("When deleting LightrunJavaAgent for StatefulSet", func() { + It("Should remove the finalizer from StatefulSet-targeting LightrunJavaAgent", func() { + err := k8sClient.Get(ctx, lrAgentStsRequest, &lrAgentSts) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.Delete(ctx, &lrAgentSts) + Expect(err).ToNot(HaveOccurred()) + + // Verify the finalizer gets removed + Eventually(func() bool { + err := k8sClient.Get(ctx, lrAgentStsRequest, &lrAgentSts) + if err != nil { + return client.IgnoreNotFound(err) == nil + } + return len(lrAgentSts.Finalizers) == 0 + }, timeout, interval).Should(BeTrue()) + }) + + It("Should restore StatefulSet to original state", func() { + Eventually(func() bool { + if err := k8sClient.Get(ctx, stsRequest, &patchedSts); err != nil { + return false + } + + // Check that the initContainer is removed + hasInitContainer := len(patchedSts.Spec.Template.Spec.InitContainers) > 0 + + // Check agent environment variables are removed + hasAgentEnv := false + for _, c := range patchedSts.Spec.Template.Spec.Containers { + if c.Name == "app" { + for _, e := range c.Env { + if e.Name == javaEnv && strings.Contains(e.Value, defaultAgentPath) { + hasAgentEnv = true + break + } + } + } + } + + // Check lightrun annotation is removed + hasAnnotation := false + _, hasAnnotation = patchedSts.Annotations[annotationAgentName] + + // All should be false for a restored statefulset + return !hasInitContainer && !hasAgentEnv && !hasAnnotation + }, timeout, interval).Should(BeTrue()) + }) + }) }) diff --git a/internal/controller/patch_funcs.go b/internal/controller/patch_funcs.go index ccf8684..68365a7 100644 --- a/internal/controller/patch_funcs.go +++ b/internal/controller/patch_funcs.go @@ -220,3 +220,140 @@ func (r *LightrunJavaAgentReconciler) unpatchJavaToolEnv(deplAnnotations map[str } } } + +// patchStatefulSet applies changes to a StatefulSet to inject the Lightrun agent +func (r *LightrunJavaAgentReconciler) patchStatefulSet(lightrunJavaAgent *agentv1beta.LightrunJavaAgent, secret *corev1.Secret, origStatefulSet *appsv1.StatefulSet, statefulSetApplyConfig *appsv1ac.StatefulSetApplyConfiguration, cmDataHash uint64) error { + // init spec.template.spec + statefulSetApplyConfig.WithSpec( + appsv1ac.StatefulSetSpec().WithTemplate( + corev1ac.PodTemplateSpec().WithSpec( + corev1ac.PodSpec(), + ).WithAnnotations(map[string]string{ + annotationConfigMapHash: fmt.Sprint(cmDataHash), + }, + ), + ), + ).WithAnnotations(map[string]string{ + annotationAgentName: lightrunJavaAgent.Name, + }) + + // Add volumes to the StatefulSet + r.addVolumeToStatefulSet(statefulSetApplyConfig, lightrunJavaAgent) + + // Add init container to the StatefulSet + r.addInitContainerToStatefulSet(statefulSetApplyConfig, lightrunJavaAgent, secret) + + // Patch app containers in the StatefulSet + err = r.patchStatefulSetAppContainers(lightrunJavaAgent, origStatefulSet, statefulSetApplyConfig) + if err != nil { + return err + } + + return nil +} + +func (r *LightrunJavaAgentReconciler) addVolumeToStatefulSet(statefulSetApplyConfig *appsv1ac.StatefulSetApplyConfiguration, lightrunJavaAgent *agentv1beta.LightrunJavaAgent) { + statefulSetApplyConfig.Spec.Template.Spec. + WithVolumes( + corev1ac.Volume(). + WithName(lightrunJavaAgent.Spec.InitContainer.SharedVolumeName). + WithEmptyDir( + corev1ac.EmptyDirVolumeSource(), + ), + ).WithVolumes( + corev1ac.Volume(). + WithName(cmVolumeName). + WithConfigMap( + corev1ac.ConfigMapVolumeSource(). + WithName(cmNamePrefix+lightrunJavaAgent.Name). + WithItems( + corev1ac.KeyToPath().WithKey("config").WithPath("agent.config"), + corev1ac.KeyToPath().WithKey("metadata").WithPath("agent.metadata.json"), + ), + ), + ) +} + +func (r *LightrunJavaAgentReconciler) addInitContainerToStatefulSet(statefulSetApplyConfig *appsv1ac.StatefulSetApplyConfiguration, lightrunJavaAgent *agentv1beta.LightrunJavaAgent, secret *corev1.Secret) { + statefulSetApplyConfig.Spec.Template.Spec.WithInitContainers( + corev1ac.Container(). + WithName(initContainerName). + WithImage(lightrunJavaAgent.Spec.InitContainer.Image). + WithVolumeMounts( + corev1ac.VolumeMount().WithName(lightrunJavaAgent.Spec.InitContainer.SharedVolumeName).WithMountPath("/tmp/"), + corev1ac.VolumeMount().WithName(cmVolumeName).WithMountPath("/tmp/cm/"), + ).WithEnv( + corev1ac.EnvVar().WithName("LIGHTRUN_KEY").WithValueFrom( + corev1ac.EnvVarSource().WithSecretKeyRef( + corev1ac.SecretKeySelector().WithName(secret.Name).WithKey("lightrun_key"), + ), + ), + corev1ac.EnvVar().WithName("PINNED_CERT").WithValueFrom( + corev1ac.EnvVarSource().WithSecretKeyRef( + corev1ac.SecretKeySelector().WithName(secret.Name).WithKey("pinned_cert_hash"), + ), + ), + corev1ac.EnvVar().WithName("LIGHTRUN_SERVER").WithValue(lightrunJavaAgent.Spec.ServerHostname), + ). + WithResources( + corev1ac.ResourceRequirements(). + WithLimits( + corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewMilliQuantity(int64(50), resource.BinarySI), + corev1.ResourceMemory: *resource.NewScaledQuantity(int64(64), resource.Scale(6)), // 64M + }, + ).WithRequests( + corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewMilliQuantity(int64(50), resource.BinarySI), + corev1.ResourceMemory: *resource.NewScaledQuantity(int64(64), resource.Scale(6)), + }, + ), + ). + WithSecurityContext( + corev1ac.SecurityContext(). + WithCapabilities( + corev1ac.Capabilities().WithDrop(corev1.Capability("ALL")), + ). + WithAllowPrivilegeEscalation(false). + WithRunAsNonRoot(true). + WithSeccompProfile( + corev1ac.SeccompProfile(). + WithType(corev1.SeccompProfileTypeRuntimeDefault), + ), + ), + ) +} + +func (r *LightrunJavaAgentReconciler) patchStatefulSetAppContainers(lightrunJavaAgent *agentv1beta.LightrunJavaAgent, origStatefulSet *appsv1.StatefulSet, statefulSetApplyConfig *appsv1ac.StatefulSetApplyConfiguration) error { + var found bool = false + for _, container := range origStatefulSet.Spec.Template.Spec.Containers { + for _, targetContainer := range lightrunJavaAgent.Spec.ContainerSelector { + if targetContainer == container.Name { + found = true + statefulSetApplyConfig.Spec.Template.Spec.WithContainers( + corev1ac.Container(). + WithName(container.Name). + WithImage(container.Image). + WithVolumeMounts( + corev1ac.VolumeMount().WithMountPath(lightrunJavaAgent.Spec.InitContainer.SharedVolumeMountPath).WithName(lightrunJavaAgent.Spec.InitContainer.SharedVolumeName), + ), + ) + } + } + } + if !found { + err = errors.New("unable to find matching container to patch") + return err + } + return nil +} + +// configMapDataHash calculates a hash of the ConfigMap data to detect changes +func configMapDataHash(cmData map[string]string) uint64 { + // Combine all data values into a single string for hashing + var hashString string + for _, v := range cmData { + hashString += v + } + return hash(hashString) +}