KEDA task count query should ignore k8s queue

CeleryKubernetesExecutor lets us use both celery and kubernetes executors. KEDA lets us scale down to zero when there are no celery tasks running. If we have no celery tasks running, and we run a k8s task, then KEDA will launch a worker even though there are still no celery tasks. We can prevent this from happening by ignoring the kubernetes queue in the KEDA query.
apache · Aug 5, 2021 · ecfa75c · ecfa75c
1 parent 4dae4ec
commit ecfa75c
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 3 deletions.
diff --git a/chart/templates/workers/worker-kedaautoscaler.yaml b/chart/templates/workers/worker-kedaautoscaler.yaml
@@ -49,5 +49,7 @@ spec:
         query: >-
           SELECT ceil(COUNT(*)::decimal / {{ .Values.config.celery.worker_concurrency }})
           FROM task_instance
-          WHERE state='running' OR state='queued'
+          WHERE (state='running' OR state='queued')
+{{ $k8s_queue := default (printf "kubernetes") .Values.config.celery_kubernetes_executor.kubernetes_queue -}}
+{{ eq .Values.executor "CeleryKubernetesExecutor" | ternary (printf "AND queue != '%s'" $k8s_queue) (print "") | indent 14 }}
 {{- end }}
diff --git a/chart/tests/test_keda.py b/chart/tests/test_keda.py
@@ -61,7 +61,7 @@ def test_keda_enabled(self, executor, is_created):
     )
     def test_keda_concurrency(self, executor, concurrency):
         """
-        ScaledObject should only be created when set to enabled and executor is Celery or CeleryKubernetes
+        Verify keda sql query is uses configured concurrency
         """
         docs = render_chart(
             values={
@@ -73,8 +73,40 @@ def test_keda_concurrency(self, executor, concurrency):
         )
         expected_query = (
             f"SELECT ceil(COUNT(*)::decimal / {concurrency}) "
-            "FROM task_instance WHERE state='running' OR state='queued'"
+            "FROM task_instance WHERE (state='running' OR state='queued')"
+        )
+        assert jmespath.search("spec.triggers[0].metadata.query", docs[0]) == expected_query
+
+    @parameterized.expand(
+        [
+            ("CeleryExecutor", None, False),
+            ("CeleryExecutor", 'my_queue', False),
+            ("CeleryKubernetesExecutor", None, True),
+            ("CeleryKubernetesExecutor", 'my_queue', True),
+        ]
+    )
+    def test_keda_query_kubernetes_queue(self, executor, queue, should_filter):
+        """
+        Verify keda sql query ignores kubernetes queue when CKE is used.
+        Sometimes a user might want to use a different queue name for k8s executor tasks,
+        and we also verify here that we use the configured queue name in that case.
+        """
+        values = {
+            "workers": {"keda": {"enabled": True}, "persistence": {"enabled": False}},
+            "executor": executor,
+        }
+        if queue:
+            values.update({'config': {'celery_kubernetes_executor': {'kubernetes_queue': queue}}})
+        docs = render_chart(
+            values=values,
+            show_only=["templates/workers/worker-kedaautoscaler.yaml"],
+        )
+        expected_query = (
+            "SELECT ceil(COUNT(*)::decimal / 16) "
+            "FROM task_instance WHERE (state='running' OR state='queued')"
         )
+        if should_filter:
+            expected_query += f" AND queue != '{queue or 'kubernetes'}'"
         assert jmespath.search("spec.triggers[0].metadata.query", docs[0]) == expected_query
 
     @parameterized.expand(

diff --git a/chart/values.yaml b/chart/values.yaml
@@ -1111,6 +1111,8 @@ config:
     reinit_frequency: '{{ .Values.kerberos.reinitFrequency }}'
     principal: '{{ .Values.kerberos.principal }}'
     ccache: '{{ .Values.kerberos.ccacheMountPath }}/{{ .Values.kerberos.ccacheFileName }}'
+  celery_kubernetes_executor:
+    kubernetes_queue: 'kubernetes'
   kubernetes:
     namespace: '{{ .Release.Namespace }}'
     airflow_configmap: '{{ include "airflow_config" . }}'