From 28b6b3a309aa9479a7b5e6ced3c411b956c5fce8 Mon Sep 17 00:00:00 2001 From: adamschmidt Date: Fri, 3 Mar 2023 11:16:11 +1100 Subject: [PATCH] feat: adds k8s config options Signed-off-by: adamschmidt --- .../batch-materialization/bytewax.md | 25 ++++++++++++++++++- .../bytewax/bytewax_materialization_engine.py | 18 ++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/docs/reference/batch-materialization/bytewax.md b/docs/reference/batch-materialization/bytewax.md index bd98a4dc6e..2e28937f50 100644 --- a/docs/reference/batch-materialization/bytewax.md +++ b/docs/reference/batch-materialization/bytewax.md @@ -23,6 +23,8 @@ To configure secrets, first create them using `kubectl`: kubectl create secret generic -n bytewax aws-credentials --from-literal=aws-access-key-id='' --from-literal=aws-secret-access-key='' ``` +If your Docker registry requires authentication to store/pull containers, you can use this same approach to store your repository access credential and use when running the materialization engine. + Then configure them in the batch_engine section of `feature_store.yaml`: ``` yaml @@ -40,6 +42,8 @@ batch_engine: secretKeyRef: name: aws-credentials key: aws-secret-access-key + image_pull_secrets: + - docker-repository-access-secret ``` #### Configuration @@ -51,9 +55,28 @@ batch_engine: type: bytewax namespace: bytewax image: bytewax/bytewax-feast:latest + image_pull_secrets: + - my_container_secret + service_account_name: my-k8s-service-account + annotations: + # example annotation you might include if running on AWS EKS + iam.amazonaws.com/role: arn:aws:iam:::role/MyBytewaxPlatformRole + resources: + limits: + cpu: 1000m + memory: 2048Mi + requests: + cpu: 500m + memory: 1024Mi ``` -The `namespace` configuration directive specifies which Kubernetes [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) jobs, services and configuration maps will be created in. +**Notes:** + +* The `namespace` configuration directive specifies which Kubernetes [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) jobs, services and configuration maps will be created in. +* The `image_pull_secrets` configuration directive specifies the pre-configured secret to use when pulling the image container from your registry +* The `service_account_name` specifies which Kubernetes service account to run the job under +* `annotations` allows you to include additional Kubernetes annotations to the job. This is particularly useful for IAM roles which grant the running pod access to cloud platform resources (for example). +* The `resources` configuration directive sets the standard Kubernetes [resource requests](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the job containers to utilise when materializing data. #### Building a custom Bytewax Docker image diff --git a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py index 9a456376bf..cf3d9f214d 100644 --- a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py +++ b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py @@ -46,6 +46,17 @@ class BytewaxMaterializationEngineConfig(FeastConfigBaseModel): These environment variables can be used to reference Kubernetes secrets. """ + image_pull_secrets: List[str] = [] + """ (optional) The secrets to use when pulling the image to run for the materialization job """ + + resources: dict = {} + """ (optional) The resource requests and limits for the materialization containers """ + + service_account_name: StrictStr = "" + """ (optional) The service account name to use when running the job """ + + annotations: dict = {} + """ (optional) Annotations to apply to the job container. Useful for linking the service account to IAM roles, operational metadata, etc """ class BytewaxMaterializationEngine(BatchMaterializationEngine): def __init__( @@ -248,9 +259,14 @@ def _create_job_definition(self, job_id, namespace, pods, env): "parallelism": pods, "completionMode": "Indexed", "template": { + "metadata": { + "annotations": self.batch_engine_config.annotations, + }, "spec": { "restartPolicy": "Never", "subdomain": f"dataflow-{job_id}", + "imagePullSecrets": self.batch_engine_config.image_pull_secrets, + "serviceAccountName": self.batch_engine_config.service_account_name, "initContainers": [ { "env": [ @@ -300,7 +316,7 @@ def _create_job_definition(self, job_id, namespace, pods, env): "protocol": "TCP", } ], - "resources": {}, + "resources": self.batch_engine_config.resources, "securityContext": { "allowPrivilegeEscalation": False, "capabilities": {