From 31fc536103538543cd7e114cf737b2712cfec15c Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Wed, 26 Sep 2018 17:41:21 -0700 Subject: [PATCH 01/18] initial commit --- .../data-populator-deployment.yml | 30 ++ .../kerberos-yml/data-populator-service.yml | 17 ++ .../kerberos-yml/dn1-deployment.yml | 30 ++ .../kerberos-yml/dn1-service.yml | 17 ++ .../kerberos-yml/kerberos-deployment.yml | 30 ++ .../kerberos-yml/kerberos-service.yml | 17 ++ .../kerberos-yml/kerberos-test.yml | 25 ++ .../kerberos-yml/nn-deployment.yml | 35 +++ .../kerberos-yml/nn-hadoop.yml | 12 + .../kerberos-yml/nn-service.yml | 17 ++ .../kerberos-yml/server-keytab.yml | 12 + .../scripts/run-kerberos-test.sh | 24 ++ .../KerberizedHadoopClusterLauncher.scala | 57 ++++ .../KerberosTestPodLauncher.scala | 128 +++++++++ .../integrationtest/KerberosTestSuite.scala | 65 +++++ .../kerberos/KerberosCMWatcherCache.scala | 104 +++++++ .../kerberos/KerberosDeployment.scala | 24 ++ .../kerberos/KerberosDriverWatcherCache.scala | 99 +++++++ .../kerberos/KerberosPVWatcherCache.scala | 184 +++++++++++++ .../kerberos/KerberosPodWatcherCache.scala | 256 ++++++++++++++++++ .../kerberos/KerberosStorage.scala | 23 ++ .../kerberos/KerberosUtils.scala | 154 +++++++++++ .../test-data/hadoop-conf/core-site.xml | 38 +++ .../test-data/hadoop-conf/hdfs-site.xml | 157 +++++++++++ .../test-data/hadoop-conf/krb5.conf | 25 ++ .../test-data/hadoop-conf/yarn-site.xml | 26 ++ .../integration-tests/test-data/input.txt | 1 + .../simple-hadoop-conf/core-site.xml | 24 ++ .../simple-hadoop-conf/hdfs-site.xml | 24 ++ 29 files changed, 1655 insertions(+) create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-service.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-service.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/nn-hadoop.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/nn-service.yml create mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/server-keytab.yml create mode 100644 resource-managers/kubernetes/integration-tests/scripts/run-kerberos-test.sh create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberizedHadoopClusterLauncher.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestPodLauncher.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala create mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala create mode 100755 resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/core-site.xml create mode 100755 resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/hdfs-site.xml create mode 100755 resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/krb5.conf create mode 100755 resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/yarn-site.xml create mode 100755 resource-managers/kubernetes/integration-tests/test-data/input.txt create mode 100755 resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/core-site.xml create mode 100755 resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/hdfs-site.xml diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml new file mode 100755 index 000000000000..eb31a0126d76 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml @@ -0,0 +1,30 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: data-populator +spec: + replicas: 1 + template: + metadata: + annotations: + pod.beta.kubernetes.io/hostname: data-populator + labels: + name: hdfs-data-populator + kerberosService: data-populator + job: kerberostest + spec: + containers: + - command: + - /populate-data.sh + name: data-populator + image: ifilonenko/hadoop-base:latest + imagePullPolicy: IfNotPresent + runAsNonRoot: false + volumeMounts: + - mountPath: /var/keytabs + name: data-populator-keytab + restartPolicy: Always + volumes: + - name: data-populator-keytab + persistentVolumeClaim: + claimName: server-keytab diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml new file mode 100755 index 000000000000..2f35d5d70de4 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + labels: + kerberosService: data-populator + job: kerberostest + name: data-populator +spec: + clusterIP: None + ports: + - protocol: TCP + port: 55555 + targetPort: 0 + selector: + kerberosService: data-populator diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml new file mode 100755 index 000000000000..f524b41ada6b --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml @@ -0,0 +1,30 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: dn1 +spec: + replicas: 1 + template: + metadata: + annotations: + pod.beta.kubernetes.io/hostname: dn1 + labels: + name: hdfs-dn1 + kerberosService: dn1 + job: kerberostest + spec: + containers: + - command: + - /start-datanode.sh + name: dn1 + image: ifilonenko/hadoop-base:latest + imagePullPolicy: IfNotPresent + runAsNonRoot: false + volumeMounts: + - mountPath: /var/keytabs + name: dn1-keytab + restartPolicy: Always + volumes: + - name: dn1-keytab + persistentVolumeClaim: + claimName: server-keytab diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-service.yml new file mode 100755 index 000000000000..6915022b7be3 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-service.yml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + labels: + kerberosService: dn1 + job: kerberostest + name: dn1 +spec: + clusterIP: None + ports: + - protocol: TCP + port: 55555 + targetPort: 0 + selector: + kerberosService: dn1 diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml new file mode 100755 index 000000000000..9b2e1a394921 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml @@ -0,0 +1,30 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: kerberos +spec: + replicas: 1 + template: + metadata: + annotations: + pod.beta.kubernetes.io/hostname: kerberos + labels: + name: hdfs-kerberos + kerberosService: kerberos + job: kerberostest + spec: + containers: + - command: + - /start-kdc.sh + name: kerberos + image: ifilonenko/hadoop-base:latest + imagePullPolicy: IfNotPresent + runAsNonRoot: false + volumeMounts: + - mountPath: /var/keytabs + name: kerb-keytab + restartPolicy: Always + volumes: + - name: kerb-keytab + persistentVolumeClaim: + claimName: server-keytab diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-service.yml new file mode 100755 index 000000000000..da7b994f6e2a --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-service.yml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + labels: + kerberosService: kerberos + job: kerberostest + name: kerberos +spec: + clusterIP: None + ports: + - protocol: TCP + port: 55555 + targetPort: 0 + selector: + kerberosService: kerberos diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml new file mode 100755 index 000000000000..4542957640c7 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml @@ -0,0 +1,25 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: kerberos-test +spec: + replicas: 1 + template: + metadata: + labels: + name: kerberos-test + spec: + containers: + - command: ["/bin/bash"] + args: ["/opt/spark/run-kerberos-test.sh"] + name: kerberos-test + image: kerberos-test:latest + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /var/keytabs + name: kerberos-test-keytab + restartPolicy: Always + volumes: + - name: kerberos-test-keytab + persistentVolumeClaim: + claimName: server-keytab diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml new file mode 100755 index 000000000000..d2f473aa66c1 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml @@ -0,0 +1,35 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: nn +spec: + replicas: 1 + template: + metadata: + annotations: + pod.beta.kubernetes.io/hostname: nn + labels: + name: hdfs-nn + kerberosService: nn + job: kerberostest + spec: + containers: + - command: + - /start-namenode.sh + name: nn + ports: + - containerPort: 9000 + image: ifilonenko/hadoop-base:latest + imagePullPolicy: IfNotPresent + runAsNonRoot: false + volumeMounts: + - mountPath: /var/keytabs + name: nn-keytab + restartPolicy: Always + volumes: + - name: nn-keytab + persistentVolumeClaim: + claimName: server-keytab + - name: nn-hadoop + persistentVolumeClaim: + claimName: nn-hadoop diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-hadoop.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-hadoop.yml new file mode 100755 index 000000000000..18c138e1512f --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-hadoop.yml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nn-hadoop + labels: + job: kerberostest +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-service.yml new file mode 100755 index 000000000000..649302150aa3 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-service.yml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + labels: + kerberosService: nn + job: kerberostest + name: nn +spec: + clusterIP: None + ports: + - protocol: TCP + port: 9000 + targetPort: 9000 + selector: + kerberosService: nn diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/server-keytab.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/server-keytab.yml new file mode 100755 index 000000000000..7798c0741366 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/server-keytab.yml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: server-keytab + labels: + job: kerberostest +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/resource-managers/kubernetes/integration-tests/scripts/run-kerberos-test.sh b/resource-managers/kubernetes/integration-tests/scripts/run-kerberos-test.sh new file mode 100644 index 000000000000..3e16a13c982c --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/scripts/run-kerberos-test.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +sed -i -e 's/#//' -e 's/default_ccache_name/# default_ccache_name/' /etc/krb5.conf +export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true" +export HADOOP_JAAS_DEBUG=true +export HADOOP_ROOT_LOGGER=DEBUG,console +cp ${TMP_KRB_LOC} /etc/krb5.conf +cp ${TMP_CORE_LOC} /opt/spark/hconf/core-site.xml +cp ${TMP_HDFS_LOC} /opt/spark/hconf/hdfs-site.xml +mkdir -p /etc/krb5.conf.d +until /usr/bin/kinit -kt /var/keytabs/hdfs.keytab hdfs/nn.${NAMESPACE}.svc.cluster.local; do sleep 15; done +/opt/spark/bin/spark-submit \ + --deploy-mode cluster \ + --class ${CLASS_NAME} \ + --master k8s://${MASTER_URL} \ + --namespace ${NAMESPACE} \ + --conf spark.executor.instances=1 \ + --conf spark.app.name=spark-hdfs \ + --conf spark.driver.extraClassPath=/opt/spark/hconf/core-site.xml:/opt/spark/hconf/hdfs-site.xml:/opt/spark/hconf/yarn-site.xml:/etc/krb5.conf \ + --conf spark.kubernetes.container.image=spark:testing \ + --conf spark.kerberos.keytab=/var/keytabs/hdfs.keytab \ + --conf spark.kerberos.principal=hdfs/nn.${NAMESPACE}.svc.cluster.local@CLUSTER.LOCAL \ + --conf spark.kubernetes.driver.label.spark-app-locator=${APP_LOCATOR_LABEL} \ + ${SUBMIT_RESOURCE} \ + hdfs://nn.${NAMESPACE}.svc.cluster.local:9000/user/ifilonenko/wordcount.txt diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberizedHadoopClusterLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberizedHadoopClusterLauncher.scala new file mode 100755 index 000000000000..37567f5ab832 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberizedHadoopClusterLauncher.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest + +import io.fabric8.kubernetes.client.KubernetesClient + +import org.apache.spark.deploy.k8s.integrationtest.kerberos._ +import org.apache.spark.internal.Logging + + /** + * This class is responsible for launching a psuedo-distributed, single noded, + * kerberized, Hadoop cluster to test secure HDFS interaction. Because each node: + * kdc, data node, and name node rely on Persistent Volumes and Config Maps to be set, + * and a particular order in pod-launching, this class leverages Watchers and thread locks + * to ensure that order is always preserved and the cluster is the same for every run. + */ +private[spark] class KerberizedHadoopClusterLauncher( + kubernetesClient: KubernetesClient, + namespace: String) extends Logging { + private val LABELS = Map("job" -> "kerberostest") + + def launchKerberizedCluster(): Unit = { + // These Utils allow for each step in this launch process to re-use + // common functionality for setting up hadoop nodes. + val kerberosUtils = new KerberosUtils(kubernetesClient, namespace) + // Launches persistent volumes and its claims for sharing keytabs across pods + val pvWatcherCache = new KerberosPVWatcherCache(kerberosUtils, LABELS) + pvWatcherCache.start() + pvWatcherCache.stop() + // Launches config map for the files in HADOOP_CONF_DIR + val cmWatcherCache = new KerberosCMWatcherCache(kerberosUtils) + cmWatcherCache.start() + cmWatcherCache.stop() + // Launches the Hadoop cluster pods: KDC --> NN --> DN1 --> Data-Populator + val podWatcherCache = new KerberosPodWatcherCache(kerberosUtils, LABELS) + podWatcherCache.start() + val dpNode = podWatcherCache.stop() + while (!podWatcherCache.hasInLogs(dpNode, "")) { + logInfo("Waiting for data-populator to be formatted") + Thread.sleep(500) + } + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestPodLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestPodLauncher.scala new file mode 100755 index 000000000000..942cc4064ce7 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestPodLauncher.scala @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest + +import java.io.{File, FileInputStream} +import java.lang.Boolean + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.builder.Predicate +import io.fabric8.kubernetes.api.model.{ContainerBuilder, KeyToPathBuilder} +import io.fabric8.kubernetes.api.model.extensions.{Deployment, DeploymentBuilder} +import io.fabric8.kubernetes.client.KubernetesClient + + /** + * This class is responsible for launching a pod that runs spark-submit to simulate + * the necessary global environmental variables and files expected for a Kerberos task. + * In this test we specify HADOOP_CONF_DIR and ensure that for any arbitrary namespace + * the krb5.conf, core-site.xml, and hdfs-site.xml are resolved accordingly. + */ +private[spark] class KerberosTestPodLauncher( + kubernetesClient: KubernetesClient, + namespace: String) { + private val kerberosFiles = Seq("krb5.conf", "core-site.xml", "hdfs-site.xml") + private val KRB_VOLUME = "krb5-conf" + private val KRB_FILE_DIR = "/tmp" + private val KRB_CONFIG_MAP_NAME = "krb-config-map" + private val HADOOP_CONF_DIR_PATH = "/opt/spark/hconf" + private val keyPaths = kerberosFiles.map(file => + new KeyToPathBuilder() + .withKey(file) + .withPath(file) + .build()).toList + def startKerberosTest( + resource: String, + className: String, + appLabel: String, + yamlLocation: String): Unit = { + kubernetesClient.load(new FileInputStream(new File(yamlLocation))) + .get().get(0) match { + case deployment: Deployment => + val deploymentWithEnv: Deployment = new DeploymentBuilder(deployment) + .editSpec() + .editTemplate() + .editSpec() + .addNewVolume() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(keyPaths.asJava) + .endConfigMap() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate( + deployment.getMetadata.getName)) + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("MASTER_URL") + .withValue(kubernetesClient.getMasterUrl.toString) + .endEnv() + .addNewEnv() + .withName("SUBMIT_RESOURCE") + .withValue(resource) + .endEnv() + .addNewEnv() + .withName("CLASS_NAME") + .withValue(className) + .endEnv() + .addNewEnv() + .withName("HADOOP_CONF_DIR") + .withValue(HADOOP_CONF_DIR_PATH) + .endEnv() + .addNewEnv() + .withName("APP_LOCATOR_LABEL") + .withValue(appLabel) + .endEnv() + .addNewEnv() + .withName("SPARK_PRINT_LAUNCH_COMMAND") + .withValue("true") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewVolumeMount() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .build() + kubernetesClient.extensions().deployments() + .inNamespace(namespace).create(deploymentWithEnv)} + } +} + +private[spark] class ContainerNameEqualityPredicate(containerName: String) + extends Predicate[ContainerBuilder] { + override def apply(item: ContainerBuilder): Boolean = { + item.getName == containerName + } +} \ No newline at end of file diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala new file mode 100755 index 000000000000..607e8ed234f6 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest + +import org.scalatest.concurrent.Eventually + +import org.apache.spark.deploy.k8s.integrationtest.KerberosTestSuite._ +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{k8sTestTag, INTERVAL, TIMEOUT} +import org.apache.spark.deploy.k8s.integrationtest.kerberos._ + +private[spark] trait KerberosTestSuite { k8sSuite: KubernetesSuite => + + test("Secure HDFS test with HDFS keytab", k8sTestTag) { + kerberizedHadoopClusterLauncher.launchKerberizedCluster() + kerberosTestLauncher.startKerberosTest( + containerLocalSparkDistroExamplesJar, + HDFS_TEST_CLASS, + appLocator, + KERB_YAML_LOCATION) + val kubernetesClient = kubernetesTestComponents.kubernetesClient + val driverWatcherCache = new KerberosDriverWatcherCache( + kubernetesClient, + Map("spark-app-locator" -> appLocator)) + driverWatcherCache.start() + driverWatcherCache.stop() + val expectedLogOnCompletion = Seq( + "Returned length(s) of: 1", + "File contents: [This is an awesome word count file]") + val driverPod = kubernetesClient + .pods() + .inNamespace(kubernetesTestComponents.namespace) + .withLabel("spark-app-locator", appLocator) + .list() + .getItems + .get(0) + Eventually.eventually(TIMEOUT, INTERVAL) { + expectedLogOnCompletion.foreach { e => + assert(kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains(e), "The application did not complete.") + } + } + } +} + +private[spark] object KerberosTestSuite { + val HDFS_TEST_CLASS = "org.apache.spark.examples.HdfsTest" + val KERB_YAML_LOCATION = "kerberos-yml/kerberos-test.yml" +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala new file mode 100755 index 000000000000..953ccbc9a377 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.k8s.integrationtest.kerberos + +import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model.ConfigMap +import io.fabric8.kubernetes.client.{KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action + +import org.apache.spark.internal.Logging + + /** + * This class is responsible for ensuring that no logic progresses in the cluster launcher + * until a configmap with the HADOOP_CONF_DIR specifications has been created. + */ +private[spark] class KerberosCMWatcherCache(kerberosUtils: KerberosUtils) extends Logging { + private val kubernetesClient = kerberosUtils.getClient + private val namespace = kerberosUtils.getNamespace + private val requiredFiles = Seq("core-site.xml", "hdfs-site.xml", "krb5.conf") + private var watcher: Watch = _ + private var cmCache = scala.collection.mutable.Map[String, Map[String, String]]() + private var lock: Lock = new ReentrantLock() + private var cmCreated: Condition = lock.newCondition() + private val configMap = kerberosUtils.getConfigMap + private val configMapName = configMap.getMetadata.getName + private val blockingThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning of ConfigMap lock") + lock.lock() + try { + while (!created()) cmCreated.await() + } finally { + logInfo("Ending the ConfigMap lock") + lock.unlock() + stop() + } + }}) + + private val watcherThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning the watch of the Kerberos Config Map") + watcher = kubernetesClient + .configMaps() + .withName(configMapName) + .watch(new Watcher[ConfigMap] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Kerberos Config Map") + override def eventReceived(action: Watcher.Action, resource: ConfigMap): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + cmCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val data = resource.getData.asScala.toMap + logInfo(s"$name includes ${data.keys.mkString(",")}") + cmCache(name) = data + if (created()) { + lock.lock() + try { + cmCreated.signalAll() + } finally { + lock.unlock() + } + } + }}} + ) + logInfo("Launching the Config Map") + kerberosUtils.getClient.configMaps().inNamespace(namespace).createOrReplace(configMap) + }}) + + def start(): Unit = { + blockingThread.start() + watcherThread.start() + blockingThread.join() + watcherThread.join()} + + def stop(): Unit = { + watcher.close() + } + + def created(): Boolean = { + cmCache.get(configMapName).exists{ data => + requiredFiles.forall(data.keys.toSeq.contains)} + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala new file mode 100755 index 000000000000..0f2719575bf5 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest.kerberos + +import io.fabric8.kubernetes.api.model.Service +import io.fabric8.kubernetes.api.model.extensions.Deployment + +private[spark] case class KerberosDeployment( + podDeployment: Deployment, + service: Service) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala new file mode 100755 index 000000000000..cf87572e9b4d --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.k8s.integrationtest.kerberos + +import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action + +import org.apache.spark.internal.Logging + + /** + * This class is responsible for ensuring that the driver-pod launched by the KerberosTestPod + * is running before trying to grab its logs for the sake of monitoring success of completition. + */ +private[spark] class KerberosDriverWatcherCache( + kubernetesClient: KubernetesClient, + labels: Map[String, String]) extends Logging { + private var podWatcher: Watch = _ + private var podCache = + scala.collection.mutable.Map[String, String]() + private var lock: Lock = new ReentrantLock() + private var driverRunning: Condition = lock.newCondition() + private var driverIsUp: Boolean = false + private val blockingThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning of Driver lock") + lock.lock() + try { + while (!driverIsUp) driverRunning.await() + } finally { + logInfo("Ending the Driver lock") + lock.unlock() + stop() + } + } + }) + + private val podWatcherThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning the watch of Driver pod") + podWatcher = kubernetesClient + .pods() + .withLabels(labels.asJava) + .watch(new Watcher[Pod] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Driver pod") + override def eventReceived(action: Watcher.Action, resource: Pod): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + podCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val phase = resource.getStatus.getPhase + logInfo(s"$name is as $phase") + podCache(name) = phase + if (maybeDriverDone(name)) { + lock.lock() + try { + driverIsUp = true + driverRunning.signalAll() + } finally { + lock.unlock() + } + }}}}) + }}) + + def start(): Unit = { + blockingThread.start() + podWatcherThread.start() + blockingThread.join() + podWatcherThread.join() + } + + def stop(): Unit = { + podWatcher.close() + } + + private def maybeDriverDone(name: String): Boolean = podCache.get(name).contains("Running") +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala new file mode 100755 index 000000000000..27f8d250b709 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.k8s.integrationtest.kerberos + +import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model.{PersistentVolume, PersistentVolumeClaim} +import io.fabric8.kubernetes.client.{KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action + +import org.apache.spark.internal.Logging + + /** + * This class is responsible for ensuring that the persistent volume claims are bounded + * to the correct persistent volume and that they are both created before launching the + * pods which expect to use them. + */ +private[spark] class KerberosPVWatcherCache( + kerberosUtils: KerberosUtils, + labels: Map[String, String]) extends Logging { + private val kubernetesClient = kerberosUtils.getClient + private val namespace = kerberosUtils.getNamespace + private var pvWatcher: Watch = _ + private var pvcWatcher: Watch = _ + private var pvCache = + scala.collection.mutable.Map[String, String]() + private var pvcCache = + scala.collection.mutable.Map[String, String]() + private var lock: Lock = new ReentrantLock() + private var nnBounded: Condition = lock.newCondition() + private var ktBounded: Condition = lock.newCondition() + private var nnIsUp: Boolean = false + private var ktIsUp: Boolean = false + private var nnSpawned: Boolean = false + private var ktSpawned: Boolean = false + private val blockingThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning of Persistent Storage Lock") + lock.lock() + try { + while (!nnIsUp) nnBounded.await() + while (!ktIsUp) ktBounded.await() + } finally { + logInfo("Ending the Persistent Storage lock") + lock.unlock() + stop() + } + } + }) + private val pvWatcherThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning the watch of Persistent Volumes") + pvWatcher = kubernetesClient + .persistentVolumes() + .withLabels(labels.asJava) + .watch(new Watcher[PersistentVolume] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Persistent Volumes", cause) + override def eventReceived(action: Watcher.Action, resource: PersistentVolume): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + pvCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val phase = resource.getStatus.getPhase + logInfo(s"$name is at stage: $phase") + pvCache(name) = phase + if (maybeDeploymentAndServiceDone(name)) { + val modifyAndSignal: Runnable = new MSThread(name) + new Thread(modifyAndSignal).start() + }}}}) + }}) + private val pvcWatcherThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning the watch of Persistent Volume Claims") + pvcWatcher = kubernetesClient + .persistentVolumeClaims() + .withLabels(labels.asJava) + .watch(new Watcher[PersistentVolumeClaim] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Persistent Volume Claims") + override def eventReceived( + action: Watcher.Action, + resource: PersistentVolumeClaim): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + pvcCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val volumeName = resource.getSpec.getVolumeName + logInfo(s"$name claims itself to $volumeName") + pvcCache(name) = volumeName + if (maybeDeploymentAndServiceDone(name)) { + val modifyAndSignal: Runnable = new MSThread(name) + new Thread(modifyAndSignal).start() + }}}}) + logInfo("Launching the Persistent Storage") + if (!nnSpawned) { + logInfo("Launching the NN Hadoop PV+PVC") + nnSpawned = true + deploy(kerberosUtils.getNNStorage) + } + }}) + + def start(): Unit = { + blockingThread.start() + pvWatcherThread.start() + pvcWatcherThread.start() + blockingThread.join() + pvWatcherThread.join() + pvcWatcherThread.join() + } + def stop(): Unit = { + pvWatcher.close() + pvcWatcher.close() + } + + private def maybeDeploymentAndServiceDone(name: String): Boolean = { + val finished = pvCache.get(name).contains("Available") && + pvcCache.get(name).contains(name) + if (!finished) { + logInfo(s"$name is not available") + if (name == "nn-hadoop") nnIsUp = false + else if (name == "server-keytab") ktIsUp = false + } + finished + } + + private def deploy(kbs: KerberosStorage) : Unit = { + kubernetesClient + .persistentVolumeClaims().inNamespace(namespace).create(kbs.persistentVolumeClaim) + kubernetesClient + .persistentVolumes().create(kbs.persistentVolume) + } + + private class MSThread(name: String) extends Runnable { + override def run(): Unit = { + logInfo(s"$name PV and PVC are bounded") + lock.lock() + if (name == "nn-hadoop") { + nnIsUp = true + logInfo(s"nn-hadoop is bounded") + try { + nnBounded.signalAll() + } finally { + lock.unlock() + } + if (!ktSpawned) { + logInfo("Launching the KT Hadoop PV+PVC") + ktSpawned = true + deploy(kerberosUtils.getKTStorage) + } + } + else if (name == "server-keytab") { + while (!nnIsUp) ktBounded.await() + ktIsUp = true + logInfo(s"server-keytab is bounded") + try { + ktBounded.signalAll() + } finally { + lock.unlock() + } + }} + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala new file mode 100755 index 000000000000..11f28587be3e --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.k8s.integrationtest.kerberos + +import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model.{Pod, Service} +import io.fabric8.kubernetes.client.{KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action + +import org.apache.spark.internal.Logging + + /** + * This class is used to ensure that the Hadoop cluster that is launched is executed + * in this order: KDC --> NN --> DN --> Data-Populator and that each one of these nodes + * is running before launching the Kerberos test. + */ +private[spark] class KerberosPodWatcherCache( + kerberosUtils: KerberosUtils, + labels: Map[String, String]) extends Logging { + private val kubernetesClient = kerberosUtils.getClient + private val namespace = kerberosUtils.getNamespace + private var podWatcher: Watch = _ + private var serviceWatcher: Watch = _ + private var podCache = + scala.collection.mutable.Map[String, String]() + private var serviceCache = + scala.collection.mutable.Map[String, String]() + private var lock: Lock = new ReentrantLock() + private var kdcRunning: Condition = lock.newCondition() + private var nnRunning: Condition = lock.newCondition() + private var dnRunning: Condition = lock.newCondition() + private var dpRunning: Condition = lock.newCondition() + private var kdcIsUp: Boolean = false + private var nnIsUp: Boolean = false + private var dnIsUp: Boolean = false + private var dpIsUp: Boolean = false + private var kdcSpawned: Boolean = false + private var nnSpawned: Boolean = false + private var dnSpawned: Boolean = false + private var dpSpawned: Boolean = false + private var dnName: String = _ + private var dpName: String = _ + + private val blockingThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning of Cluster lock") + lock.lock() + try { + while (!kdcIsUp) kdcRunning.await() + while (!nnIsUp) nnRunning.await() + while (!dnIsUp) dnRunning.await() + while (!dpIsUp) dpRunning.await() + } finally { + logInfo("Ending the Cluster lock") + lock.unlock() + stop() + } + } + }) + + private val podWatcherThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning the watch of Pods") + podWatcher = kubernetesClient + .pods() + .withLabels(labels.asJava) + .watch(new Watcher[Pod] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Pods") + override def eventReceived(action: Watcher.Action, resource: Pod): Unit = { + val name = resource.getMetadata.getName + val keyName = podNameParse(name) + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + podCache.remove(keyName) + case Action.ADDED | Action.MODIFIED => + val phase = resource.getStatus.getPhase + logInfo(s"$name is as $phase") + if (name.startsWith("dn1")) { dnName = name } + if (name.startsWith("data-populator")) { dpName = name } + podCache(keyName) = phase + if (maybeDeploymentAndServiceDone(keyName)) { + val modifyAndSignal: Runnable = new MSThread(keyName) + new Thread(modifyAndSignal).start() + }}}}) + }}) + + private val serviceWatcherThread = new Thread(new Runnable { + override def run(): Unit = { + logInfo("Beginning the watch of Services") + serviceWatcher = kubernetesClient + .services() + .withLabels(labels.asJava) + .watch(new Watcher[Service] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Services") + override def eventReceived(action: Watcher.Action, resource: Service): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + serviceCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val bound = resource.getSpec.getSelector.get("kerberosService") + logInfo(s"$name is bounded to $bound") + serviceCache(name) = bound + if (maybeDeploymentAndServiceDone(name)) { + val modifyAndSignal: Runnable = new MSThread(name) + new Thread(modifyAndSignal).start() + }}}}) + logInfo("Launching the Cluster") + if (!kdcSpawned) { + logInfo("Launching the KDC Node") + kdcSpawned = true + deploy(kerberosUtils.getKDC) + } + }}) + + def start(): Unit = { + blockingThread.start() + podWatcherThread.start() + serviceWatcherThread.start() + blockingThread.join() + podWatcherThread.join() + serviceWatcherThread.join() + } + + def stop(): String = { + podWatcher.close() + serviceWatcher.close() + dpName + } + + private def maybeDeploymentAndServiceDone(name: String): Boolean = { + val finished = podCache.get(name).contains("Running") && + serviceCache.get(name).contains(name) + if (!finished) { + logInfo(s"$name is not up with a service") + if (name == "kerberos") kdcIsUp = false + else if (name == "nn") nnIsUp = false + else if (name == "dn1") dnIsUp = false + else if (name == "data-populator") dpIsUp = false + } + finished + } + + private def deploy(kdc: KerberosDeployment) : Unit = { + kubernetesClient + .extensions().deployments().inNamespace(namespace).create(kdc.podDeployment) + kubernetesClient + .services().inNamespace(namespace).create(kdc.service) + } + + private class MSThread(name: String) extends Runnable { + override def run(): Unit = { + logInfo(s"$name Node and Service is up") + lock.lock() + if (name == "kerberos") { + kdcIsUp = true + logInfo(s"kdc has signaled") + try { + kdcRunning.signalAll() + } finally { + lock.unlock() + } + if (!nnSpawned) { + logInfo("Launching the NN Node") + nnSpawned = true + deploy(kerberosUtils.getNN) + } + } + else if (name == "nn") { + while (!kdcIsUp) kdcRunning.await() + nnIsUp = true + logInfo(s"nn has signaled") + try { + nnRunning.signalAll() + } finally { + lock.unlock() + } + if (!dnSpawned) { + logInfo("Launching the DN Node") + dnSpawned = true + deploy(kerberosUtils.getDN) + } + } + else if (name == "dn1") { + while (!kdcIsUp) kdcRunning.await() + while (!nnIsUp) nnRunning.await() + dnIsUp = true + logInfo(s"dn1 has signaled") + try { + dnRunning.signalAll() + } finally { + lock.unlock() + } + if (!dpSpawned) { + logInfo("Launching the DP Node") + dpSpawned = true + deploy(kerberosUtils.getDP) + } + } + else if (name == "data-populator") { + while (!kdcIsUp) kdcRunning.await() + while (!nnIsUp) nnRunning.await() + while (!dnIsUp) dnRunning.await() + while (!hasInLogs(dnName, "Got finalize command for block pool")) { + logInfo("Waiting on DN to be formatted") + Thread.sleep(500) + } + dpIsUp = true + logInfo(s"data-populator has signaled") + try { + dpRunning.signalAll() + } finally { + lock.unlock() + } + } + } + } + + private def podNameParse(name: String) : String = { + name match { + case _ if name.startsWith("kerberos") => "kerberos" + case _ if name.startsWith("nn") => "nn" + case _ if name.startsWith("dn1") => "dn1" + case _ if name.startsWith("data-populator") => "data-populator" + } + } + + def hasInLogs(name: String, expectation: String): Boolean = { + kubernetesClient + .pods() + .withName(name) + .getLog().contains(expectation) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala new file mode 100755 index 000000000000..15d0ffdeeaa2 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest.kerberos + +import io.fabric8.kubernetes.api.model.{PersistentVolume, PersistentVolumeClaim} + +private[spark] case class KerberosStorage( + persistentVolumeClaim: PersistentVolumeClaim, + persistentVolume: PersistentVolume) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala new file mode 100755 index 000000000000..c9c6254fd241 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest.kerberos + +import java.io.{File, FileInputStream} + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model._ +import io.fabric8.kubernetes.api.model.extensions.{Deployment, DeploymentBuilder} +import io.fabric8.kubernetes.client.KubernetesClient +import org.apache.commons.io.FileUtils.readFileToString + +import org.apache.spark.deploy.k8s.integrationtest.ContainerNameEqualityPredicate + + /** + * This class is responsible for handling all Utils and Constants necessary for testing + */ +private[spark] class KerberosUtils( + kubernetesClient: KubernetesClient, + namespace: String) { + def getClient: KubernetesClient = kubernetesClient + def getNamespace: String = namespace + def yamlLocation(loc: String): String = s"kerberos-yml/$loc.yml" + def loadFromYaml(resource: String): FileInputStream = + new FileInputStream(new File(yamlLocation(resource))) + private val regex = "REPLACE_ME".r + private val regexDP = "# default_ccache_name = MEMORY".r + private val defaultCacheDP = "default_ccache_name = KRBCONF" + private def locationResolver(loc: String) = s"test-data/hadoop-conf/$loc" + private val kerberosFiles = Seq("krb5.conf", "core-site.xml", "hdfs-site.xml") + private val kerberosConfTupList = + kerberosFiles.map { file => + (file, regex.replaceAllIn(readFileToString(new File(locationResolver(file))), namespace))} ++ + Seq(("krb5-dp.conf", regexDP.replaceAllIn(regex.replaceAllIn(readFileToString( + new File(locationResolver("krb5.conf"))), namespace), defaultCacheDP))) + private val KRB_VOLUME = "krb5-conf" + private val KRB_FILE_DIR = "/tmp" + private val KRB_CONFIG_MAP_NAME = "krb-config-map" + private val PV_LABELS = Map("job" -> "kerberostest") + private val keyPaths: Seq[KeyToPath] = (kerberosFiles ++ Seq("krb5-dp.conf")) + .map(file => + new KeyToPathBuilder() + .withKey(file) + .withPath(file) + .build()).toList + private def createPVTemplate(name: String, pathType: String) : PersistentVolume = + new PersistentVolumeBuilder() + .withNewMetadata() + .withName(name) + .withLabels(Map( + "type" -> "local", + "job" -> "kerberostest").asJava) + .endMetadata() + .withNewSpec() + .withCapacity(Map("storage" -> new Quantity("1Gi")).asJava) + .withAccessModes("ReadWriteOnce") + .withHostPath( + new HostPathVolumeSource(s"/tmp/$namespace/$pathType")) + .endSpec() + .build() + private val pvNN = "nn-hadoop" + private val pvKT = "server-keytab" + private val persistentVolumeMap: Map[String, PersistentVolume] = Map( + pvNN -> createPVTemplate(pvNN, "nn"), + pvKT -> createPVTemplate(pvKT, "keytab")) + private def buildKerberosPV(pvType: String) = { + KerberosStorage( + kubernetesClient.load(loadFromYaml(pvType)) + .get().get(0).asInstanceOf[PersistentVolumeClaim], + persistentVolumeMap(pvType)) + } + def getNNStorage: KerberosStorage = buildKerberosPV(pvNN) + def getKTStorage: KerberosStorage = buildKerberosPV(pvKT) + def getLabels: Map[String, String] = PV_LABELS + def getKeyPaths: Seq[KeyToPath] = keyPaths + def getConfigMap: ConfigMap = new ConfigMapBuilder() + .withNewMetadata() + .withName(KRB_CONFIG_MAP_NAME) + .endMetadata() + .addToData(kerberosConfTupList.toMap.asJava) + .build() + private val kdcNode = Seq("kerberos-deployment", "kerberos-service") + private val nnNode = Seq("nn-deployment", "nn-service") + private val dnNode = Seq("dn1-deployment", "dn1-service") + private val dataPopulator = Seq("data-populator-deployment", "data-populator-service") + private def buildKerberosDeployment(seqPair: Seq[String]) = { + val deployment = + kubernetesClient.load(loadFromYaml(seqPair.head)).get().get(0).asInstanceOf[Deployment] + KerberosDeployment( + new DeploymentBuilder(deployment) + .editSpec() + .editTemplate() + .editSpec() + .addNewVolume() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(keyPaths.asJava) + .endConfigMap() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate( + deployment.getMetadata.getName)) + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_DP_LOC") + .withValue(s"$KRB_FILE_DIR/krb5-dp.conf") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewVolumeMount() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .build(), + kubernetesClient.load(loadFromYaml(seqPair(1))).get().get(0).asInstanceOf[Service] + ) + } + def getKDC: KerberosDeployment = buildKerberosDeployment(kdcNode) + def getNN: KerberosDeployment = buildKerberosDeployment(nnNode) + def getDN: KerberosDeployment = buildKerberosDeployment(dnNode) + def getDP: KerberosDeployment = buildKerberosDeployment(dataPopulator) +} diff --git a/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/core-site.xml b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/core-site.xml new file mode 100755 index 000000000000..9a6ae2c50526 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/core-site.xml @@ -0,0 +1,38 @@ + + + + + + + + + hadoop.security.authentication + kerberos + + + + hadoop.security.authorization + true + + + + fs.defaultFS + hdfs://nn.REPLACE_ME.svc.cluster.local:9000 + + + hadoop.rpc.protection + authentication + + diff --git a/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/hdfs-site.xml b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/hdfs-site.xml new file mode 100755 index 000000000000..66dc969c46b6 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/hdfs-site.xml @@ -0,0 +1,157 @@ + + + + + + + + + + dfs.replication + 1 + + + + + dfs.permissions + true + + + dfs.block.access.token.enable + true + + + + + dfs.namenode.keytab.file + /var/keytabs/hdfs.keytab + + + dfs.namenode.kerberos.principal + hdfs/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.namenode.kerberos.internal.spnego.principal + HTTP/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.namenode.rpc-address + nn.REPLACE_ME.svc.cluster.local:9000 + + + + + + dfs.namenode.delegation.token.max-lifetime + 3600000 + + + dfs.namenode.delegation.token.renew-interval + 3600000 + + + + + + + dfs.data.transfer.protection + integrity + + + dfs.datanode.address + 0.0.0.0:10019 + + + + dfs.datanode.http.address + 0.0.0.0:10022 + + + + dfs.http.policy + HTTPS_ONLY + + + + + dfs.namenode.keytab.file + /var/keytabs/hdfs.keytab + + + dfs.namenode.kerberos.principal + hdfs/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.namenode.kerberos.internal.spnego.principal + HTTP/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + + + dfs.namenode.datanode.registration.ip-hostname-check + false + + + dfs.datanode.data.dir.perm + 700 + + + dfs.namenode.name.dir + file:///hadoop/etc/data + + + dfs.datanode.name.dir + file:///hadoop/etc/data + + + dfs.data.dir + file:///hadoop/etc/data + + + dfs.datanode.keytab.file + /var/keytabs/hdfs.keytab + + + dfs.datanode.kerberos.principal + hdfs/dn1.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.encrypt.data.transfer + true + + + dfs.encrypt.data.transfer.cipher.suites + AES/CTR/NoPadding + + + dfs.encrypt.data.transfer.cipher.key.bitlength + 256 + + + + + dfs.webhdfs.enabled + true + + + dfs.web.authentication.kerberos.principal + HTTP/dn1.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.web.authentication.kerberos.keytab + /var/keytabs/hdfs.keytab + + + diff --git a/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/krb5.conf b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/krb5.conf new file mode 100755 index 000000000000..144f77d8995d --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/krb5.conf @@ -0,0 +1,25 @@ +includedir /etc/krb5.conf.d/ + +[logging] +default = FILE:/var/log/krb5libs.log +kdc = FILE:/var/log/krb5kdc.log +admin_server = FILE:/var/log/kadmind.log + +[libdefaults] +dns_lookup_realm = false +ticket_lifetime = 24h +renew_lifetime = 7d +forwardable = true +rdns = false +default_realm = CLUSTER.LOCAL +# default_ccache_name = MEMORY + +[realms] +CLUSTER.LOCAL = { + kdc = kerberos.REPLACE_ME.svc.cluster.local + admin_server = kerberos.REPLACE_ME.svc.cluster.local +} + +[domain_realm] +.cluster.local = CLUSTER.LOCAL +cluster.local = CLUSTER.LOCAL diff --git a/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/yarn-site.xml b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/yarn-site.xml new file mode 100755 index 000000000000..b8ff146d98a3 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/yarn-site.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + yarn.resourcemanager.principal + yarn/_HOST@CLUSTER.LOCAL + + diff --git a/resource-managers/kubernetes/integration-tests/test-data/input.txt b/resource-managers/kubernetes/integration-tests/test-data/input.txt new file mode 100755 index 000000000000..dfe437bdebeb --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/input.txt @@ -0,0 +1 @@ +Contents diff --git a/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/core-site.xml b/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/core-site.xml new file mode 100755 index 000000000000..08a512929a2a --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/core-site.xml @@ -0,0 +1,24 @@ + + + + + + + + + fs.defaultFS + hdfs://nn.REPLACE_ME.svc.cluster.local:9000 + + diff --git a/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/hdfs-site.xml b/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/hdfs-site.xml new file mode 100755 index 000000000000..76fc9c68fa37 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/hdfs-site.xml @@ -0,0 +1,24 @@ + + + + + + + + + dfs.replication + 1 + + From 9bfa86a947b4ff764762fe27b356480a6e957baa Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Fri, 28 Sep 2018 18:55:31 -0700 Subject: [PATCH 02/18] initial work on secure-hdfs integration testing --- bin/docker-image-tool.sh | 16 +- .../src/main/dockerfiles/spark/Dockerfile | 2 +- .../dockerfiles/spark/kerberos/Dockerfile | 24 ++ .../docker/test/hadoop/conf/core-site.xml | 38 +++ .../docker/test/hadoop/conf/hdfs-site.xml | 157 ++++++++++ .../docker/test/hadoop/conf/krb5.conf | 25 ++ .../docker/test/hadoop/conf/yarn-site.xml | 26 ++ .../test}/scripts/run-kerberos-test.sh | 2 +- .../data-populator-deployment.yml | 3 +- .../kerberos-yml/dn1-deployment.yml | 3 +- .../kerberos-yml/kerberos-deployment.yml | 3 +- .../kerberos-yml/kerberos-test.yml | 1 - .../kerberos-yml/nn-deployment.yml | 3 +- .../KerberosTestPodLauncher.scala | 128 -------- .../integrationtest/KerberosTestSuite.scala | 24 +- .../k8s/integrationtest/KubernetesSuite.scala | 16 +- .../minikube/MinikubeTestBackend.scala | 22 +- .../ContainerNameEqualityPredicate.scala | 29 ++ .../KerberizedHadoopClusterLauncher.scala | 48 +-- .../kerberos/KerberosCMWatcherCache.scala | 118 +++----- .../kerberos/KerberosDeployment.scala | 1 + .../kerberos/KerberosDriverWatcherCache.scala | 105 +++---- .../kerberos/KerberosPVWatcherCache.scala | 207 ++++--------- .../kerberos/KerberosPodWatcherCache.scala | 284 +++++------------- .../kerberos/KerberosStorage.scala | 1 + .../kerberos/KerberosUtils.scala | 268 ++++++++++++----- 26 files changed, 808 insertions(+), 746 deletions(-) create mode 100644 resource-managers/kubernetes/docker/test/dockerfiles/spark/kerberos/Dockerfile create mode 100755 resource-managers/kubernetes/docker/test/hadoop/conf/core-site.xml create mode 100755 resource-managers/kubernetes/docker/test/hadoop/conf/hdfs-site.xml create mode 100755 resource-managers/kubernetes/docker/test/hadoop/conf/krb5.conf create mode 100755 resource-managers/kubernetes/docker/test/hadoop/conf/yarn-site.xml rename resource-managers/kubernetes/{integration-tests => docker/test}/scripts/run-kerberos-test.sh (95%) delete mode 100755 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestPodLauncher.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/ContainerNameEqualityPredicate.scala rename resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/{ => kerberos}/KerberizedHadoopClusterLauncher.scala (52%) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index d6371051ef7f..fe9ed53ac993 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -47,7 +47,7 @@ function build { if [ ! -f "$SPARK_HOME/RELEASE" ]; then # Set image build arguments accordingly if this is a source repo and not a distribution archive. - IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles + IMG_PATH=resource-managers/kubernetes/docker/src BUILD_ARGS=( ${BUILD_PARAMS} --build-arg @@ -57,7 +57,7 @@ function build { ) else # Not passed as an argument to docker, but used to validate the Spark directory. - IMG_PATH="kubernetes/dockerfiles" + IMG_PATH="kubernetes/src" BUILD_ARGS=(${BUILD_PARAMS}) fi @@ -69,9 +69,10 @@ function build { --build-arg base_img=$(image_ref spark) ) - local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"} - local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"} - local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"} + local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/Dockerfile"} + local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/python/Dockerfile"} + local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/R/Dockerfile"} + local KDOCKERFILE=${KDOCKERFILE:-"$IMG_PATH/test/dockerfiles/spark/kerberos/Dockerfile"} docker build $NOCACHEARG "${BUILD_ARGS[@]}" \ -t $(image_ref spark) \ @@ -84,12 +85,17 @@ function build { docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -t $(image_ref spark-r) \ -f "$RDOCKERFILE" . + + docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-kerberos) \ + -f "$KDOCKERFILE" . } function push { docker push "$(image_ref spark)" docker push "$(image_ref spark-py)" docker push "$(image_ref spark-r)" + docker push "$(image_ref spark-kerberos)" } function usage { diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile index 7ae57bf6e42d..41984d1665fd 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile @@ -18,7 +18,7 @@ FROM openjdk:8-alpine ARG spark_jars=jars -ARG img_path=kubernetes/dockerfiles +ARG img_path=kubernetes/src/main/dockerfiles # Before building the docker image, first build and make a Spark distribution following # the instructions in http://spark.apache.org/docs/latest/building-spark.html. diff --git a/resource-managers/kubernetes/docker/test/dockerfiles/spark/kerberos/Dockerfile b/resource-managers/kubernetes/docker/test/dockerfiles/spark/kerberos/Dockerfile new file mode 100644 index 000000000000..9f01f50828f6 --- /dev/null +++ b/resource-managers/kubernetes/docker/test/dockerfiles/spark/kerberos/Dockerfile @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ARG base_img +FROM $base_img + +ARG k_img_path=kubernetes/src/test + +COPY ${k_img_path}/scripts/test-env.sh /opt/spark/ +COPY ${k_img_path}/hadoop/conf /opt/spark/hconf diff --git a/resource-managers/kubernetes/docker/test/hadoop/conf/core-site.xml b/resource-managers/kubernetes/docker/test/hadoop/conf/core-site.xml new file mode 100755 index 000000000000..9a6ae2c50526 --- /dev/null +++ b/resource-managers/kubernetes/docker/test/hadoop/conf/core-site.xml @@ -0,0 +1,38 @@ + + + + + + + + + hadoop.security.authentication + kerberos + + + + hadoop.security.authorization + true + + + + fs.defaultFS + hdfs://nn.REPLACE_ME.svc.cluster.local:9000 + + + hadoop.rpc.protection + authentication + + diff --git a/resource-managers/kubernetes/docker/test/hadoop/conf/hdfs-site.xml b/resource-managers/kubernetes/docker/test/hadoop/conf/hdfs-site.xml new file mode 100755 index 000000000000..66dc969c46b6 --- /dev/null +++ b/resource-managers/kubernetes/docker/test/hadoop/conf/hdfs-site.xml @@ -0,0 +1,157 @@ + + + + + + + + + + dfs.replication + 1 + + + + + dfs.permissions + true + + + dfs.block.access.token.enable + true + + + + + dfs.namenode.keytab.file + /var/keytabs/hdfs.keytab + + + dfs.namenode.kerberos.principal + hdfs/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.namenode.kerberos.internal.spnego.principal + HTTP/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.namenode.rpc-address + nn.REPLACE_ME.svc.cluster.local:9000 + + + + + + dfs.namenode.delegation.token.max-lifetime + 3600000 + + + dfs.namenode.delegation.token.renew-interval + 3600000 + + + + + + + dfs.data.transfer.protection + integrity + + + dfs.datanode.address + 0.0.0.0:10019 + + + + dfs.datanode.http.address + 0.0.0.0:10022 + + + + dfs.http.policy + HTTPS_ONLY + + + + + dfs.namenode.keytab.file + /var/keytabs/hdfs.keytab + + + dfs.namenode.kerberos.principal + hdfs/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.namenode.kerberos.internal.spnego.principal + HTTP/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + + + dfs.namenode.datanode.registration.ip-hostname-check + false + + + dfs.datanode.data.dir.perm + 700 + + + dfs.namenode.name.dir + file:///hadoop/etc/data + + + dfs.datanode.name.dir + file:///hadoop/etc/data + + + dfs.data.dir + file:///hadoop/etc/data + + + dfs.datanode.keytab.file + /var/keytabs/hdfs.keytab + + + dfs.datanode.kerberos.principal + hdfs/dn1.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.encrypt.data.transfer + true + + + dfs.encrypt.data.transfer.cipher.suites + AES/CTR/NoPadding + + + dfs.encrypt.data.transfer.cipher.key.bitlength + 256 + + + + + dfs.webhdfs.enabled + true + + + dfs.web.authentication.kerberos.principal + HTTP/dn1.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL + + + dfs.web.authentication.kerberos.keytab + /var/keytabs/hdfs.keytab + + + diff --git a/resource-managers/kubernetes/docker/test/hadoop/conf/krb5.conf b/resource-managers/kubernetes/docker/test/hadoop/conf/krb5.conf new file mode 100755 index 000000000000..144f77d8995d --- /dev/null +++ b/resource-managers/kubernetes/docker/test/hadoop/conf/krb5.conf @@ -0,0 +1,25 @@ +includedir /etc/krb5.conf.d/ + +[logging] +default = FILE:/var/log/krb5libs.log +kdc = FILE:/var/log/krb5kdc.log +admin_server = FILE:/var/log/kadmind.log + +[libdefaults] +dns_lookup_realm = false +ticket_lifetime = 24h +renew_lifetime = 7d +forwardable = true +rdns = false +default_realm = CLUSTER.LOCAL +# default_ccache_name = MEMORY + +[realms] +CLUSTER.LOCAL = { + kdc = kerberos.REPLACE_ME.svc.cluster.local + admin_server = kerberos.REPLACE_ME.svc.cluster.local +} + +[domain_realm] +.cluster.local = CLUSTER.LOCAL +cluster.local = CLUSTER.LOCAL diff --git a/resource-managers/kubernetes/docker/test/hadoop/conf/yarn-site.xml b/resource-managers/kubernetes/docker/test/hadoop/conf/yarn-site.xml new file mode 100755 index 000000000000..b8ff146d98a3 --- /dev/null +++ b/resource-managers/kubernetes/docker/test/hadoop/conf/yarn-site.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + yarn.resourcemanager.principal + yarn/_HOST@CLUSTER.LOCAL + + diff --git a/resource-managers/kubernetes/integration-tests/scripts/run-kerberos-test.sh b/resource-managers/kubernetes/docker/test/scripts/run-kerberos-test.sh similarity index 95% rename from resource-managers/kubernetes/integration-tests/scripts/run-kerberos-test.sh rename to resource-managers/kubernetes/docker/test/scripts/run-kerberos-test.sh index 3e16a13c982c..6003a1ecc5a5 100644 --- a/resource-managers/kubernetes/integration-tests/scripts/run-kerberos-test.sh +++ b/resource-managers/kubernetes/docker/test/scripts/run-kerberos-test.sh @@ -16,7 +16,7 @@ until /usr/bin/kinit -kt /var/keytabs/hdfs.keytab hdfs/nn.${NAMESPACE}.svc.clust --conf spark.executor.instances=1 \ --conf spark.app.name=spark-hdfs \ --conf spark.driver.extraClassPath=/opt/spark/hconf/core-site.xml:/opt/spark/hconf/hdfs-site.xml:/opt/spark/hconf/yarn-site.xml:/etc/krb5.conf \ - --conf spark.kubernetes.container.image=spark:testing \ + --conf spark.kubernetes.container.image=spark:latest \ --conf spark.kerberos.keytab=/var/keytabs/hdfs.keytab \ --conf spark.kerberos.principal=hdfs/nn.${NAMESPACE}.svc.cluster.local@CLUSTER.LOCAL \ --conf spark.kubernetes.driver.label.spark-app-locator=${APP_LOCATOR_LABEL} \ diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml index eb31a0126d76..0b7ba93bfd22 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml @@ -17,9 +17,8 @@ spec: - command: - /populate-data.sh name: data-populator - image: ifilonenko/hadoop-base:latest + image: hadoop-base:latest imagePullPolicy: IfNotPresent - runAsNonRoot: false volumeMounts: - mountPath: /var/keytabs name: data-populator-keytab diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml index f524b41ada6b..9ef80b5dfc06 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml @@ -17,9 +17,8 @@ spec: - command: - /start-datanode.sh name: dn1 - image: ifilonenko/hadoop-base:latest + image: hadoop-base:latest imagePullPolicy: IfNotPresent - runAsNonRoot: false volumeMounts: - mountPath: /var/keytabs name: dn1-keytab diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml index 9b2e1a394921..491ad3ad0968 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml @@ -17,9 +17,8 @@ spec: - command: - /start-kdc.sh name: kerberos - image: ifilonenko/hadoop-base:latest + image: hadoop-base:latest imagePullPolicy: IfNotPresent - runAsNonRoot: false volumeMounts: - mountPath: /var/keytabs name: kerb-keytab diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml index 4542957640c7..9c3cc067b580 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml @@ -13,7 +13,6 @@ spec: - command: ["/bin/bash"] args: ["/opt/spark/run-kerberos-test.sh"] name: kerberos-test - image: kerberos-test:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml index d2f473aa66c1..feee748eedbc 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml @@ -19,9 +19,8 @@ spec: name: nn ports: - containerPort: 9000 - image: ifilonenko/hadoop-base:latest + image: hadoop-base:latest imagePullPolicy: IfNotPresent - runAsNonRoot: false volumeMounts: - mountPath: /var/keytabs name: nn-keytab diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestPodLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestPodLauncher.scala deleted file mode 100755 index 942cc4064ce7..000000000000 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestPodLauncher.scala +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.k8s.integrationtest - -import java.io.{File, FileInputStream} -import java.lang.Boolean - -import scala.collection.JavaConverters._ - -import io.fabric8.kubernetes.api.builder.Predicate -import io.fabric8.kubernetes.api.model.{ContainerBuilder, KeyToPathBuilder} -import io.fabric8.kubernetes.api.model.extensions.{Deployment, DeploymentBuilder} -import io.fabric8.kubernetes.client.KubernetesClient - - /** - * This class is responsible for launching a pod that runs spark-submit to simulate - * the necessary global environmental variables and files expected for a Kerberos task. - * In this test we specify HADOOP_CONF_DIR and ensure that for any arbitrary namespace - * the krb5.conf, core-site.xml, and hdfs-site.xml are resolved accordingly. - */ -private[spark] class KerberosTestPodLauncher( - kubernetesClient: KubernetesClient, - namespace: String) { - private val kerberosFiles = Seq("krb5.conf", "core-site.xml", "hdfs-site.xml") - private val KRB_VOLUME = "krb5-conf" - private val KRB_FILE_DIR = "/tmp" - private val KRB_CONFIG_MAP_NAME = "krb-config-map" - private val HADOOP_CONF_DIR_PATH = "/opt/spark/hconf" - private val keyPaths = kerberosFiles.map(file => - new KeyToPathBuilder() - .withKey(file) - .withPath(file) - .build()).toList - def startKerberosTest( - resource: String, - className: String, - appLabel: String, - yamlLocation: String): Unit = { - kubernetesClient.load(new FileInputStream(new File(yamlLocation))) - .get().get(0) match { - case deployment: Deployment => - val deploymentWithEnv: Deployment = new DeploymentBuilder(deployment) - .editSpec() - .editTemplate() - .editSpec() - .addNewVolume() - .withName(KRB_VOLUME) - .withNewConfigMap() - .withName(KRB_CONFIG_MAP_NAME) - .withItems(keyPaths.asJava) - .endConfigMap() - .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate( - deployment.getMetadata.getName)) - .addNewEnv() - .withName("NAMESPACE") - .withValue(namespace) - .endEnv() - .addNewEnv() - .withName("MASTER_URL") - .withValue(kubernetesClient.getMasterUrl.toString) - .endEnv() - .addNewEnv() - .withName("SUBMIT_RESOURCE") - .withValue(resource) - .endEnv() - .addNewEnv() - .withName("CLASS_NAME") - .withValue(className) - .endEnv() - .addNewEnv() - .withName("HADOOP_CONF_DIR") - .withValue(HADOOP_CONF_DIR_PATH) - .endEnv() - .addNewEnv() - .withName("APP_LOCATOR_LABEL") - .withValue(appLabel) - .endEnv() - .addNewEnv() - .withName("SPARK_PRINT_LAUNCH_COMMAND") - .withValue("true") - .endEnv() - .addNewEnv() - .withName("TMP_KRB_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") - .endEnv() - .addNewEnv() - .withName("TMP_CORE_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") - .endEnv() - .addNewEnv() - .withName("TMP_HDFS_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") - .endEnv() - .addNewVolumeMount() - .withName(KRB_VOLUME) - .withMountPath(KRB_FILE_DIR) - .endVolumeMount() - .endContainer() - .endSpec() - .endTemplate() - .endSpec() - .build() - kubernetesClient.extensions().deployments() - .inNamespace(namespace).create(deploymentWithEnv)} - } -} - -private[spark] class ContainerNameEqualityPredicate(containerName: String) - extends Predicate[ContainerBuilder] { - override def apply(item: ContainerBuilder): Boolean = { - item.getName == containerName - } -} \ No newline at end of file diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala index 607e8ed234f6..6d11152db3a7 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala @@ -25,21 +25,23 @@ import org.apache.spark.deploy.k8s.integrationtest.kerberos._ private[spark] trait KerberosTestSuite { k8sSuite: KubernetesSuite => test("Secure HDFS test with HDFS keytab", k8sTestTag) { - kerberizedHadoopClusterLauncher.launchKerberizedCluster() - kerberosTestLauncher.startKerberosTest( - containerLocalSparkDistroExamplesJar, - HDFS_TEST_CLASS, - appLocator, - KERB_YAML_LOCATION) val kubernetesClient = kubernetesTestComponents.kubernetesClient + // Launches single-noded psuedo-distributed kerberized hadoop cluster + kerberizedHadoopClusterLauncher.launchKerberizedCluster(kerberosUtils) + + // Launches Kerberos test val driverWatcherCache = new KerberosDriverWatcherCache( - kubernetesClient, + kerberosUtils, Map("spark-app-locator" -> appLocator)) - driverWatcherCache.start() - driverWatcherCache.stop() + driverWatcherCache.deploy(kerberosUtils.getKerberosTest( + containerLocalSparkDistroExamplesJar, + HDFS_TEST_CLASS, + appLocator, + KERB_YAML_LOCATION)) + driverWatcherCache.stopWatch() val expectedLogOnCompletion = Seq( - "Returned length(s) of: 1", - "File contents: [This is an awesome word count file]") + "Returned length(s) of: [1, 1, 1]", + "Other stuff") val driverPod = kubernetesClient .pods() .inNamespace(kubernetesTestComponents.namespace) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index c99a907f98d0..c5d5e44c0c7e 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -34,11 +34,11 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.k8s.integrationtest.TestConfig._ import org.apache.spark.deploy.k8s.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory} +import org.apache.spark.deploy.k8s.integrationtest.kerberos.{KerberizedHadoopClusterLauncher, KerberosUtils} import org.apache.spark.internal.Logging private[spark] class KubernetesSuite extends SparkFunSuite - with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite - with PythonTestsSuite with ClientModeTestsSuite + with BeforeAndAfterAll with BeforeAndAfter with KerberosTestSuite with Logging with Eventually with Matchers { import KubernetesSuite._ @@ -46,6 +46,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite private var sparkHomeDir: Path = _ private var pyImage: String = _ private var rImage: String = _ + private var kImage: String = _ protected var image: String = _ protected var testBackend: IntegrationTestBackend = _ @@ -54,6 +55,9 @@ private[spark] class KubernetesSuite extends SparkFunSuite protected var sparkAppConf: SparkAppConf = _ protected var containerLocalSparkDistroExamplesJar: String = _ protected var appLocator: String = _ + // Kerberos related testing + protected var kerberizedHadoopClusterLauncher: KerberizedHadoopClusterLauncher = _ + protected var kerberosUtils: KerberosUtils = _ // Default memory limit is 1024M + 384M (minimum overhead constant) private val baseMemory = s"${1024 + 384}Mi" @@ -87,6 +91,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite image = s"$imageRepo/spark:$imageTag" pyImage = s"$imageRepo/spark-py:$imageTag" rImage = s"$imageRepo/spark-r:$imageTag" + kImage = s"$imageRepo/spark-kerberos:$imageTag" val sparkDistroExamplesJarFile: File = sparkHomeDir.resolve(Paths.get("examples", "jars")) .toFile @@ -96,6 +101,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite testBackend = IntegrationTestBackendFactory.getTestBackend testBackend.initialize() kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) + kerberizedHadoopClusterLauncher = new KerberizedHadoopClusterLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace), + kubernetesTestComponents.namespace) + kerberosUtils = new KerberosUtils( + kImage, + kubernetesTestComponents.kubernetesClient, + kubernetesTestComponents.namespace) } override def afterAll(): Unit = { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala index cb9324179d70..a437df63f5c1 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -17,10 +17,15 @@ package org.apache.spark.deploy.k8s.integrationtest.backend.minikube import io.fabric8.kubernetes.client.DefaultKubernetesClient +import org.scalatest.Matchers +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, TIMEOUT} import org.apache.spark.deploy.k8s.integrationtest.backend.IntegrationTestBackend -private[spark] object MinikubeTestBackend extends IntegrationTestBackend { +private[spark] object MinikubeTestBackend + extends IntegrationTestBackend with Eventually with Matchers { private var defaultClient: DefaultKubernetesClient = _ @@ -33,10 +38,25 @@ private[spark] object MinikubeTestBackend extends IntegrationTestBackend { } override def cleanUp(): Unit = { + deleteKubernetesPVs() super.cleanUp() } override def getKubernetesClient: DefaultKubernetesClient = { defaultClient } + + private def deleteKubernetesPVs(): Unit = { + // Temporary hack until client library for fabric8 is updated to get around + // the NPE that comes about when I do .list().getItems().asScala + try { + val pvList = defaultClient.persistentVolumes().list().getItems.asScala + if (pvList.nonEmpty) { + defaultClient.persistentVolumes().delete() + Eventually.eventually(TIMEOUT, INTERVAL) { pvList.isEmpty should be (true) } + } + } catch { + case ex: java.lang.NullPointerException => + } + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/ContainerNameEqualityPredicate.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/ContainerNameEqualityPredicate.scala new file mode 100644 index 000000000000..aaed74a6533f --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/ContainerNameEqualityPredicate.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest.kerberos + +import java.lang.Boolean + +import io.fabric8.kubernetes.api.builder.Predicate +import io.fabric8.kubernetes.api.model.ContainerBuilder + +private[spark] class ContainerNameEqualityPredicate(containerName: String) + extends Predicate[ContainerBuilder] { + override def apply(item: ContainerBuilder): Boolean = { + item.getName == containerName + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberizedHadoopClusterLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala similarity index 52% rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberizedHadoopClusterLauncher.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala index 37567f5ab832..a38c56cf446d 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberizedHadoopClusterLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala @@ -14,11 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.k8s.integrationtest +package org.apache.spark.deploy.k8s.integrationtest.kerberos +import io.fabric8.kubernetes.api.builder.Predicate +import io.fabric8.kubernetes.api.model.ContainerBuilder import io.fabric8.kubernetes.client.KubernetesClient -import org.apache.spark.deploy.k8s.integrationtest.kerberos._ import org.apache.spark.internal.Logging /** @@ -31,27 +32,28 @@ import org.apache.spark.internal.Logging private[spark] class KerberizedHadoopClusterLauncher( kubernetesClient: KubernetesClient, namespace: String) extends Logging { - private val LABELS = Map("job" -> "kerberostest") + private val LABELS = Map("job" -> "kerberostest") - def launchKerberizedCluster(): Unit = { - // These Utils allow for each step in this launch process to re-use - // common functionality for setting up hadoop nodes. - val kerberosUtils = new KerberosUtils(kubernetesClient, namespace) - // Launches persistent volumes and its claims for sharing keytabs across pods - val pvWatcherCache = new KerberosPVWatcherCache(kerberosUtils, LABELS) - pvWatcherCache.start() - pvWatcherCache.stop() - // Launches config map for the files in HADOOP_CONF_DIR - val cmWatcherCache = new KerberosCMWatcherCache(kerberosUtils) - cmWatcherCache.start() - cmWatcherCache.stop() - // Launches the Hadoop cluster pods: KDC --> NN --> DN1 --> Data-Populator - val podWatcherCache = new KerberosPodWatcherCache(kerberosUtils, LABELS) - podWatcherCache.start() - val dpNode = podWatcherCache.stop() - while (!podWatcherCache.hasInLogs(dpNode, "")) { - logInfo("Waiting for data-populator to be formatted") - Thread.sleep(500) - } + def launchKerberizedCluster(kerberosUtils: KerberosUtils): Unit = { + // These Utils allow for each step in this launch process to re-use + // common functionality for setting up hadoop nodes. + // Launches persistent volumes and its claims for sharing keytabs across pods + val pvWatcherCache = new KerberosPVWatcherCache(kerberosUtils, LABELS) + pvWatcherCache.deploy(kerberosUtils.getNNStorage) + pvWatcherCache.deploy(kerberosUtils.getKTStorage) + pvWatcherCache.stopWatch() + + // Launches config map for the files in HADOOP_CONF_DIR + val cmWatcherCache = new KerberosCMWatcherCache(kerberosUtils) + cmWatcherCache.deploy(kerberosUtils.getConfigMap) + cmWatcherCache.stopWatch() + + // Launches the Hadoop cluster pods: KDC --> NN --> DN1 --> Data-Populator + val podWatcherCache = new KerberosPodWatcherCache(kerberosUtils, LABELS) + podWatcherCache.deploy(kerberosUtils.getKDC) + podWatcherCache.deploy(kerberosUtils.getNN) + podWatcherCache.deploy(kerberosUtils.getDN) + podWatcherCache.deploy(kerberosUtils.getDP) + podWatcherCache.stopWatch() } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala index 953ccbc9a377..d98c06cc8056 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala @@ -17,88 +17,62 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos -import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} - -import scala.collection.JavaConverters._ - import io.fabric8.kubernetes.api.model.ConfigMap import io.fabric8.kubernetes.client.{KubernetesClientException, Watch, Watcher} import io.fabric8.kubernetes.client.Watcher.Action +import org.scalatest.Matchers +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, TIMEOUT} import org.apache.spark.internal.Logging /** * This class is responsible for ensuring that no logic progresses in the cluster launcher * until a configmap with the HADOOP_CONF_DIR specifications has been created. */ -private[spark] class KerberosCMWatcherCache(kerberosUtils: KerberosUtils) extends Logging { - private val kubernetesClient = kerberosUtils.getClient - private val namespace = kerberosUtils.getNamespace - private val requiredFiles = Seq("core-site.xml", "hdfs-site.xml", "krb5.conf") - private var watcher: Watch = _ - private var cmCache = scala.collection.mutable.Map[String, Map[String, String]]() - private var lock: Lock = new ReentrantLock() - private var cmCreated: Condition = lock.newCondition() - private val configMap = kerberosUtils.getConfigMap - private val configMapName = configMap.getMetadata.getName - private val blockingThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning of ConfigMap lock") - lock.lock() - try { - while (!created()) cmCreated.await() - } finally { - logInfo("Ending the ConfigMap lock") - lock.unlock() - stop() - } - }}) - - private val watcherThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning the watch of the Kerberos Config Map") - watcher = kubernetesClient - .configMaps() - .withName(configMapName) - .watch(new Watcher[ConfigMap] { - override def onClose(cause: KubernetesClientException): Unit = - logInfo("Ending the watch of Kerberos Config Map") - override def eventReceived(action: Watcher.Action, resource: ConfigMap): Unit = { - val name = resource.getMetadata.getName - action match { - case Action.DELETED | Action.ERROR => - logInfo(s"$name either deleted or error") - cmCache.remove(name) - case Action.ADDED | Action.MODIFIED => - val data = resource.getData.asScala.toMap - logInfo(s"$name includes ${data.keys.mkString(",")}") - cmCache(name) = data - if (created()) { - lock.lock() - try { - cmCreated.signalAll() - } finally { - lock.unlock() - } - } - }}} - ) - logInfo("Launching the Config Map") - kerberosUtils.getClient.configMaps().inNamespace(namespace).createOrReplace(configMap) - }}) - - def start(): Unit = { - blockingThread.start() - watcherThread.start() - blockingThread.join() - watcherThread.join()} +private[spark] class KerberosCMWatcherCache(kerberosUtils: KerberosUtils) + extends Logging with Eventually with Matchers { + private val kubernetesClient = kerberosUtils.getClient + private val namespace = kerberosUtils.getNamespace + private val requiredFiles = Seq("core-site.xml", "hdfs-site.xml", "krb5.conf") + private val cmCache = scala.collection.mutable.Map[String, Map[String, String]]() + private val configMap = kerberosUtils.getConfigMap + private val configMapName = configMap.getMetadata.getName + // Watching ConfigMaps + logInfo("Beginning the watch of the Kerberos Config Map") + private val watcher: Watch = kubernetesClient + .configMaps() + .withName(configMapName) + .watch(new Watcher[ConfigMap] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Kerberos Config Map") + override def eventReceived(action: Watcher.Action, resource: ConfigMap): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + cmCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val data = resource.getData.asScala.toMap + logInfo(s"$name includes ${data.keys.mkString(",")}") + cmCache(name) = data + }}}) + // Check for CM to have proper files + private def check(name: String): Boolean = { + cmCache.get(name).exists{ data => requiredFiles.forall(data.keys.toSeq.contains)} + } - def stop(): Unit = { - watcher.close() - } + def deploy(configMap: ConfigMap): Unit = { + logInfo("Launching the ConfigMap") + kerberosUtils.getClient.configMaps().inNamespace(namespace).createOrReplace(configMap) + // Making sure CM has correct files + Eventually.eventually(TIMEOUT, INTERVAL) { + check(configMap.getMetadata.getName) should be (true) } + } - def created(): Boolean = { - cmCache.get(configMapName).exists{ data => - requiredFiles.forall(data.keys.toSeq.contains)} - } + def stopWatch() : Unit = { + // Closing Watcher + watcher.close() + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala index 0f2719575bf5..3f94cf25d0c4 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala @@ -20,5 +20,6 @@ import io.fabric8.kubernetes.api.model.Service import io.fabric8.kubernetes.api.model.extensions.Deployment private[spark] case class KerberosDeployment( + name: String, podDeployment: Deployment, service: Service) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala index cf87572e9b4d..75cdb8fa4b23 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala @@ -17,14 +17,15 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos -import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} - -import scala.collection.JavaConverters._ - import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.api.model.extensions.Deployment +import io.fabric8.kubernetes.client.{KubernetesClientException, Watch, Watcher} import io.fabric8.kubernetes.client.Watcher.Action +import org.scalatest.Matchers +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, TIMEOUT} import org.apache.spark.internal.Logging /** @@ -32,68 +33,40 @@ import org.apache.spark.internal.Logging * is running before trying to grab its logs for the sake of monitoring success of completition. */ private[spark] class KerberosDriverWatcherCache( - kubernetesClient: KubernetesClient, - labels: Map[String, String]) extends Logging { - private var podWatcher: Watch = _ - private var podCache = + kerberosUtils: KerberosUtils, + labels: Map[String, String]) extends Logging with Eventually with Matchers { + private val kubernetesClient = kerberosUtils.getClient + private val namespace = kerberosUtils.getNamespace + private var driverName: String = "" + private val podCache = scala.collection.mutable.Map[String, String]() - private var lock: Lock = new ReentrantLock() - private var driverRunning: Condition = lock.newCondition() - private var driverIsUp: Boolean = false - private val blockingThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning of Driver lock") - lock.lock() - try { - while (!driverIsUp) driverRunning.await() - } finally { - logInfo("Ending the Driver lock") - lock.unlock() - stop() - } - } - }) - - private val podWatcherThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning the watch of Driver pod") - podWatcher = kubernetesClient - .pods() - .withLabels(labels.asJava) - .watch(new Watcher[Pod] { - override def onClose(cause: KubernetesClientException): Unit = - logInfo("Ending the watch of Driver pod") - override def eventReceived(action: Watcher.Action, resource: Pod): Unit = { - val name = resource.getMetadata.getName - action match { - case Action.DELETED | Action.ERROR => - logInfo(s"$name either deleted or error") - podCache.remove(name) - case Action.ADDED | Action.MODIFIED => - val phase = resource.getStatus.getPhase - logInfo(s"$name is as $phase") - podCache(name) = phase - if (maybeDriverDone(name)) { - lock.lock() - try { - driverIsUp = true - driverRunning.signalAll() - } finally { - lock.unlock() - } - }}}}) - }}) - - def start(): Unit = { - blockingThread.start() - podWatcherThread.start() - blockingThread.join() - podWatcherThread.join() - } + private val watcher: Watch = kubernetesClient + .pods() + .withLabels(labels.asJava) + .watch(new Watcher[Pod] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Driver pod") + override def eventReceived(action: Watcher.Action, resource: Pod): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + podCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val phase = resource.getStatus.getPhase + logInfo(s"$name is as $phase") + podCache(name) = phase + if (name.contains("driver")) { driverName = name } + }}}) - def stop(): Unit = { - podWatcher.close() - } + private def check(name: String): Boolean = podCache.get(name).contains("Running") - private def maybeDriverDone(name: String): Boolean = podCache.get(name).contains("Running") + def deploy(deployment: Deployment): Unit = { + kubernetesClient.extensions().deployments().inNamespace(namespace).create(deployment) + Eventually.eventually(TIMEOUT, INTERVAL) { check(driverName) should be (true) } + } + def stopWatch(): Unit = { + // Closing Watch + watcher.close() + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala index 27f8d250b709..ec8b2fcc54d9 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala @@ -17,14 +17,14 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos -import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} - -import scala.collection.JavaConverters._ - import io.fabric8.kubernetes.api.model.{PersistentVolume, PersistentVolumeClaim} import io.fabric8.kubernetes.client.{KubernetesClientException, Watch, Watcher} import io.fabric8.kubernetes.client.Watcher.Action +import org.scalatest.Matchers +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, TIMEOUT} import org.apache.spark.internal.Logging /** @@ -34,151 +34,76 @@ import org.apache.spark.internal.Logging */ private[spark] class KerberosPVWatcherCache( kerberosUtils: KerberosUtils, - labels: Map[String, String]) extends Logging { + labels: Map[String, String]) extends Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace - private var pvWatcher: Watch = _ - private var pvcWatcher: Watch = _ - private var pvCache = - scala.collection.mutable.Map[String, String]() - private var pvcCache = - scala.collection.mutable.Map[String, String]() - private var lock: Lock = new ReentrantLock() - private var nnBounded: Condition = lock.newCondition() - private var ktBounded: Condition = lock.newCondition() - private var nnIsUp: Boolean = false - private var ktIsUp: Boolean = false - private var nnSpawned: Boolean = false - private var ktSpawned: Boolean = false - private val blockingThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning of Persistent Storage Lock") - lock.lock() - try { - while (!nnIsUp) nnBounded.await() - while (!ktIsUp) ktBounded.await() - } finally { - logInfo("Ending the Persistent Storage lock") - lock.unlock() - stop() - } - } - }) - private val pvWatcherThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning the watch of Persistent Volumes") - pvWatcher = kubernetesClient - .persistentVolumes() - .withLabels(labels.asJava) - .watch(new Watcher[PersistentVolume] { - override def onClose(cause: KubernetesClientException): Unit = - logInfo("Ending the watch of Persistent Volumes", cause) - override def eventReceived(action: Watcher.Action, resource: PersistentVolume): Unit = { - val name = resource.getMetadata.getName - action match { - case Action.DELETED | Action.ERROR => - logInfo(s"$name either deleted or error") - pvCache.remove(name) - case Action.ADDED | Action.MODIFIED => - val phase = resource.getStatus.getPhase - logInfo(s"$name is at stage: $phase") - pvCache(name) = phase - if (maybeDeploymentAndServiceDone(name)) { - val modifyAndSignal: Runnable = new MSThread(name) - new Thread(modifyAndSignal).start() - }}}}) - }}) - private val pvcWatcherThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning the watch of Persistent Volume Claims") - pvcWatcher = kubernetesClient - .persistentVolumeClaims() - .withLabels(labels.asJava) - .watch(new Watcher[PersistentVolumeClaim] { - override def onClose(cause: KubernetesClientException): Unit = - logInfo("Ending the watch of Persistent Volume Claims") - override def eventReceived( - action: Watcher.Action, - resource: PersistentVolumeClaim): Unit = { - val name = resource.getMetadata.getName - action match { - case Action.DELETED | Action.ERROR => - logInfo(s"$name either deleted or error") - pvcCache.remove(name) - case Action.ADDED | Action.MODIFIED => - val volumeName = resource.getSpec.getVolumeName - logInfo(s"$name claims itself to $volumeName") - pvcCache(name) = volumeName - if (maybeDeploymentAndServiceDone(name)) { - val modifyAndSignal: Runnable = new MSThread(name) - new Thread(modifyAndSignal).start() - }}}}) - logInfo("Launching the Persistent Storage") - if (!nnSpawned) { - logInfo("Launching the NN Hadoop PV+PVC") - nnSpawned = true - deploy(kerberosUtils.getNNStorage) - } - }}) - def start(): Unit = { - blockingThread.start() - pvWatcherThread.start() - pvcWatcherThread.start() - blockingThread.join() - pvWatcherThread.join() - pvcWatcherThread.join() - } - def stop(): Unit = { - pvWatcher.close() - pvcWatcher.close() - } + // Cache for PVs and PVCs + private val pvCache = scala.collection.mutable.Map[String, String]() + private val pvcCache = scala.collection.mutable.Map[String, String]() + + // Watching PVs + logInfo("Beginning the watch of Persistent Volumes") + private val pvWatcher: Watch = kubernetesClient + .persistentVolumes() + .withLabels(labels.asJava) + .watch(new Watcher[PersistentVolume] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Persistent Volumes", cause) + override def eventReceived(action: Watcher.Action, resource: PersistentVolume): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + pvCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val phase = resource.getStatus.getPhase + logInfo(s"$name is at stage: $phase") + pvCache(name) = phase }}}) - private def maybeDeploymentAndServiceDone(name: String): Boolean = { - val finished = pvCache.get(name).contains("Available") && - pvcCache.get(name).contains(name) - if (!finished) { - logInfo(s"$name is not available") - if (name == "nn-hadoop") nnIsUp = false - else if (name == "server-keytab") ktIsUp = false - } - finished + // Watching PVCs + logInfo("Beginning the watch of Persistent Volume Claims") + private val pvcWatcher: Watch = kubernetesClient + .persistentVolumeClaims() + .withLabels(labels.asJava) + .watch(new Watcher[PersistentVolumeClaim] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Persistent Volume Claims") + override def eventReceived( + action: Watcher.Action, + resource: PersistentVolumeClaim): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + pvcCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val volumeName = resource.getSpec.getVolumeName + val state = resource.getStatus.getPhase + logInfo(s"$name claims itself to $volumeName and is $state") + pvcCache(name) = s"$volumeName $state"}}}) + + // Check for PVC being bounded to correct PV + private def check(name: String): Boolean = { + pvCache.get(name).contains("Bound") && + pvcCache.get(name).contains(s"$name Bound") } - private def deploy(kbs: KerberosStorage) : Unit = { - kubernetesClient - .persistentVolumeClaims().inNamespace(namespace).create(kbs.persistentVolumeClaim) + def deploy(kbs: KerberosStorage) : Unit = { + logInfo("Launching the Persistent Storage") kubernetesClient .persistentVolumes().create(kbs.persistentVolume) + // Making sure PV is Available for creation of PVC + Eventually.eventually(TIMEOUT, INTERVAL) { + (pvCache(kbs.name) == "Available") should be (true) } + kubernetesClient + .persistentVolumeClaims().inNamespace(namespace).create(kbs.persistentVolumeClaim) + Eventually.eventually(TIMEOUT, INTERVAL) { check(kbs.name) should be (true) } } - private class MSThread(name: String) extends Runnable { - override def run(): Unit = { - logInfo(s"$name PV and PVC are bounded") - lock.lock() - if (name == "nn-hadoop") { - nnIsUp = true - logInfo(s"nn-hadoop is bounded") - try { - nnBounded.signalAll() - } finally { - lock.unlock() - } - if (!ktSpawned) { - logInfo("Launching the KT Hadoop PV+PVC") - ktSpawned = true - deploy(kerberosUtils.getKTStorage) - } - } - else if (name == "server-keytab") { - while (!nnIsUp) ktBounded.await() - ktIsUp = true - logInfo(s"server-keytab is bounded") - try { - ktBounded.signalAll() - } finally { - lock.unlock() - } - }} - } + def stopWatch(): Unit = { + // Closing Watchers + pvWatcher.close() + pvcWatcher.close() + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala index 11f28587be3e..09d42f472a05 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala @@ -17,14 +17,14 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos -import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} - -import scala.collection.JavaConverters._ - import io.fabric8.kubernetes.api.model.{Pod, Service} import io.fabric8.kubernetes.client.{KubernetesClientException, Watch, Watcher} import io.fabric8.kubernetes.client.Watcher.Action +import org.scalatest.Matchers +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, TIMEOUT} import org.apache.spark.internal.Logging /** @@ -34,209 +34,87 @@ import org.apache.spark.internal.Logging */ private[spark] class KerberosPodWatcherCache( kerberosUtils: KerberosUtils, - labels: Map[String, String]) extends Logging { - private val kubernetesClient = kerberosUtils.getClient - private val namespace = kerberosUtils.getNamespace - private var podWatcher: Watch = _ - private var serviceWatcher: Watch = _ - private var podCache = - scala.collection.mutable.Map[String, String]() - private var serviceCache = - scala.collection.mutable.Map[String, String]() - private var lock: Lock = new ReentrantLock() - private var kdcRunning: Condition = lock.newCondition() - private var nnRunning: Condition = lock.newCondition() - private var dnRunning: Condition = lock.newCondition() - private var dpRunning: Condition = lock.newCondition() - private var kdcIsUp: Boolean = false - private var nnIsUp: Boolean = false - private var dnIsUp: Boolean = false - private var dpIsUp: Boolean = false - private var kdcSpawned: Boolean = false - private var nnSpawned: Boolean = false - private var dnSpawned: Boolean = false - private var dpSpawned: Boolean = false - private var dnName: String = _ - private var dpName: String = _ - - private val blockingThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning of Cluster lock") - lock.lock() - try { - while (!kdcIsUp) kdcRunning.await() - while (!nnIsUp) nnRunning.await() - while (!dnIsUp) dnRunning.await() - while (!dpIsUp) dpRunning.await() - } finally { - logInfo("Ending the Cluster lock") - lock.unlock() - stop() - } - } - }) - - private val podWatcherThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning the watch of Pods") - podWatcher = kubernetesClient - .pods() - .withLabels(labels.asJava) - .watch(new Watcher[Pod] { - override def onClose(cause: KubernetesClientException): Unit = - logInfo("Ending the watch of Pods") - override def eventReceived(action: Watcher.Action, resource: Pod): Unit = { - val name = resource.getMetadata.getName - val keyName = podNameParse(name) - action match { - case Action.DELETED | Action.ERROR => - logInfo(s"$name either deleted or error") - podCache.remove(keyName) - case Action.ADDED | Action.MODIFIED => - val phase = resource.getStatus.getPhase - logInfo(s"$name is as $phase") - if (name.startsWith("dn1")) { dnName = name } - if (name.startsWith("data-populator")) { dpName = name } - podCache(keyName) = phase - if (maybeDeploymentAndServiceDone(keyName)) { - val modifyAndSignal: Runnable = new MSThread(keyName) - new Thread(modifyAndSignal).start() - }}}}) - }}) - - private val serviceWatcherThread = new Thread(new Runnable { - override def run(): Unit = { - logInfo("Beginning the watch of Services") - serviceWatcher = kubernetesClient - .services() - .withLabels(labels.asJava) - .watch(new Watcher[Service] { - override def onClose(cause: KubernetesClientException): Unit = - logInfo("Ending the watch of Services") - override def eventReceived(action: Watcher.Action, resource: Service): Unit = { - val name = resource.getMetadata.getName - action match { - case Action.DELETED | Action.ERROR => - logInfo(s"$name either deleted or error") - serviceCache.remove(name) - case Action.ADDED | Action.MODIFIED => - val bound = resource.getSpec.getSelector.get("kerberosService") - logInfo(s"$name is bounded to $bound") - serviceCache(name) = bound - if (maybeDeploymentAndServiceDone(name)) { - val modifyAndSignal: Runnable = new MSThread(name) - new Thread(modifyAndSignal).start() - }}}}) - logInfo("Launching the Cluster") - if (!kdcSpawned) { - logInfo("Launching the KDC Node") - kdcSpawned = true - deploy(kerberosUtils.getKDC) - } - }}) - - def start(): Unit = { - blockingThread.start() - podWatcherThread.start() - serviceWatcherThread.start() - blockingThread.join() - podWatcherThread.join() - serviceWatcherThread.join() - } - - def stop(): String = { - podWatcher.close() - serviceWatcher.close() - dpName - } - - private def maybeDeploymentAndServiceDone(name: String): Boolean = { - val finished = podCache.get(name).contains("Running") && - serviceCache.get(name).contains(name) - if (!finished) { - logInfo(s"$name is not up with a service") - if (name == "kerberos") kdcIsUp = false - else if (name == "nn") nnIsUp = false - else if (name == "dn1") dnIsUp = false - else if (name == "data-populator") dpIsUp = false + labels: Map[String, String]) extends Logging with Eventually with Matchers { + + private val kubernetesClient = kerberosUtils.getClient + private val namespace = kerberosUtils.getNamespace + private val podCache = scala.collection.mutable.Map[String, String]() + private val serviceCache = scala.collection.mutable.Map[String, String]() + private var kdcName: String = _ + private var nnName: String = _ + private var dnName: String = _ + private var dpName: String = _ + private val podWatcher: Watch = kubernetesClient + .pods() + .withLabels(labels.asJava) + .watch(new Watcher[Pod] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Pods") + override def eventReceived(action: Watcher.Action, resource: Pod): Unit = { + val name = resource.getMetadata.getName + val keyName = podNameParse(name) + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + podCache.remove(keyName) + case Action.ADDED | Action.MODIFIED => + val phase = resource.getStatus.getPhase + logInfo(s"$name is as $phase") + if (keyName == "kerberos") { kdcName = name } + if (keyName == "nn") { nnName = name } + if (keyName == "dn1") { dnName = name } + if (keyName == "data-populator") { dpName = name } + podCache(keyName) = phase }}}) + + private val serviceWatcher: Watch = kubernetesClient + .services() + .withLabels(labels.asJava) + .watch(new Watcher[Service] { + override def onClose(cause: KubernetesClientException): Unit = + logInfo("Ending the watch of Services") + override def eventReceived(action: Watcher.Action, resource: Service): Unit = { + val name = resource.getMetadata.getName + action match { + case Action.DELETED | Action.ERROR => + logInfo(s"$name either deleted or error") + serviceCache.remove(name) + case Action.ADDED | Action.MODIFIED => + val bound = resource.getSpec.getSelector.get("kerberosService") + logInfo(s"$name is bounded to $bound") + serviceCache(name) = bound }}}) + + private def additionalCheck(name: String): Boolean = { + name match { + case "kerberos" => hasInLogs(kdcName, "krb5kdc: starting") + case "nn" => hasInLogs(nnName, "createNameNode") + case "dn1" => hasInLogs(dnName, "Got finalize command for block pool") + case "data-populator" => hasInLogs(dpName, "Entered Krb5Context.initSecContext") + } + } + + private def check(name: String): Boolean = { + podCache.get(name).contains("Running") && + serviceCache.get(name).contains(name) && + additionalCheck(name) } - finished - } - private def deploy(kdc: KerberosDeployment) : Unit = { - kubernetesClient - .extensions().deployments().inNamespace(namespace).create(kdc.podDeployment) - kubernetesClient - .services().inNamespace(namespace).create(kdc.service) + def deploy(kdc: KerberosDeployment) : Unit = { + logInfo("Launching the Deployment") + kubernetesClient + .extensions().deployments().inNamespace(namespace).create(kdc.podDeployment) + // Making sure Pod is running + Eventually.eventually(TIMEOUT, INTERVAL) { + (podCache(kdc.name) == "Running") should be (true) } + kubernetesClient + .services().inNamespace(namespace).create(kdc.service) + Eventually.eventually(TIMEOUT, INTERVAL) { check(kdc.name) should be (true) } } - private class MSThread(name: String) extends Runnable { - override def run(): Unit = { - logInfo(s"$name Node and Service is up") - lock.lock() - if (name == "kerberos") { - kdcIsUp = true - logInfo(s"kdc has signaled") - try { - kdcRunning.signalAll() - } finally { - lock.unlock() - } - if (!nnSpawned) { - logInfo("Launching the NN Node") - nnSpawned = true - deploy(kerberosUtils.getNN) - } - } - else if (name == "nn") { - while (!kdcIsUp) kdcRunning.await() - nnIsUp = true - logInfo(s"nn has signaled") - try { - nnRunning.signalAll() - } finally { - lock.unlock() - } - if (!dnSpawned) { - logInfo("Launching the DN Node") - dnSpawned = true - deploy(kerberosUtils.getDN) - } - } - else if (name == "dn1") { - while (!kdcIsUp) kdcRunning.await() - while (!nnIsUp) nnRunning.await() - dnIsUp = true - logInfo(s"dn1 has signaled") - try { - dnRunning.signalAll() - } finally { - lock.unlock() - } - if (!dpSpawned) { - logInfo("Launching the DP Node") - dpSpawned = true - deploy(kerberosUtils.getDP) - } - } - else if (name == "data-populator") { - while (!kdcIsUp) kdcRunning.await() - while (!nnIsUp) nnRunning.await() - while (!dnIsUp) dnRunning.await() - while (!hasInLogs(dnName, "Got finalize command for block pool")) { - logInfo("Waiting on DN to be formatted") - Thread.sleep(500) - } - dpIsUp = true - logInfo(s"data-populator has signaled") - try { - dpRunning.signalAll() - } finally { - lock.unlock() - } - } - } - } + def stopWatch(): Unit = { + // Closing Watchers + podWatcher.close() + serviceWatcher.close() + } private def podNameParse(name: String) : String = { name match { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala index 15d0ffdeeaa2..a92dc35c71b8 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala @@ -19,5 +19,6 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos import io.fabric8.kubernetes.api.model.{PersistentVolume, PersistentVolumeClaim} private[spark] case class KerberosStorage( + name: String, persistentVolumeClaim: PersistentVolumeClaim, persistentVolume: PersistentVolume) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index c9c6254fd241..574fdd570697 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -18,19 +18,17 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos import java.io.{File, FileInputStream} -import scala.collection.JavaConverters._ - import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.api.model.extensions.{Deployment, DeploymentBuilder} import io.fabric8.kubernetes.client.KubernetesClient import org.apache.commons.io.FileUtils.readFileToString +import scala.collection.JavaConverters._ -import org.apache.spark.deploy.k8s.integrationtest.ContainerNameEqualityPredicate - - /** - * This class is responsible for handling all Utils and Constants necessary for testing + /** + * This class is responsible for handling all Utils and Constants necessary for testing */ private[spark] class KerberosUtils( + kerberosImage: String, kubernetesClient: KubernetesClient, namespace: String) { def getClient: KubernetesClient = kubernetesClient @@ -49,7 +47,7 @@ private[spark] class KerberosUtils( Seq(("krb5-dp.conf", regexDP.replaceAllIn(regex.replaceAllIn(readFileToString( new File(locationResolver("krb5.conf"))), namespace), defaultCacheDP))) private val KRB_VOLUME = "krb5-conf" - private val KRB_FILE_DIR = "/tmp" + private val KRB_FILE_DIR = "/mnt" private val KRB_CONFIG_MAP_NAME = "krb-config-map" private val PV_LABELS = Map("job" -> "kerberostest") private val keyPaths: Seq[KeyToPath] = (kerberosFiles ++ Seq("krb5-dp.conf")) @@ -67,88 +65,192 @@ private[spark] class KerberosUtils( "job" -> "kerberostest").asJava) .endMetadata() .withNewSpec() + .withStorageClassName(name) .withCapacity(Map("storage" -> new Quantity("1Gi")).asJava) - .withAccessModes("ReadWriteOnce") + .withAccessModes("ReadWriteMany") .withHostPath( - new HostPathVolumeSource(s"/tmp/$namespace/$pathType")) + new HostPathVolumeSource(s"/mnt/$namespace/$pathType")) .endSpec() .build() - private val pvNN = "nn-hadoop" - private val pvKT = "server-keytab" - private val persistentVolumeMap: Map[String, PersistentVolume] = Map( + private def createPVCTemplate(name: String) : PersistentVolumeClaim = + new PersistentVolumeClaimBuilder() + .withNewMetadata() + .withName(name) + .withLabels(Map( + "job" -> "kerberostest").asJava) + .endMetadata() + .withNewSpec() + .withStorageClassName(name) + .withVolumeName(name) + .withAccessModes("ReadWriteMany") + .withNewResources() + .withRequests(Map("storage" -> new Quantity("1Gi")).asJava) + .endResources() + .endSpec() + .build() + private val pvNN = "nn-hadoop" + private val pvKT = "server-keytab" + private val persistentVolumeMap: Map[String, PersistentVolume] = Map( pvNN -> createPVTemplate(pvNN, "nn"), pvKT -> createPVTemplate(pvKT, "keytab")) - private def buildKerberosPV(pvType: String) = { - KerberosStorage( - kubernetesClient.load(loadFromYaml(pvType)) - .get().get(0).asInstanceOf[PersistentVolumeClaim], - persistentVolumeMap(pvType)) + private def buildKerberosPV(pvType: String) = { + KerberosStorage( + pvType, + createPVCTemplate(pvType), + persistentVolumeMap(pvType)) } + def getNNStorage: KerberosStorage = buildKerberosPV(pvNN) + def getKTStorage: KerberosStorage = buildKerberosPV(pvKT) + def getLabels: Map[String, String] = PV_LABELS + def getKeyPaths: Seq[KeyToPath] = keyPaths + def getConfigMap: ConfigMap = new ConfigMapBuilder() + .withNewMetadata() + .withName(KRB_CONFIG_MAP_NAME) + .endMetadata() + .addToData(kerberosConfTupList.toMap.asJava) + .build() + private val kdcNode = Seq("kerberos-deployment", "kerberos-service") + private val nnNode = Seq("nn-deployment", "nn-service") + private val dnNode = Seq("dn1-deployment", "dn1-service") + private val dataPopulator = Seq("data-populator-deployment", "data-populator-service") + private def buildKerberosDeployment(name: String, seqPair: Seq[String]) = { + val deployment = + kubernetesClient.load(loadFromYaml(seqPair.head)).get().get(0).asInstanceOf[Deployment] + KerberosDeployment( + name, + new DeploymentBuilder(deployment) + .editSpec() + .editTemplate() + .editSpec() + .addNewVolume() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(keyPaths.asJava) + .endConfigMap() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate( + deployment.getMetadata.getName)) + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_DP_LOC") + .withValue(s"$KRB_FILE_DIR/krb5-dp.conf") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewVolumeMount() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .build(), + kubernetesClient.load(loadFromYaml(seqPair(1))).get().get(0).asInstanceOf[Service] ) } - def getNNStorage: KerberosStorage = buildKerberosPV(pvNN) - def getKTStorage: KerberosStorage = buildKerberosPV(pvKT) - def getLabels: Map[String, String] = PV_LABELS - def getKeyPaths: Seq[KeyToPath] = keyPaths - def getConfigMap: ConfigMap = new ConfigMapBuilder() - .withNewMetadata() - .withName(KRB_CONFIG_MAP_NAME) - .endMetadata() - .addToData(kerberosConfTupList.toMap.asJava) - .build() - private val kdcNode = Seq("kerberos-deployment", "kerberos-service") - private val nnNode = Seq("nn-deployment", "nn-service") - private val dnNode = Seq("dn1-deployment", "dn1-service") - private val dataPopulator = Seq("data-populator-deployment", "data-populator-service") - private def buildKerberosDeployment(seqPair: Seq[String]) = { - val deployment = - kubernetesClient.load(loadFromYaml(seqPair.head)).get().get(0).asInstanceOf[Deployment] - KerberosDeployment( - new DeploymentBuilder(deployment) - .editSpec() - .editTemplate() - .editSpec() - .addNewVolume() - .withName(KRB_VOLUME) - .withNewConfigMap() - .withName(KRB_CONFIG_MAP_NAME) - .withItems(keyPaths.asJava) - .endConfigMap() + def getKDC: KerberosDeployment = buildKerberosDeployment("kerberos", kdcNode) + def getNN: KerberosDeployment = buildKerberosDeployment("nn", nnNode) + def getDN: KerberosDeployment = buildKerberosDeployment("dn1", dnNode) + def getDP: KerberosDeployment = buildKerberosDeployment("data-populator", dataPopulator) + private val HADOOP_CONF_DIR_PATH = "/opt/spark/hconf" + private val krb5TestkeyPaths = kerberosFiles.map(file => + new KeyToPathBuilder() + .withKey(file) + .withPath(file) + .build()).toList + def getKerberosTest( + resource: String, + className: String, + appLabel: String, + yamlLocation: String): Deployment = { + new DeploymentBuilder() + .editSpec() + .editTemplate() + .editOrNewMetadata() + .addToLabels(Map("name" -> "kerberos-test").asJava) + .endMetadata() + .editSpec() + .addNewVolume() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(krb5TestkeyPaths.asJava) + .endConfigMap() + .endVolume() + .addNewVolume() + .withName("kerberos-test-keytab") + .withNewPersistentVolumeClaim("server-keytab", false) .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate( - deployment.getMetadata.getName)) - .addNewEnv() - .withName("NAMESPACE") - .withValue(namespace) - .endEnv() - .addNewEnv() - .withName("TMP_KRB_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") - .endEnv() - .addNewEnv() - .withName("TMP_KRB_DP_LOC") - .withValue(s"$KRB_FILE_DIR/krb5-dp.conf") - .endEnv() - .addNewEnv() - .withName("TMP_CORE_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") - .endEnv() - .addNewEnv() - .withName("TMP_HDFS_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") - .endEnv() - .addNewVolumeMount() - .withName(KRB_VOLUME) - .withMountPath(KRB_FILE_DIR) - .endVolumeMount() - .endContainer() - .endSpec() - .endTemplate() - .endSpec() - .build(), - kubernetesClient.load(loadFromYaml(seqPair(1))).get().get(0).asInstanceOf[Service] - ) - } - def getKDC: KerberosDeployment = buildKerberosDeployment(kdcNode) - def getNN: KerberosDeployment = buildKerberosDeployment(nnNode) - def getDN: KerberosDeployment = buildKerberosDeployment(dnNode) - def getDP: KerberosDeployment = buildKerberosDeployment(dataPopulator) + .addNewContainer() + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("MASTER_URL") + .withValue(kubernetesClient.getMasterUrl.toString) + .endEnv() + .addNewEnv() + .withName("SUBMIT_RESOURCE") + .withValue(resource) + .endEnv() + .addNewEnv() + .withName("CLASS_NAME") + .withValue(className) + .endEnv() + .addNewEnv() + .withName("HADOOP_CONF_DIR") + .withValue(HADOOP_CONF_DIR_PATH) + .endEnv() + .addNewEnv() + .withName("APP_LOCATOR_LABEL") + .withValue(appLabel) + .endEnv() + .addNewEnv() + .withName("SPARK_PRINT_LAUNCH_COMMAND") + .withValue("true") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewVolumeMount() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .addNewVolumeMount() + .withName("kerberos-test-keytab") + .withMountPath("/var/keytabs") + .endVolumeMount() + .withCommand(List("/bin/bash").asJava) + .withArgs(List("/opt/spark/run-kerberos-test.sh").asJava) + .withName("kerberos-test") + .withImage(kerberosImage) + .withImagePullPolicy("IfNotPresent") + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .build() } } From 77ea92a0c1303f7b4c7dd4a6131e49e691b19b84 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Fri, 28 Sep 2018 19:06:42 -0700 Subject: [PATCH 03/18] small fix --- .../kerberos/KerberosUtils.scala | 155 +++++++++--------- 1 file changed, 74 insertions(+), 81 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index 574fdd570697..404eb3a4225c 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -172,85 +172,78 @@ private[spark] class KerberosUtils( .withPath(file) .build()).toList def getKerberosTest( - resource: String, - className: String, - appLabel: String, - yamlLocation: String): Deployment = { - new DeploymentBuilder() - .editSpec() - .editTemplate() - .editOrNewMetadata() - .addToLabels(Map("name" -> "kerberos-test").asJava) - .endMetadata() - .editSpec() - .addNewVolume() - .withName(KRB_VOLUME) - .withNewConfigMap() - .withName(KRB_CONFIG_MAP_NAME) - .withItems(krb5TestkeyPaths.asJava) - .endConfigMap() - .endVolume() - .addNewVolume() - .withName("kerberos-test-keytab") - .withNewPersistentVolumeClaim("server-keytab", false) - .endVolume() - .addNewContainer() - .addNewEnv() - .withName("NAMESPACE") - .withValue(namespace) - .endEnv() - .addNewEnv() - .withName("MASTER_URL") - .withValue(kubernetesClient.getMasterUrl.toString) - .endEnv() - .addNewEnv() - .withName("SUBMIT_RESOURCE") - .withValue(resource) - .endEnv() - .addNewEnv() - .withName("CLASS_NAME") - .withValue(className) - .endEnv() - .addNewEnv() - .withName("HADOOP_CONF_DIR") - .withValue(HADOOP_CONF_DIR_PATH) - .endEnv() - .addNewEnv() - .withName("APP_LOCATOR_LABEL") - .withValue(appLabel) - .endEnv() - .addNewEnv() - .withName("SPARK_PRINT_LAUNCH_COMMAND") - .withValue("true") - .endEnv() - .addNewEnv() - .withName("TMP_KRB_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") - .endEnv() - .addNewEnv() - .withName("TMP_CORE_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") - .endEnv() - .addNewEnv() - .withName("TMP_HDFS_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") - .endEnv() - .addNewVolumeMount() - .withName(KRB_VOLUME) - .withMountPath(KRB_FILE_DIR) - .endVolumeMount() - .addNewVolumeMount() - .withName("kerberos-test-keytab") - .withMountPath("/var/keytabs") - .endVolumeMount() - .withCommand(List("/bin/bash").asJava) - .withArgs(List("/opt/spark/run-kerberos-test.sh").asJava) - .withName("kerberos-test") - .withImage(kerberosImage) - .withImagePullPolicy("IfNotPresent") - .endContainer() - .endSpec() - .endTemplate() - .endSpec() - .build() } + resource: String, + className: String, + appLabel: String, + yamlLocation: String): Deployment = { + kubernetesClient.load(new FileInputStream(new File(yamlLocation))) + .get().get(0) match { + case deployment: Deployment => + new DeploymentBuilder(deployment) + .editSpec() + .editTemplate() + .editOrNewMetadata() + .addToLabels(Map("name" -> "kerberos-test").asJava) + .endMetadata() + .editSpec() + .addNewVolume() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(krb5TestkeyPaths.asJava) + .endConfigMap() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate( + deployment.getMetadata.getName)) + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("MASTER_URL") + .withValue(kubernetesClient.getMasterUrl.toString) + .endEnv() + .addNewEnv() + .withName("SUBMIT_RESOURCE") + .withValue(resource) + .endEnv() + .addNewEnv() + .withName("CLASS_NAME") + .withValue(className) + .endEnv() + .addNewEnv() + .withName("HADOOP_CONF_DIR") + .withValue(HADOOP_CONF_DIR_PATH) + .endEnv() + .addNewEnv() + .withName("APP_LOCATOR_LABEL") + .withValue(appLabel) + .endEnv() + .addNewEnv() + .withName("SPARK_PRINT_LAUNCH_COMMAND") + .withValue("true") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewVolumeMount() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .withImage(kerberosImage) + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .build() + }} } From 761254c3d4bdd1b35e707077acf0a70defc88ea9 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Fri, 28 Sep 2018 20:02:11 -0700 Subject: [PATCH 04/18] fixed issue of docker building --- bin/docker-image-tool.sh | 10 +++++----- dev/make-distribution.sh | 2 +- .../test/dockerfiles/spark/kerberos/Dockerfile | 0 .../docker/{ => src}/test/hadoop/conf/core-site.xml | 0 .../docker/{ => src}/test/hadoop/conf/hdfs-site.xml | 0 .../docker/{ => src}/test/hadoop/conf/krb5.conf | 0 .../docker/{ => src}/test/hadoop/conf/yarn-site.xml | 0 .../docker/{ => src}/test/scripts/run-kerberos-test.sh | 0 8 files changed, 6 insertions(+), 6 deletions(-) rename resource-managers/kubernetes/docker/{ => src}/test/dockerfiles/spark/kerberos/Dockerfile (100%) rename resource-managers/kubernetes/docker/{ => src}/test/hadoop/conf/core-site.xml (100%) rename resource-managers/kubernetes/docker/{ => src}/test/hadoop/conf/hdfs-site.xml (100%) rename resource-managers/kubernetes/docker/{ => src}/test/hadoop/conf/krb5.conf (100%) rename resource-managers/kubernetes/docker/{ => src}/test/hadoop/conf/yarn-site.xml (100%) rename resource-managers/kubernetes/docker/{ => src}/test/scripts/run-kerberos-test.sh (100%) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index fe9ed53ac993..75f7860e27e2 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -71,7 +71,7 @@ function build { ) local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/Dockerfile"} local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/python/Dockerfile"} - local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/R/Dockerfile"} +# local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/R/Dockerfile"} local KDOCKERFILE=${KDOCKERFILE:-"$IMG_PATH/test/dockerfiles/spark/kerberos/Dockerfile"} docker build $NOCACHEARG "${BUILD_ARGS[@]}" \ @@ -82,9 +82,9 @@ function build { -t $(image_ref spark-py) \ -f "$PYDOCKERFILE" . - docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-r) \ - -f "$RDOCKERFILE" . +# docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ +# -t $(image_ref spark-r) \ +# -f "$RDOCKERFILE" . docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -t $(image_ref spark-kerberos) \ @@ -94,7 +94,7 @@ function build { function push { docker push "$(image_ref spark)" docker push "$(image_ref spark-py)" - docker push "$(image_ref spark-r)" +# docker push "$(image_ref spark-r)" docker push "$(image_ref spark-kerberos)" } diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 668682fbb913..a38ab84daa56 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -191,7 +191,7 @@ fi # Only create and copy the dockerfiles directory if the kubernetes artifacts were built. if [ -d "$SPARK_HOME"/resource-managers/kubernetes/core/target/ ]; then mkdir -p "$DISTDIR/kubernetes/" - cp -a "$SPARK_HOME"/resource-managers/kubernetes/docker/src/main/dockerfiles "$DISTDIR/kubernetes/" + cp -a "$SPARK_HOME"/resource-managers/kubernetes/docker/src "$DISTDIR/kubernetes/" cp -a "$SPARK_HOME"/resource-managers/kubernetes/integration-tests/tests "$DISTDIR/kubernetes/" fi diff --git a/resource-managers/kubernetes/docker/test/dockerfiles/spark/kerberos/Dockerfile b/resource-managers/kubernetes/docker/src/test/dockerfiles/spark/kerberos/Dockerfile similarity index 100% rename from resource-managers/kubernetes/docker/test/dockerfiles/spark/kerberos/Dockerfile rename to resource-managers/kubernetes/docker/src/test/dockerfiles/spark/kerberos/Dockerfile diff --git a/resource-managers/kubernetes/docker/test/hadoop/conf/core-site.xml b/resource-managers/kubernetes/docker/src/test/hadoop/conf/core-site.xml similarity index 100% rename from resource-managers/kubernetes/docker/test/hadoop/conf/core-site.xml rename to resource-managers/kubernetes/docker/src/test/hadoop/conf/core-site.xml diff --git a/resource-managers/kubernetes/docker/test/hadoop/conf/hdfs-site.xml b/resource-managers/kubernetes/docker/src/test/hadoop/conf/hdfs-site.xml similarity index 100% rename from resource-managers/kubernetes/docker/test/hadoop/conf/hdfs-site.xml rename to resource-managers/kubernetes/docker/src/test/hadoop/conf/hdfs-site.xml diff --git a/resource-managers/kubernetes/docker/test/hadoop/conf/krb5.conf b/resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf similarity index 100% rename from resource-managers/kubernetes/docker/test/hadoop/conf/krb5.conf rename to resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf diff --git a/resource-managers/kubernetes/docker/test/hadoop/conf/yarn-site.xml b/resource-managers/kubernetes/docker/src/test/hadoop/conf/yarn-site.xml similarity index 100% rename from resource-managers/kubernetes/docker/test/hadoop/conf/yarn-site.xml rename to resource-managers/kubernetes/docker/src/test/hadoop/conf/yarn-site.xml diff --git a/resource-managers/kubernetes/docker/test/scripts/run-kerberos-test.sh b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh similarity index 100% rename from resource-managers/kubernetes/docker/test/scripts/run-kerberos-test.sh rename to resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh From 6e3966fbc98809a962bd9cbd589266d9b8b95834 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Fri, 28 Sep 2018 20:04:16 -0700 Subject: [PATCH 05/18] fixes and organizations --- .../kerberos-yml/data-populator-deployment.yml | 2 +- .../kerberos-yml/dn1-deployment.yml | 2 +- .../kerberos-yml/kerberos-deployment.yml | 2 +- .../integration-tests/kerberos-yml/nn-deployment.yml | 2 +- .../integration-tests/kerberos-yml/nn-hadoop.yml | 12 ------------ .../integration-tests/kerberos-yml/server-keytab.yml | 12 ------------ .../k8s/integrationtest/kerberos/KerberosUtils.scala | 2 +- 7 files changed, 5 insertions(+), 29 deletions(-) delete mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/nn-hadoop.yml delete mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/server-keytab.yml diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml index 0b7ba93bfd22..166520a1e306 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml @@ -17,7 +17,7 @@ spec: - command: - /populate-data.sh name: data-populator - image: hadoop-base:latest + image: ifilonenko/hadoop-base:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml index 9ef80b5dfc06..03ac6b10ccab 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml @@ -17,7 +17,7 @@ spec: - command: - /start-datanode.sh name: dn1 - image: hadoop-base:latest + image: ifilonenko/hadoop-base:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml index 491ad3ad0968..c093d8881e3a 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml @@ -17,7 +17,7 @@ spec: - command: - /start-kdc.sh name: kerberos - image: hadoop-base:latest + image: ifilonenko/hadoop-base:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml index feee748eedbc..89ad452d3daa 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml @@ -19,7 +19,7 @@ spec: name: nn ports: - containerPort: 9000 - image: hadoop-base:latest + image: ifilonenko/hadoop-base:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-hadoop.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-hadoop.yml deleted file mode 100755 index 18c138e1512f..000000000000 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-hadoop.yml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: nn-hadoop - labels: - job: kerberostest -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/server-keytab.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/server-keytab.yml deleted file mode 100755 index 7798c0741366..000000000000 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/server-keytab.yml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: server-keytab - labels: - job: kerberostest -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index 404eb3a4225c..089f391f42bd 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -69,7 +69,7 @@ private[spark] class KerberosUtils( .withCapacity(Map("storage" -> new Quantity("1Gi")).asJava) .withAccessModes("ReadWriteMany") .withHostPath( - new HostPathVolumeSource(s"/mnt/$namespace/$pathType")) + new HostPathVolumeSource(s"$KRB_FILE_DIR/$namespace/$pathType")) .endSpec() .build() private def createPVCTemplate(name: String) : PersistentVolumeClaim = From 776617dc5328a7a88afde854240d750efd52959f Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Sat, 29 Sep 2018 10:42:42 -0700 Subject: [PATCH 06/18] traits and polymorphosim --- .../kerberos/KerberosCMWatcherCache.scala | 16 +- .../kerberos/KerberosDriverWatcherCache.scala | 12 +- .../kerberos/KerberosPVWatcherCache.scala | 17 +- .../kerberos/KerberosPodWatcherCache.scala | 19 +- .../kerberos/KerberosStorage.scala | 17 +- .../kerberos/KerberosUtils.scala | 165 +++++++++--------- ....scala => WatcherCacheConfiguration.scala} | 13 +- 7 files changed, 139 insertions(+), 120 deletions(-) rename resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/{KerberosDeployment.scala => WatcherCacheConfiguration.scala} (79%) mode change 100755 => 100644 diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala index d98c06cc8056..f1153d167a21 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala @@ -32,13 +32,12 @@ import org.apache.spark.internal.Logging * until a configmap with the HADOOP_CONF_DIR specifications has been created. */ private[spark] class KerberosCMWatcherCache(kerberosUtils: KerberosUtils) - extends Logging with Eventually with Matchers { + extends WatcherCacheConfiguration with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace private val requiredFiles = Seq("core-site.xml", "hdfs-site.xml", "krb5.conf") private val cmCache = scala.collection.mutable.Map[String, Map[String, String]]() - private val configMap = kerberosUtils.getConfigMap - private val configMapName = configMap.getMetadata.getName + private val configMapName = kerberosUtils.getConfigMap.resource.getMetadata.getName // Watching ConfigMaps logInfo("Beginning the watch of the Kerberos Config Map") private val watcher: Watch = kubernetesClient @@ -59,19 +58,20 @@ private[spark] class KerberosCMWatcherCache(kerberosUtils: KerberosUtils) cmCache(name) = data }}}) // Check for CM to have proper files - private def check(name: String): Boolean = { + override def check(name: String): Boolean = { cmCache.get(name).exists{ data => requiredFiles.forall(data.keys.toSeq.contains)} } - def deploy(configMap: ConfigMap): Unit = { + override def deploy[T <: ResourceStorage[ConfigMap]](storage: T): Unit = { logInfo("Launching the ConfigMap") - kerberosUtils.getClient.configMaps().inNamespace(namespace).createOrReplace(configMap) + kerberosUtils.getClient.configMaps() + .inNamespace(namespace).createOrReplace(storage.resource) // Making sure CM has correct files Eventually.eventually(TIMEOUT, INTERVAL) { - check(configMap.getMetadata.getName) should be (true) } + check(configMapName) should be (true) } } - def stopWatch() : Unit = { + override def stopWatch() : Unit = { // Closing Watcher watcher.close() } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala index 75cdb8fa4b23..f55f4807ad8d 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala @@ -34,7 +34,8 @@ import org.apache.spark.internal.Logging */ private[spark] class KerberosDriverWatcherCache( kerberosUtils: KerberosUtils, - labels: Map[String, String]) extends Logging with Eventually with Matchers { + labels: Map[String, String]) + extends WatcherCacheConfiguration with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace private var driverName: String = "" @@ -59,13 +60,14 @@ private[spark] class KerberosDriverWatcherCache( if (name.contains("driver")) { driverName = name } }}}) - private def check(name: String): Boolean = podCache.get(name).contains("Running") + override def check(name: String): Boolean = podCache.get(name).contains("Running") - def deploy(deployment: Deployment): Unit = { - kubernetesClient.extensions().deployments().inNamespace(namespace).create(deployment) + override def deploy[T <: ResourceStorage[Deployment]](storage: T): Unit = { + kubernetesClient.extensions().deployments() + .inNamespace(namespace).create(storage.resource) Eventually.eventually(TIMEOUT, INTERVAL) { check(driverName) should be (true) } } - def stopWatch(): Unit = { + override def stopWatch(): Unit = { // Closing Watch watcher.close() } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala index ec8b2fcc54d9..910a6cb95a6a 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala @@ -34,7 +34,8 @@ import org.apache.spark.internal.Logging */ private[spark] class KerberosPVWatcherCache( kerberosUtils: KerberosUtils, - labels: Map[String, String]) extends Logging with Eventually with Matchers { + labels: Map[String, String]) + extends WatcherCacheConfiguration with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace @@ -84,24 +85,24 @@ private[spark] class KerberosPVWatcherCache( pvcCache(name) = s"$volumeName $state"}}}) // Check for PVC being bounded to correct PV - private def check(name: String): Boolean = { + override def check(name: String): Boolean = { pvCache.get(name).contains("Bound") && pvcCache.get(name).contains(s"$name Bound") } - def deploy(kbs: KerberosStorage) : Unit = { + override def deploy[T <: PVStorage](pv: T) : Unit = { logInfo("Launching the Persistent Storage") kubernetesClient - .persistentVolumes().create(kbs.persistentVolume) + .persistentVolumes().create(pv.persistentVolume) // Making sure PV is Available for creation of PVC Eventually.eventually(TIMEOUT, INTERVAL) { - (pvCache(kbs.name) == "Available") should be (true) } + (pvCache(pv.name) == "Available") should be (true) } kubernetesClient - .persistentVolumeClaims().inNamespace(namespace).create(kbs.persistentVolumeClaim) - Eventually.eventually(TIMEOUT, INTERVAL) { check(kbs.name) should be (true) } + .persistentVolumeClaims().inNamespace(namespace).create(pv.persistentVolumeClaim) + Eventually.eventually(TIMEOUT, INTERVAL) { check(pv.name) should be (true) } } - def stopWatch(): Unit = { + override def stopWatch(): Unit = { // Closing Watchers pvWatcher.close() pvcWatcher.close() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala index 09d42f472a05..25c608ca9aa3 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala @@ -33,8 +33,9 @@ import org.apache.spark.internal.Logging * is running before launching the Kerberos test. */ private[spark] class KerberosPodWatcherCache( - kerberosUtils: KerberosUtils, - labels: Map[String, String]) extends Logging with Eventually with Matchers { + kerberosUtils: KerberosUtils, + labels: Map[String, String]) + extends WatcherCacheConfiguration with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace @@ -92,25 +93,25 @@ private[spark] class KerberosPodWatcherCache( } } - private def check(name: String): Boolean = { + override def check(name: String): Boolean = { podCache.get(name).contains("Running") && serviceCache.get(name).contains(name) && additionalCheck(name) } - def deploy(kdc: KerberosDeployment) : Unit = { + override def deploy[T <: ServiceStorage](srvc: T) : Unit = { logInfo("Launching the Deployment") kubernetesClient - .extensions().deployments().inNamespace(namespace).create(kdc.podDeployment) + .extensions().deployments().inNamespace(namespace).create(srvc.podDeployment) // Making sure Pod is running Eventually.eventually(TIMEOUT, INTERVAL) { - (podCache(kdc.name) == "Running") should be (true) } + (podCache(srvc.name) == "Running") should be (true) } kubernetesClient - .services().inNamespace(namespace).create(kdc.service) - Eventually.eventually(TIMEOUT, INTERVAL) { check(kdc.name) should be (true) } + .services().inNamespace(namespace).create(srvc.service) + Eventually.eventually(TIMEOUT, INTERVAL) { check(srvc.name) should be (true) } } - def stopWatch(): Unit = { + override def stopWatch(): Unit = { // Closing Watchers podWatcher.close() serviceWatcher.close() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala index a92dc35c71b8..2a2be4078e3c 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala @@ -16,9 +16,20 @@ */ package org.apache.spark.deploy.k8s.integrationtest.kerberos -import io.fabric8.kubernetes.api.model.{PersistentVolume, PersistentVolumeClaim} +import io.fabric8.kubernetes.api.model.{HasMetadata, PersistentVolume, PersistentVolumeClaim, Service} +import io.fabric8.kubernetes.api.model.extensions._ -private[spark] case class KerberosStorage( +private[spark] sealed trait KerberosStorage + +private[spark] case class PVStorage( name: String, persistentVolumeClaim: PersistentVolumeClaim, - persistentVolume: PersistentVolume) + persistentVolume: PersistentVolume) extends KerberosStorage + +private[spark] case class ServiceStorage( + name: String, + podDeployment: Deployment, + service: Service) extends KerberosStorage + +private[spark] case class ResourceStorage[T <: HasMetadata]( + resource: T) extends KerberosStorage diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index 089f391f42bd..54f136baf424 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -94,20 +94,22 @@ private[spark] class KerberosUtils( pvNN -> createPVTemplate(pvNN, "nn"), pvKT -> createPVTemplate(pvKT, "keytab")) private def buildKerberosPV(pvType: String) = { - KerberosStorage( + PVStorage( pvType, createPVCTemplate(pvType), persistentVolumeMap(pvType)) } - def getNNStorage: KerberosStorage = buildKerberosPV(pvNN) - def getKTStorage: KerberosStorage = buildKerberosPV(pvKT) + def getNNStorage: PVStorage = buildKerberosPV(pvNN) + def getKTStorage: PVStorage = buildKerberosPV(pvKT) def getLabels: Map[String, String] = PV_LABELS def getKeyPaths: Seq[KeyToPath] = keyPaths - def getConfigMap: ConfigMap = new ConfigMapBuilder() - .withNewMetadata() - .withName(KRB_CONFIG_MAP_NAME) - .endMetadata() - .addToData(kerberosConfTupList.toMap.asJava) - .build() + def getConfigMap: ResourceStorage[ConfigMap] = + ResourceStorage( + new ConfigMapBuilder() + .withNewMetadata() + .withName(KRB_CONFIG_MAP_NAME) + .endMetadata() + .addToData(kerberosConfTupList.toMap.asJava) + .build()) private val kdcNode = Seq("kerberos-deployment", "kerberos-service") private val nnNode = Seq("nn-deployment", "nn-service") private val dnNode = Seq("dn1-deployment", "dn1-service") @@ -115,7 +117,7 @@ private[spark] class KerberosUtils( private def buildKerberosDeployment(name: String, seqPair: Seq[String]) = { val deployment = kubernetesClient.load(loadFromYaml(seqPair.head)).get().get(0).asInstanceOf[Deployment] - KerberosDeployment( + ServiceStorage( name, new DeploymentBuilder(deployment) .editSpec() @@ -161,10 +163,10 @@ private[spark] class KerberosUtils( .build(), kubernetesClient.load(loadFromYaml(seqPair(1))).get().get(0).asInstanceOf[Service] ) } - def getKDC: KerberosDeployment = buildKerberosDeployment("kerberos", kdcNode) - def getNN: KerberosDeployment = buildKerberosDeployment("nn", nnNode) - def getDN: KerberosDeployment = buildKerberosDeployment("dn1", dnNode) - def getDP: KerberosDeployment = buildKerberosDeployment("data-populator", dataPopulator) + def getKDC: ServiceStorage = buildKerberosDeployment("kerberos", kdcNode) + def getNN: ServiceStorage = buildKerberosDeployment("nn", nnNode) + def getDN: ServiceStorage = buildKerberosDeployment("dn1", dnNode) + def getDP: ServiceStorage = buildKerberosDeployment("data-populator", dataPopulator) private val HADOOP_CONF_DIR_PATH = "/opt/spark/hconf" private val krb5TestkeyPaths = kerberosFiles.map(file => new KeyToPathBuilder() @@ -175,75 +177,76 @@ private[spark] class KerberosUtils( resource: String, className: String, appLabel: String, - yamlLocation: String): Deployment = { + yamlLocation: String): ResourceStorage[Deployment] = { kubernetesClient.load(new FileInputStream(new File(yamlLocation))) .get().get(0) match { case deployment: Deployment => - new DeploymentBuilder(deployment) - .editSpec() - .editTemplate() - .editOrNewMetadata() - .addToLabels(Map("name" -> "kerberos-test").asJava) - .endMetadata() - .editSpec() - .addNewVolume() - .withName(KRB_VOLUME) - .withNewConfigMap() - .withName(KRB_CONFIG_MAP_NAME) - .withItems(krb5TestkeyPaths.asJava) - .endConfigMap() - .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate( - deployment.getMetadata.getName)) - .addNewEnv() - .withName("NAMESPACE") - .withValue(namespace) - .endEnv() - .addNewEnv() - .withName("MASTER_URL") - .withValue(kubernetesClient.getMasterUrl.toString) - .endEnv() - .addNewEnv() - .withName("SUBMIT_RESOURCE") - .withValue(resource) - .endEnv() - .addNewEnv() - .withName("CLASS_NAME") - .withValue(className) - .endEnv() - .addNewEnv() - .withName("HADOOP_CONF_DIR") - .withValue(HADOOP_CONF_DIR_PATH) - .endEnv() - .addNewEnv() - .withName("APP_LOCATOR_LABEL") - .withValue(appLabel) - .endEnv() - .addNewEnv() - .withName("SPARK_PRINT_LAUNCH_COMMAND") - .withValue("true") - .endEnv() - .addNewEnv() - .withName("TMP_KRB_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") - .endEnv() - .addNewEnv() - .withName("TMP_CORE_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") - .endEnv() - .addNewEnv() - .withName("TMP_HDFS_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") - .endEnv() - .addNewVolumeMount() - .withName(KRB_VOLUME) - .withMountPath(KRB_FILE_DIR) - .endVolumeMount() - .withImage(kerberosImage) - .endContainer() - .endSpec() - .endTemplate() - .endSpec() - .build() + ResourceStorage( + new DeploymentBuilder(deployment) + .editSpec() + .editTemplate() + .editOrNewMetadata() + .addToLabels(Map("name" -> "kerberos-test").asJava) + .endMetadata() + .editSpec() + .addNewVolume() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(krb5TestkeyPaths.asJava) + .endConfigMap() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate( + deployment.getMetadata.getName)) + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("MASTER_URL") + .withValue(kubernetesClient.getMasterUrl.toString) + .endEnv() + .addNewEnv() + .withName("SUBMIT_RESOURCE") + .withValue(resource) + .endEnv() + .addNewEnv() + .withName("CLASS_NAME") + .withValue(className) + .endEnv() + .addNewEnv() + .withName("HADOOP_CONF_DIR") + .withValue(HADOOP_CONF_DIR_PATH) + .endEnv() + .addNewEnv() + .withName("APP_LOCATOR_LABEL") + .withValue(appLabel) + .endEnv() + .addNewEnv() + .withName("SPARK_PRINT_LAUNCH_COMMAND") + .withValue("true") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewVolumeMount() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .withImage(kerberosImage) + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .build()) }} } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala old mode 100755 new mode 100644 similarity index 79% rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala index 3f94cf25d0c4..68fc4df4906d --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDeployment.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala @@ -16,10 +16,11 @@ */ package org.apache.spark.deploy.k8s.integrationtest.kerberos -import io.fabric8.kubernetes.api.model.Service -import io.fabric8.kubernetes.api.model.extensions.Deployment +private[spark] trait WatcherCacheConfiguration { -private[spark] case class KerberosDeployment( - name: String, - podDeployment: Deployment, - service: Service) + def check(name: String): Boolean + + def deploy[T <: KerberosStorage](storage: T) : Unit + + def stopWatch(): Unit +} From 7f1ccb6451d53f04d46263f7bb7e81211bfb809f Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Sun, 30 Sep 2018 00:39:19 -0700 Subject: [PATCH 07/18] polymorphism fixes and generuc class types --- .../src/test/dockerfiles/spark/kerberos/Dockerfile | 2 +- .../docker/src/test/scripts/run-kerberos-test.sh | 2 +- .../kerberos/KerberosCMWatcherCache.scala | 4 ++-- .../kerberos/KerberosDriverWatcherCache.scala | 5 +++-- .../kerberos/KerberosPVWatcherCache.scala | 4 ++-- .../kerberos/KerberosPodWatcherCache.scala | 4 ++-- .../k8s/integrationtest/kerberos/KerberosStorage.scala | 9 ++++++--- .../k8s/integrationtest/kerberos/KerberosUtils.scala | 8 ++++---- .../kerberos/WatcherCacheConfiguration.scala | 4 ++-- 9 files changed, 23 insertions(+), 19 deletions(-) diff --git a/resource-managers/kubernetes/docker/src/test/dockerfiles/spark/kerberos/Dockerfile b/resource-managers/kubernetes/docker/src/test/dockerfiles/spark/kerberos/Dockerfile index 9f01f50828f6..01d69a3c8cb3 100644 --- a/resource-managers/kubernetes/docker/src/test/dockerfiles/spark/kerberos/Dockerfile +++ b/resource-managers/kubernetes/docker/src/test/dockerfiles/spark/kerberos/Dockerfile @@ -20,5 +20,5 @@ FROM $base_img ARG k_img_path=kubernetes/src/test -COPY ${k_img_path}/scripts/test-env.sh /opt/spark/ +COPY ${k_img_path}/scripts/run-kerberos-test.sh /opt/spark/ COPY ${k_img_path}/hadoop/conf /opt/spark/hconf diff --git a/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh index 6003a1ecc5a5..647e9c0bc274 100644 --- a/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh +++ b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh @@ -12,7 +12,7 @@ until /usr/bin/kinit -kt /var/keytabs/hdfs.keytab hdfs/nn.${NAMESPACE}.svc.clust --deploy-mode cluster \ --class ${CLASS_NAME} \ --master k8s://${MASTER_URL} \ - --namespace ${NAMESPACE} \ + --conf spark.kubernetes.namespace=${NAMESPACE} \ --conf spark.executor.instances=1 \ --conf spark.app.name=spark-hdfs \ --conf spark.driver.extraClassPath=/opt/spark/hconf/core-site.xml:/opt/spark/hconf/hdfs-site.xml:/opt/spark/hconf/yarn-site.xml:/etc/krb5.conf \ diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala index f1153d167a21..aa99105e1116 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosCMWatcherCache.scala @@ -32,7 +32,7 @@ import org.apache.spark.internal.Logging * until a configmap with the HADOOP_CONF_DIR specifications has been created. */ private[spark] class KerberosCMWatcherCache(kerberosUtils: KerberosUtils) - extends WatcherCacheConfiguration with Logging with Eventually with Matchers { + extends WatcherCacheConfiguration[ConfigMapStorage] with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace private val requiredFiles = Seq("core-site.xml", "hdfs-site.xml", "krb5.conf") @@ -62,7 +62,7 @@ private[spark] class KerberosCMWatcherCache(kerberosUtils: KerberosUtils) cmCache.get(name).exists{ data => requiredFiles.forall(data.keys.toSeq.contains)} } - override def deploy[T <: ResourceStorage[ConfigMap]](storage: T): Unit = { + def deploy(storage: ConfigMapStorage): Unit = { logInfo("Launching the ConfigMap") kerberosUtils.getClient.configMaps() .inNamespace(namespace).createOrReplace(storage.resource) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala index f55f4807ad8d..1bf6e5094972 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala @@ -35,7 +35,7 @@ import org.apache.spark.internal.Logging private[spark] class KerberosDriverWatcherCache( kerberosUtils: KerberosUtils, labels: Map[String, String]) - extends WatcherCacheConfiguration with Logging with Eventually with Matchers { + extends WatcherCacheConfiguration[DeploymentStorage] with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace private var driverName: String = "" @@ -62,11 +62,12 @@ private[spark] class KerberosDriverWatcherCache( override def check(name: String): Boolean = podCache.get(name).contains("Running") - override def deploy[T <: ResourceStorage[Deployment]](storage: T): Unit = { + override def deploy(storage: DeploymentStorage) : Unit = { kubernetesClient.extensions().deployments() .inNamespace(namespace).create(storage.resource) Eventually.eventually(TIMEOUT, INTERVAL) { check(driverName) should be (true) } } + override def stopWatch(): Unit = { // Closing Watch watcher.close() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala index 910a6cb95a6a..6cc4de7c6a74 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPVWatcherCache.scala @@ -35,7 +35,7 @@ import org.apache.spark.internal.Logging private[spark] class KerberosPVWatcherCache( kerberosUtils: KerberosUtils, labels: Map[String, String]) - extends WatcherCacheConfiguration with Logging with Eventually with Matchers { + extends WatcherCacheConfiguration[PVStorage] with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace @@ -90,7 +90,7 @@ private[spark] class KerberosPVWatcherCache( pvcCache.get(name).contains(s"$name Bound") } - override def deploy[T <: PVStorage](pv: T) : Unit = { + override def deploy(pv: PVStorage) : Unit = { logInfo("Launching the Persistent Storage") kubernetesClient .persistentVolumes().create(pv.persistentVolume) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala index 25c608ca9aa3..7e80efce2f19 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala @@ -35,7 +35,7 @@ import org.apache.spark.internal.Logging private[spark] class KerberosPodWatcherCache( kerberosUtils: KerberosUtils, labels: Map[String, String]) - extends WatcherCacheConfiguration with Logging with Eventually with Matchers { + extends WatcherCacheConfiguration[ServiceStorage] with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace @@ -99,7 +99,7 @@ private[spark] class KerberosPodWatcherCache( additionalCheck(name) } - override def deploy[T <: ServiceStorage](srvc: T) : Unit = { + override def deploy(srvc: ServiceStorage) : Unit = { logInfo("Launching the Deployment") kubernetesClient .extensions().deployments().inNamespace(namespace).create(srvc.podDeployment) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala index 2a2be4078e3c..23b11a2681b7 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.deploy.k8s.integrationtest.kerberos -import io.fabric8.kubernetes.api.model.{HasMetadata, PersistentVolume, PersistentVolumeClaim, Service} +import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.api.model.extensions._ private[spark] sealed trait KerberosStorage @@ -31,5 +31,8 @@ private[spark] case class ServiceStorage( podDeployment: Deployment, service: Service) extends KerberosStorage -private[spark] case class ResourceStorage[T <: HasMetadata]( - resource: T) extends KerberosStorage +private[spark] case class DeploymentStorage( + resource: Deployment) extends KerberosStorage + +private[spark] case class ConfigMapStorage( + resource: ConfigMap) extends KerberosStorage diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index 54f136baf424..51ffd271b7f4 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -102,8 +102,8 @@ private[spark] class KerberosUtils( def getKTStorage: PVStorage = buildKerberosPV(pvKT) def getLabels: Map[String, String] = PV_LABELS def getKeyPaths: Seq[KeyToPath] = keyPaths - def getConfigMap: ResourceStorage[ConfigMap] = - ResourceStorage( + def getConfigMap: ConfigMapStorage = + ConfigMapStorage( new ConfigMapBuilder() .withNewMetadata() .withName(KRB_CONFIG_MAP_NAME) @@ -177,11 +177,11 @@ private[spark] class KerberosUtils( resource: String, className: String, appLabel: String, - yamlLocation: String): ResourceStorage[Deployment] = { + yamlLocation: String): DeploymentStorage = { kubernetesClient.load(new FileInputStream(new File(yamlLocation))) .get().get(0) match { case deployment: Deployment => - ResourceStorage( + DeploymentStorage( new DeploymentBuilder(deployment) .editSpec() .editTemplate() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala index 68fc4df4906d..73fe18b98336 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala @@ -16,11 +16,11 @@ */ package org.apache.spark.deploy.k8s.integrationtest.kerberos -private[spark] trait WatcherCacheConfiguration { +private[spark] trait WatcherCacheConfiguration[T <: KerberosStorage] { def check(name: String): Boolean - def deploy[T <: KerberosStorage](storage: T) : Unit + def deploy(storage: T) : Unit def stopWatch(): Unit } From 3ab4358787e5cfb0de289f963122b7f22108fc36 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Sun, 30 Sep 2018 22:28:03 -0700 Subject: [PATCH 08/18] working test cases (just need clusterrolebindings) --- bin/docker-image-tool.sh | 15 ++++++--- .../src/test/scripts/run-kerberos-test.sh | 5 +-- .../integrationtest/KerberosTestSuite.scala | 8 +++-- .../k8s/integrationtest/KubernetesSuite.scala | 3 ++ .../minikube/MinikubeTestBackend.scala | 5 +-- .../KerberizedHadoopClusterLauncher.scala | 8 ++--- .../kerberos/KerberosDriverWatcherCache.scala | 1 - .../kerberos/KerberosStorage.scala | 2 ++ .../kerberos/KerberosUtils.scala | 33 +++++++++++++++++-- .../kerberos/WatcherCacheConfiguration.scala | 16 +++++++++ 10 files changed, 75 insertions(+), 21 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 75f7860e27e2..23ec660865de 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -71,31 +71,36 @@ function build { ) local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/Dockerfile"} local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/python/Dockerfile"} -# local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/R/Dockerfile"} local KDOCKERFILE=${KDOCKERFILE:-"$IMG_PATH/test/dockerfiles/spark/kerberos/Dockerfile"} + local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/R/Dockerfile"} + # Spark Base docker build $NOCACHEARG "${BUILD_ARGS[@]}" \ -t $(image_ref spark) \ -f "$BASEDOCKERFILE" . + # PySpark docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -t $(image_ref spark-py) \ -f "$PYDOCKERFILE" . -# docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -# -t $(image_ref spark-r) \ -# -f "$RDOCKERFILE" . + # The following are optional docker builds for Kerberos Testing + docker pull ifilonenko/hadoop-base:latest docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -t $(image_ref spark-kerberos) \ -f "$KDOCKERFILE" . + + # SparkR +# docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ +# -t $(image_ref spark-r) \ +# -f "$RDOCKERFILE" . } function push { docker push "$(image_ref spark)" docker push "$(image_ref spark-py)" # docker push "$(image_ref spark-r)" - docker push "$(image_ref spark-kerberos)" } function usage { diff --git a/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh index 647e9c0bc274..8b61b1aeff16 100644 --- a/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh +++ b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh @@ -16,9 +16,10 @@ until /usr/bin/kinit -kt /var/keytabs/hdfs.keytab hdfs/nn.${NAMESPACE}.svc.clust --conf spark.executor.instances=1 \ --conf spark.app.name=spark-hdfs \ --conf spark.driver.extraClassPath=/opt/spark/hconf/core-site.xml:/opt/spark/hconf/hdfs-site.xml:/opt/spark/hconf/yarn-site.xml:/etc/krb5.conf \ - --conf spark.kubernetes.container.image=spark:latest \ + --conf spark.kubernetes.container.image=${BASE_SPARK_IMAGE} \ + --conf spark.kubernetes.kerberos.krb5location=/etc/krb5.conf \ --conf spark.kerberos.keytab=/var/keytabs/hdfs.keytab \ --conf spark.kerberos.principal=hdfs/nn.${NAMESPACE}.svc.cluster.local@CLUSTER.LOCAL \ --conf spark.kubernetes.driver.label.spark-app-locator=${APP_LOCATOR_LABEL} \ ${SUBMIT_RESOURCE} \ - hdfs://nn.${NAMESPACE}.svc.cluster.local:9000/user/ifilonenko/wordcount.txt + hdfs://nn.${NAMESPACE}.svc.cluster.local:9000/user/ifilonenko/people.txt diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala index 6d11152db3a7..e0fd0726e81f 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala @@ -24,8 +24,9 @@ import org.apache.spark.deploy.k8s.integrationtest.kerberos._ private[spark] trait KerberosTestSuite { k8sSuite: KubernetesSuite => - test("Secure HDFS test with HDFS keytab", k8sTestTag) { + test("Secure HDFS test with HDFS keytab (Cluster Mode)", k8sTestTag) { val kubernetesClient = kubernetesTestComponents.kubernetesClient + // Launches single-noded psuedo-distributed kerberized hadoop cluster kerberizedHadoopClusterLauncher.launchKerberizedCluster(kerberosUtils) @@ -39,9 +40,10 @@ private[spark] trait KerberosTestSuite { k8sSuite: KubernetesSuite => appLocator, KERB_YAML_LOCATION)) driverWatcherCache.stopWatch() + val expectedLogOnCompletion = Seq( - "Returned length(s) of: [1, 1, 1]", - "Other stuff") + "File contents: [Michael, 29],[Andy, 30],[Justin, 19]", + "Returned length(s) of: 1,1,1") val driverPod = kubernetesClient .pods() .inNamespace(kubernetesTestComponents.namespace) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index c5d5e44c0c7e..223e32653535 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -102,9 +102,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite testBackend.initialize() kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) kerberizedHadoopClusterLauncher = new KerberizedHadoopClusterLauncher( + KERBEROS_LABEL, kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace), kubernetesTestComponents.namespace) kerberosUtils = new KerberosUtils( + image, kImage, kubernetesTestComponents.kubernetesClient, kubernetesTestComponents.namespace) @@ -365,4 +367,5 @@ private[spark] object KubernetesSuite { val SPARK_DRIVER_MAIN_CLASS: String = "org.apache.spark.examples.DriverSubmissionTest" val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) + val KERBEROS_LABEL = Map("job" -> "kerberostest") } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala index a437df63f5c1..bee68eda1e78 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -21,7 +21,7 @@ import org.scalatest.Matchers import org.scalatest.concurrent.Eventually import scala.collection.JavaConverters._ -import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, TIMEOUT} +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, KERBEROS_LABEL, TIMEOUT} import org.apache.spark.deploy.k8s.integrationtest.backend.IntegrationTestBackend private[spark] object MinikubeTestBackend @@ -53,8 +53,9 @@ private[spark] object MinikubeTestBackend val pvList = defaultClient.persistentVolumes().list().getItems.asScala if (pvList.nonEmpty) { defaultClient.persistentVolumes().delete() - Eventually.eventually(TIMEOUT, INTERVAL) { pvList.isEmpty should be (true) } } + Eventually.eventually(TIMEOUT, INTERVAL) { + defaultClient.persistentVolumes().list().getItems.asScala.isEmpty should be (true) } } catch { case ex: java.lang.NullPointerException => } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala index a38c56cf446d..81263efdb52a 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala @@ -16,8 +16,6 @@ */ package org.apache.spark.deploy.k8s.integrationtest.kerberos -import io.fabric8.kubernetes.api.builder.Predicate -import io.fabric8.kubernetes.api.model.ContainerBuilder import io.fabric8.kubernetes.client.KubernetesClient import org.apache.spark.internal.Logging @@ -30,15 +28,15 @@ import org.apache.spark.internal.Logging * to ensure that order is always preserved and the cluster is the same for every run. */ private[spark] class KerberizedHadoopClusterLauncher( + labels: Map[String, String], kubernetesClient: KubernetesClient, namespace: String) extends Logging { - private val LABELS = Map("job" -> "kerberostest") def launchKerberizedCluster(kerberosUtils: KerberosUtils): Unit = { // These Utils allow for each step in this launch process to re-use // common functionality for setting up hadoop nodes. // Launches persistent volumes and its claims for sharing keytabs across pods - val pvWatcherCache = new KerberosPVWatcherCache(kerberosUtils, LABELS) + val pvWatcherCache = new KerberosPVWatcherCache(kerberosUtils, labels) pvWatcherCache.deploy(kerberosUtils.getNNStorage) pvWatcherCache.deploy(kerberosUtils.getKTStorage) pvWatcherCache.stopWatch() @@ -49,7 +47,7 @@ private[spark] class KerberizedHadoopClusterLauncher( cmWatcherCache.stopWatch() // Launches the Hadoop cluster pods: KDC --> NN --> DN1 --> Data-Populator - val podWatcherCache = new KerberosPodWatcherCache(kerberosUtils, LABELS) + val podWatcherCache = new KerberosPodWatcherCache(kerberosUtils, labels) podWatcherCache.deploy(kerberosUtils.getKDC) podWatcherCache.deploy(kerberosUtils.getNN) podWatcherCache.deploy(kerberosUtils.getDN) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala index 1bf6e5094972..cb601d90c123 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala @@ -18,7 +18,6 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.api.model.extensions.Deployment import io.fabric8.kubernetes.client.{KubernetesClientException, Watch, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.scalatest.Matchers diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala index 23b11a2681b7..b52f3ba7fe1c 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala @@ -18,6 +18,7 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.api.model.extensions._ +import io.fabric8.openshift.api.model.ClusterRoleBinding private[spark] sealed trait KerberosStorage @@ -32,6 +33,7 @@ private[spark] case class ServiceStorage( service: Service) extends KerberosStorage private[spark] case class DeploymentStorage( + clusterRoleBinding: ClusterRoleBinding, resource: Deployment) extends KerberosStorage private[spark] case class ConfigMapStorage( diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index 51ffd271b7f4..4a664f75e2d3 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -18,16 +18,21 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos import java.io.{File, FileInputStream} +import scala.collection.JavaConverters._ + import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.api.model.extensions.{Deployment, DeploymentBuilder} import io.fabric8.kubernetes.client.KubernetesClient +import io.fabric8.openshift.api.model.{ClusterRoleBinding, ClusterRoleBindingBuilder} import org.apache.commons.io.FileUtils.readFileToString -import scala.collection.JavaConverters._ + +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.KERBEROS_LABEL /** * This class is responsible for handling all Utils and Constants necessary for testing */ private[spark] class KerberosUtils( + sparkImage: String, kerberosImage: String, kubernetesClient: KubernetesClient, namespace: String) { @@ -49,7 +54,6 @@ private[spark] class KerberosUtils( private val KRB_VOLUME = "krb5-conf" private val KRB_FILE_DIR = "/mnt" private val KRB_CONFIG_MAP_NAME = "krb-config-map" - private val PV_LABELS = Map("job" -> "kerberostest") private val keyPaths: Seq[KeyToPath] = (kerberosFiles ++ Seq("krb5-dp.conf")) .map(file => new KeyToPathBuilder() @@ -100,7 +104,7 @@ private[spark] class KerberosUtils( persistentVolumeMap(pvType)) } def getNNStorage: PVStorage = buildKerberosPV(pvNN) def getKTStorage: PVStorage = buildKerberosPV(pvKT) - def getLabels: Map[String, String] = PV_LABELS + def getLabels: Map[String, String] = KERBEROS_LABEL def getKeyPaths: Seq[KeyToPath] = keyPaths def getConfigMap: ConfigMapStorage = ConfigMapStorage( @@ -173,6 +177,23 @@ private[spark] class KerberosUtils( .withKey(file) .withPath(file) .build()).toList + def getKerberosRoleBinding: ClusterRoleBinding = + new ClusterRoleBindingBuilder() + .withNewMetadata() + .withName(s"default-admin-$namespace") + .withLabels(getLabels.asJava) + .endMetadata() + .withNewRoleRef() + .withKind("ClusterRole") + .withName("cluster-admin") + .endRoleRef() + .withSubjects( + new ObjectReferenceBuilder() + .withKind("ServiceAccount") + .withName("default") + .withNamespace(namespace) + .build()) + .build() def getKerberosTest( resource: String, className: String, @@ -182,6 +203,7 @@ private[spark] class KerberosUtils( .get().get(0) match { case deployment: Deployment => DeploymentStorage( + getKerberosRoleBinding, new DeploymentBuilder(deployment) .editSpec() .editTemplate() @@ -238,6 +260,10 @@ private[spark] class KerberosUtils( .withName("TMP_HDFS_LOC") .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") .endEnv() + .addNewEnv() + .withName("BASE_SPARK_IMAGE") + .withValue(sparkImage) + .endEnv() .addNewVolumeMount() .withName(KRB_VOLUME) .withMountPath(KRB_FILE_DIR) @@ -249,4 +275,5 @@ private[spark] class KerberosUtils( .endSpec() .build()) }} + } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala index 73fe18b98336..f5ef781bff1a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala @@ -16,11 +16,27 @@ */ package org.apache.spark.deploy.k8s.integrationtest.kerberos +/** + * A collection of functions that together represent a WatcherCache. The functin of these + * WatcherCaches are to watch the KerberosStorage object and insure they are properly created + * by blocking with a condition. + */ private[spark] trait WatcherCacheConfiguration[T <: KerberosStorage] { + /** + * This function defines the boolean condition which would block the + * completion of the deploy() block + */ def check(name: String): Boolean + /** + * This functions deploys the KerberosStorage object by having the KubernetesClient + * create the resulting KerberosStorage object. + */ def deploy(storage: T) : Unit + /** + * This function closes all Watcher threads. + */ def stopWatch(): Unit } From cfe799033139251df44e584ab06b699cb437ed11 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Tue, 2 Oct 2018 08:40:28 +0100 Subject: [PATCH 09/18] small changes with addition of old tests --- .../spark/deploy/k8s/integrationtest/KubernetesSuite.scala | 4 +++- .../k8s/integrationtest/backend/minikube/Minikube.scala | 1 - .../backend/minikube/MinikubeTestBackend.scala | 6 ++++-- .../k8s/integrationtest/kerberos/KerberosStorage.scala | 2 +- .../deploy/k8s/integrationtest/kerberos/KerberosUtils.scala | 1 + 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 223e32653535..c8748b6dd51d 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -38,7 +38,9 @@ import org.apache.spark.deploy.k8s.integrationtest.kerberos.{KerberizedHadoopClu import org.apache.spark.internal.Logging private[spark] class KubernetesSuite extends SparkFunSuite - with BeforeAndAfterAll with BeforeAndAfter with KerberosTestSuite + with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite + with PythonTestsSuite with ClientModeTestsSuite + with KerberosTestSuite with Logging with Eventually with Matchers { import KubernetesSuite._ diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala index 6494cbc18f33..af85aae21678 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala @@ -16,7 +16,6 @@ */ package org.apache.spark.deploy.k8s.integrationtest.backend.minikube -import java.io.File import java.nio.file.Paths import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala index bee68eda1e78..808368ddffaa 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -50,12 +50,14 @@ private[spark] object MinikubeTestBackend // Temporary hack until client library for fabric8 is updated to get around // the NPE that comes about when I do .list().getItems().asScala try { - val pvList = defaultClient.persistentVolumes().list().getItems.asScala + val pvList = defaultClient.persistentVolumes().withLabels(KERBEROS_LABEL.asJava) + .list().getItems.asScala if (pvList.nonEmpty) { defaultClient.persistentVolumes().delete() } Eventually.eventually(TIMEOUT, INTERVAL) { - defaultClient.persistentVolumes().list().getItems.asScala.isEmpty should be (true) } + defaultClient.persistentVolumes().withLabels(KERBEROS_LABEL.asJava) + .list().getItems.asScala.isEmpty should be (true) } } catch { case ex: java.lang.NullPointerException => } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala index b52f3ba7fe1c..2e5708d11af6 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala @@ -17,7 +17,7 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.api.model.extensions._ +import io.fabric8.kubernetes.api.model.extensions.Deployment import io.fabric8.openshift.api.model.ClusterRoleBinding private[spark] sealed trait KerberosStorage diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index 4a664f75e2d3..a88539cf2295 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -177,6 +177,7 @@ private[spark] class KerberosUtils( .withKey(file) .withPath(file) .build()).toList + // RoleBinding in the case of RBAC problems def getKerberosRoleBinding: ClusterRoleBinding = new ClusterRoleBindingBuilder() .withNewMetadata() From 54316ba4fbc5ec7b46184d01f6404bd26d3c0f5d Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Tue, 2 Oct 2018 08:48:26 +0100 Subject: [PATCH 10/18] bring back sparkr --- bin/docker-image-tool.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 23ec660865de..9de13b61c5ab 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -92,15 +92,15 @@ function build { -f "$KDOCKERFILE" . # SparkR -# docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -# -t $(image_ref spark-r) \ -# -f "$RDOCKERFILE" . + docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-r) \ + -f "$RDOCKERFILE" . } function push { docker push "$(image_ref spark)" docker push "$(image_ref spark-py)" -# docker push "$(image_ref spark-r)" + docker push "$(image_ref spark-r)" } function usage { From 56e2c6e20b427c883e330d79f45ef6f3841cd518 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Tue, 2 Oct 2018 11:29:04 +0100 Subject: [PATCH 11/18] add necessary apache license to pass RAT tests --- .../docker/src/test/hadoop/conf/krb5.conf | 16 ++++++++++++++++ .../docker/src/test/scripts/run-kerberos-test.sh | 16 ++++++++++++++++ .../kerberos-yml/data-populator-deployment.yml | 16 ++++++++++++++++ .../kerberos-yml/data-populator-service.yml | 16 ++++++++++++++++ .../kerberos-yml/dn1-deployment.yml | 16 ++++++++++++++++ .../kerberos-yml/dn1-service.yml | 16 ++++++++++++++++ .../kerberos-yml/kerberos-deployment.yml | 16 ++++++++++++++++ .../kerberos-yml/kerberos-service.yml | 16 ++++++++++++++++ .../kerberos-yml/kerberos-test.yml | 16 ++++++++++++++++ .../kerberos-yml/nn-deployment.yml | 16 ++++++++++++++++ .../kerberos-yml/nn-service.yml | 16 ++++++++++++++++ 11 files changed, 176 insertions(+) diff --git a/resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf b/resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf index 144f77d8995d..f9d65d5db1b0 100755 --- a/resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf +++ b/resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# includedir /etc/krb5.conf.d/ [logging] diff --git a/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh index 8b61b1aeff16..c8f81dfba754 100644 --- a/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh +++ b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh @@ -1,4 +1,20 @@ #!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# sed -i -e 's/#//' -e 's/default_ccache_name/# default_ccache_name/' /etc/krb5.conf export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true" export HADOOP_JAAS_DEBUG=true diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml index 166520a1e306..f96aa2661e2e 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: extensions/v1beta1 kind: Deployment metadata: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml index 2f35d5d70de4..fb319ac13f7e 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: v1 kind: Service metadata: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml index 03ac6b10ccab..7f7b01d72a1d 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: extensions/v1beta1 kind: Deployment metadata: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-service.yml index 6915022b7be3..963e66da8015 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-service.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-service.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: v1 kind: Service metadata: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml index c093d8881e3a..c8a70f03e1cf 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: extensions/v1beta1 kind: Deployment metadata: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-service.yml index da7b994f6e2a..c1ff280b2bda 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-service.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-service.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: v1 kind: Service metadata: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml index 9c3cc067b580..dcc0eba80d67 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: extensions/v1beta1 kind: Deployment metadata: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml index 89ad452d3daa..5f7c94dcf87b 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: extensions/v1beta1 kind: Deployment metadata: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-service.yml index 649302150aa3..7d6cfd415a28 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-service.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-service.yml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# apiVersion: v1 kind: Service metadata: From 436f652159b257e3f362118cf0f2d73bd77cd328 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Mon, 15 Oct 2018 17:18:14 -0700 Subject: [PATCH 12/18] style --- .../k8s/integrationtest/KerberosTestSuite.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala index d509464a20a1..e9883efcd83d 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala @@ -34,11 +34,13 @@ private[spark] trait KerberosTestSuite { k8sSuite: KubernetesSuite => val driverWatcherCache = new KerberosDriverWatcherCache( kerberosUtils, Map("spark-app-locator" -> appLocator)) - driverWatcherCache.deploy(kerberosUtils.getKerberosTest( - containerLocalSparkDistroExamplesJar, - HDFS_TEST_CLASS, - appLocator, - KERB_YAML_LOCATION)) + driverWatcherCache.deploy( + kerberosUtils.getKerberosTest( + containerLocalSparkDistroExamplesJar, + HDFS_TEST_CLASS, + appLocator, + KERB_YAML_LOCATION) + ) driverWatcherCache.stopWatch() val expectedLogOnCompletion = Seq( From cccf0275cc58b464aba544742d7300ba4939f5a6 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Mon, 15 Oct 2018 17:33:51 -0700 Subject: [PATCH 13/18] style --- .../deploy/k8s/integrationtest/kerberos/KerberosUtils.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index 3b69f8927d38..d7689d884c9e 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -28,9 +28,9 @@ import org.apache.commons.io.FileUtils.readFileToString import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.KERBEROS_LABEL - /** - * This class is responsible for handling all Utils and Constants necessary for testing - */ +/** + * This class is responsible for handling all Utils and Constants necessary for testing + */ private[spark] class KerberosUtils( sparkImage: String, kerberosImage: String, From 5d270f17dccbb2eac6d3c2ab8c12987e3d992086 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Tue, 16 Oct 2018 18:35:08 -0700 Subject: [PATCH 14/18] resolve initial comments on location of krb image buildling --- bin/docker-image-tool.sh | 8 --- dev/make-distribution.sh | 1 + .../scripts/setup-integration-test-env.sh | 2 + .../scripts/setup-krb-integration-test-env.sh | 71 +++++++++++++++++++ .../kerberos/WatcherCacheConfiguration.scala | 4 +- .../integration-tests/test-data/input.txt | 1 - .../simple-hadoop-conf/core-site.xml | 24 ------- .../simple-hadoop-conf/hdfs-site.xml | 24 ------- 8 files changed, 76 insertions(+), 59 deletions(-) create mode 100644 resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh delete mode 100755 resource-managers/kubernetes/integration-tests/test-data/input.txt delete mode 100755 resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/core-site.xml delete mode 100755 resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/hdfs-site.xml diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 941dac1310a4..2833ca56e58d 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -73,7 +73,6 @@ function build { ) local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/Dockerfile"} local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/python/Dockerfile"} - local KDOCKERFILE=${KDOCKERFILE:-"$IMG_PATH/test/dockerfiles/spark/kerberos/Dockerfile"} local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/main/dockerfiles/spark/bindings/R/Dockerfile"} # Spark Base @@ -86,13 +85,6 @@ function build { -t $(image_ref spark-py) \ -f "$PYDOCKERFILE" . - # The following are optional docker builds for Kerberos Testing - docker pull ifilonenko/hadoop-base:latest - - docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-kerberos) \ - -f "$KDOCKERFILE" . - # SparkR docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -t $(image_ref spark-r) \ diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index a38ab84daa56..64972ba7fb1e 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -192,6 +192,7 @@ fi if [ -d "$SPARK_HOME"/resource-managers/kubernetes/core/target/ ]; then mkdir -p "$DISTDIR/kubernetes/" cp -a "$SPARK_HOME"/resource-managers/kubernetes/docker/src "$DISTDIR/kubernetes/" + cp -a "$SPARK_HOME"/resource-managers/kubernetes/integration-tests/scripts "$DISTDIR/kubernetes/" cp -a "$SPARK_HOME"/resource-managers/kubernetes/integration-tests/tests "$DISTDIR/kubernetes/" fi diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh index ccfb8e767c52..6b4a35d5f062 100755 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh @@ -83,6 +83,8 @@ then else # -m option for minikube. $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build + chmod +x $UNPACKED_SPARK_TGZ/kubernetes/scripts/setup-krb-integration-test-env.sh + $UNPACKED_SPARK_TGZ/kubernetes/scripts/setup-krb-integration-test-env.sh -r $IMAGE_REPO -t $IMAGE_TAG fi cd - fi diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh new file mode 100644 index 000000000000..c21f417479c1 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function error { + echo "$@" 1>&2 + exit 1 +} + +function image_ref { + local image="$1" + local add_repo="${2:-1}" + if [ -n "$REPO" ]; then + image="$REPO/$image" + fi + if [ -n "$TAG" ]; then + image="$image:$TAG" + fi + echo "$image" +} + +function build { + local BUILD_ARGS + local IMG_PATH="kubernetes/src" + + if [ ! -d "$IMG_PATH" ]; then + error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark." + fi + + local KRB_BUILD_ARGS=( + --build-arg + base_img=$(image_ref spark) + ) + local KDOCKERFILE=${KDOCKERFILE:-"$IMG_PATH/test/dockerfiles/spark/kerberos/Dockerfile"} + + docker pull ifilonenko/hadoop-base:latest + + docker build $NOCACHEARG "${KRB_BUILD_ARGS[@]}" \ + -t $(image_ref spark-kerberos) \ + -f "$KDOCKERFILE" . +} + +REPO= +TAG= +NOCACHEARG= +while getopts r:t:n: option +do + case "${option}" + in + r) REPO=${OPTARG};; + t) TAG=${OPTARG};; + n) NOCACHEARG="--no-cache";; + esac +done + +build diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala index f5ef781bff1a..955c23062656 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/WatcherCacheConfiguration.scala @@ -17,8 +17,8 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos /** - * A collection of functions that together represent a WatcherCache. The functin of these - * WatcherCaches are to watch the KerberosStorage object and insure they are properly created + * A collection of functions that together represent a WatcherCache. The function of these + * WatcherCaches are to watch the KerberosStorage object and insure it is properly created * by blocking with a condition. */ private[spark] trait WatcherCacheConfiguration[T <: KerberosStorage] { diff --git a/resource-managers/kubernetes/integration-tests/test-data/input.txt b/resource-managers/kubernetes/integration-tests/test-data/input.txt deleted file mode 100755 index dfe437bdebeb..000000000000 --- a/resource-managers/kubernetes/integration-tests/test-data/input.txt +++ /dev/null @@ -1 +0,0 @@ -Contents diff --git a/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/core-site.xml b/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/core-site.xml deleted file mode 100755 index 08a512929a2a..000000000000 --- a/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/core-site.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - fs.defaultFS - hdfs://nn.REPLACE_ME.svc.cluster.local:9000 - - diff --git a/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/hdfs-site.xml b/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/hdfs-site.xml deleted file mode 100755 index 76fc9c68fa37..000000000000 --- a/resource-managers/kubernetes/integration-tests/test-data/simple-hadoop-conf/hdfs-site.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - dfs.replication - 1 - - From 4c9b886c1f23bbdd3d8e1ec7df25f03e45892d88 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Tue, 16 Oct 2018 18:59:13 -0700 Subject: [PATCH 15/18] need to eval into minikube docker-env --- .../integration-tests/scripts/setup-krb-integration-test-env.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh index c21f417479c1..f7b165cb2373 100644 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh @@ -68,4 +68,5 @@ do esac done +eval $(minikube docker-env) build From 66fe40830d9f732b7bd2bb5e1ebf81713d201751 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Thu, 25 Oct 2018 15:02:38 -0700 Subject: [PATCH 16/18] allow for building of hadoop-base image --- .../docker/src/test/data/people.txt | 3 + .../src/test/dockerfiles/hadoop/Dockerfile | 46 +++ .../docker/src/test/hadoop/conf/core-site.xml | 38 -- .../docker/src/test/hadoop/conf/hdfs-site.xml | 157 -------- .../src/test/hadoop/conf/ssl-server.xml | 44 +++ .../docker/src/test/scripts/populate-data.sh | 42 +++ .../krb5.conf => scripts/start-datanode.sh} | 33 +- .../docker/src/test/scripts/start-kdc.sh | 55 +++ .../docker/src/test/scripts/start-namenode.sh | 33 ++ .../data-populator-deployment.yml | 5 +- .../kerberos-yml/dn1-deployment.yml | 5 +- .../kerberos-yml/kerberos-deployment.yml | 5 +- .../kerberos-yml/nn-deployment.yml | 5 +- .../kubernetes/integration-tests/pom.xml | 30 +- .../scripts/setup-integration-test-env.sh | 23 +- .../scripts/setup-krb-integration-test-env.sh | 16 +- .../integrationtest/KerberosTestSuite.scala | 2 +- .../k8s/integrationtest/KubernetesSuite.scala | 6 +- .../kerberos/KerberosUtils.scala | 337 +++++++++--------- .../test-data/hadoop-conf/yarn-site.xml | 26 -- 20 files changed, 480 insertions(+), 431 deletions(-) create mode 100644 resource-managers/kubernetes/docker/src/test/data/people.txt create mode 100644 resource-managers/kubernetes/docker/src/test/dockerfiles/hadoop/Dockerfile delete mode 100755 resource-managers/kubernetes/docker/src/test/hadoop/conf/core-site.xml delete mode 100755 resource-managers/kubernetes/docker/src/test/hadoop/conf/hdfs-site.xml create mode 100644 resource-managers/kubernetes/docker/src/test/hadoop/conf/ssl-server.xml create mode 100644 resource-managers/kubernetes/docker/src/test/scripts/populate-data.sh rename resource-managers/kubernetes/docker/src/test/{hadoop/conf/krb5.conf => scripts/start-datanode.sh} (59%) mode change 100755 => 100644 create mode 100644 resource-managers/kubernetes/docker/src/test/scripts/start-kdc.sh create mode 100644 resource-managers/kubernetes/docker/src/test/scripts/start-namenode.sh delete mode 100755 resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/yarn-site.xml diff --git a/resource-managers/kubernetes/docker/src/test/data/people.txt b/resource-managers/kubernetes/docker/src/test/data/people.txt new file mode 100644 index 000000000000..30f7501874b6 --- /dev/null +++ b/resource-managers/kubernetes/docker/src/test/data/people.txt @@ -0,0 +1,3 @@ +Michael, 29 +Andy, 30 +Justin, 19 \ No newline at end of file diff --git a/resource-managers/kubernetes/docker/src/test/dockerfiles/hadoop/Dockerfile b/resource-managers/kubernetes/docker/src/test/dockerfiles/hadoop/Dockerfile new file mode 100644 index 000000000000..d9b67dadde4c --- /dev/null +++ b/resource-managers/kubernetes/docker/src/test/dockerfiles/hadoop/Dockerfile @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM centos:7 + +ARG hadoop_version +ARG k_img_path=kubernetes/src/test + +RUN yum -y install krb5-server krb5-workstation +RUN yum -y install java-1.8.0-openjdk-headless +RUN yum -y install apache-commons-daemon-jsvc +RUN yum install net-tools -y +RUN yum install telnet telnet-server -y +RUN yum -y install which + +RUN sed -i -e 's/#//' -e 's/default_ccache_name/# default_ccache_name/' /etc/krb5.conf + +RUN useradd -u 1098 hdfs + +ADD hadoop-${hadoop_version}.tar.gz / +RUN ln -s hadoop-${hadoop_version} hadoop +RUN chown -R -L hdfs /hadoop + +COPY ${k_img_path}/hadoop/conf/ssl-server.xml /hadoop/etc/hadoop/ +COPY ${k_img_path}/hadoop/conf/yarn-site.xml /hadoop/etc/hadoop/ + +COPY ${k_img_path}/scripts/start-namenode.sh / +COPY ${k_img_path}/scripts/start-datanode.sh / +COPY ${k_img_path}/scripts/populate-data.sh / +COPY ${k_img_path}/scripts/start-kdc.sh / + +COPY ${k_img_path}/data/people.txt / diff --git a/resource-managers/kubernetes/docker/src/test/hadoop/conf/core-site.xml b/resource-managers/kubernetes/docker/src/test/hadoop/conf/core-site.xml deleted file mode 100755 index 9a6ae2c50526..000000000000 --- a/resource-managers/kubernetes/docker/src/test/hadoop/conf/core-site.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - hadoop.security.authentication - kerberos - - - - hadoop.security.authorization - true - - - - fs.defaultFS - hdfs://nn.REPLACE_ME.svc.cluster.local:9000 - - - hadoop.rpc.protection - authentication - - diff --git a/resource-managers/kubernetes/docker/src/test/hadoop/conf/hdfs-site.xml b/resource-managers/kubernetes/docker/src/test/hadoop/conf/hdfs-site.xml deleted file mode 100755 index 66dc969c46b6..000000000000 --- a/resource-managers/kubernetes/docker/src/test/hadoop/conf/hdfs-site.xml +++ /dev/null @@ -1,157 +0,0 @@ - - - - - - - - - - dfs.replication - 1 - - - - - dfs.permissions - true - - - dfs.block.access.token.enable - true - - - - - dfs.namenode.keytab.file - /var/keytabs/hdfs.keytab - - - dfs.namenode.kerberos.principal - hdfs/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL - - - dfs.namenode.kerberos.internal.spnego.principal - HTTP/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL - - - dfs.namenode.rpc-address - nn.REPLACE_ME.svc.cluster.local:9000 - - - - - - dfs.namenode.delegation.token.max-lifetime - 3600000 - - - dfs.namenode.delegation.token.renew-interval - 3600000 - - - - - - - dfs.data.transfer.protection - integrity - - - dfs.datanode.address - 0.0.0.0:10019 - - - - dfs.datanode.http.address - 0.0.0.0:10022 - - - - dfs.http.policy - HTTPS_ONLY - - - - - dfs.namenode.keytab.file - /var/keytabs/hdfs.keytab - - - dfs.namenode.kerberos.principal - hdfs/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL - - - dfs.namenode.kerberos.internal.spnego.principal - HTTP/nn.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL - - - - - dfs.namenode.datanode.registration.ip-hostname-check - false - - - dfs.datanode.data.dir.perm - 700 - - - dfs.namenode.name.dir - file:///hadoop/etc/data - - - dfs.datanode.name.dir - file:///hadoop/etc/data - - - dfs.data.dir - file:///hadoop/etc/data - - - dfs.datanode.keytab.file - /var/keytabs/hdfs.keytab - - - dfs.datanode.kerberos.principal - hdfs/dn1.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL - - - dfs.encrypt.data.transfer - true - - - dfs.encrypt.data.transfer.cipher.suites - AES/CTR/NoPadding - - - dfs.encrypt.data.transfer.cipher.key.bitlength - 256 - - - - - dfs.webhdfs.enabled - true - - - dfs.web.authentication.kerberos.principal - HTTP/dn1.REPLACE_ME.svc.cluster.local@CLUSTER.LOCAL - - - dfs.web.authentication.kerberos.keytab - /var/keytabs/hdfs.keytab - - - diff --git a/resource-managers/kubernetes/docker/src/test/hadoop/conf/ssl-server.xml b/resource-managers/kubernetes/docker/src/test/hadoop/conf/ssl-server.xml new file mode 100644 index 000000000000..45cfa1870792 --- /dev/null +++ b/resource-managers/kubernetes/docker/src/test/hadoop/conf/ssl-server.xml @@ -0,0 +1,44 @@ + + + + + + + ssl.server.truststore.location + /var/keytabs/hdfs.jks + + + + ssl.server.truststore.password + changeme + + + + ssl.server.keystore.location + /var/keytabs/hdfs.jks + + + + ssl.server.keystore.password + changeme + + + + ssl.server.keystore.keypassword + changeme + + + diff --git a/resource-managers/kubernetes/docker/src/test/scripts/populate-data.sh b/resource-managers/kubernetes/docker/src/test/scripts/populate-data.sh new file mode 100644 index 000000000000..300158969496 --- /dev/null +++ b/resource-managers/kubernetes/docker/src/test/scripts/populate-data.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk +export PATH=/hadoop/bin:$PATH +export HADOOP_CONF_DIR=/hadoop/etc/hadoop +export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true ${HADOOP_OPTS}" +export KRB5CCNAME=KRBCONF +mkdir -p /hadoop/etc/data +cp ${TMP_KRB_LOC} /etc/krb5.conf +cp ${TMP_CORE_LOC} /hadoop/etc/hadoop/core-site.xml +cp ${TMP_HDFS_LOC} /hadoop/etc/hadoop/hdfs-site.xml + +until kinit -kt /var/keytabs/hdfs.keytab hdfs/nn.${NAMESPACE}.svc.cluster.local; do sleep 2; done + +until (echo > /dev/tcp/nn.${NAMESPACE}.svc.cluster.local/9000) >/dev/null 2>&1; do sleep 2; done + +hdfs dfsadmin -safemode wait + + +hdfs dfs -mkdir -p /user/ifilonenko/ +hdfs dfs -copyFromLocal /people.txt /user/ifilonenko + +hdfs dfs -chmod -R 755 /user/ifilonenko +hdfs dfs -chown -R ifilonenko /user/ifilonenko + + +sleep 60 diff --git a/resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf b/resource-managers/kubernetes/docker/src/test/scripts/start-datanode.sh old mode 100755 new mode 100644 similarity index 59% rename from resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf rename to resource-managers/kubernetes/docker/src/test/scripts/start-datanode.sh index f9d65d5db1b0..d87ea659ae64 --- a/resource-managers/kubernetes/docker/src/test/hadoop/conf/krb5.conf +++ b/resource-managers/kubernetes/docker/src/test/scripts/start-datanode.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -14,28 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # -includedir /etc/krb5.conf.d/ +export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk +export PATH=/hadoop/bin:$PATH +export HADOOP_CONF_DIR=/hadoop/etc/hadoop +mkdir -p /hadoop/etc/data +cp ${TMP_KRB_LOC} /etc/krb5.conf +cp ${TMP_CORE_LOC} /hadoop/etc/hadoop/core-site.xml +cp ${TMP_HDFS_LOC} /hadoop/etc/hadoop/hdfs-site.xml -[logging] -default = FILE:/var/log/krb5libs.log -kdc = FILE:/var/log/krb5kdc.log -admin_server = FILE:/var/log/kadmind.log +until kinit -kt /var/keytabs/hdfs.keytab hdfs/nn.${NAMESPACE}.svc.cluster.local; do sleep 15; done -[libdefaults] -dns_lookup_realm = false -ticket_lifetime = 24h -renew_lifetime = 7d -forwardable = true -rdns = false -default_realm = CLUSTER.LOCAL -# default_ccache_name = MEMORY +echo "KDC is up and ready to go... starting up" -[realms] -CLUSTER.LOCAL = { - kdc = kerberos.REPLACE_ME.svc.cluster.local - admin_server = kerberos.REPLACE_ME.svc.cluster.local -} +kdestroy -[domain_realm] -.cluster.local = CLUSTER.LOCAL -cluster.local = CLUSTER.LOCAL +hdfs datanode diff --git a/resource-managers/kubernetes/docker/src/test/scripts/start-kdc.sh b/resource-managers/kubernetes/docker/src/test/scripts/start-kdc.sh new file mode 100644 index 000000000000..8b5fb8553f38 --- /dev/null +++ b/resource-managers/kubernetes/docker/src/test/scripts/start-kdc.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk +export PATH=/hadoop/bin:$PATH +export HADOOP_CONF_DIR=/hadoop/etc/hadoop +mkdir -p /hadoop/etc/data +cp ${TMP_KRB_LOC} /etc/krb5.conf +cp ${TMP_CORE_LOC} /hadoop/etc/hadoop/core-site.xml +cp ${TMP_HDFS_LOC} /hadoop/etc/hadoop/hdfs-site.xml + +/usr/sbin/kdb5_util -P changeme create -s + + +## password only user +/usr/sbin/kadmin.local -q "addprinc -randkey ifilonenko" +/usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/ifilonenko.keytab ifilonenko" + +/usr/sbin/kadmin.local -q "addprinc -randkey HTTP/server.${NAMESPACE}.svc.cluster.local" +/usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/server.keytab HTTP/server.${NAMESPACE}.svc.cluster.local" + +/usr/sbin/kadmin.local -q "addprinc -randkey hdfs/nn.${NAMESPACE}.svc.cluster.local" +/usr/sbin/kadmin.local -q "addprinc -randkey HTTP/nn.${NAMESPACE}.svc.cluster.local" +/usr/sbin/kadmin.local -q "addprinc -randkey hdfs/dn1.${NAMESPACE}.svc.cluster.local" +/usr/sbin/kadmin.local -q "addprinc -randkey HTTP/dn1.${NAMESPACE}.svc.cluster.local" + +/usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/hdfs.keytab hdfs/nn.${NAMESPACE}.svc.cluster.local" +/usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/hdfs.keytab HTTP/nn.${NAMESPACE}.svc.cluster.local" +/usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/hdfs.keytab hdfs/dn1.${NAMESPACE}.svc.cluster.local" +/usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/hdfs.keytab HTTP/dn1.${NAMESPACE}.svc.cluster.local" + +chown hdfs /var/keytabs/hdfs.keytab + +keytool -genkey -alias nn.${NAMESPACE}.svc.cluster.local -keyalg rsa -keysize 1024 -dname "CN=nn.${NAMESPACE}.svc.cluster.local" -keypass changeme -keystore /var/keytabs/hdfs.jks -storepass changeme +keytool -genkey -alias dn1.${NAMESPACE}.svc.cluster.local -keyalg rsa -keysize 1024 -dname "CN=dn1.${NAMESPACE}.svc.cluster.local" -keypass changeme -keystore /var/keytabs/hdfs.jks -storepass changeme + +chmod 700 /var/keytabs/hdfs.jks +chown hdfs /var/keytabs/hdfs.jks + + +krb5kdc -n diff --git a/resource-managers/kubernetes/docker/src/test/scripts/start-namenode.sh b/resource-managers/kubernetes/docker/src/test/scripts/start-namenode.sh new file mode 100644 index 000000000000..d23ec8265412 --- /dev/null +++ b/resource-managers/kubernetes/docker/src/test/scripts/start-namenode.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk +export PATH=/hadoop/bin:$PATH +export HADOOP_CONF_DIR=/hadoop/etc/hadoop +mkdir -p /hadoop/etc/data +cp ${TMP_KRB_LOC} /etc/krb5.conf +cp ${TMP_CORE_LOC} /hadoop/etc/hadoop/core-site.xml +cp ${TMP_HDFS_LOC} /hadoop/etc/hadoop/hdfs-site.xml + +until kinit -kt /var/keytabs/hdfs.keytab hdfs/nn.${NAMESPACE}.svc.cluster.local; do sleep 15; done + +echo "KDC is up and ready to go... starting up" + +kdestroy + +hdfs namenode -format +hdfs namenode diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml index f96aa2661e2e..57bf9d37c25e 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml @@ -30,10 +30,9 @@ spec: job: kerberostest spec: containers: - - command: - - /populate-data.sh + - command: ["sh"] + args: ["/populate-data.sh"] name: data-populator - image: ifilonenko/hadoop-base:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml index 7f7b01d72a1d..ca1d572a4ec3 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml @@ -30,10 +30,9 @@ spec: job: kerberostest spec: containers: - - command: - - /start-datanode.sh + - command: ["sh"] + args: ["/start-datanode.sh"] name: dn1 - image: ifilonenko/hadoop-base:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml index c8a70f03e1cf..a38fab6c2e32 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml @@ -30,10 +30,9 @@ spec: job: kerberostest spec: containers: - - command: - - /start-kdc.sh + - command: ["sh"] + args: ["/start-kdc.sh"] name: kerberos - image: ifilonenko/hadoop-base:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml index 5f7c94dcf87b..e6527ed1d2de 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml @@ -30,12 +30,11 @@ spec: job: kerberostest spec: containers: - - command: - - /start-namenode.sh + - command: ["sh"] + args: ["/start-namenode.sh"] name: nn ports: - containerPort: 9000 - image: ifilonenko/hadoop-base:latest imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/keytabs diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 23453c8957b2..168588e04630 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -26,14 +26,16 @@ spark-kubernetes-integration-tests_2.11 - 1.3.0 + 1.4.1 1.4.0 3.0.0 + 2.7.7 3.2.2 1.0 kubernetes-integration-tests ${project.build.directory}/spark-dist-unpacked + ${project.build.directory}/hadoop-dist-loc N/A ${project.build.directory}/imageTag.txt minikube @@ -71,6 +73,25 @@ + + com.googlecode.maven-download-plugin + download-maven-plugin + ${download-maven-plugin.version} + + + install-hadoop-distribution + pre-integration-test + + wget + + + http://apache.mirrors.lucidnetworks.net/hadoop/common/hadoop-${hadoop-common-version}/hadoop-${hadoop-common-version}.tar.gz + ${spark.kubernetes.test.hadoopTgz} + hadoop-${hadoop-common-version}.tar.gz + + + + org.codehaus.mojo exec-maven-plugin @@ -102,12 +123,17 @@ --spark-tgz ${spark.kubernetes.test.sparkTgz} + + --hadoop-tgz + ${spark.kubernetes.test.hadoopTgz}/hadoop-${hadoop-common-version}.tar.gz + + --hadoop-version + ${hadoop-common-version} - org.apache.maven.plugins maven-surefire-plugin diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh index 6b4a35d5f062..31633145ea89 100755 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh @@ -23,6 +23,8 @@ DEPLOY_MODE="minikube" IMAGE_REPO="docker.io/kubespark" IMAGE_TAG="N/A" SPARK_TGZ="N/A" +HADOOP_TGZ="N/A" +HADOOP_VERSION="N/A" # Parse arguments while (( "$#" )); do @@ -51,6 +53,14 @@ while (( "$#" )); do SPARK_TGZ="$2" shift ;; + --hadoop-tgz) + HADOOP_TGZ="$2" + shift + ;; + --hadoop-version) + HADOOP_VERSION="$2" + shift + ;; *) break ;; @@ -63,6 +73,16 @@ then echo "Must specify a Spark tarball to build Docker images against with --spark-tgz." && exit 1; fi +if [[ $HADOOP_TGZ == "N/A" ]]; +then + echo "Must specify a Hadoop tarball to build hadoop Docker images against with --hadoop-tgz." && exit 1; +fi + +if [[ $HADOOP_VERSION == "N/A" ]]; +then + echo "Must specify a Hadoop version with --hadoop-version." && exit 1; +fi + rm -rf $UNPACKED_SPARK_TGZ mkdir -p $UNPACKED_SPARK_TGZ tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ; @@ -71,6 +91,7 @@ if [[ $IMAGE_TAG == "N/A" ]]; then IMAGE_TAG=$(uuidgen); cd $UNPACKED_SPARK_TGZ + cp $HADOOP_TGZ $UNPACKED_SPARK_TGZ/ if [[ $DEPLOY_MODE == cloud ]] ; then $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build @@ -84,7 +105,7 @@ then # -m option for minikube. $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build chmod +x $UNPACKED_SPARK_TGZ/kubernetes/scripts/setup-krb-integration-test-env.sh - $UNPACKED_SPARK_TGZ/kubernetes/scripts/setup-krb-integration-test-env.sh -r $IMAGE_REPO -t $IMAGE_TAG + $UNPACKED_SPARK_TGZ/kubernetes/scripts/setup-krb-integration-test-env.sh -r $IMAGE_REPO -t $IMAGE_TAG -v $HADOOP_VERSION fi cd - fi diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh index f7b165cb2373..dc0cdecbffd4 100644 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-krb-integration-test-env.sh @@ -35,7 +35,6 @@ function image_ref { } function build { - local BUILD_ARGS local IMG_PATH="kubernetes/src" if [ ! -d "$IMG_PATH" ]; then @@ -46,9 +45,16 @@ function build { --build-arg base_img=$(image_ref spark) ) - local KDOCKERFILE=${KDOCKERFILE:-"$IMG_PATH/test/dockerfiles/spark/kerberos/Dockerfile"} + local HADOOP_BUILD_ARGS=( + --build-arg + hadoop_version="$HVERSION" + ) + local HDOCKERFILE="$IMG_PATH/test/dockerfiles/hadoop/Dockerfile" + local KDOCKERFILE="$IMG_PATH/test/dockerfiles/spark/kerberos/Dockerfile" - docker pull ifilonenko/hadoop-base:latest + docker build $NOCACHEARG "${HADOOP_BUILD_ARGS[@]}" \ + -t $(image_ref hadoop-base) \ + -f "$HDOCKERFILE" . docker build $NOCACHEARG "${KRB_BUILD_ARGS[@]}" \ -t $(image_ref spark-kerberos) \ @@ -58,12 +64,14 @@ function build { REPO= TAG= NOCACHEARG= -while getopts r:t:n: option +HVERSION= +while getopts r:t:v:n: option do case "${option}" in r) REPO=${OPTARG};; t) TAG=${OPTARG};; + v) HVERSION=${OPTARG};; n) NOCACHEARG="--no-cache";; esac done diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala index e9883efcd83d..9047f9c67a81 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala @@ -45,7 +45,7 @@ private[spark] trait KerberosTestSuite { k8sSuite: KubernetesSuite => val expectedLogOnCompletion = Seq( "File contents: [Michael,", - "Returned length(s) of: 3") + "Returned length(s) of: 3.0") val driverPod = kubernetesClient .pods() .inNamespace(kubernetesTestComponents.namespace) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 6b1a90dff7bb..90d274b8c17a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -38,8 +38,7 @@ import org.apache.spark.deploy.k8s.integrationtest.kerberos.{KerberizedHadoopClu import org.apache.spark.internal.Logging private[spark] class KubernetesSuite extends SparkFunSuite - with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite - with PythonTestsSuite with ClientModeTestsSuite + with BeforeAndAfterAll with BeforeAndAfter with KerberosTestSuite with Logging with Eventually with Matchers { @@ -48,6 +47,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite private var sparkHomeDir: Path = _ private var pyImage: String = _ private var rImage: String = _ + private var hImage: String = _ private var kImage: String = _ protected var image: String = _ @@ -93,6 +93,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite image = s"$imageRepo/spark:$imageTag" pyImage = s"$imageRepo/spark-py:$imageTag" rImage = s"$imageRepo/spark-r:$imageTag" + hImage = s"$imageRepo/hadoop-base:$imageTag" kImage = s"$imageRepo/spark-kerberos:$imageTag" val sparkDistroExamplesJarFile: File = sparkHomeDir.resolve(Paths.get("examples", "jars")) @@ -109,6 +110,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite kubernetesTestComponents.namespace) kerberosUtils = new KerberosUtils( image, + hImage, kImage, kubernetesTestComponents.serviceAccountName, kubernetesTestComponents.kubernetesClient, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index d7689d884c9e..27a7fc3fb2b7 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -33,6 +33,7 @@ import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.KERBEROS_LABE */ private[spark] class KerberosUtils( sparkImage: String, + hadoopImage: String, kerberosImage: String, serviceAccountName: String, kubernetesClient: KubernetesClient, @@ -78,187 +79,189 @@ private[spark] class KerberosUtils( new HostPathVolumeSource(s"$KRB_FILE_DIR/$namespace/$pathType")) .endSpec() .build() - private def createPVCTemplate(name: String) : PersistentVolumeClaim = - new PersistentVolumeClaimBuilder() - .withNewMetadata() - .withName(name) - .withLabels(Map( - "job" -> "kerberostest").asJava) - .endMetadata() - .withNewSpec() - .withStorageClassName(name) + private def createPVCTemplate(name: String) : PersistentVolumeClaim = + new PersistentVolumeClaimBuilder() + .withNewMetadata() + .withName(name) + .withLabels(Map( + "job" -> "kerberostest").asJava) + .endMetadata() + .withNewSpec() + .withStorageClassName(name) .withVolumeName(name) .withAccessModes("ReadWriteMany") .withNewResources() .withRequests(Map("storage" -> new Quantity("1Gi")).asJava) .endResources() - .endSpec() - .build() - private val pvNN = "nn-hadoop" - private val pvKT = "server-keytab" - private val persistentVolumeMap: Map[String, PersistentVolume] = Map( - pvNN -> createPVTemplate(pvNN, "nn"), - pvKT -> createPVTemplate(pvKT, "keytab")) - private def buildKerberosPV(pvType: String) = { - PVStorage(pvType, createPVCTemplate(pvType), persistentVolumeMap(pvType)) - } - def getNNStorage: PVStorage = buildKerberosPV(pvNN) - def getKTStorage: PVStorage = buildKerberosPV(pvKT) - def getLabels: Map[String, String] = KERBEROS_LABEL - def getKeyPaths: Seq[KeyToPath] = keyPaths - def getConfigMap: ConfigMapStorage = - ConfigMapStorage( - new ConfigMapBuilder() + .endSpec() + .build() + private val pvNN = "nn-hadoop" + private val pvKT = "server-keytab" + private val persistentVolumeMap: Map[String, PersistentVolume] = Map( + pvNN -> createPVTemplate(pvNN, "nn"), + pvKT -> createPVTemplate(pvKT, "keytab")) + private def buildKerberosPV(pvType: String) = { + PVStorage(pvType, createPVCTemplate(pvType), persistentVolumeMap(pvType)) + } + def getNNStorage: PVStorage = buildKerberosPV(pvNN) + def getKTStorage: PVStorage = buildKerberosPV(pvKT) + def getLabels: Map[String, String] = KERBEROS_LABEL + def getKeyPaths: Seq[KeyToPath] = keyPaths + def getConfigMap: ConfigMapStorage = + ConfigMapStorage( + new ConfigMapBuilder() .withNewMetadata() .withName(KRB_CONFIG_MAP_NAME) .endMetadata() .addToData(kerberosConfTupList.toMap.asJava) - .build()) - private val kdcNode = Seq("kerberos-deployment", "kerberos-service") - private val nnNode = Seq("nn-deployment", "nn-service") - private val dnNode = Seq("dn1-deployment", "dn1-service") - private val dataPopulator = Seq("data-populator-deployment", "data-populator-service") - private def buildKerberosDeployment(name: String, seqPair: Seq[String]) = { - val deployment = - kubernetesClient.load(loadFromYaml(seqPair.head)).get().get(0).asInstanceOf[Deployment] - ServiceStorage( - name, - new DeploymentBuilder(deployment) - .editSpec() - .editTemplate() - .editSpec() - .addNewVolume() + .build() + ) + private val kdcNode = Seq("kerberos-deployment", "kerberos-service") + private val nnNode = Seq("nn-deployment", "nn-service") + private val dnNode = Seq("dn1-deployment", "dn1-service") + private val dataPopulator = Seq("data-populator-deployment", "data-populator-service") + private def buildHadoopClusterDeployment(name: String, seqPair: Seq[String]) = { + val deployment = + kubernetesClient.load(loadFromYaml(seqPair.head)).get().get(0).asInstanceOf[Deployment] + ServiceStorage( + name, + new DeploymentBuilder(deployment) + .editSpec() + .editTemplate() + .editSpec() + .addNewVolume() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(keyPaths.asJava) + .endConfigMap() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate( + deployment.getMetadata.getName)) + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_DP_LOC") + .withValue(s"$KRB_FILE_DIR/krb5-dp.conf") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewVolumeMount() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .withImage(hadoopImage) + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .build(), + kubernetesClient.load(loadFromYaml(seqPair(1))).get().get(0).asInstanceOf[Service] ) + } + def getKDC: ServiceStorage = buildHadoopClusterDeployment("kerberos", kdcNode) + def getNN: ServiceStorage = buildHadoopClusterDeployment("nn", nnNode) + def getDN: ServiceStorage = buildHadoopClusterDeployment("dn1", dnNode) + def getDP: ServiceStorage = buildHadoopClusterDeployment("data-populator", dataPopulator) + private val HADOOP_CONF_DIR_PATH = "/opt/spark/hconf" + private val krb5TestkeyPaths = + kerberosFiles.map { file => + new KeyToPathBuilder() + .withKey(file) + .withPath(file) + .build() + }.toList + def getKerberosTest( + resource: String, + className: String, + appLabel: String, + yamlLocation: String): DeploymentStorage = { + kubernetesClient.load(new FileInputStream(new File(yamlLocation))) + .get().get(0) match { + case deployment: Deployment => + DeploymentStorage( + new DeploymentBuilder(deployment) + .editSpec() + .editTemplate() + .editOrNewMetadata() + .addToLabels(Map("name" -> "kerberos-test").asJava) + .endMetadata() + .editSpec() + .withServiceAccountName(serviceAccountName) + .addNewVolume() .withName(KRB_VOLUME) .withNewConfigMap() .withName(KRB_CONFIG_MAP_NAME) - .withItems(keyPaths.asJava) + .withItems(krb5TestkeyPaths.asJava) .endConfigMap() .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate( - deployment.getMetadata.getName)) - .addNewEnv() - .withName("NAMESPACE") - .withValue(namespace) - .endEnv() - .addNewEnv() - .withName("TMP_KRB_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") - .endEnv() - .addNewEnv() - .withName("TMP_KRB_DP_LOC") - .withValue(s"$KRB_FILE_DIR/krb5-dp.conf") - .endEnv() - .addNewEnv() - .withName("TMP_CORE_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") - .endEnv() - .addNewEnv() - .withName("TMP_HDFS_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") - .endEnv() - .addNewVolumeMount() - .withName(KRB_VOLUME) - .withMountPath(KRB_FILE_DIR) - .endVolumeMount() - .endContainer() + .editMatchingContainer(new ContainerNameEqualityPredicate( + deployment.getMetadata.getName)) + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("MASTER_URL") + .withValue(kubernetesClient.getMasterUrl.toString) + .endEnv() + .addNewEnv() + .withName("SUBMIT_RESOURCE") + .withValue(resource) + .endEnv() + .addNewEnv() + .withName("CLASS_NAME") + .withValue(className) + .endEnv() + .addNewEnv() + .withName("HADOOP_CONF_DIR") + .withValue(HADOOP_CONF_DIR_PATH) + .endEnv() + .addNewEnv() + .withName("APP_LOCATOR_LABEL") + .withValue(appLabel) + .endEnv() + .addNewEnv() + .withName("SPARK_PRINT_LAUNCH_COMMAND") + .withValue("true") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewEnv() + .withName("BASE_SPARK_IMAGE") + .withValue(sparkImage) + .endEnv() + .addNewVolumeMount() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .withImage(kerberosImage) + .endContainer() + .endSpec() + .endTemplate() .endSpec() - .endTemplate() - .endSpec() - .build(), - kubernetesClient.load(loadFromYaml(seqPair(1))).get().get(0).asInstanceOf[Service] ) - } - def getKDC: ServiceStorage = buildKerberosDeployment("kerberos", kdcNode) - def getNN: ServiceStorage = buildKerberosDeployment("nn", nnNode) - def getDN: ServiceStorage = buildKerberosDeployment("dn1", dnNode) - def getDP: ServiceStorage = buildKerberosDeployment("data-populator", dataPopulator) - private val HADOOP_CONF_DIR_PATH = "/opt/spark/hconf" - private val krb5TestkeyPaths = - kerberosFiles.map { file => - new KeyToPathBuilder() - .withKey(file) - .withPath(file) - .build() - }.toList - def getKerberosTest( - resource: String, - className: String, - appLabel: String, - yamlLocation: String): DeploymentStorage = { - kubernetesClient.load(new FileInputStream(new File(yamlLocation))) - .get().get(0) match { - case deployment: Deployment => - DeploymentStorage( - new DeploymentBuilder(deployment) - .editSpec() - .editTemplate() - .editOrNewMetadata() - .addToLabels(Map("name" -> "kerberos-test").asJava) - .endMetadata() - .editSpec() - .withServiceAccountName(serviceAccountName) - .addNewVolume() - .withName(KRB_VOLUME) - .withNewConfigMap() - .withName(KRB_CONFIG_MAP_NAME) - .withItems(krb5TestkeyPaths.asJava) - .endConfigMap() - .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate( - deployment.getMetadata.getName)) - .addNewEnv() - .withName("NAMESPACE") - .withValue(namespace) - .endEnv() - .addNewEnv() - .withName("MASTER_URL") - .withValue(kubernetesClient.getMasterUrl.toString) - .endEnv() - .addNewEnv() - .withName("SUBMIT_RESOURCE") - .withValue(resource) - .endEnv() - .addNewEnv() - .withName("CLASS_NAME") - .withValue(className) - .endEnv() - .addNewEnv() - .withName("HADOOP_CONF_DIR") - .withValue(HADOOP_CONF_DIR_PATH) - .endEnv() - .addNewEnv() - .withName("APP_LOCATOR_LABEL") - .withValue(appLabel) - .endEnv() - .addNewEnv() - .withName("SPARK_PRINT_LAUNCH_COMMAND") - .withValue("true") - .endEnv() - .addNewEnv() - .withName("TMP_KRB_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") - .endEnv() - .addNewEnv() - .withName("TMP_CORE_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") - .endEnv() - .addNewEnv() - .withName("TMP_HDFS_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") - .endEnv() - .addNewEnv() - .withName("BASE_SPARK_IMAGE") - .withValue(sparkImage) - .endEnv() - .addNewVolumeMount() - .withName(KRB_VOLUME) - .withMountPath(KRB_FILE_DIR) - .endVolumeMount() - .withImage(kerberosImage) - .endContainer() - .endSpec() - .endTemplate() - .endSpec() - .build()) - } + .build()) } + } } diff --git a/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/yarn-site.xml b/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/yarn-site.xml deleted file mode 100755 index b8ff146d98a3..000000000000 --- a/resource-managers/kubernetes/integration-tests/test-data/hadoop-conf/yarn-site.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - yarn.resourcemanager.principal - yarn/_HOST@CLUSTER.LOCAL - - From 0639099f35ad249fc3b1149f92cf2b3453aae47d Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Thu, 25 Oct 2018 15:27:53 -0700 Subject: [PATCH 17/18] bring back old tests..hehe oops --- .../spark/deploy/k8s/integrationtest/KubernetesSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 90d274b8c17a..cfd5796c4a76 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -38,7 +38,8 @@ import org.apache.spark.deploy.k8s.integrationtest.kerberos.{KerberizedHadoopClu import org.apache.spark.internal.Logging private[spark] class KubernetesSuite extends SparkFunSuite - with BeforeAndAfterAll with BeforeAndAfter + with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite + with PythonTestsSuite with ClientModeTestsSuite with KerberosTestSuite with Logging with Eventually with Matchers { From a32ec4a8765ea2e9d3279fb9dc48f204e4eb7ba1 Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Tue, 30 Oct 2018 14:29:00 -0700 Subject: [PATCH 18/18] resolve comments --- .../docker/src/test/scripts/populate-data.sh | 11 +- .../src/test/scripts/run-kerberos-test.sh | 2 +- .../docker/src/test/scripts/start-kdc.sh | 4 +- ...-deployment.yml => data-populator-job.yml} | 9 +- .../kerberos-yml/data-populator-service.yml | 33 --- .../{dn1-deployment.yml => dn1-set.yml} | 2 +- ...rberos-deployment.yml => kerberos-set.yml} | 2 +- .../kerberos-yml/kerberos-test.yml | 9 +- .../{nn-deployment.yml => nn-set.yml} | 2 +- .../integrationtest/KerberosTestSuite.scala | 5 +- .../k8s/integrationtest/KubernetesSuite.scala | 21 ++ .../minikube/MinikubeTestBackend.scala | 18 -- .../KerberizedHadoopClusterLauncher.scala | 8 +- ...he.scala => KerberosJobWatcherCache.scala} | 34 ++- .../kerberos/KerberosPodWatcherCache.scala | 6 +- .../kerberos/KerberosStorage.scala | 9 +- .../kerberos/KerberosUtils.scala | 263 ++++++++++-------- 17 files changed, 225 insertions(+), 213 deletions(-) rename resource-managers/kubernetes/integration-tests/kerberos-yml/{data-populator-deployment.yml => data-populator-job.yml} (93%) delete mode 100755 resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml rename resource-managers/kubernetes/integration-tests/kerberos-yml/{dn1-deployment.yml => dn1-set.yml} (98%) rename resource-managers/kubernetes/integration-tests/kerberos-yml/{kerberos-deployment.yml => kerberos-set.yml} (98%) rename resource-managers/kubernetes/integration-tests/kerberos-yml/{nn-deployment.yml => nn-set.yml} (98%) rename resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/{KerberosDriverWatcherCache.scala => KerberosJobWatcherCache.scala} (73%) diff --git a/resource-managers/kubernetes/docker/src/test/scripts/populate-data.sh b/resource-managers/kubernetes/docker/src/test/scripts/populate-data.sh index 300158969496..4e2d8f3254d3 100644 --- a/resource-managers/kubernetes/docker/src/test/scripts/populate-data.sh +++ b/resource-managers/kubernetes/docker/src/test/scripts/populate-data.sh @@ -32,11 +32,8 @@ until (echo > /dev/tcp/nn.${NAMESPACE}.svc.cluster.local/9000) >/dev/null 2>&1; hdfs dfsadmin -safemode wait -hdfs dfs -mkdir -p /user/ifilonenko/ -hdfs dfs -copyFromLocal /people.txt /user/ifilonenko +hdfs dfs -mkdir -p /user/userone/ +hdfs dfs -copyFromLocal /people.txt /user/userone -hdfs dfs -chmod -R 755 /user/ifilonenko -hdfs dfs -chown -R ifilonenko /user/ifilonenko - - -sleep 60 +hdfs dfs -chmod -R 755 /user/userone +hdfs dfs -chown -R ifilonenko /user/userone diff --git a/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh index 16d4ebeaef95..56542fed6622 100644 --- a/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh +++ b/resource-managers/kubernetes/docker/src/test/scripts/run-kerberos-test.sh @@ -37,4 +37,4 @@ mkdir -p /etc/krb5.conf.d --conf spark.kerberos.principal=hdfs/nn.${NAMESPACE}.svc.cluster.local@CLUSTER.LOCAL \ --conf spark.kubernetes.driver.label.spark-app-locator=${APP_LOCATOR_LABEL} \ ${SUBMIT_RESOURCE} \ - hdfs://nn.${NAMESPACE}.svc.cluster.local:9000/user/ifilonenko/people.txt + hdfs://nn.${NAMESPACE}.svc.cluster.local:9000/user/userone/people.txt diff --git a/resource-managers/kubernetes/docker/src/test/scripts/start-kdc.sh b/resource-managers/kubernetes/docker/src/test/scripts/start-kdc.sh index 8b5fb8553f38..820ee29650b4 100644 --- a/resource-managers/kubernetes/docker/src/test/scripts/start-kdc.sh +++ b/resource-managers/kubernetes/docker/src/test/scripts/start-kdc.sh @@ -27,8 +27,8 @@ cp ${TMP_HDFS_LOC} /hadoop/etc/hadoop/hdfs-site.xml ## password only user -/usr/sbin/kadmin.local -q "addprinc -randkey ifilonenko" -/usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/ifilonenko.keytab ifilonenko" +/usr/sbin/kadmin.local -q "addprinc -randkey userone" +/usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/userone.keytab userone" /usr/sbin/kadmin.local -q "addprinc -randkey HTTP/server.${NAMESPACE}.svc.cluster.local" /usr/sbin/kadmin.local -q "ktadd -k /var/keytabs/server.keytab HTTP/server.${NAMESPACE}.svc.cluster.local" diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-job.yml similarity index 93% rename from resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml rename to resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-job.yml index 8a554b9bbffa..294506f7965d 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-job.yml @@ -14,12 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # -apiVersion: apps/v1 -kind: Deployment +apiVersion: batch/v1 +kind: Job metadata: name: data-populator spec: - replicas: 1 + manualSelector: true + backoffLimit: 4 selector: matchLabels: name: hdfs-data-populator @@ -42,7 +43,7 @@ spec: volumeMounts: - mountPath: /var/keytabs name: data-populator-keytab - restartPolicy: Always + restartPolicy: OnFailure volumes: - name: data-populator-keytab persistentVolumeClaim: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml deleted file mode 100755 index fb319ac13f7e..000000000000 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/data-populator-service.yml +++ /dev/null @@ -1,33 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -apiVersion: v1 -kind: Service -metadata: - annotations: - service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" - labels: - kerberosService: data-populator - job: kerberostest - name: data-populator -spec: - clusterIP: None - ports: - - protocol: TCP - port: 55555 - targetPort: 0 - selector: - kerberosService: data-populator diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-set.yml similarity index 98% rename from resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml rename to resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-set.yml index 75498928b2ed..77484be5c6e0 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/dn1-set.yml @@ -15,7 +15,7 @@ # limitations under the License. # apiVersion: apps/v1 -kind: Deployment +kind: StatefulSet metadata: name: dn1 spec: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-set.yml similarity index 98% rename from resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml rename to resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-set.yml index 439cbf36ffa6..5000e115c364 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-set.yml @@ -15,7 +15,7 @@ # limitations under the License. # apiVersion: apps/v1 -kind: Deployment +kind: StatefulSet metadata: name: kerberos spec: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml index 970d025fe743..493de8ce6ba7 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/kerberos-test.yml @@ -14,12 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # -apiVersion: apps/v1 -kind: Deployment +apiVersion: batch/v1 +kind: Job metadata: name: kerberos-test spec: - replicas: 1 + manualSelector: true + backoffLimit: 1 selector: matchLabels: name: kerberos-test @@ -36,7 +37,7 @@ spec: volumeMounts: - mountPath: /var/keytabs name: kerberos-test-keytab - restartPolicy: Always + restartPolicy: OnFailure volumes: - name: kerberos-test-keytab persistentVolumeClaim: diff --git a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-set.yml similarity index 98% rename from resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml rename to resource-managers/kubernetes/integration-tests/kerberos-yml/nn-set.yml index 411e55fc477a..9329edc79164 100755 --- a/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-deployment.yml +++ b/resource-managers/kubernetes/integration-tests/kerberos-yml/nn-set.yml @@ -15,7 +15,7 @@ # limitations under the License. # apiVersion: apps/v1 -kind: Deployment +kind: StatefulSet metadata: name: nn spec: diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala index 9047f9c67a81..5dea0f998b4c 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KerberosTestSuite.scala @@ -31,9 +31,10 @@ private[spark] trait KerberosTestSuite { k8sSuite: KubernetesSuite => kerberizedHadoopClusterLauncher.launchKerberizedCluster(kerberosUtils) // Launches Kerberos test - val driverWatcherCache = new KerberosDriverWatcherCache( + val driverWatcherCache = new KerberosJobWatcherCache( kerberosUtils, Map("spark-app-locator" -> appLocator)) + driverWatcherCache.deploy( kerberosUtils.getKerberosTest( containerLocalSparkDistroExamplesJar, @@ -67,5 +68,5 @@ private[spark] trait KerberosTestSuite { k8sSuite: KubernetesSuite => private[spark] object KerberosTestSuite { val HDFS_TEST_CLASS = "org.apache.spark.examples.HdfsTest" - val KERB_YAML_LOCATION = "kerberos-yml/kerberos-test.yml" + val KERB_YAML_LOCATION = "kerberos-test" } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 90d274b8c17a..4b5fc13b0eec 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -143,6 +143,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite kubernetesTestComponents.deleteNamespace() } deleteDriverPod() + deleteKubernetesPVs() } protected def runSparkPiAndVerifyCompletion( @@ -363,6 +364,26 @@ private[spark] class KubernetesSuite extends SparkFunSuite .get() == null) } } + + private def deleteKubernetesPVs(): Unit = { + // Temporary hack until client library for fabric8 is updated to get around + // the NPE that comes about when I do .list().getItems().asScala + try { + val pvList = kubernetesTestComponents.kubernetesClient + .persistentVolumes().withLabels(KERBEROS_LABEL.asJava) + .list().getItems.asScala + if (pvList.nonEmpty) { + kubernetesTestComponents.kubernetesClient + .persistentVolumes().withLabels(KERBEROS_LABEL.asJava).delete() + } + Eventually.eventually(TIMEOUT, INTERVAL) { + kubernetesTestComponents.kubernetesClient + .persistentVolumes().withLabels(KERBEROS_LABEL.asJava) + .list().getItems.asScala.isEmpty should be (true) } + } catch { + case ex: java.lang.NullPointerException => + } + } } private[spark] object KubernetesSuite { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala index 808368ddffaa..921dc1685795 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -38,28 +38,10 @@ private[spark] object MinikubeTestBackend } override def cleanUp(): Unit = { - deleteKubernetesPVs() super.cleanUp() } override def getKubernetesClient: DefaultKubernetesClient = { defaultClient } - - private def deleteKubernetesPVs(): Unit = { - // Temporary hack until client library for fabric8 is updated to get around - // the NPE that comes about when I do .list().getItems().asScala - try { - val pvList = defaultClient.persistentVolumes().withLabels(KERBEROS_LABEL.asJava) - .list().getItems.asScala - if (pvList.nonEmpty) { - defaultClient.persistentVolumes().delete() - } - Eventually.eventually(TIMEOUT, INTERVAL) { - defaultClient.persistentVolumes().withLabels(KERBEROS_LABEL.asJava) - .list().getItems.asScala.isEmpty should be (true) } - } catch { - case ex: java.lang.NullPointerException => - } - } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala index b34470d7700b..99eeccac36ea 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberizedHadoopClusterLauncher.scala @@ -47,12 +47,16 @@ private[spark] class KerberizedHadoopClusterLauncher( cmWatcherCache.deploy(kerberosUtils.getConfigMap) cmWatcherCache.stopWatch() - // Launches the Hadoop cluster pods: KDC --> NN --> DN1 --> Data-Populator + // Launches the Hadoop cluster pods: KDC --> NN --> DN1 val podWatcherCache = new KerberosPodWatcherCache(kerberosUtils, labels) podWatcherCache.deploy(kerberosUtils.getKDC) podWatcherCache.deploy(kerberosUtils.getNN) podWatcherCache.deploy(kerberosUtils.getDN) - podWatcherCache.deploy(kerberosUtils.getDP) podWatcherCache.stopWatch() + + // Launch the Data populator pod to populate HDFS + val jobWatcherCache = new KerberosJobWatcherCache(kerberosUtils, labels) + jobWatcherCache.deploy(kerberosUtils.getDP) + jobWatcherCache.stopWatch() } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosJobWatcherCache.scala similarity index 73% rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosJobWatcherCache.scala index fc93130fabdc..2e9ed256b913 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosDriverWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosJobWatcherCache.scala @@ -31,20 +31,20 @@ import org.apache.spark.internal.Logging * This class is responsible for ensuring that the driver-pod launched by the KerberosTestPod * is running before trying to grab its logs for the sake of monitoring success of completition. */ -private[spark] class KerberosDriverWatcherCache( +private[spark] class KerberosJobWatcherCache( kerberosUtils: KerberosUtils, labels: Map[String, String]) - extends WatcherCacheConfiguration[DeploymentStorage] with Logging with Eventually with Matchers { + extends WatcherCacheConfiguration[JobStorage] with Logging with Eventually with Matchers { private val kubernetesClient = kerberosUtils.getClient private val namespace = kerberosUtils.getNamespace - private var driverName: String = "" + private var jobName: String = "" private val podCache = scala.collection.mutable.Map[String, String]() private val watcher: Watch = kubernetesClient .pods() .withLabels(labels.asJava) .watch(new Watcher[Pod] { override def onClose(cause: KubernetesClientException): Unit = - logInfo("Ending the watch of Driver pod") + logInfo("Ending the watch of Job pod") override def eventReceived(action: Watcher.Action, resource: Pod): Unit = { val name = resource.getMetadata.getName action match { @@ -55,19 +55,27 @@ private[spark] class KerberosDriverWatcherCache( val phase = resource.getStatus.getPhase logInfo(s"$name is as $phase") podCache(name) = phase - if (name.contains("driver")) { - driverName = name - } + jobName = name } } }) - override def check(name: String): Boolean = podCache.get(name).contains("Running") + private def additionalCheck(name: String): Boolean = { + name match { + case _ if name.startsWith("data-populator") + => hasInLogs(name, "Entered Krb5Context.initSecContext") + case _ => true + } + } + + override def check(name: String): Boolean = + podCache.get(name).contains("Succeeded") && + additionalCheck(name) - override def deploy(storage: DeploymentStorage) : Unit = { - kubernetesClient.apps().deployments().inNamespace(namespace).create(storage.resource) + override def deploy(storage: JobStorage) : Unit = { + kubernetesClient.batch().jobs().inNamespace(namespace).create(storage.resource) Eventually.eventually(TIMEOUT, INTERVAL) { - check(driverName) should be (true) + check(jobName) should be (true) } } @@ -75,4 +83,8 @@ private[spark] class KerberosDriverWatcherCache( // Closing Watch watcher.close() } + + def hasInLogs(name: String, expectation: String): Boolean = { + kubernetesClient.pods().withName(name).getLog().contains(expectation) + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala index ca0b2cb0c352..f1f8b2533228 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosPodWatcherCache.scala @@ -44,7 +44,6 @@ private[spark] class KerberosPodWatcherCache( private var kdcName: String = _ private var nnName: String = _ private var dnName: String = _ - private var dpName: String = _ private val podWatcher: Watch = kubernetesClient .pods() .withLabels(labels.asJava) @@ -64,7 +63,6 @@ private[spark] class KerberosPodWatcherCache( if (keyName == "kerberos") { kdcName = name } if (keyName == "nn") { nnName = name } if (keyName == "dn1") { dnName = name } - if (keyName == "data-populator") { dpName = name } podCache(keyName) = phase } } @@ -92,7 +90,6 @@ private[spark] class KerberosPodWatcherCache( case "kerberos" => hasInLogs(kdcName, "krb5kdc: starting") case "nn" => hasInLogs(nnName, "createNameNode") case "dn1" => hasInLogs(dnName, "Got finalize command for block pool") - case "data-populator" => hasInLogs(dpName, "Entered Krb5Context.initSecContext") } } @@ -105,7 +102,7 @@ private[spark] class KerberosPodWatcherCache( override def deploy(srvc: ServiceStorage) : Unit = { logInfo("Launching the Deployment") kubernetesClient - .apps().deployments().inNamespace(namespace).create(srvc.podDeployment) + .apps().statefulSets().inNamespace(namespace).create(srvc.podSet) // Making sure Pod is running Eventually.eventually(TIMEOUT, INTERVAL) { (podCache(srvc.name) == "Running") should be (true) @@ -127,7 +124,6 @@ private[spark] class KerberosPodWatcherCache( case _ if name.startsWith("kerberos") => "kerberos" case _ if name.startsWith("nn") => "nn" case _ if name.startsWith("dn1") => "dn1" - case _ if name.startsWith("data-populator") => "data-populator" } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala index c1310c214840..4e0b5ec5b749 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosStorage.scala @@ -17,7 +17,8 @@ package org.apache.spark.deploy.k8s.integrationtest.kerberos import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.api.model.apps.Deployment +import io.fabric8.kubernetes.api.model.apps.StatefulSet +import io.fabric8.kubernetes.api.model.batch.Job private[spark] sealed trait KerberosStorage @@ -29,12 +30,12 @@ private[spark] case class PVStorage( private[spark] case class ServiceStorage( name: String, - podDeployment: Deployment, + podSet: StatefulSet, service: Service) extends KerberosStorage -private[spark] case class DeploymentStorage( - resource: Deployment) +private[spark] case class JobStorage( + resource: Job) extends KerberosStorage private[spark] case class ConfigMapStorage( diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala index d9fa2d082152..8eb1481c8e25 100755 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/kerberos/KerberosUtils.scala @@ -21,7 +21,8 @@ import java.io.{File, FileInputStream} import scala.collection.JavaConverters._ import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.api.model.apps.{Deployment, DeploymentBuilder} +import io.fabric8.kubernetes.api.model.apps.{StatefulSet, StatefulSetBuilder} +import io.fabric8.kubernetes.api.model.batch.{Job, JobBuilder} import io.fabric8.kubernetes.client.KubernetesClient import org.apache.commons.io.FileUtils.readFileToString @@ -72,7 +73,7 @@ private[spark] class KerberosUtils( .endMetadata() .withNewSpec() .withStorageClassName(name) - .withCapacity(Map("storage" -> new Quantity("1Gi")).asJava) + .withCapacity(Map("storage" -> new Quantity("200Mi")).asJava) .withAccessModes("ReadWriteMany") .withHostPath(new HostPathVolumeSource(s"$KRB_FILE_DIR/$namespace/$pathType", "")) .endSpec() @@ -89,7 +90,7 @@ private[spark] class KerberosUtils( .withVolumeName(name) .withAccessModes("ReadWriteMany") .withNewResources() - .withRequests(Map("storage" -> new Quantity("1Gi")).asJava) + .withRequests(Map("storage" -> new Quantity("200Mi")).asJava) .endResources() .endSpec() .build() @@ -98,9 +99,11 @@ private[spark] class KerberosUtils( private val persistentVolumeMap: Map[String, PersistentVolume] = Map( pvNN -> createPVTemplate(pvNN, "nn"), pvKT -> createPVTemplate(pvKT, "keytab")) + private def buildKerberosPV(pvType: String) = { PVStorage(pvType, createPVCTemplate(pvType), persistentVolumeMap(pvType)) } + def getNNStorage: PVStorage = buildKerberosPV(pvNN) def getKTStorage: PVStorage = buildKerberosPV(pvKT) def getLabels: Map[String, String] = KERBEROS_LABEL @@ -114,52 +117,60 @@ private[spark] class KerberosUtils( .addToData(kerberosConfTupList.toMap.asJava) .build() ) - private val kdcNode = Seq("kerberos-deployment", "kerberos-service") - private val nnNode = Seq("nn-deployment", "nn-service") - private val dnNode = Seq("dn1-deployment", "dn1-service") - private val dataPopulator = Seq("data-populator-deployment", "data-populator-service") + private val kdcNode = Seq("kerberos-set", "kerberos-service") + private val nnNode = Seq("nn-set", "nn-service") + private val dnNode = Seq("dn1-set", "dn1-service") + private val dataPopulator = "data-populator-job" + private val hadoopContainerEnvs = Seq( + new EnvVarBuilder() + .withName("NAMESPACE") + .withValue(namespace) + .build(), + new EnvVarBuilder() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .build(), + new EnvVarBuilder() + .withName("TMP_KRB_DP_LOC") + .withValue(s"$KRB_FILE_DIR/krb5-dp.conf") + .build(), + new EnvVarBuilder() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .build(), + new EnvVarBuilder() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .build() + ).asJava + private val krbVolume = + new VolumeBuilder() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(keyPaths.asJava) + .endConfigMap() + .build() + private val krbVolumeMount = + new VolumeMountBuilder() + .withName(KRB_VOLUME) + .withMountPath(KRB_FILE_DIR) + .build() + private def buildHadoopClusterDeployment(name: String, seqPair: Seq[String]) = { - val deployment = - kubernetesClient.load(loadFromYaml(seqPair.head)).get().get(0).asInstanceOf[Deployment] + val statefulSet = + kubernetesClient.load(loadFromYaml(seqPair.head)).get().get(0).asInstanceOf[StatefulSet] ServiceStorage( name, - new DeploymentBuilder(deployment) + new StatefulSetBuilder(statefulSet) .editSpec() .editTemplate() .editSpec() - .addNewVolume() - .withName(KRB_VOLUME) - .withNewConfigMap() - .withName(KRB_CONFIG_MAP_NAME) - .withItems(keyPaths.asJava) - .endConfigMap() - .endVolume() + .addNewVolumeLike(krbVolume).endVolume() .editMatchingContainer(new ContainerNameEqualityPredicate( - deployment.getMetadata.getName)) - .addNewEnv() - .withName("NAMESPACE") - .withValue(namespace) - .endEnv() - .addNewEnv() - .withName("TMP_KRB_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") - .endEnv() - .addNewEnv() - .withName("TMP_KRB_DP_LOC") - .withValue(s"$KRB_FILE_DIR/krb5-dp.conf") - .endEnv() - .addNewEnv() - .withName("TMP_CORE_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") - .endEnv() - .addNewEnv() - .withName("TMP_HDFS_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") - .endEnv() - .addNewVolumeMount() - .withName(KRB_VOLUME) - .withMountPath(KRB_FILE_DIR) - .endVolumeMount() + statefulSet.getMetadata.getName)) + .addAllToEnv(hadoopContainerEnvs) + .addNewVolumeMountLike(krbVolumeMount).endVolumeMount() .withImage(hadoopImage) .endContainer() .endSpec() @@ -168,10 +179,30 @@ private[spark] class KerberosUtils( .build(), kubernetesClient.load(loadFromYaml(seqPair(1))).get().get(0).asInstanceOf[Service] ) } + private def buildDP(yamlLocation: String): JobStorage = { + val job = kubernetesClient.load(loadFromYaml(yamlLocation)).get().get(0).asInstanceOf[Job] + JobStorage( + new JobBuilder(job) + .editSpec() + .editTemplate() + .editSpec() + .addNewVolumeLike(krbVolume).endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate( + job.getMetadata.getName)) + .addAllToEnv(hadoopContainerEnvs) + .addNewVolumeMountLike(krbVolumeMount).endVolumeMount() + .withImage(hadoopImage) + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .build() + ) + } def getKDC: ServiceStorage = buildHadoopClusterDeployment("kerberos", kdcNode) def getNN: ServiceStorage = buildHadoopClusterDeployment("nn", nnNode) def getDN: ServiceStorage = buildHadoopClusterDeployment("dn1", dnNode) - def getDP: ServiceStorage = buildHadoopClusterDeployment("data-populator", dataPopulator) + def getDP: JobStorage = buildDP(dataPopulator) private val HADOOP_CONF_DIR_PATH = "/opt/spark/hconf" private val krb5TestkeyPaths = kerberosFiles.map { file => @@ -180,86 +211,84 @@ private[spark] class KerberosUtils( .withPath(file) .build() }.toList + def getKerberosTest( resource: String, className: String, appLabel: String, - yamlLocation: String): DeploymentStorage = { - kubernetesClient.load(new FileInputStream(new File(yamlLocation))) - .get().get(0) match { - case deployment: Deployment => - DeploymentStorage( - new DeploymentBuilder(deployment) + yamlLocation: String): JobStorage = { + val job = kubernetesClient.load(loadFromYaml(yamlLocation)).get().get(0).asInstanceOf[Job] + JobStorage( + new JobBuilder(job) + .editSpec() + .editTemplate() + .editOrNewMetadata() + .addToLabels(Map("name" -> "kerberos-test").asJava) + .endMetadata() .editSpec() - .editTemplate() - .editOrNewMetadata() - .addToLabels(Map("name" -> "kerberos-test").asJava) - .endMetadata() - .editSpec() - .withServiceAccountName(serviceAccountName) - .addNewVolume() + .withServiceAccountName(serviceAccountName) + .addNewVolume() + .withName(KRB_VOLUME) + .withNewConfigMap() + .withName(KRB_CONFIG_MAP_NAME) + .withItems(krb5TestkeyPaths.asJava) + .endConfigMap() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate( + job.getMetadata.getName)) + .addNewEnv() + .withName("NAMESPACE") + .withValue(namespace) + .endEnv() + .addNewEnv() + .withName("MASTER_URL") + .withValue(kubernetesClient.getMasterUrl.toString) + .endEnv() + .addNewEnv() + .withName("SUBMIT_RESOURCE") + .withValue(resource) + .endEnv() + .addNewEnv() + .withName("CLASS_NAME") + .withValue(className) + .endEnv() + .addNewEnv() + .withName("HADOOP_CONF_DIR") + .withValue(HADOOP_CONF_DIR_PATH) + .endEnv() + .addNewEnv() + .withName("APP_LOCATOR_LABEL") + .withValue(appLabel) + .endEnv() + .addNewEnv() + .withName("SPARK_PRINT_LAUNCH_COMMAND") + .withValue("true") + .endEnv() + .addNewEnv() + .withName("TMP_KRB_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") + .endEnv() + .addNewEnv() + .withName("TMP_CORE_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") + .endEnv() + .addNewEnv() + .withName("TMP_HDFS_LOC") + .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") + .endEnv() + .addNewEnv() + .withName("BASE_SPARK_IMAGE") + .withValue(sparkImage) + .endEnv() + .addNewVolumeMount() .withName(KRB_VOLUME) - .withNewConfigMap() - .withName(KRB_CONFIG_MAP_NAME) - .withItems(krb5TestkeyPaths.asJava) - .endConfigMap() - .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate( - deployment.getMetadata.getName)) - .addNewEnv() - .withName("NAMESPACE") - .withValue(namespace) - .endEnv() - .addNewEnv() - .withName("MASTER_URL") - .withValue(kubernetesClient.getMasterUrl.toString) - .endEnv() - .addNewEnv() - .withName("SUBMIT_RESOURCE") - .withValue(resource) - .endEnv() - .addNewEnv() - .withName("CLASS_NAME") - .withValue(className) - .endEnv() - .addNewEnv() - .withName("HADOOP_CONF_DIR") - .withValue(HADOOP_CONF_DIR_PATH) - .endEnv() - .addNewEnv() - .withName("APP_LOCATOR_LABEL") - .withValue(appLabel) - .endEnv() - .addNewEnv() - .withName("SPARK_PRINT_LAUNCH_COMMAND") - .withValue("true") - .endEnv() - .addNewEnv() - .withName("TMP_KRB_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles.head}") - .endEnv() - .addNewEnv() - .withName("TMP_CORE_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(1)}") - .endEnv() - .addNewEnv() - .withName("TMP_HDFS_LOC") - .withValue(s"$KRB_FILE_DIR/${kerberosFiles(2)}") - .endEnv() - .addNewEnv() - .withName("BASE_SPARK_IMAGE") - .withValue(sparkImage) - .endEnv() - .addNewVolumeMount() - .withName(KRB_VOLUME) - .withMountPath(KRB_FILE_DIR) - .endVolumeMount() - .withImage(kerberosImage) - .endContainer() - .endSpec() - .endTemplate() + .withMountPath(KRB_FILE_DIR) + .endVolumeMount() + .withImage(kerberosImage) + .endContainer() .endSpec() - .build()) - } + .endTemplate() + .endSpec() + .build()) } }