Skip to content

Commit 6ec5bb9

Browse files
author
Arthur Rand
authored
[Spark-483] Add Kerberos principal and secret-based key tab to the CLI, also update libmesos in Dockerfile (apache#164)
* wip, decode base64 secrets * improved logging * change makefile back * makefile... * use libmesos bundle instead of private image * update docs and remove tgt? * remove dead code * fix typo * fixed hdfs.md with tgt instructions
1 parent 0751fcf commit 6ec5bb9

File tree

7 files changed

+106
-12
lines changed

7 files changed

+106
-12
lines changed

conf/spark-env.sh

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ mkdir -p "${HADOOP_CONF_DIR}"
1414

1515
cd $MESOS_SANDBOX
1616

17-
MESOS_NATIVE_JAVA_LIBRARY=/usr/lib/libmesos.so
17+
MESOS_NATIVE_JAVA_LIBRARY=/opt/mesosphere/libmesos-bundle/lib/libmesos.so
1818

1919
# For non-CNI, tell the Spark driver to bind to LIBPROCESS_IP
2020
#
@@ -30,6 +30,23 @@ fi
3030
# But this fails now due to MESOS-6391, so I'm setting it to /tmp
3131
MESOS_DIRECTORY=/tmp
3232

33+
echo "spark-env: Printing environment" >&2
34+
env >&2
35+
echo "spark-env: User: $(whoami)" >&2
36+
37+
for f in $MESOS_SANDBOX/*.base64 ; do
38+
echo "decoding $f" >&2
39+
secret=$(basename ${f} .base64)
40+
cat ${f} | base64 -d > ${secret}
41+
done
42+
43+
if [[ -n "${KRB5_CONFIG_BASE64}" ]]; then
44+
echo "spark-env: Copying krb config from $KRB5_CONFIG_BASE64 to /etc/" >&2
45+
echo "${KRB5_CONFIG_BASE64}" | base64 -d > /etc/krb5.conf
46+
else
47+
echo "spark-env: No kerberos KDC config found" >&2
48+
fi
49+
3350
# Options read when launching programs locally with
3451
# ./bin/run-example or ./bin/spark-submit
3552
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files

dispatcher/cli/dcos_spark/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
dcos spark run --help
99
dcos spark run --submit-args=<spark-args>
1010
[--dcos-space=<dcos_space>]
11+
[--kerberos-principal=kerberos_principal]
12+
[--keytab-secret-path=<keytab_secret>]
1113
[--docker-image=<docker-image>]
1214
[--verbose]
1315
dcos spark status <submissionId> [--verbose]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
PATH_ENV = 'PATH'
2+
KERBEROS_PRINCIPAL_ARG = "--kerberos-principal"
3+
KEYTAB_SECRET_PATH_ARG = "--keytab-secret-path"
4+
ENCODED_SUFFIX = ".base64"

dispatcher/cli/dcos_spark/spark_submit.py

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,20 +154,80 @@ def show_help():
154154
return 0
155155

156156

157+
def format_kerberos_args(args):
158+
def check_args():
159+
if (args[constants.KERBEROS_PRINCIPAL_ARG] is None and
160+
args[constants.KEYTAB_SECRET_PATH_ARG] is None):
161+
return False # No Kerberos args
162+
if args[constants.KERBEROS_PRINCIPAL_ARG] is not None:
163+
if args[constants.KEYTAB_SECRET_PATH_ARG] is None:
164+
print("Missing {} argument for keytab "
165+
"secret. E.g. /hdfs.keytab"
166+
.format(constants.KERBEROS_PRINCIPAL_ARG),
167+
file=sys.stderr)
168+
exit(1)
169+
return True
170+
if args[constants.KEYTAB_SECRET_PATH_ARG] is not None:
171+
if args[constants.KERBEROS_PRINCIPAL_ARG] is None:
172+
print("Missing {} argument for Kerberos principal, e.g. "
173+
"hdfs/name-0.hdfs.autoip.dcos.thisdcos.directory@LOCAL"
174+
.format(constants.KERBEROS_PRINCIPAL_ARG),
175+
file=sys.stderr)
176+
exit(1)
177+
return True
178+
179+
def get_secret_file_from_path(encoded):
180+
if args[constants.KEYTAB_SECRET_PATH_ARG] is not None:
181+
f = args[constants.KEYTAB_SECRET_PATH_ARG].split("/")[-1]
182+
return f + constants.ENCODED_SUFFIX if encoded else f
183+
else:
184+
return None
185+
186+
def get_krb5_config():
187+
app = spark_app()
188+
if "SPARK_MESOS_KRB5_CONF_BASE64" in app["env"]:
189+
krb5 = app["env"]["SPARK_MESOS_KRB5_CONF_BASE64"]
190+
return ["--conf",
191+
"spark.mesos.driverEnv.KRB5_CONFIG_BASE64={}"
192+
.format(krb5)]
193+
else:
194+
print("WARNING: You must specify a krb5.conf that is base64 "
195+
"encoded with "
196+
"--conf spark.mesos.driverEnv.KRB5_CONFIG_BASE64",
197+
file=sys.stderr)
198+
return []
199+
200+
add_args = check_args()
201+
if add_args:
202+
return [
203+
"--principal",
204+
"{}".format(args[constants.KERBEROS_PRINCIPAL_ARG]),
205+
"--conf",
206+
"spark.yarn.keytab={}".format(
207+
get_secret_file_from_path(encoded=False)),
208+
"--conf",
209+
"spark.mesos.driver.secret.name={}".format(
210+
args[constants.KEYTAB_SECRET_PATH_ARG]),
211+
"--conf",
212+
"spark.mesos.driver.secret.filename={}".format(
213+
get_secret_file_from_path(encoded=True)),
214+
"--conf",
215+
"spark.mesos.containerizer=mesos"] + get_krb5_config()
216+
else:
217+
return []
218+
219+
157220
def submit_job(dispatcher, docker_image, args):
158221
"""
159222
Run spark-submit.
160223
161224
:param dispatcher: Spark Dispatcher URL. Used to construct --master.
162225
:type dispatcher: string
163-
:param args: --submit-args value from `dcos spark run`
226+
:param args: command line args value from `dcos spark run`
164227
:type args: string
165228
:param docker_image: Docker image to run the driver and executors in.
166229
:type docker_image: string
167-
:param verbose: If true, prints verbose information to stdout.
168-
:type verbose: boolean
169230
"""
170-
171231
submit_args = args["--submit-args"]
172232
verbose = args["--verbose"] if args["--verbose"] is not None else False
173233
app = spark_app()
@@ -183,6 +243,7 @@ def submit_job(dispatcher, docker_image, args):
183243
"spark.mesos.task.labels=DCOS_SPACE:{}".format(dcos_space),
184244
"--conf",
185245
"spark.mesos.role={}".format(role)] + \
246+
format_kerberos_args(args) + \
186247
submit_args.split()
187248

188249
hdfs_url = _get_spark_hdfs_url()

docker/Dockerfile

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
# docker build -t spark:git-`git rev-parse --short HEAD` .
1919

2020
# Basing from Mesos image so the Mesos native library is present.
21-
FROM mesosphere/mesos-modules-private:dcos-ee-mesos-modules-1.8.5-rc2
22-
MAINTAINER Michael Gummelt <mgummelt@mesosphere.io>
21+
FROM ubuntu:14.04
22+
MAINTAINER Michael Gummelt <mgummelt@mesosphere.io>, Arthur Rand <arand@mesosphere.io>
2323

2424
# Set environment variables.
2525
ENV DEBIAN_FRONTEND "noninteractive"
@@ -37,13 +37,21 @@ RUN apt-get update && \
3737
apt-get install -y curl
3838
RUN apt-get install -y r-base
3939

40-
RUN cd /usr/lib/jvm && \
40+
RUN mkdir -p /opt/mesosphere/ && \
41+
cd /opt/mesosphere && \
42+
curl -L -O https://downloads.mesosphere.io/libmesos-bundle/libmesos-bundle-1.10-1.4-63e0814.tar.gz && \
43+
tar zxf libmesos-bundle-1.10-1.4-63e0814.tar.gz && \
44+
rm libmesos-bundle-1.10-1.4-63e0814.tar.gz
45+
46+
RUN mkdir -p /usr/lib/jvm/ && \
47+
cd /usr/lib/jvm && \
4148
curl -L -O https://downloads.mesosphere.com/java/jre-8u112-linux-x64-jce-unlimited.tar.gz && \
4249
tar zxf jre-8u112-linux-x64-jce-unlimited.tar.gz && \
4350
rm jre-8u112-linux-x64-jce-unlimited.tar.gz
4451

4552
ENV JAVA_HOME /usr/lib/jvm/jre1.8.0_112
46-
ENV MESOS_NATIVE_JAVA_LIBRARY /usr/lib/libmesos.so
53+
ENV MESOS_NATIVE_JAVA_LIBRARY /opt/mesosphere/libmesos-bundle/lib/libmesos.so
54+
ENV LD_LIBRARY_PATH /opt/mesosphere/libmesos-bundle/lib/
4755
ENV HADOOP_CONF_DIR /etc/hadoop
4856

4957
RUN mkdir /etc/hadoop

docker/runit/init.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ if [[ -f hdfs-site.xml && -f core-site.xml ]]; then
6565
fi
6666

6767
# Move kerberos config file, as specified by security.kerberos.krb5conf, into place.
68+
# this only affects the krb5.conf file for the dispatcher
6869
if [[ -n "${SPARK_MESOS_KRB5_CONF_BASE64}" ]]; then
6970
echo "${SPARK_MESOS_KRB5_CONF_BASE64}" | base64 -d > /etc/krb5.conf
7071
fi

docs/hdfs.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,15 @@ Keytabs are valid infinitely, while tickets can expire. Especially for long-runn
7676

7777
Submit the job with the keytab:
7878

79-
dcos spark run --submit-args="--principal user@REALM --keytab <keytab-file-path>..."
79+
dcos spark run --kerberos-principal=user@REALM --keytab-secret-path=<secret_path> \
80+
--submit-args=" ... "
8081

8182
### TGT Authentication
8283

8384
Submit the job with the ticket:
84-
85-
dcos spark run --principal user@REALM --tgt <ticket-file-path>
85+
```$bash
86+
dcos spark run --kerberos-principal user@REALM --submit-args="--tgt <ticket-file-path> ..."
87+
```
8688

8789
**Note:** These credentials are security-critical. We highly recommended configuring SSL encryption between the Spark components when accessing Kerberos-secured HDFS clusters. See the Security section for information on how to do this.
8890

0 commit comments

Comments
 (0)