Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use the service configuration library to set up Spark on Tron #2601

Merged
merged 2 commits into from
Jan 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .activate.sh
1 change: 1 addition & 0 deletions .deactivate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
deactivate
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@ fake_service_uno/
paasta_itests/mesos-cli.json
paasta_itests/fake_etc_paasta/marathon.json
yelp_package/bintray.json

.activate.sh
.deactivate.sh
8 changes: 7 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ before_install:
- sudo apt-get update
- sudo apt-get -o Dpkg::Options::="--force-confnew" install -y docker-engine=1.13.1-0~ubuntu-xenial devscripts --force-yes
- 'if [ "$TRAVIS_BRANCH" = "master" -a "$TRAVIS_PULL_REQUEST" = "false" -a "$DOCKER_USERNAME" != "" ]; then docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"; fi'
install: pip install coveralls tox==3.2 tox-pip-extensions==1.3.0
install:
- pip install coveralls tox==3.2 tox-pip-extensions==1.3.0
# There's some weird breakage between travis, python, virtualenvs, and eggs that I don't really
# understand, but it was causing the build to fail because of ContextualVersionConflicts.
# It seems as though we're hitting https://github.com/pypa/pip/issues/6275
# I also don't understand why but running setup.py egg_info is a workaround that appears to fix things
- python setup.py egg_info
script: if [[ -n "$MAKE_TARGET" ]]; then make "$MAKE_TARGET"; else tox -i https://pypi.python.org/simple; fi
after_success:
- coveralls
Expand Down
19 changes: 11 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ endif

.PHONY: all docs test itest

dev: .paasta/bin/activate
.paasta/bin/tox -i $(PIP_INDEX_URL)

docs: .paasta/bin/activate
.paasta/bin/tox -i $(PIP_INDEX_URL) -e docs

test: .paasta/bin/activate
.paasta/bin/tox -i $(PIP_INDEX_URL)
.paasta/bin/tox -i $(PIP_INDEX_URL) -e test

.tox/py36-linux: .paasta/bin/activate
.paasta/bin/tox -i $(PIP_INDEX_URL)
Expand Down Expand Up @@ -60,13 +63,13 @@ release:
make -C yelp_package release

clean:
rm -rf ./dist
make -C yelp_package clean
rm -rf docs/build
find . -name '*.pyc' -delete
find . -name '__pycache__' -delete
rm -rf .tox
rm -rf .paasta
-rm -rf ./dist
-make -C yelp_package clean
-rm -rf docs/build
-find . -name '*.pyc' -delete
-find . -name '__pycache__' -delete
-rm -rf .tox
-rm -rf .paasta

yelpy: ## Installs the yelp-internal packages into the default tox environment
.tox/py36-linux/bin/pip-custom-platform install -i https://pypi.yelpcorp.com/simple -r yelp_package/extra_requirements_yelp.txt -r ./extra-linux-requirements.txt
Expand Down
2 changes: 2 additions & 0 deletions docs/source/generated/paasta_tools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Subpackages
paasta_tools.cli
paasta_tools.deployd
paasta_tools.frameworks
paasta_tools.instance
paasta_tools.kubernetes
paasta_tools.mesos
paasta_tools.metrics
Expand Down Expand Up @@ -91,6 +92,7 @@ Submodules
paasta_tools.setup_tron_namespace
paasta_tools.slack
paasta_tools.smartstack_tools
paasta_tools.spark_tools
paasta_tools.synapse_srv_namespaces_fact
paasta_tools.tron_tools
paasta_tools.utils
Expand Down
112 changes: 10 additions & 102 deletions paasta_tools/cli/cmds/spark_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import sys
import time

import boto3
from boto3.exceptions import Boto3Error
from botocore.session import Session
from ruamel.yaml import YAML

from paasta_tools.cli.cmds.check import makefile_responds_to
Expand All @@ -19,7 +17,10 @@
from paasta_tools.cli.utils import pick_random_port
from paasta_tools.clusterman import get_clusterman_metrics
from paasta_tools.mesos_tools import find_mesos_leader
from paasta_tools.mesos_tools import MESOS_MASTER_PORT
from paasta_tools.spark_tools import DEFAULT_SPARK_SERVICE
from paasta_tools.spark_tools import get_aws_credentials
from paasta_tools.spark_tools import get_default_event_log_dir
from paasta_tools.spark_tools import load_mesos_secret_for_spark
from paasta_tools.utils import _run
from paasta_tools.utils import DEFAULT_SOA_DIR
from paasta_tools.utils import get_possible_launched_by_user_variable_from_env
Expand All @@ -35,14 +36,10 @@
from paasta_tools.utils import SystemPaastaConfig


AWS_CREDENTIALS_DIR = "/etc/boto_cfg/"
DEFAULT_SPARK_RUN_CONFIG = "/nail/srv/configs/spark.yaml"
DEFAULT_AWS_REGION = "us-west-2"
DEFAULT_SERVICE = "spark"
DEFAULT_SPARK_WORK_DIR = "/spark_driver"
DEFAULT_SPARK_DOCKER_IMAGE_PREFIX = "paasta-spark-run"
DEFAULT_SPARK_DOCKER_REGISTRY = "docker-dev.yelpcorp.com"
DEFAULT_SPARK_MESOS_SECRET_FILE = "/nail/etc/paasta_spark_secret"
SENSITIVE_ENV = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]
clusterman_metrics, CLUSTERMAN_YAML_FILE_PATH = get_clusterman_metrics()

Expand Down Expand Up @@ -112,7 +109,7 @@ def add_subparser(subparsers):
"-s",
"--service",
help="The name of the service from which the Spark image is built.",
default=DEFAULT_SERVICE,
default=DEFAULT_SPARK_SERVICE,
).completer = lazy_choices_completer(list_services)

list_parser.add_argument(
Expand Down Expand Up @@ -352,36 +349,6 @@ def get_smart_paasta_instance_name(args):
return f"{args.instance}_{get_username()}_{how_submitted}"


def get_default_event_log_dir(access_key, secret_key):
if access_key is None:
log.warning(
"Since no AWS credentials were provided, spark event logging "
"will be disabled"
)
return None

with open(DEFAULT_SPARK_RUN_CONFIG) as fp:
spark_run_conf = YAML().load(fp.read())
try:
account_id = (
boto3.client(
"sts", aws_access_key_id=access_key, aws_secret_access_key=secret_key
)
.get_caller_identity()
.get("Account")
)
except Exception as e:
log.warning("Failed to identify account ID, error: {}".format(str(e)))
return None

for conf in spark_run_conf["environments"].values():
if account_id == conf["account_id"]:
default_event_log_dir = conf["default_event_log_dir"]
paasta_print(f"default event logging at: {default_event_log_dir}")
return default_event_log_dir
return None


def get_spark_env(args, spark_conf, spark_ui_port, access_key, secret_key):
spark_env = {}

Expand Down Expand Up @@ -422,52 +389,6 @@ def get_spark_env(args, spark_conf, spark_ui_port, access_key, secret_key):
return spark_env


def get_aws_credentials(args):
if args.no_aws_credentials:
return None, None
elif args.aws_credentials_yaml:
return load_aws_credentials_from_yaml(args.aws_credentials_yaml)
elif args.service != DEFAULT_SERVICE:
service_credentials_path = get_service_aws_credentials_path(args.service)
if os.path.exists(service_credentials_path):
return load_aws_credentials_from_yaml(service_credentials_path)
else:
paasta_print(
PaastaColors.yellow(
"Did not find service AWS credentials at %s. Falling back to "
"user credentials." % (service_credentials_path)
)
)

creds = Session(profile=args.aws_profile).get_credentials()
return creds.access_key, creds.secret_key


def get_service_aws_credentials_path(service_name):
service_yaml = "%s.yaml" % service_name
return os.path.join(AWS_CREDENTIALS_DIR, service_yaml)


def load_aws_credentials_from_yaml(yaml_file_path):
with open(yaml_file_path, "r") as yaml_file:
try:
credentials_yaml = YAML().load(yaml_file.read())
except Exception as e:
paasta_print(
PaastaColors.red(
"Encountered %s when trying to parse AWS credentials yaml %s. "
"Suppressing further output to avoid leaking credentials."
% (type(e), yaml_file_path)
)
)
sys.exit(1)

return (
credentials_yaml["aws_access_key_id"],
credentials_yaml["aws_secret_access_key"],
)


def get_spark_config(
args,
spark_app_name,
Expand All @@ -490,16 +411,15 @@ def get_spark_config(
"spark.mesos.executor.docker.forcePullImage": "true",
}

default_event_log_dir = get_default_event_log_dir(access_key, secret_key)
default_event_log_dir = get_default_event_log_dir(
access_key=access_key, secret_key=secret_key
)
if default_event_log_dir is not None:
user_args["spark.eventLog.enabled"] = "true"
user_args["spark.eventLog.dir"] = default_event_log_dir

# Spark options managed by PaaSTA
cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
cluster=args.cluster
)
mesos_address = "{}:{}".format(find_mesos_leader(cluster_fqdn), MESOS_MASTER_PORT)
mesos_address = find_mesos_leader(args.cluster)
paasta_instance = get_smart_paasta_instance_name(args)
non_user_args = {
"spark.master": "mesos://%s" % mesos_address,
Expand All @@ -512,7 +432,7 @@ def get_spark_config(
"spark.mesos.executor.docker.volumes": ",".join(volumes),
"spark.mesos.executor.docker.image": docker_img,
"spark.mesos.principal": "spark",
"spark.mesos.secret": _load_mesos_secret(),
"spark.mesos.secret": load_mesos_secret_for_spark(),
}

if not args.build and not args.image:
Expand Down Expand Up @@ -587,18 +507,6 @@ def get_spark_config(
return dict(non_user_args, **user_args)


def _load_mesos_secret():
try:
with open(DEFAULT_SPARK_MESOS_SECRET_FILE, "r") as f:
return f.read()
except IOError:
paasta_print(
"Cannot load mesos secret from %s" % DEFAULT_SPARK_MESOS_SECRET_FILE,
file=sys.stderr,
)
sys.exit(1)


def create_spark_config_str(spark_config_dict, is_mrjob):
conf_option = "--jobconf" if is_mrjob else "--conf"
spark_config_entries = list()
Expand Down
8 changes: 6 additions & 2 deletions paasta_tools/mesos_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,19 @@ def is_mesos_leader(hostname: str = MY_HOSTNAME) -> bool:
return get_mesos_leader() == hostname


def find_mesos_leader(master):
def find_mesos_leader(cluster):
""" Find the leader with redirect given one mesos master.
"""
master = (
load_system_paasta_config().get_cluster_fqdn_format().format(cluster=cluster)
)
if master is None:
raise ValueError("Mesos master is required to find leader")

url = f"http://{master}:{MESOS_MASTER_PORT}/redirect"
response = requests.get(url)
return urlparse(response.url).hostname
hostname = urlparse(response.url).hostname
return f"{hostname}:{MESOS_MASTER_PORT}"


async def get_current_tasks(job_id: str) -> List[Task]:
Expand Down
7 changes: 1 addition & 6 deletions paasta_tools/paasta_remote_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,7 @@ def create_mesos_executor(
""" Create a Mesos executor specific to our cluster """
MesosExecutor = processor.executor_cls("mesos_task")

cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
cluster=cluster
)
mesos_address = "{}:{}".format(
mesos_tools.find_mesos_leader(cluster_fqdn), mesos_tools.MESOS_MASTER_PORT
)
mesos_address = mesos_tools.find_mesos_leader(cluster)

return MesosExecutor(
role=role,
Expand Down
Loading