From 6f8887e96d5a5aae488048038e05a0f573326b79 Mon Sep 17 00:00:00 2001 From: cheng chang Date: Thu, 5 Mar 2015 15:11:39 +0800 Subject: [PATCH 1/4] make tachyon version configurable improve pep8 error when lacking argument pep8 again... --- .../root/spark-ec2/ec2-variables.sh | 3 ++- ec2/spark_ec2.py | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh index 740c267fd9866..0857657152ec7 100644 --- a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh +++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh @@ -26,9 +26,10 @@ export SPARK_LOCAL_DIRS="{{spark_local_dirs}}" export MODULES="{{modules}}" export SPARK_VERSION="{{spark_version}}" export SHARK_VERSION="{{shark_version}}" +export TACHYON_VERSION="{{tachyon_version}}" export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}" export SWAP_MB="{{swap}}" export SPARK_WORKER_INSTANCES="{{spark_worker_instances}}" export SPARK_MASTER_OPTS="{{spark_master_opts}}" export AWS_ACCESS_KEY_ID="{{aws_access_key_id}}" -export AWS_SECRET_ACCESS_KEY="{{aws_secret_access_key}}" \ No newline at end of file +export AWS_SECRET_ACCESS_KEY="{{aws_secret_access_key}}" diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index c59ab565c6862..e2807a783c2e4 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -61,6 +61,16 @@ "1.2.1", ]) +SPARK_TACHYON_MAP = { + "1.0.0": "0.4.1", + "1.0.1": "0.4.1", + "1.0.2": "0.4.1", + "1.1.0": "0.5.0", + "1.1.1": "0.5.0", + "1.2.0": "0.5.0", + "1.2.1": "0.5.0", +} + DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark" @@ -147,6 +157,10 @@ def parse_args(): parser.add_option( "-v", "--spark-version", default=DEFAULT_SPARK_VERSION, help="Version of Spark to use: 'X.Y.Z' or a specific git hash (default: %default)") + parser.add_option( + "--tachyon-version", + help="If --spark-version is a git hash, this will be used as the version of Tachyon. " + + "Otherwise, this field does not need to be specified") parser.add_option( "--spark-git-repo", default=DEFAULT_SPARK_GITHUB_REPO, @@ -341,6 +355,10 @@ def is_active(instance): } +def get_tachyon_version(spark_version): + return SPARK_TACHYON_MAP.get(spark_version, "") + + # Attempt to resolve an appropriate AMI given the architecture and region of the request. def get_spark_ami(opts): if opts.instance_type in EC2_INSTANCE_TYPES: @@ -872,9 +890,16 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): if "." in opts.spark_version: # Pre-built Spark deploy spark_v = get_validate_spark_version(opts.spark_version, opts.spark_git_repo) + tachyon_v = get_tachyon_version(spark_v) else: # Spark-only custom deploy spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) + if opts.tachyon_version is None: + print >>std_error,\ + "You have used github hash as --spark-version, " + \ + "need to manually specify --tachyon-version" + sys.exit(1) + tachyon_v = opts.tachyon_version template_vars = { "master_list": '\n'.join([i.public_dns_name for i in master_nodes]), @@ -887,6 +912,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): "swap": str(opts.swap), "modules": '\n'.join(modules), "spark_version": spark_v, + "tachyon_version": tachyon_v, "hadoop_major_version": opts.hadoop_major_version, "spark_worker_instances": "%d" % opts.worker_instances, "spark_master_opts": opts.master_opts From 1d53c5c5a15c94765abf689de30731ea654c99ce Mon Sep 17 00:00:00 2001 From: cheng chang Date: Sun, 8 Mar 2015 12:16:12 +0800 Subject: [PATCH 2/4] add default value to --tachyon-version --- ec2/spark_ec2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index e2807a783c2e4..184a76563defe 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -71,6 +71,8 @@ "1.2.1": "0.5.0", } +DEFAULT_TACHYON_VERSION = "0.6.0" + DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark" @@ -159,8 +161,9 @@ def parse_args(): help="Version of Spark to use: 'X.Y.Z' or a specific git hash (default: %default)") parser.add_option( "--tachyon-version", + default=DEFAULT_TACHYON_VERSION, help="If --spark-version is a git hash, this will be used as the version of Tachyon. " + - "Otherwise, this field does not need to be specified") + "Otherwise, this field does not need to be specified. Default to 0.6.0") parser.add_option( "--spark-git-repo", default=DEFAULT_SPARK_GITHUB_REPO, From fd2a48e4c4743fd7cc108562d54cda2fc1e4e18e Mon Sep 17 00:00:00 2001 From: cheng chang Date: Mon, 9 Mar 2015 10:03:30 +0800 Subject: [PATCH 3/4] Remove Tachyon when deploying through git hash --- ec2/spark_ec2.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 184a76563defe..17c2dcebd5507 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -71,8 +71,6 @@ "1.2.1": "0.5.0", } -DEFAULT_TACHYON_VERSION = "0.6.0" - DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark" @@ -159,11 +157,6 @@ def parse_args(): parser.add_option( "-v", "--spark-version", default=DEFAULT_SPARK_VERSION, help="Version of Spark to use: 'X.Y.Z' or a specific git hash (default: %default)") - parser.add_option( - "--tachyon-version", - default=DEFAULT_TACHYON_VERSION, - help="If --spark-version is a git hash, this will be used as the version of Tachyon. " + - "Otherwise, this field does not need to be specified. Default to 0.6.0") parser.add_option( "--spark-git-repo", default=DEFAULT_SPARK_GITHUB_REPO, @@ -897,12 +890,9 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): else: # Spark-only custom deploy spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) - if opts.tachyon_version is None: - print >>std_error,\ - "You have used github hash as --spark-version, " + \ - "need to manually specify --tachyon-version" - sys.exit(1) - tachyon_v = opts.tachyon_version + tachyon_v = "" + print "Deploy spark via git hash, Tachyon won't be set up" + modules = filter(lambda x: x != "tachyon", modules) template_vars = { "master_list": '\n'.join([i.public_dns_name for i in master_nodes]), From 313aa36dde9db80bb008e4a4cbdf7df08f3c547c Mon Sep 17 00:00:00 2001 From: cheng chang Date: Tue, 10 Mar 2015 07:29:26 +0800 Subject: [PATCH 4/4] minor re-wording --- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 17c2dcebd5507..00035bf255351 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -891,7 +891,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): # Spark-only custom deploy spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) tachyon_v = "" - print "Deploy spark via git hash, Tachyon won't be set up" + print "Deploying spark via git hash, Tachyon won't be set up" modules = filter(lambda x: x != "tachyon", modules) template_vars = {