Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions bin/interpreter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ bin=$(cd "${bin}">/dev/null; pwd)


function usage() {
echo "usage) $0 -p <port> -d <directory to load>"
echo "usage) $0 -p <port> -d <interpreter dir to load> -l <local interpreter repo dir to load>"
}

while getopts "hp:d:" o; do
while getopts "hp:d:l:" o; do
case ${o} in
h)
usage
Expand All @@ -36,6 +36,9 @@ while getopts "hp:d:" o; do
p)
PORT=${OPTARG}
;;
l)
LOCAL_INTERPRETER_REPO=${OPTARG}
;;
esac
done

Expand Down Expand Up @@ -128,6 +131,8 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
fi
fi

addJarInDir "${LOCAL_INTERPRETER_REPO}"

CLASSPATH+=":${ZEPPELIN_CLASSPATH}"

if [[ -n "${SPARK_SUBMIT}" ]]; then
Expand Down
5 changes: 3 additions & 2 deletions conf/zeppelin-env.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@
# export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved
# export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
# export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false"
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default.
# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0.
# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading


#### Spark interpreter configuration ####
Expand Down
6 changes: 6 additions & 0 deletions conf/zeppelin-site.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@
<description>Interpreter implementation base directory</description>
</property>

<property>
<name>zeppelin.interpreter.localRepo</name>
<value>local-repo</value>
<description>Local repository for interpreter's additional dependency loading</description>
</property>

<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.tachyon.TachyonInterpreter</value>
Expand Down
4 changes: 3 additions & 1 deletion docs/_includes/themes/zeppelin/_navigation.html
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
<a href="#" data-toggle="dropdown" class="dropdown-toggle">Interpreter <b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="{{BASE_PATH}}/manual/interpreters.html">Overview</a></li>
<li><a href="{{BASE_PATH}}/manual/dynamicinterpreterload.html">Dynamic Interpreter Loading</a></li>
<li role="separator" class="divider"></li>
<li><a href="{{BASE_PATH}}/interpreter/cassandra.html">Cassandra</a></li>
<li><a href="{{BASE_PATH}}/interpreter/elasticsearch.html">Elasticsearch</a></li>
Expand All @@ -53,6 +52,9 @@
<li><a href="{{BASE_PATH}}/interpreter/spark.html">Spark</a></li>
<li><a href="{{BASE_PATH}}/interpreter/tachyon.html">Tachyon</a></li>
<li><a href="{{BASE_PATH}}/pleasecontribute.html">Tajo</a></li>
<li role="separator" class="divider"></li>
<li><a href="{{BASE_PATH}}/manual/dynamicinterpreterload.html">Dynamic Interpreter Loading</a></li>
<li><a href="{{BASE_PATH}}/manual/dependencymanagement.html">Interpreter Dependency Management</a></li>
</ul>
</li>
<li>
Expand Down
4 changes: 3 additions & 1 deletion docs/_includes/themes/zeppelin/default.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@

<!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
<!--[if lt IE 9]>
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->

<link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet">

<!-- Le styles -->
<link href="{{ ASSET_PATH }}/bootstrap/css/bootstrap.css" rel="stylesheet">
<link href="{{ ASSET_PATH }}/css/style.css?body=1" rel="stylesheet" type="text/css">
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
96 changes: 51 additions & 45 deletions docs/interpreter/spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,58 @@ SparkContext, SQLContext, ZeppelinContext are automatically created and exposed
<a name="dependencyloading"> </a>

## Dependency Management
There are two ways to load external library in spark interpreter. First is using Zeppelin's `%dep` interpreter and second is loading Spark properties.
There are two ways to load external library in spark interpreter. First is using Interpreter setting menu and second is loading Spark properties.

### 1. Setting Dependencies via Interpreter Setting
Please see [Dependency Management](../manual/dependencymanagement.html) for the details.

### 2. Loading Spark Properties
Once `SPARK_HOME` is set in `conf/zeppelin-env.sh`, Zeppelin uses `spark-submit` as spark interpreter runner. `spark-submit` supports two ways to load configurations. The first is command line options such as --master and Zeppelin can pass these options to `spark-submit` by exporting `SPARK_SUBMIT_OPTIONS` in conf/zeppelin-env.sh. Second is reading configuration options from `SPARK_HOME/conf/spark-defaults.conf`. Spark properites that user can set to distribute libraries are:

<table class="table-configuration">
<tr>
<th>spark-defaults.conf</th>
<th>SPARK_SUBMIT_OPTIONS</th>
<th>Applicable Interpreter</th>
<th>Description</th>
</tr>
<tr>
<td>spark.jars</td>
<td>--jars</td>
<td>%spark</td>
<td>Comma-separated list of local jars to include on the driver and executor classpaths.</td>
</tr>
<tr>
<td>spark.jars.packages</td>
<td>--packages</td>
<td>%spark</td>
<td>Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. Will search the local maven repo, then maven central and any additional remote repositories given by --repositories. The format for the coordinates should be groupId:artifactId:version.</td>
</tr>
<tr>
<td>spark.files</td>
<td>--files</td>
<td>%pyspark</td>
<td>Comma-separated list of files to be placed in the working directory of each executor.</td>
</tr>
</table>
> Note that adding jar to pyspark is only availabe via `%dep` interpreter at the moment.

Here are few examples:

* SPARK\_SUBMIT\_OPTIONS in conf/zeppelin-env.sh

export SPARK_SUBMIT_OPTIONS="--packages com.databricks:spark-csv_2.10:1.2.0 --jars /path/mylib1.jar,/path/mylib2.jar --files /path/mylib1.py,/path/mylib2.zip,/path/mylib3.egg"

* SPARK_HOME/conf/spark-defaults.conf

spark.jars /path/mylib1.jar,/path/mylib2.jar
spark.jars.packages com.databricks:spark-csv_2.10:1.2.0
spark.files /path/mylib1.py,/path/mylib2.egg,/path/mylib3.zip

### 3. Dynamic Dependency Loading via %dep interpreter
> Note: `%dep` interpreter is deprecated since v0.6.0-incubating.
`%dep` interpreter load libraries to `%spark` and `%pyspark` but not to `%spark.sql` interpreter so we recommend you to use first option instead.

### 1. Dynamic Dependency Loading via %dep interpreter
When your code requires external library, instead of doing download/copy/restart Zeppelin, you can easily do following jobs using `%dep` interpreter.

* Load libraries recursively from Maven repository
Expand Down Expand Up @@ -129,49 +178,6 @@ z.load("groupId:artifactId:version").exclude("groupId:*")
z.load("groupId:artifactId:version").local()
```

### 2. Loading Spark Properties
Once `SPARK_HOME` is set in `conf/zeppelin-env.sh`, Zeppelin uses `spark-submit` as spark interpreter runner. `spark-submit` supports two ways to load configurations. The first is command line options such as --master and Zeppelin can pass these options to `spark-submit` by exporting `SPARK_SUBMIT_OPTIONS` in conf/zeppelin-env.sh. Second is reading configuration options from `SPARK_HOME/conf/spark-defaults.conf`. Spark properites that user can set to distribute libraries are:

<table class="table-configuration">
<tr>
<th>spark-defaults.conf</th>
<th>SPARK_SUBMIT_OPTIONS</th>
<th>Applicable Interpreter</th>
<th>Description</th>
</tr>
<tr>
<td>spark.jars</td>
<td>--jars</td>
<td>%spark</td>
<td>Comma-separated list of local jars to include on the driver and executor classpaths.</td>
</tr>
<tr>
<td>spark.jars.packages</td>
<td>--packages</td>
<td>%spark</td>
<td>Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. Will search the local maven repo, then maven central and any additional remote repositories given by --repositories. The format for the coordinates should be groupId:artifactId:version.</td>
</tr>
<tr>
<td>spark.files</td>
<td>--files</td>
<td>%pyspark</td>
<td>Comma-separated list of files to be placed in the working directory of each executor.</td>
</tr>
</table>
> Note that adding jar to pyspark is only availabe via `%dep` interpreter at the moment.

Here are few examples:

* SPARK\_SUBMIT\_OPTIONS in conf/zeppelin-env.sh

export SPARK_SUBMIT_OPTIONS="--packages com.databricks:spark-csv_2.10:1.2.0 --jars /path/mylib1.jar,/path/mylib2.jar --files /path/mylib1.py,/path/mylib2.zip,/path/mylib3.egg"

* SPARK_HOME/conf/spark-defaults.conf

spark.jars /path/mylib1.jar,/path/mylib2.jar
spark.jars.packages com.databricks:spark-csv_2.10:1.2.0
spark.files /path/mylib1.py,/path/mylib2.egg,/path/mylib3.zip

## ZeppelinContext
Zeppelin automatically injects ZeppelinContext as variable 'z' in your scala/python environment. ZeppelinContext provides some additional functions and utility.

Expand Down
74 changes: 74 additions & 0 deletions docs/manual/dependencymanagement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
---
layout: page
title: "Dependnecy Management"
description: ""
group: manual
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}

## Dependency Management for Interpreter

You can include external libraries to interpreter by setting dependencies in interpreter menu.

When your code requires external library, instead of doing download/copy/restart Zeppelin, you can easily do following jobs in this menu.

* Load libraries recursively from Maven repository
* Load libraries from local filesystem
* Add additional maven repository
* Automatically add libraries to SparkCluster

<hr>
<div class="row">
<div class="col-md-6">
<a data-lightbox="compiler" href="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/interpreter-dependency-loading.png">
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/interpreter-dependency-loading.png" />
</a>
</div>
<div class="col-md-6" style="padding-top:30px">
<b> Load Dependencies to Interpreter </b>
<br /><br />
<ol>
<li> Click 'Interpreter' menu in navigation bar. </li>
<li> Click 'edit' button of the interpreter which you want to load dependencies to. </li>
<li> Fill artifact and exclude field to your needs.
You can enter not only groupId:artifactId:version but also local file in artifact field. </li>
<li> Press 'Save' to restart the interpreter with loaded libraries. </li>
</ol>
</div>
</div>
<hr>
<div class="row">
<div class="col-md-6">
<a data-lightbox="compiler" href="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/interpreter-add-repo1.png">
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/interpreter-add-repo1.png" />
</a>
<a data-lightbox="compiler" href="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/interpreter-add-repo2.png">
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/interpreter-add-repo2.png" />
</a>
</div>
<div class="col-md-6" style="padding-top:30px">
<b> Add repository for dependency resolving </b>
<br /><br />
<ol>
<li> Press <i class="fa fa-cog"></i> icon in 'Interpreter' menu on the top right side.
It will show you available repository lists.</li>
<li> If you need to resolve dependencies from other than central maven repository or
local ~/.m2 repository, hit <i class="fa fa-plus"></i> icon next to repository lists. </li>
<li> Fill out the form and click 'Add' button, then you will be able to see that new repository is added. </li>
</ol>
</div>
</div>

Loading