Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

[PIO-209] Upgrade Elasticsearch to 6.8 for pre-built binary distribution #516

Merged
merged 14 commits into from
Nov 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,7 @@ env:
METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
- BUILD_TYPE=Integration
METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3
PIO_ELASTICSEARCH_VERSION=5.6.9
- BUILD_TYPE=Integration
METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3
PIO_ELASTICSEARCH_VERSION=6.4.2
PIO_ELASTICSEARCH_VERSION=6.8.1
- BUILD_TYPE=Integration
METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
PIO_HBASE_VERSION=1.2.6
Expand Down
1 change: 0 additions & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1708,7 +1708,6 @@ Binary distribution bundles
org.scala-lang.modules # scala-parser-combinators_2.11 # 1.0.6 (http://scala-lang.org/)
org.scala-lang.modules # scala-parser-combinators_2.11 # 1.1.0 (http://scala-lang.org/)
org.scala-lang.modules # scala-xml_2.11 # 1.0.5 (http://scala-lang.org/)
org.scala-lang.modules # scala-xml_2.11 # 1.0.6 (http://scala-lang.org/)

which is available under the BSD license (http://www.scala-lang.org/downloads/license.html)

Expand Down
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ scalaVersion in ThisBuild := sys.props.getOrElse("scala.version", "2.11.12")

scalaBinaryVersion in ThisBuild := binaryVersion(scalaVersion.value)

crossScalaVersions in ThisBuild := Seq("2.11.12")
crossScalaVersions in ThisBuild := Seq(scalaVersion.value)

scalacOptions in ThisBuild ++= Seq("-deprecation", "-unchecked", "-feature")

Expand All @@ -45,7 +45,7 @@ hadoopVersion in ThisBuild := sys.props.getOrElse("hadoop.version", "2.7.7")

akkaVersion in ThisBuild := sys.props.getOrElse("akka.version", "2.5.17")

elasticsearchVersion in ThisBuild := sys.props.getOrElse("elasticsearch.version", "5.6.9")
elasticsearchVersion in ThisBuild := sys.props.getOrElse("elasticsearch.version", "6.8.1")

hbaseVersion in ThisBuild := sys.props.getOrElse("hbase.version", "1.2.6")

Expand Down
7 changes: 3 additions & 4 deletions conf/pio-env.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# you need to change these to fit your site.

# SPARK_HOME: Apache Spark is a hard dependency and must be configured.
# SPARK_HOME=$PIO_HOME/vendors/spark-2.0.2-bin-hadoop2.7
SPARK_HOME=$PIO_HOME/vendors/spark-2.1.1-bin-hadoop2.6

POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-42.0.0.jar
Expand All @@ -40,7 +39,7 @@ MYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-5.1.41.jar

# HBASE_CONF_DIR: You must configure this if you intend to run PredictionIO
# with HBase on a remote cluster.
# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.0.0/conf
# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.2.6/conf

# Filesystem paths where PredictionIO uses as block storage.
PIO_FS_BASEDIR=$HOME/.pio_store
Expand Down Expand Up @@ -89,7 +88,7 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio
# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost
# PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200
# PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http
# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-5.6.9
# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-6.8.1
# Optional basic HTTP auth
# PIO_STORAGE_SOURCES_ELASTICSEARCH_USERNAME=my-name
# PIO_STORAGE_SOURCES_ELASTICSEARCH_PASSWORD=my-secret
Expand All @@ -100,7 +99,7 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio

# HBase Example
# PIO_STORAGE_SOURCES_HBASE_TYPE=hbase
# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.0.0
# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.2.6

# AWS S3 Example
# PIO_STORAGE_SOURCES_S3_TYPE=s3
Expand Down
2 changes: 1 addition & 1 deletion conf/pio-vendors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ if [ -z "$PIO_HADOOP_VERSION" ]; then
fi

if [ -z "$PIO_ELASTICSEARCH_VERSION" ]; then
PIO_ELASTICSEARCH_VERSION="5.6.9"
PIO_ELASTICSEARCH_VERSION="6.8.1"
fi

if [ -z "$PIO_HBASE_VERSION" ]; then
Expand Down
2 changes: 1 addition & 1 deletion docs/manual/data/versions.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pio: 0.14.0
spark: 2.4.0
spark_download_filename: spark-2.4.0-bin-hadoop2.7
elasticsearch_download_filename: elasticsearch-5.6.9
elasticsearch_download_filename: elasticsearch-6.8.1
hbase_version: 1.2.6
hbase_basename: hbase-1.2.6
hbase_variant: bin
52 changes: 16 additions & 36 deletions docs/manual/source/install/index.html.md.erb
Original file line number Diff line number Diff line change
Expand Up @@ -21,54 +21,34 @@ limitations under the License.

## Prerequisites

It is **very important** to meet the minimum version of the following
It is **very important** to meet the version of the following
technologies that power Apache PredictionIO®.

* Apache Hadoop 2.6.5 (optional, required only if YARN and HDFS are needed)
* Apache Spark 2.0.2 for Hadoop 2.6
* Java SE Development Kit 8
* Apache Spark 2.0+
* Apache Hadoop 2.6, 2.7

and one of the following sets:

* PostgreSQL 9.1

or

* MySQL 5.1

or

* Apache HBase 0.98.5
* Elasticsearch 5.6.9

WARNING: **Note that support for Scala 2.10 and Spark 1.6 were removed as of PredictionIO 0.14.0.**

If you are running on a single machine, we recommend a minimum of 2GB memory.

INFO: If you are using Linux, Apache Spark local mode, which is the default
operation mode without further configuration, may not work. In that case,
configure your Apache Spark to run in [standalone cluster
mode](http://spark.apache.org/docs/latest/spark-standalone.html).
* PostgreSQL 9.6 or MySQL 5.1
* Apache HBase 1.2
* Elasticsearch 6.x, 5.6(deprecated)

## Installation

* [Installing Apache PredictionIO](install-sourcecode.html)
Pre-built for the following versions

You may also use Docker to install Apache PredictionIO®
* Scala 2.11
* Apache Spark 2.4
* Apache Hadoop 2.7
* Elasticsearch 6.8

* [Installing Apache PredictionIO with Docker](install-docker.html)


[//]: # (* *(coming soon)* Installing Apache PredictionIO with Homebrew)
* [Downloading Binary Distribution](install-sourcecode.html#downloading-binary-distribution)

Building Apache PredictionIO

* [Downloading Source Code](install-sourcecode.html#downloading-source-code)

WARNING: **0.8.2 contains schema changes from the previous versions, if you have
installed the previous versions, you may need to clear both HBase and
Elasticsearch. See more [here](/resources/upgrade/).**
Docker


[//]: # (## Production Deployment)

[//]: # (For production environment setup, please refer to [Production)
[//]: # (Deployment](/production/deploy.html) guide.)
* [Installing Apache PredictionIO with Docker](install-docker.html)
19 changes: 0 additions & 19 deletions docs/manual/source/install/install-sourcecode.html.md.erb
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@ replace `/home/abc` with your own home directory wherever you see it.

## Downloading Binary Distribution

You can use pre-built binary distribution for Apache PredictionIO® if you are
building against

* Scala 2.11.12
* Spark 2.1.3
* Hadoop 2.7.7
* Elasticsearch 5.6.9

Download [binary release from an Apache
mirror](https://www.apache.org/dyn/closer.lua/predictionio/<%= data.versions.pio
%>/apache-predictionio-<%= data.versions.pio %>-bin.tar.gz).
Expand Down Expand Up @@ -127,17 +119,6 @@ Extract the binary distribution you have just built.
$ tar zxvf PredictionIO-<%= data.versions.pio %>.tar.gz
```

### Building against Different Versions of Dependencies

Starting from version 0.11.0, PredictionIO can be built against different
versions of dependencies. As of writing, one could build PredictionIO against
these different dependencies:

* Scala 2.11.x
* Spark 2.0.x, 2.1.x, 2.2.x, 2.3.x, 2.4.x
* Hadoop 2.6.x, 2.7.x
* Elasticsearch 5.6.x, 6.x

## Installing Dependencies

Let us install dependencies inside a subdirectory of the Apache PredictionIO
Expand Down
5 changes: 2 additions & 3 deletions docs/manual/source/partials/shared/install/_postgres.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,5 @@ $ psql -c "create user pio with password 'pio'"
Starting from 0.11.0, PredictionIO no longer bundles JDBC drivers. Download the
PostgreSQL JDBC driver from the [official web
site](https://jdbc.postgresql.org/), and put the JAR file in the `lib`
subdirectory. By default, `conf/pio-env.sh` assumes version 42.0.0 JDBC 4.2. If
you use a different version, modify `POSTGRES_JDBC_DRIVER` to point to the
correct JAR.
subdirectory. Afterwords, you need to edit `conf/pio-env.sh` and change the
`POSTGRES_JDBC_DRIVER` variable to point to the correct JAR.
2 changes: 1 addition & 1 deletion project/assembly.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.7")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.9")
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.2.3
sbt.version=1.2.8
6 changes: 3 additions & 3 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0")

addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.2")

addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.3.15")
addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.4.1")

addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.3")
addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.5")

addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")

resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"

addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.1")

addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.6")
addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.22")

addSbtPlugin("com.typesafe.sbt" % "sbt-license-report" % "1.2.0")
17 changes: 4 additions & 13 deletions storage/elasticsearch/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,20 @@ import PIOBuild._

name := "apache-predictionio-data-elasticsearch"

elasticsearchVersion := (if (majorVersion(elasticsearchVersion.value) < 5) "5.6.9" else elasticsearchVersion.value)

libraryDependencies ++= Seq(
"org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
"org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
"org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
"org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
"org.elasticsearch.client" % "elasticsearch-rest-client" % elasticsearchVersion.value,
"org.elasticsearch" %% "elasticsearch-spark-20" % elasticsearchVersion.value
"org.elasticsearch" %% "elasticsearch-spark-20" % elasticsearchVersion.value
exclude("org.apache.spark", "*"),
"org.elasticsearch" % "elasticsearch-hadoop-mr" % elasticsearchVersion.value,
"org.specs2" %% "specs2" % "2.3.13" % "test")
"org.specs2" %% "specs2" % "2.3.13" % "test")

parallelExecution in Test := false

pomExtra := childrenPomExtra.value

assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)

assemblyShadeRules in assembly := Seq(
ShadeRule.rename("org.apache.http.**" ->
"org.apache.predictionio.shaded.org.apache.http.@1").inAll,
ShadeRule.rename("org.elasticsearch.client.**" ->
"org.apache.predictionio.shaded.org.elasticsearch.client.@1").inAll)

// skip test in assembly
test in assembly := {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,20 @@ import org.json4s.native.Serialization.write
import grizzled.slf4j.Logging

/** Elasticsearch implementation of AccessKeys. */
class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: String)
class ESAccessKeys(client: RestClient, config: StorageClientConfig, metadataName: String)
extends AccessKeys with Logging {
implicit val formats = DefaultFormats.lossless
private val estype = "accesskeys"
private val internalIndex = index + "_" + estype

ESUtils.createIndex(client, internalIndex)
val mappingJson =
(estype ->
("properties" ->
("key" -> ("type" -> "keyword")) ~
("events" -> ("type" -> "keyword"))))
ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson)))
private val metadataKey = "accesskeys"
private val index = metadataName + "_" + metadataKey
private val estype = {
val mappingJson =
("mappings" ->
("properties" ->
("key" -> ("type" -> "keyword")) ~
("events" -> ("type" -> "keyword"))))

ESUtils.createIndex(client, index, compact(render(mappingJson)))
}

def insert(accessKey: AccessKey): Option[String] = {
val key = if (accessKey.key.isEmpty) generateKey else accessKey.key
Expand All @@ -63,7 +64,7 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin
try {
val response = client.performRequest(
"GET",
s"/$internalIndex/$estype/$id",
s"/$index/$estype/$id",
Map.empty[String, String].asJava)
val jsonResponse = parse(EntityUtils.toString(response.getEntity))
(jsonResponse \ "found").extract[Boolean] match {
Expand All @@ -77,11 +78,11 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin
e.getResponse.getStatusLine.getStatusCode match {
case 404 => None
case _ =>
error(s"Failed to access to /$internalIndex/$estype/$id", e)
error(s"Failed to access to /$index/$estype/$id", e)
None
}
case e: IOException =>
error(s"Failed to access to /$internalIndex/$estype/$id", e)
error(s"Failed to access to /$index/$estype/$id", e)
None
}
}
Expand All @@ -91,10 +92,10 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin
val json =
("query" ->
("match_all" -> List.empty))
ESUtils.getAll[AccessKey](client, internalIndex, estype, compact(render(json)))
ESUtils.getAll[AccessKey](client, index, compact(render(json)))
} catch {
case e: IOException =>
error("Failed to access to /$internalIndex/$estype/_search", e)
error(s"Failed to access to /$index/_search", e)
Nil
}
}
Expand All @@ -105,10 +106,10 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin
("query" ->
("term" ->
("appid" -> appid)))
ESUtils.getAll[AccessKey](client, internalIndex, estype, compact(render(json)))
ESUtils.getAll[AccessKey](client, index, compact(render(json)))
} catch {
case e: IOException =>
error("Failed to access to /$internalIndex/$estype/_search", e)
error(s"Failed to access to /$index/_search", e)
Nil
}
}
Expand All @@ -118,8 +119,8 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin
try {
val entity = new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON)
val response = client.performRequest(
"POST",
s"/$internalIndex/$estype/$id",
"PUT",
s"/$index/$estype/$id",
Map("refresh" -> "true").asJava,
entity)
val jsonResponse = parse(EntityUtils.toString(response.getEntity))
Expand All @@ -128,30 +129,30 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin
case "created" =>
case "updated" =>
case _ =>
error(s"[$result] Failed to update $internalIndex/$estype/$id")
error(s"[$result] Failed to update $index/$estype/$id")
}
} catch {
case e: IOException =>
error(s"Failed to update $internalIndex/$estype/$id", e)
error(s"Failed to update $index/$estype/$id", e)
}
}

def delete(id: String): Unit = {
try {
val response = client.performRequest(
"DELETE",
s"/$internalIndex/$estype/$id",
s"/$index/$estype/$id",
Map("refresh" -> "true").asJava)
val json = parse(EntityUtils.toString(response.getEntity))
val result = (json \ "result").extract[String]
result match {
case "deleted" =>
case _ =>
error(s"[$result] Failed to update $internalIndex/$estype/id")
error(s"[$result] Failed to delete $index/$estype/$id")
}
} catch {
case e: IOException =>
error(s"Failed to update $internalIndex/$estype/id", e)
error(s"Failed to delete $index/$estype/$id", e)
}
}
}
Loading