From d9a1b581a35f8b8f9a3b89415d10469b01dbd3aa Mon Sep 17 00:00:00 2001 From: shimamoto Date: Thu, 13 Jun 2019 19:10:14 +0900 Subject: [PATCH 01/14] Upgrade Spark to 2.4 for pre-built binary distribution --- .travis.yml | 6 +- build.sbt | 18 +- common/build.sbt | 10 +- conf/pio-env.sh.template | 11 +- conf/pio-vendors.sh | 6 +- core/build.sbt | 21 +- data/build.sbt | 17 +- docs/manual/data/versions.yml | 10 +- docs/manual/source/install/index.html.md.erb | 52 ++-- .../install/install-sourcecode.html.md.erb | 19 -- .../shared/install/_postgres.html.erb | 5 +- project/PIOBuild.scala | 1 - project/assembly.sbt | 2 +- project/build.properties | 2 +- project/plugins.sbt | 6 +- storage/elasticsearch/build.sbt | 18 +- .../storage/elasticsearch/ESAccessKeys.scala | 26 +- .../data/storage/elasticsearch/ESApps.scala | 35 +-- .../storage/elasticsearch/ESChannels.scala | 26 +- .../elasticsearch/ESEngineInstances.scala | 36 ++- .../elasticsearch/ESEvaluationInstances.scala | 26 +- .../storage/elasticsearch/ESLEvents.scala | 64 +++-- .../storage/elasticsearch/ESPEvents.scala | 14 +- .../storage/elasticsearch/ESSequences.scala | 14 +- .../data/storage/elasticsearch/ESUtils.scala | 58 ++--- storage/hbase/build.sbt | 3 +- .../data/storage/hbase/HBEventsUtil.scala | 12 +- .../data/storage/hbase/HBLEvents.scala | 8 +- .../data/storage/hbase/HBPEvents.scala | 1 + .../data/storage/hbase/StorageClient.scala | 8 +- .../data/storage/hbase/upgrade/HB_0_8_0.scala | 193 --------------- .../data/storage/hbase/upgrade/Upgrade.scala | 75 ------ .../storage/hbase/upgrade/Upgrade_0_8_3.scala | 224 ------------------ storage/hdfs/build.sbt | 3 +- storage/jdbc/build.sbt | 6 +- .../storage/jdbc/JDBCEngineInstances.scala | 1 + .../jdbc/JDBCEvaluationInstances.scala | 1 + .../data/storage/jdbc/JDBCLEvents.scala | 1 + storage/localfs/build.sbt | 3 +- storage/s3/build.sbt | 9 +- tools/build.sbt | 15 +- .../predictionio/tools/console/Pio.scala | 7 +- .../tools/export/EventsToFile.scala | 4 +- 43 files changed, 221 insertions(+), 856 deletions(-) delete mode 100644 storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala delete mode 100644 storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala delete mode 100644 storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala diff --git a/.travis.yml b/.travis.yml index c8071655fc..d383d87179 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,10 +52,10 @@ env: PIO_ELASTICSEARCH_VERSION=5.6.9 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3 - PIO_ELASTICSEARCH_VERSION=6.4.2 + PIO_ELASTICSEARCH_VERSION=6.8.0 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS - PIO_HBASE_VERSION=1.2.6 + PIO_HBASE_VERSION=1.4.10 - BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL @@ -101,7 +101,7 @@ env: - BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS PIO_SCALA_VERSION=2.11.12 - PIO_SPARK_VERSION=2.4.0 + PIO_SPARK_VERSION=2.4.3 PIO_HADOOP_VERSION=2.7.7 - BUILD_TYPE=LicenseCheck diff --git a/build.sbt b/build.sbt index 082521b929..1b580562e7 100644 --- a/build.sbt +++ b/build.sbt @@ -26,7 +26,7 @@ scalaVersion in ThisBuild := sys.props.getOrElse("scala.version", "2.11.12") scalaBinaryVersion in ThisBuild := binaryVersion(scalaVersion.value) -crossScalaVersions in ThisBuild := Seq("2.11.12") +crossScalaVersions in ThisBuild := Seq(scalaVersion.value) scalacOptions in ThisBuild ++= Seq("-deprecation", "-unchecked", "-feature") @@ -37,17 +37,15 @@ javacOptions in (ThisBuild, compile) ++= Seq("-source", "1.8", "-target", "1.8", "-Xlint:deprecation", "-Xlint:unchecked") // Ignore differentiation of Spark patch levels -sparkVersion in ThisBuild := sys.props.getOrElse("spark.version", "2.1.3") +sparkVersion in ThisBuild := sys.props.getOrElse("spark.version", "2.4.3") sparkBinaryVersion in ThisBuild := binaryVersion(sparkVersion.value) hadoopVersion in ThisBuild := sys.props.getOrElse("hadoop.version", "2.7.7") -akkaVersion in ThisBuild := sys.props.getOrElse("akka.version", "2.5.17") +elasticsearchVersion in ThisBuild := sys.props.getOrElse("elasticsearch.version", "6.8.0") -elasticsearchVersion in ThisBuild := sys.props.getOrElse("elasticsearch.version", "5.6.9") - -hbaseVersion in ThisBuild := sys.props.getOrElse("hbase.version", "1.2.6") +hbaseVersion in ThisBuild := sys.props.getOrElse("hbase.version", "1.4.10") json4sVersion in ThisBuild := { sparkBinaryVersion.value match { @@ -65,11 +63,6 @@ val commonSettings = Seq( unmanagedClasspath in Test += conf, unmanagedClasspath in Test += baseDirectory.value.getParentFile / s"storage/jdbc/target/scala-${scalaBinaryVersion.value}/classes") -val commonTestSettings = Seq( - libraryDependencies ++= Seq( - "org.postgresql" % "postgresql" % "9.4-1204-jdbc41" % "test", - "org.scalikejdbc" %% "scalikejdbc" % "3.1.0" % "test")) - val dataElasticsearch = (project in file("storage/elasticsearch")). settings(commonSettings: _*) @@ -101,14 +94,12 @@ val common = (project in file("common")). val data = (project in file("data")). dependsOn(common). settings(commonSettings: _*). - settings(commonTestSettings: _*). enablePlugins(GenJavadocPlugin). disablePlugins(sbtassembly.AssemblyPlugin) val core = (project in file("core")). dependsOn(data). settings(commonSettings: _*). - settings(commonTestSettings: _*). enablePlugins(GenJavadocPlugin). enablePlugins(BuildInfoPlugin). settings( @@ -134,7 +125,6 @@ val e2 = (project in file("e2")). val tools = (project in file("tools")). dependsOn(e2). settings(commonSettings: _*). - settings(commonTestSettings: _*). settings(skip in publish := true). enablePlugins(GenJavadocPlugin). enablePlugins(SbtTwirl) diff --git a/common/build.sbt b/common/build.sbt index f9fd97bfea..311b152998 100644 --- a/common/build.sbt +++ b/common/build.sbt @@ -20,11 +20,11 @@ import PIOBuild._ name := "apache-predictionio-common" libraryDependencies ++= Seq( - "com.typesafe.akka" %% "akka-actor" % akkaVersion.value, - "com.typesafe.akka" %% "akka-slf4j" % akkaVersion.value, - "com.typesafe.akka" %% "akka-http" % "10.1.5", - "org.json4s" %% "json4s-native" % json4sVersion.value, - "com.typesafe.akka" %% "akka-stream" % "2.5.12" + "com.typesafe.akka" %% "akka-actor" % "2.5.23", + "com.typesafe.akka" %% "akka-slf4j" % "2.5.23", + "com.typesafe.akka" %% "akka-stream" % "2.5.23", + "com.typesafe.akka" %% "akka-http" % "10.1.8", + "org.json4s" %% "json4s-native" % json4sVersion.value ) pomExtra := childrenPomExtra.value diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template index 5fbad4b426..8de5f651cf 100644 --- a/conf/pio-env.sh.template +++ b/conf/pio-env.sh.template @@ -24,10 +24,9 @@ # you need to change these to fit your site. # SPARK_HOME: Apache Spark is a hard dependency and must be configured. -# SPARK_HOME=$PIO_HOME/vendors/spark-2.0.2-bin-hadoop2.7 -SPARK_HOME=$PIO_HOME/vendors/spark-2.1.1-bin-hadoop2.6 +SPARK_HOME=$PIO_HOME/vendors/spark-2.4.3-bin-hadoop2.7 -POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-42.0.0.jar +POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-42.2.5.jar MYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-5.1.41.jar # ES_CONF_DIR: You must configure this if you have advanced configuration for @@ -40,7 +39,7 @@ MYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-5.1.41.jar # HBASE_CONF_DIR: You must configure this if you intend to run PredictionIO # with HBase on a remote cluster. -# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.0.0/conf +# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.4.10/conf # Filesystem paths where PredictionIO uses as block storage. PIO_FS_BASEDIR=$HOME/.pio_store @@ -89,7 +88,7 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio # PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost # PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200 # PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http -# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-5.6.9 +# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-6.8.0 # Optional basic HTTP auth # PIO_STORAGE_SOURCES_ELASTICSEARCH_USERNAME=my-name # PIO_STORAGE_SOURCES_ELASTICSEARCH_PASSWORD=my-secret @@ -100,7 +99,7 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio # HBase Example # PIO_STORAGE_SOURCES_HBASE_TYPE=hbase -# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.0.0 +# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.4.10 # AWS S3 Example # PIO_STORAGE_SOURCES_S3_TYPE=s3 diff --git a/conf/pio-vendors.sh b/conf/pio-vendors.sh index 959c395f88..bb067d76f1 100644 --- a/conf/pio-vendors.sh +++ b/conf/pio-vendors.sh @@ -24,7 +24,7 @@ if [ -z "$PIO_SCALA_VERSION" ]; then fi if [ -z "$PIO_SPARK_VERSION" ]; then - PIO_SPARK_VERSION="2.1.3" + PIO_SPARK_VERSION="2.4.3" fi if [ -z "$PIO_HADOOP_VERSION" ]; then @@ -32,7 +32,7 @@ if [ -z "$PIO_HADOOP_VERSION" ]; then fi if [ -z "$PIO_ELASTICSEARCH_VERSION" ]; then - PIO_ELASTICSEARCH_VERSION="5.6.9" + PIO_ELASTICSEARCH_VERSION="6.8.0" fi if [ -z "$PIO_HBASE_VERSION" ]; then @@ -45,7 +45,7 @@ export ES_TAG="$PIO_ELASTICSEARCH_VERSION" HBASE_MAJOR=`echo $PIO_HBASE_VERSION | awk -F. '{print $1 "." $2}'` export HBASE_TAG="$HBASE_MAJOR" -PGSQL_JAR=postgresql-9.4-1204.jdbc41.jar +PGSQL_JAR=postgresql-42.2.5.jar PGSQL_DOWNLOAD=https://jdbc.postgresql.org/download/${PGSQL_JAR} HADOOP_MAJOR=`echo $PIO_HADOOP_VERSION | awk -F. '{print $1 "." $2}'` diff --git a/core/build.sbt b/core/build.sbt index 14b3449744..5692dd9f78 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -20,19 +20,18 @@ import PIOBuild._ name := "apache-predictionio-core" libraryDependencies ++= Seq( - "com.github.scopt" %% "scopt" % "3.5.0", - "com.google.code.gson" % "gson" % "2.5", - "com.twitter" %% "chill-bijection" % "0.7.2", - "de.javakaffee" % "kryo-serializers" % "0.37", - "net.jodah" % "typetools" % "0.3.1", + "com.github.scopt" %% "scopt" % "3.7.0", + "com.google.code.gson" % "gson" % "2.8.5", + "com.twitter" %% "chill-bijection" % "0.9.3", + "de.javakaffee" % "kryo-serializers" % "0.45", + "net.jodah" % "typetools" % "0.6.1", "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", "org.json4s" %% "json4s-ext" % json4sVersion.value, - "org.scalaj" %% "scalaj-http" % "1.1.6", - "org.slf4j" % "slf4j-log4j12" % "1.7.18", - "org.scalatest" %% "scalatest" % "2.1.7" % "test", - "org.specs2" %% "specs2" % "2.3.13" % "test", - "org.scalamock" %% "scalamock-scalatest-support" % "3.5.0" % "test", - "com.h2database" % "h2" % "1.4.196" % "test" + "org.scalaj" %% "scalaj-http" % "2.4.1", + "org.slf4j" % "slf4j-log4j12" % "1.7.26", + "org.scalatest" %% "scalatest" % "3.0.8" % "test", + "org.scalamock" %% "scalamock" % "4.2.0" % "test", + "com.h2database" % "h2" % "1.4.199" % "test" ) parallelExecution in Test := false diff --git a/data/build.sbt b/data/build.sbt index 65925360a3..89bca2de4c 100644 --- a/data/build.sbt +++ b/data/build.sbt @@ -21,16 +21,15 @@ name := "apache-predictionio-data" libraryDependencies ++= Seq( "org.scala-lang" % "scala-reflect" % scalaVersion.value, - "com.github.nscala-time" %% "nscala-time" % "2.6.0", - "com.google.guava" % "guava" % "14.0.1", - "com.typesafe.akka" %% "akka-http-testkit" % "10.1.5" % "test", + "com.github.nscala-time" %% "nscala-time" % "2.22.0", + "com.google.guava" % "guava" % "27.1-jre", + "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "org.clapper" %% "grizzled-slf4j" % "1.0.2", - "org.scalatest" %% "scalatest" % "2.1.7" % "test", - "org.specs2" %% "specs2" % "3.3.1" % "test" - exclude("org.scalaz.stream", s"scalaz-stream_${scalaBinaryVersion.value}"), - "org.scalamock" %% "scalamock-specs2-support" % "3.5.0" % "test", - "com.h2database" % "h2" % "1.4.196" % "test") + "org.clapper" %% "grizzled-slf4j" % "1.3.3", + "org.scalatest" %% "scalatest" % "3.0.8" % "test", + "org.specs2" %% "specs2-core" % "4.5.1" % "test", + "org.scalamock" %% "scalamock" % "4.2.0" % "test", + "com.h2database" % "h2" % "1.4.199" % "test") parallelExecution in Test := false diff --git a/docs/manual/data/versions.yml b/docs/manual/data/versions.yml index 54fa46968b..0ef7d808f9 100644 --- a/docs/manual/data/versions.yml +++ b/docs/manual/data/versions.yml @@ -1,7 +1,7 @@ pio: 0.14.0 -spark: 2.4.0 -spark_download_filename: spark-2.4.0-bin-hadoop2.7 -elasticsearch_download_filename: elasticsearch-5.6.9 -hbase_version: 1.2.6 -hbase_basename: hbase-1.2.6 +spark: 2.4.3 +spark_download_filename: spark-2.4.3-bin-hadoop2.7 +elasticsearch_download_filename: elasticsearch-6.8.0 +hbase_version: 1.4.10 +hbase_basename: hbase-1.4.10 hbase_variant: bin diff --git a/docs/manual/source/install/index.html.md.erb b/docs/manual/source/install/index.html.md.erb index ce2023b993..d7263b0114 100644 --- a/docs/manual/source/install/index.html.md.erb +++ b/docs/manual/source/install/index.html.md.erb @@ -21,54 +21,34 @@ limitations under the License. ## Prerequisites -It is **very important** to meet the minimum version of the following +It is **very important** to meet the version of the following technologies that power Apache PredictionIO®. -* Apache Hadoop 2.6.5 (optional, required only if YARN and HDFS are needed) -* Apache Spark 2.0.2 for Hadoop 2.6 * Java SE Development Kit 8 +* Apache Spark 2.0+ +* Apache Hadoop 2.6, 2.7 and one of the following sets: -* PostgreSQL 9.1 - -or - -* MySQL 5.1 - -or - -* Apache HBase 0.98.5 -* Elasticsearch 5.6.9 - -WARNING: **Note that support for Scala 2.10 and Spark 1.6 were removed as of PredictionIO 0.14.0.** - -If you are running on a single machine, we recommend a minimum of 2GB memory. - -INFO: If you are using Linux, Apache Spark local mode, which is the default -operation mode without further configuration, may not work. In that case, -configure your Apache Spark to run in [standalone cluster -mode](http://spark.apache.org/docs/latest/spark-standalone.html). +* PostgreSQL 9.6 or MySQL 5.1 +* Apache HBase 1.4 +* Elasticsearch 6.x, 5.6(deprecated) ## Installation -* [Installing Apache PredictionIO](install-sourcecode.html) +Pre-built for the following versions -You may also use Docker to install Apache PredictionIO® +* Scala 2.11 +* Apache Spark 2.4 +* Apache Hadoop 2.7 +* Elasticsearch 6.8 -* [Installing Apache PredictionIO with Docker](install-docker.html) - - -[//]: # (* *(coming soon)* Installing Apache PredictionIO with Homebrew) +* [Downloading Binary Distribution](install-sourcecode.html#downloading-binary-distribution) +Building Apache PredictionIO +* [Downloading Source Code](install-sourcecode.html#downloading-source-code) -WARNING: **0.8.2 contains schema changes from the previous versions, if you have -installed the previous versions, you may need to clear both HBase and -Elasticsearch. See more [here](/resources/upgrade/).** +Docker - -[//]: # (## Production Deployment) - -[//]: # (For production environment setup, please refer to [Production) -[//]: # (Deployment](/production/deploy.html) guide.) +* [Installing Apache PredictionIO with Docker](install-docker.html) diff --git a/docs/manual/source/install/install-sourcecode.html.md.erb b/docs/manual/source/install/install-sourcecode.html.md.erb index 60f8772850..cfb49298f1 100644 --- a/docs/manual/source/install/install-sourcecode.html.md.erb +++ b/docs/manual/source/install/install-sourcecode.html.md.erb @@ -24,14 +24,6 @@ replace `/home/abc` with your own home directory wherever you see it. ## Downloading Binary Distribution -You can use pre-built binary distribution for Apache PredictionIO® if you are -building against - -* Scala 2.11.12 -* Spark 2.1.3 -* Hadoop 2.7.7 -* Elasticsearch 5.6.9 - Download [binary release from an Apache mirror](https://www.apache.org/dyn/closer.lua/predictionio/<%= data.versions.pio %>/apache-predictionio-<%= data.versions.pio %>-bin.tar.gz). @@ -127,17 +119,6 @@ Extract the binary distribution you have just built. $ tar zxvf PredictionIO-<%= data.versions.pio %>.tar.gz ``` -### Building against Different Versions of Dependencies - -Starting from version 0.11.0, PredictionIO can be built against different -versions of dependencies. As of writing, one could build PredictionIO against -these different dependencies: - -* Scala 2.11.x -* Spark 2.0.x, 2.1.x, 2.2.x, 2.3.x, 2.4.x -* Hadoop 2.6.x, 2.7.x -* Elasticsearch 5.6.x, 6.x - ## Installing Dependencies Let us install dependencies inside a subdirectory of the Apache PredictionIO diff --git a/docs/manual/source/partials/shared/install/_postgres.html.erb b/docs/manual/source/partials/shared/install/_postgres.html.erb index a2e6e99b77..3de4f3a799 100644 --- a/docs/manual/source/partials/shared/install/_postgres.html.erb +++ b/docs/manual/source/partials/shared/install/_postgres.html.erb @@ -54,6 +54,5 @@ $ psql -c "create user pio with password 'pio'" Starting from 0.11.0, PredictionIO no longer bundles JDBC drivers. Download the PostgreSQL JDBC driver from the [official web site](https://jdbc.postgresql.org/), and put the JAR file in the `lib` -subdirectory. By default, `conf/pio-env.sh` assumes version 42.0.0 JDBC 4.2. If -you use a different version, modify `POSTGRES_JDBC_DRIVER` to point to the -correct JAR. +subdirectory. Afterwords, you need to edit `conf/pio-env.sh` and change the +`POSTGRES_JDBC_DRIVER` variable to point to the correct JAR. diff --git a/project/PIOBuild.scala b/project/PIOBuild.scala index 615efc0998..91709d6baa 100644 --- a/project/PIOBuild.scala +++ b/project/PIOBuild.scala @@ -24,7 +24,6 @@ object PIOBuild { val sparkVersion = settingKey[String]("The version of Apache Spark used for building") val sparkBinaryVersion = settingKey[String]("The binary version of Apache Spark used for building") val hadoopVersion = settingKey[String]("The version of Apache Hadoop used for building") - val akkaVersion = settingKey[String]("The version of Akka used for building") val childrenPomExtra = settingKey[scala.xml.NodeSeq]("Extra POM data for children projects") diff --git a/project/assembly.sbt b/project/assembly.sbt index d95475f16f..9c014713d3 100644 --- a/project/assembly.sbt +++ b/project/assembly.sbt @@ -1 +1 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.7") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.9") diff --git a/project/build.properties b/project/build.properties index 5f528e4747..1fc4b8093e 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.2.3 \ No newline at end of file +sbt.version=1.2.8 \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt index fece7e4235..0c9832c959 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -2,9 +2,9 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0") addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.2") -addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.3.15") +addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.4.1") -addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.3") +addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.5") addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") @@ -12,6 +12,6 @@ resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositori addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.1") -addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.6") +addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.22") addSbtPlugin("com.typesafe.sbt" % "sbt-license-report" % "1.2.0") \ No newline at end of file diff --git a/storage/elasticsearch/build.sbt b/storage/elasticsearch/build.sbt index 3dbd3dec96..f474330382 100644 --- a/storage/elasticsearch/build.sbt +++ b/storage/elasticsearch/build.sbt @@ -19,16 +19,14 @@ import PIOBuild._ name := "apache-predictionio-data-elasticsearch" -elasticsearchVersion := (if (majorVersion(elasticsearchVersion.value) < 5) "5.6.9" else elasticsearchVersion.value) - libraryDependencies ++= Seq( - "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", - "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", + "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", + "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", "org.elasticsearch.client" % "elasticsearch-rest-client" % elasticsearchVersion.value, - "org.elasticsearch" %% "elasticsearch-spark-20" % elasticsearchVersion.value + "org.elasticsearch" %% "elasticsearch-spark-20" % elasticsearchVersion.value exclude("org.apache.spark", "*"), - "org.elasticsearch" % "elasticsearch-hadoop-mr" % elasticsearchVersion.value, - "org.specs2" %% "specs2" % "2.3.13" % "test") +// "org.elasticsearch" % "elasticsearch-hadoop-mr" % elasticsearchVersion.value, + "org.specs2" %% "specs2-core" % "4.5.1" % "test") parallelExecution in Test := false @@ -36,12 +34,6 @@ pomExtra := childrenPomExtra.value assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) -assemblyShadeRules in assembly := Seq( - ShadeRule.rename("org.apache.http.**" -> - "org.apache.predictionio.shaded.org.apache.http.@1").inAll, - ShadeRule.rename("org.elasticsearch.client.**" -> - "org.apache.predictionio.shaded.org.elasticsearch.client.@1").inAll) - // skip test in assembly test in assembly := {} diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala index eef83e4f68..7c5fb74bef 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala @@ -19,15 +19,13 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException -import scala.collection.JavaConverters.mapAsJavaMapConverter - import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.AccessKey import org.apache.predictionio.data.storage.AccessKeys import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{ResponseException, RestClient} +import org.elasticsearch.client.{Request, ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -62,9 +60,8 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin } try { val response = client.performRequest( - "GET", - s"/$internalIndex/$estype/$id", - Map.empty[String, String].asJava) + new Request("GET", s"/$internalIndex/$estype/$id") + ) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -116,12 +113,10 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin def update(accessKey: AccessKey): Unit = { val id = accessKey.key try { - val entity = new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava, - entity) + val request = new Request("POST", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + request.setEntity(new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -138,10 +133,9 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin def delete(id: String): Unit = { try { - val response = client.performRequest( - "DELETE", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava) + val request = new Request("DELETE", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + val response = client.performRequest(request) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala index 26621cff35..3ecc85e48b 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala @@ -19,15 +19,13 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException -import scala.collection.JavaConverters.mapAsJavaMapConverter - import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.App import org.apache.predictionio.data.storage.Apps import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{ResponseException, RestClient} +import org.elasticsearch.client.{Request, ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -71,9 +69,8 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) def get(id: Int): Option[App] = { try { val response = client.performRequest( - "GET", - s"/$internalIndex/$estype/$id", - Map.empty[String, String].asJava) + new Request("GET", s"/$internalIndex/$estype/$id") + ) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -101,12 +98,9 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) ("query" -> ("term" -> ("name" -> name))) - val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/_search", - Map.empty[String, String].asJava, - entity) + val request = new Request("POST", s"/$internalIndex/$estype/_search") + request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "hits" \ "total").extract[Long] match { case 0 => None @@ -138,12 +132,10 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) def update(app: App): Unit = { val id = app.id.toString try { - val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON); - val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava, - entity) + val request = new Request("POST", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + request.setEntity(new NStringEntity(write(app), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -160,10 +152,9 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) def delete(id: Int): Unit = { try { - val response = client.performRequest( - "DELETE", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava) + val request = new Request("DELETE", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + val response = client.performRequest(request) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala index ac248debf4..dec94642d8 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala @@ -19,15 +19,13 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException -import scala.collection.JavaConverters.mapAsJavaMapConverter - import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.Channel import org.apache.predictionio.data.storage.Channels import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{ResponseException, RestClient} +import org.elasticsearch.client.{Request, ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -69,9 +67,8 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) def get(id: Int): Option[Channel] = { try { val response = client.performRequest( - "GET", - s"/$internalIndex/$estype/$id", - Map.empty[String, String].asJava) + new Request("GET", s"/$internalIndex/$estype/$id") + ) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -110,12 +107,10 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) def update(channel: Channel): Boolean = { val id = channel.id.toString try { - val entity = new NStringEntity(write(channel), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava, - entity) + val request = new Request("POST", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + request.setEntity(new NStringEntity(write(channel), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { @@ -134,10 +129,9 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) def delete(id: Int): Unit = { try { - val response = client.performRequest( - "DELETE", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava) + val request = new Request("DELETE", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala index 96f8a6720c..7030b578a4 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala @@ -19,8 +19,6 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException -import scala.collection.JavaConverters.mapAsJavaMapConverter - import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils @@ -28,7 +26,7 @@ import org.apache.predictionio.data.storage.EngineInstance import org.apache.predictionio.data.storage.EngineInstanceSerializer import org.apache.predictionio.data.storage.EngineInstances import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{ResponseException, RestClient} +import org.elasticsearch.client.{Request, ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -81,12 +79,10 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: def preInsert(): Option[String] = { try { - val entity = new NStringEntity("{}", ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/", - Map("refresh" -> "true").asJava, - entity) + val request = new Request("POST", s"/$internalIndex/$estype/") + request.addParameter("refresh", "true") + request.setEntity(new NStringEntity("{}", ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -106,9 +102,8 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: def get(id: String): Option[EngineInstance] = { try { val response = client.performRequest( - "GET", - s"/$internalIndex/$estype/$id", - Map.empty[String, String].asJava) + new Request("GET", s"/$internalIndex/$estype/$id") + ) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -183,12 +178,10 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: def update(i: EngineInstance): Unit = { val id = i.id try { - val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava, - entity) + val request = new Request("POST", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + request.setEntity(new NStringEntity(write(i), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -205,10 +198,9 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: def delete(id: String): Unit = { try { - val response = client.performRequest( - "DELETE", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava) + val request = new Request("DELETE", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + val response = client.performRequest(request) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala index 0025950d03..1e7b2d74be 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala @@ -19,8 +19,6 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException -import scala.collection.JavaConverters._ - import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils @@ -28,7 +26,7 @@ import org.apache.predictionio.data.storage.EvaluationInstance import org.apache.predictionio.data.storage.EvaluationInstanceSerializer import org.apache.predictionio.data.storage.EvaluationInstances import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{ResponseException, RestClient} +import org.elasticsearch.client.{Request, ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -78,9 +76,8 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind def get(id: String): Option[EvaluationInstance] = { try { val response = client.performRequest( - "GET", - s"/$internalIndex/$estype/$id", - Map.empty[String, String].asJava) + new Request("GET", s"/$internalIndex/$estype/$id") + ) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -135,12 +132,10 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind def update(i: EvaluationInstance): Unit = { val id = i.id try { - val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava, - entity) + val request = new Request("POST", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + request.setEntity(new NStringEntity(write(i), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { @@ -157,10 +152,9 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind def delete(id: String): Unit = { try { - val response = client.performRequest( - "DELETE", - s"/$internalIndex/$estype/$id", - Map("refresh" -> "true").asJava) + val request = new Request("DELETE", s"/$internalIndex/$estype/$id") + request.addParameter("refresh", "true") + val response = client.performRequest(request) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala index 708d3d33b3..c524f75759 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala @@ -19,7 +19,6 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException -import scala.collection.JavaConverters._ import scala.concurrent.ExecutionContext import scala.concurrent.Future import org.apache.http.entity.{ContentType, StringEntity} @@ -28,7 +27,7 @@ import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.Event import org.apache.predictionio.data.storage.LEvents import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{ResponseException, RestClient} +import org.elasticsearch.client.{Request, RequestOptions, ResponseException, RestClient} import org.joda.time.DateTime import org.json4s._ import org.json4s.JsonDSL._ @@ -36,7 +35,6 @@ import org.json4s.native.JsonMethods._ import org.json4s.native.Serialization.write import org.json4s.ext.JodaTimeSerializers import grizzled.slf4j.Logging -import org.apache.http.message.BasicHeader class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseIndex: String) extends LEvents with Logging { @@ -80,12 +78,11 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val json = ("query" -> ("match_all" -> List.empty)) - val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) - client.performRequest( - "POST", - s"/$index/$estype/_delete_by_query", - Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, - entity).getStatusLine.getStatusCode match { + val request = new Request("POST", s"/$index/$estype/_delete_by_query") + request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) + request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) + client.performRequest(request) + .getStatusLine.getStatusCode match { case 200 => true case _ => error(s"Failed to remove $index/$estype") @@ -123,12 +120,10 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("prId" -> event.prId) ~ ("creationTime" -> ESUtils.formatUTCDateTime(event.creationTime)) ~ ("properties" -> write(event.properties.toJObject)) - val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$index/$estype/$id", - Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, - entity) + val request = new Request("POST", s"/$index/$estype/$id") + request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) + request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -183,13 +178,14 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd }.mkString("", "\n", "\n") - val entity = new StringEntity(json) - val response = client.performRequest( - "POST", - "/_bulk", - Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, - entity, - new BasicHeader("Content-Type", "application/x-ndjson")) + val request = new Request("POST", "/_bulk") + request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) + request.setEntity(new StringEntity(json)) + val options = request.getOptions().toBuilder() + options.addHeader("Content-Type", "application/x-ndjson") + options.setHttpAsyncResponseConsumerFactory(RequestOptions.DEFAULT.getHttpAsyncResponseConsumerFactory) + request.setOptions(options) + val response = client.performRequest(request) val responseJson = parse(EntityUtils.toString(response.getEntity)) val items = (responseJson \ "items").asInstanceOf[JArray] @@ -218,9 +214,8 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val index = baseIndex + "_" + estype try { client.performRequest( - "GET", - s"/$index/$estype/$id", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { + new Request("GET", s"/$index/$estype/$id") + ).getStatusLine.getStatusCode match { case 200 => true case _ => false } @@ -250,12 +245,9 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("query" -> ("term" -> ("eventId" -> eventId))) - val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$index/$estype/_search", - Map.empty[String, String].asJava, - entity) + val request = new Request("POST", s"/$index/$estype/_search") + request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "hits" \ "total").extract[Long] match { case 0 => None @@ -284,12 +276,10 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("query" -> ("term" -> ("eventId" -> eventId))) - val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$index/$estype/_delete_by_query", - Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, - entity) + val request = new Request("POST", s"/$index/$estype/_delete_by_query") + request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) + request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "deleted").extract[Int] > 0 } catch { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala index a86d378331..44dee6d320 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala @@ -17,8 +17,6 @@ package org.apache.predictionio.data.storage.elasticsearch -import scala.collection.JavaConverters._ - import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.MapWritable import org.apache.hadoop.io.Text @@ -27,7 +25,7 @@ import org.apache.predictionio.data.storage.PEvents import org.apache.predictionio.data.storage.StorageClientConfig import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD -import org.elasticsearch.client.RestClient +import org.elasticsearch.client.{Request, RestClient} import org.elasticsearch.hadoop.mr.EsInputFormat import org.elasticsearch.spark._ import org.joda.time.DateTime @@ -117,12 +115,10 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri ("query" -> ("term" -> ("eventId" -> eventId))) - val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$index/$estype/_delete_by_query", - Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, - entity) + val request = new Request("POST", s"/$index/$estype/_delete_by_query") + request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) + request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala index ade0f40ce9..06e12e73cb 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala @@ -19,14 +19,12 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException -import scala.collection.JavaConverters._ - import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.StorageClientConfig import org.apache.predictionio.data.storage.StorageClientException -import org.elasticsearch.client.RestClient +import org.elasticsearch.client.{Request, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -48,12 +46,10 @@ class ESSequences(client: RestClient, config: StorageClientConfig, index: String def genNext(name: String): Long = { try { - val entity = new NStringEntity(write("n" -> name), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$name", - Map("refresh" -> "false").asJava, - entity) + val request = new Request("POST", s"/$internalIndex/$estype/$name") + request.addParameter("refresh", "false") + request.setEntity(new NStringEntity(write("n" -> name), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala index 93d5d94912..794f4f892d 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala @@ -17,12 +17,9 @@ package org.apache.predictionio.data.storage.elasticsearch -import scala.collection.JavaConversions._ -import scala.collection.JavaConverters._ - import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity -import org.elasticsearch.client.RestClient +import org.elasticsearch.client.{Request, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -96,12 +93,10 @@ object ESUtils { query: String, size: Int)( implicit formats: Formats): Seq[JValue] = { - val entity = new NStringEntity(query, ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$index/$estype/_search", - Map("size" -> s"${size}"), - entity) + val request = new Request("POST", s"/$index/$estype/_search") + request.addParameter("size", s"$size") + request.setEntity(new NStringEntity(query, ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val responseJValue = parse(EntityUtils.toString(response.getEntity)) val hits = (responseJValue \ "hits" \ "hits").extract[Seq[JValue]] hits.map(h => (h \ "_source")) @@ -137,12 +132,9 @@ object ESUtils { if (hits.isEmpty) results else { val json = ("scroll" -> scrollLife) ~ ("scroll_id" -> scrollId) - val scrollBody = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - "/_search/scroll", - Map[String, String](), - scrollBody) + val request = new Request("POST", "/_search/scroll") + request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val responseJValue = parse(EntityUtils.toString(response.getEntity)) scroll((responseJValue \ "_scroll_id").extract[String], (responseJValue \ "hits" \ "hits").extract[Seq[JValue]], @@ -150,12 +142,10 @@ object ESUtils { } } - val entity = new NStringEntity(query, ContentType.APPLICATION_JSON) - val response = client.performRequest( - "POST", - s"/$index/$estype/_search", - Map("scroll" -> scrollLife), - entity) + val request = new Request("POST", s"/$index/$estype/_search") + request.addParameter("scroll", scrollLife) + request.setEntity(new NStringEntity(query, ContentType.APPLICATION_JSON)) + val response = client.performRequest(request) val responseJValue = parse(EntityUtils.toString(response.getEntity)) scroll((responseJValue \ "_scroll_id").extract[String], (responseJValue \ "hits" \ "hits").extract[Seq[JValue]], @@ -166,14 +156,12 @@ object ESUtils { client: RestClient, index: String): Unit = { client.performRequest( - "HEAD", - s"/$index", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { + new Request("HEAD", s"/$index") + ).getStatusLine.getStatusCode match { case 404 => client.performRequest( - "PUT", - s"/$index", - Map.empty[String, String].asJava) + new Request("PUT", s"/$index") + ) case 200 => case _ => throw new IllegalStateException(s"/$index is invalid.") @@ -186,16 +174,12 @@ object ESUtils { estype: String, json: String): Unit = { client.performRequest( - "HEAD", - s"/$index/_mapping/$estype", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { + new Request("HEAD", s"/$index/_mapping/$estype") + ).getStatusLine.getStatusCode match { case 404 => - val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) - client.performRequest( - "PUT", - s"/$index/_mapping/$estype", - Map.empty[String, String].asJava, - entity) + val request = new Request("PUT", s"/$index/_mapping/$estype") + request.setEntity(new NStringEntity(json, ContentType.APPLICATION_JSON)) + client.performRequest(request) case 200 => case _ => throw new IllegalStateException(s"/$index/$estype is invalid: $json") diff --git a/storage/hbase/build.sbt b/storage/hbase/build.sbt index 5e412b06cf..2a2d55c58e 100644 --- a/storage/hbase/build.sbt +++ b/storage/hbase/build.sbt @@ -29,11 +29,10 @@ libraryDependencies ++= Seq( "org.apache.hbase" % "hbase-server" % hbaseVersion.value exclude("org.apache.hbase", "hbase-client") exclude("org.apache.zookeeper", "zookeeper") - exclude("javax.servlet", "servlet-api") exclude("org.mortbay.jetty", "servlet-api-2.5") exclude("org.mortbay.jetty", "jsp-api-2.1") exclude("org.mortbay.jetty", "jsp-2.1"), - "org.specs2" %% "specs2" % "2.3.13" % "test") + "org.specs2" %% "specs2-core" % "4.5.1" % "test") parallelExecution in Test := false diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala index 4b0ad9a5a2..8115209f91 100644 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala +++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala @@ -163,11 +163,11 @@ object HBEventsUtil { val put = new Put(rowKey.toBytes, event.eventTime.getMillis) def addStringToE(col: Array[Byte], v: String): Put = { - put.add(eBytes, col, Bytes.toBytes(v)) + put.addColumn(eBytes, col, Bytes.toBytes(v)) } def addLongToE(col: Array[Byte], v: Long): Put = { - put.add(eBytes, col, Bytes.toBytes(v)) + put.addColumn(eBytes, col, Bytes.toBytes(v)) } addStringToE(colNames("event"), event.event) @@ -310,12 +310,12 @@ object HBEventsUtil { // If you specify a startRow and stopRow, // to scan in reverse, the startRow needs to be lexicographically // after the stopRow. - scan.setStartRow(stop) - scan.setStopRow(start) + scan.withStartRow(stop) + scan.withStopRow(start) scan.setReversed(true) } else { - scan.setStartRow(start) - scan.setStopRow(stop) + scan.withStartRow(start) + scan.withStopRow(stop) } } case (_, _) => { diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala index e95e7e82b1..97ce9ba990 100644 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala +++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala @@ -42,8 +42,10 @@ class HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace def resultToEvent(result: Result, appId: Int): Event = HBEventsUtil.resultToEvent(result, appId) - def getTable(appId: Int, channelId: Option[Int] = None): HTableInterface = - client.connection.getTable(HBEventsUtil.tableName(namespace, appId, channelId)) + def getTable(appId: Int, channelId: Option[Int] = None): Table = { + val tableName = TableName.valueOf(HBEventsUtil.tableName(namespace, appId, channelId)) + client.connection.getTable(tableName) + } override def init(appId: Int, channelId: Option[Int] = None): Boolean = { @@ -103,7 +105,6 @@ class HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace val table = getTable(appId, channelId) val (put, rowKey) = HBEventsUtil.eventToPut(event, appId) table.put(put) - table.flushCommits() table.close() rowKey.toString } @@ -117,7 +118,6 @@ class HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace val table = getTable(appId, channelId) val (puts, rowKeys) = events.map { event => HBEventsUtil.eventToPut(event, appId) }.unzip table.put(puts) - table.flushCommits() table.close() rowKeys.map(_.toString) } diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala index 7324fa68e9..66d406665f 100644 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala +++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala @@ -119,6 +119,7 @@ class HBPEvents(client: HBClient, config: StorageClientConfig, namespace: String conf.set(TableOutputFormat.OUTPUT_TABLE, tableName) + val table = new HTable(conf, tableName) iter.foreach { id => val rowKey = HBEventsUtil.RowKey(id) diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala index 1720410150..df1a3f9911 100644 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala +++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala @@ -25,15 +25,15 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.MasterNotRunningException import org.apache.hadoop.hbase.ZooKeeperConnectionException -import org.apache.hadoop.hbase.client.HConnectionManager -import org.apache.hadoop.hbase.client.HConnection +import org.apache.hadoop.hbase.client.ConnectionFactory +import org.apache.hadoop.hbase.client.Connection import org.apache.hadoop.hbase.client.HBaseAdmin import grizzled.slf4j.Logging case class HBClient( val conf: Configuration, - val connection: HConnection, + val connection: Connection, val admin: HBaseAdmin ) @@ -73,7 +73,7 @@ class StorageClient(val config: StorageClientConfig) } } - val connection = HConnectionManager.createConnection(conf) + val connection = ConnectionFactory.createConnection(conf) val client = HBClient( conf = conf, diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala deleted file mode 100644 index 795cf7e290..0000000000 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.hbase.upgrade - -import org.apache.predictionio.annotation.Experimental - -import org.apache.predictionio.data.storage.Event -import org.apache.predictionio.data.storage.EventValidation -import org.apache.predictionio.data.storage.DataMap - -import org.apache.hadoop.hbase.client.Scan -import org.apache.hadoop.hbase.client.HConnection -import org.apache.hadoop.hbase.client.Result -import org.apache.hadoop.hbase.TableName -import org.apache.hadoop.hbase.util.Bytes - -import org.joda.time.DateTime -import org.joda.time.DateTimeZone - -import org.json4s.DefaultFormats -import org.json4s.JObject -import org.json4s.native.Serialization.{ read, write } - -import org.apache.commons.codec.binary.Base64 - -import scala.collection.JavaConversions._ - -/** :: Experimental :: */ -@Experimental -object HB_0_8_0 { - - implicit val formats = DefaultFormats - - def getByAppId( - connection: HConnection, - namespace: String, - appId: Int): Iterator[Event] = { - val tableName = TableName.valueOf(namespace, "events") - val table = connection.getTable(tableName) - val start = PartialRowKey(appId) - val stop = PartialRowKey(appId + 1) - val scan = new Scan(start.toBytes, stop.toBytes) - val scanner = table.getScanner(scan) - table.close() - scanner.iterator().map { resultToEvent(_) } - } - - val colNames: Map[String, Array[Byte]] = Map( - "event" -> "e", - "entityType" -> "ety", - "entityId" -> "eid", - "targetEntityType" -> "tety", - "targetEntityId" -> "teid", - "properties" -> "p", - "prId" -> "pk", // columna name is 'pk' in 0.8.0/0.8.1 - "eventTimeZone" -> "etz", - "creationTimeZone" -> "ctz" - ).mapValues(Bytes.toBytes(_)) - - - class RowKey( - val appId: Int, - val millis: Long, - val uuidLow: Long - ) { - lazy val toBytes: Array[Byte] = { - // add UUID least significant bits for multiple actions at the same time - // (UUID's most significant bits are actually timestamp, - // use eventTime instead). - Bytes.toBytes(appId) ++ Bytes.toBytes(millis) ++ Bytes.toBytes(uuidLow) - } - override def toString: String = { - Base64.encodeBase64URLSafeString(toBytes) - } - } - - object RowKey { - // get RowKey from string representation - def apply(s: String): RowKey = { - try { - apply(Base64.decodeBase64(s)) - } catch { - case e: Exception => throw new RowKeyException( - s"Failed to convert String ${s} to RowKey because ${e}", e) - } - } - - def apply(b: Array[Byte]): RowKey = { - if (b.size != 20) { - val bString = b.mkString(",") - throw new RowKeyException( - s"Incorrect byte array size. Bytes: ${bString}.") - } - - new RowKey( - appId = Bytes.toInt(b.slice(0, 4)), - millis = Bytes.toLong(b.slice(4, 12)), - uuidLow = Bytes.toLong(b.slice(12, 20)) - ) - } - } - - class RowKeyException(msg: String, cause: Exception) - extends Exception(msg, cause) { - def this(msg: String) = this(msg, null) - } - - case class PartialRowKey(val appId: Int, val millis: Option[Long] = None) { - val toBytes: Array[Byte] = { - Bytes.toBytes(appId) ++ - (millis.map(Bytes.toBytes(_)).getOrElse(Array[Byte]())) - } - } - - def resultToEvent(result: Result): Event = { - val rowKey = RowKey(result.getRow()) - - val eBytes = Bytes.toBytes("e") - // val e = result.getFamilyMap(eBytes) - - def getStringCol(col: String): String = { - val r = result.getValue(eBytes, colNames(col)) - require(r != null, - s"Failed to get value for column ${col}. " + - s"Rowkey: ${rowKey.toString} " + - s"StringBinary: ${Bytes.toStringBinary(result.getRow())}.") - - Bytes.toString(r) - } - - def getOptStringCol(col: String): Option[String] = { - val r = result.getValue(eBytes, colNames(col)) - if (r == null) { - None - } else { - Some(Bytes.toString(r)) - } - } - - def getTimestamp(col: String): Long = { - result.getColumnLatestCell(eBytes, colNames(col)).getTimestamp() - } - - val event = getStringCol("event") - val entityType = getStringCol("entityType") - val entityId = getStringCol("entityId") - val targetEntityType = getOptStringCol("targetEntityType") - val targetEntityId = getOptStringCol("targetEntityId") - val properties: DataMap = getOptStringCol("properties") - .map(s => DataMap(read[JObject](s))).getOrElse(DataMap()) - val prId = getOptStringCol("prId") - val eventTimeZone = getOptStringCol("eventTimeZone") - .map(DateTimeZone.forID(_)) - .getOrElse(EventValidation.defaultTimeZone) - val creationTimeZone = getOptStringCol("creationTimeZone") - .map(DateTimeZone.forID(_)) - .getOrElse(EventValidation.defaultTimeZone) - - val creationTime: DateTime = new DateTime( - getTimestamp("event"), creationTimeZone - ) - - Event( - eventId = Some(RowKey(result.getRow()).toString), - event = event, - entityType = entityType, - entityId = entityId, - targetEntityType = targetEntityType, - targetEntityId = targetEntityId, - properties = properties, - eventTime = new DateTime(rowKey.millis, eventTimeZone), - tags = Nil, - prId = prId, - creationTime = creationTime - ) - } -} diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala deleted file mode 100644 index 1759561207..0000000000 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.hbase.upgrade - -import org.apache.predictionio.annotation.Experimental - -import org.apache.predictionio.data.storage.Storage -import org.apache.predictionio.data.storage.hbase.HBLEvents -import org.apache.predictionio.data.storage.hbase.HBEventsUtil - -import scala.collection.JavaConversions._ - -/** :: Experimental :: */ -@Experimental -object Upgrade { - - def main(args: Array[String]) { - val fromAppId = args(0).toInt - val toAppId = args(1).toInt - val batchSize = args.lift(2).map(_.toInt).getOrElse(100) - val fromNamespace = args.lift(3).getOrElse("predictionio_eventdata") - - upgrade(fromAppId, toAppId, batchSize, fromNamespace) - } - - /* For upgrade from 0.8.0 or 0.8.1 to 0.8.2 only */ - def upgrade( - fromAppId: Int, - toAppId: Int, - batchSize: Int, - fromNamespace: String) { - - val events = Storage.getLEvents().asInstanceOf[HBLEvents] - - // Assume already run "pio app new " (new app already created) - // TODO: check if new table empty and warn user if not - val newTable = events.getTable(toAppId) - - val newTableName = newTable.getName().getNameAsString() - println(s"Copying data from ${fromNamespace}:events for app ID ${fromAppId}" - + s" to new HBase table ${newTableName}...") - - HB_0_8_0.getByAppId( - events.client.connection, - fromNamespace, - fromAppId).grouped(batchSize).foreach { eventGroup => - val puts = eventGroup.map{ e => - val (put, rowkey) = HBEventsUtil.eventToPut(e, toAppId) - put - } - newTable.put(puts.toList) - } - - newTable.flushCommits() - newTable.close() - println("Done.") - } - -} diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala deleted file mode 100644 index de74d46dce..0000000000 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.hbase.upgrade - -import org.apache.predictionio.annotation.Experimental - -import grizzled.slf4j.Logger -import org.apache.predictionio.data.storage.Storage -import org.apache.predictionio.data.storage.DataMap -import org.apache.predictionio.data.storage.hbase.HBLEvents -import org.apache.predictionio.data.storage.hbase.HBEventsUtil - -import scala.collection.JavaConversions._ - -import scala.concurrent._ -import ExecutionContext.Implicits.global -import org.apache.predictionio.data.storage.LEvents -import scala.concurrent.Await -import scala.concurrent.duration.Duration -import java.lang.Thread - -object CheckDistribution { - def entityType(eventClient: LEvents, appId: Int) - : Map[(String, Option[String]), Int] = { - eventClient - .find(appId = appId) - .foldLeft(Map[(String, Option[String]), Int]().withDefaultValue(0)) { - case (m, e) => { - val k = (e.entityType, e.targetEntityType) - m.updated(k, m(k) + 1) - } - } - } - - def runMain(appId: Int) { - val eventClient = Storage.getLEvents().asInstanceOf[HBLEvents] - - entityType(eventClient, appId) - .toSeq - .sortBy(-_._2) - .foreach { println } - - } - - def main(args: Array[String]) { - runMain(args(0).toInt) - } - -} - -/** :: Experimental :: */ -@Experimental -object Upgrade_0_8_3 { - val NameMap = Map( - "pio_user" -> "user", - "pio_item" -> "item") - val RevNameMap = NameMap.toSeq.map(_.swap).toMap - - val logger = Logger[this.type] - - def main(args: Array[String]) { - val fromAppId = args(0).toInt - val toAppId = args(1).toInt - - runMain(fromAppId, toAppId) - } - - def runMain(fromAppId: Int, toAppId: Int): Unit = { - upgrade(fromAppId, toAppId) - } - - - val obsEntityTypes = Set("pio_user", "pio_item") - val obsProperties = Set( - "pio_itypes", "pio_starttime", "pio_endtime", - "pio_inactive", "pio_price", "pio_rating") - - def hasPIOPrefix(eventClient: LEvents, appId: Int): Boolean = { - eventClient.find(appId = appId).filter( e => - (obsEntityTypes.contains(e.entityType) || - e.targetEntityType.map(obsEntityTypes.contains(_)).getOrElse(false) || - (!e.properties.keySet.forall(!obsProperties.contains(_))) - ) - ).hasNext - } - - def isEmpty(eventClient: LEvents, appId: Int): Boolean = - !eventClient.find(appId = appId).hasNext - - - def upgradeCopy(eventClient: LEvents, fromAppId: Int, toAppId: Int) { - val fromDist = CheckDistribution.entityType(eventClient, fromAppId) - - logger.info("FromAppId Distribution") - fromDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) } - - val events = eventClient - .find(appId = fromAppId) - .zipWithIndex - .foreach { case (fromEvent, index) => { - if (index % 50000 == 0) { - // logger.info(s"Progress: $fromEvent $index") - logger.info(s"Progress: $index") - } - - - val fromEntityType = fromEvent.entityType - val toEntityType = NameMap.getOrElse(fromEntityType, fromEntityType) - - val fromTargetEntityType = fromEvent.targetEntityType - val toTargetEntityType = fromTargetEntityType - .map { et => NameMap.getOrElse(et, et) } - - val toProperties = DataMap(fromEvent.properties.fields.map { - case (k, v) => - val newK = if (obsProperties.contains(k)) { - val nK = k.stripPrefix("pio_") - logger.info(s"property ${k} will be renamed to ${nK}") - nK - } else k - (newK, v) - }) - - val toEvent = fromEvent.copy( - entityType = toEntityType, - targetEntityType = toTargetEntityType, - properties = toProperties) - - eventClient.insert(toEvent, toAppId) - }} - - - val toDist = CheckDistribution.entityType(eventClient, toAppId) - - logger.info("Recap fromAppId Distribution") - fromDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) } - - logger.info("ToAppId Distribution") - toDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) } - - val fromGood = fromDist - .toSeq - .forall { case (k, c) => { - val (et, tet) = k - val net = NameMap.getOrElse(et, et) - val ntet = tet.map(tet => NameMap.getOrElse(tet, tet)) - val nk = (net, ntet) - val nc = toDist.getOrElse(nk, -1) - val checkMatch = (c == nc) - if (!checkMatch) { - logger.info(s"${k} doesn't match: old has ${c}. new has ${nc}.") - } - checkMatch - }} - - val toGood = toDist - .toSeq - .forall { case (k, c) => { - val (et, tet) = k - val oet = RevNameMap.getOrElse(et, et) - val otet = tet.map(tet => RevNameMap.getOrElse(tet, tet)) - val ok = (oet, otet) - val oc = fromDist.getOrElse(ok, -1) - val checkMatch = (c == oc) - if (!checkMatch) { - logger.info(s"${k} doesn't match: new has ${c}. old has ${oc}.") - } - checkMatch - }} - - if (!fromGood || !toGood) { - logger.error("Doesn't match!! There is an import error.") - } else { - logger.info("Count matches. Looks like we are good to go.") - } - } - - /* For upgrade from 0.8.2 to 0.8.3 only */ - def upgrade(fromAppId: Int, toAppId: Int) { - - val eventClient = Storage.getLEvents().asInstanceOf[HBLEvents] - - require(fromAppId != toAppId, - s"FromAppId: $fromAppId must be different from toAppId: $toAppId") - - if (hasPIOPrefix(eventClient, fromAppId)) { - require( - isEmpty(eventClient, toAppId), - s"Target appId: $toAppId is not empty. Please run " + - "`pio app data-delete ` to clean the data before upgrading") - - logger.info(s"$fromAppId isEmpty: " + isEmpty(eventClient, fromAppId)) - - upgradeCopy(eventClient, fromAppId, toAppId) - - } else { - logger.info(s"From appId: ${fromAppId} doesn't contain" - + s" obsolete entityTypes ${obsEntityTypes} or" - + s" obsolete properties ${obsProperties}." - + " No need data migration." - + s" You can continue to use appId ${fromAppId}.") - } - - logger.info("Done.") - } - - -} diff --git a/storage/hdfs/build.sbt b/storage/hdfs/build.sbt index 50ec37928d..d9da828ed9 100644 --- a/storage/hdfs/build.sbt +++ b/storage/hdfs/build.sbt @@ -23,8 +23,7 @@ libraryDependencies ++= Seq( "org.apache.hadoop" % "hadoop-common" % hadoopVersion.value exclude("commons-beanutils", "*"), "org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion.value, - "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided", - "org.scalatest" %% "scalatest" % "2.1.7" % "test") + "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided") parallelExecution in Test := false diff --git a/storage/jdbc/build.sbt b/storage/jdbc/build.sbt index 90265405fb..44076b4b19 100644 --- a/storage/jdbc/build.sbt +++ b/storage/jdbc/build.sbt @@ -22,9 +22,9 @@ name := "apache-predictionio-data-jdbc" libraryDependencies ++= Seq( "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "org.scalikejdbc" %% "scalikejdbc" % "3.1.0", - "org.postgresql" % "postgresql" % "9.4-1204-jdbc41" % "test", - "org.specs2" %% "specs2" % "2.3.13" % "test") + "org.scalikejdbc" %% "scalikejdbc-joda-time" % "3.3.4", + "org.postgresql" % "postgresql" % "42.2.5" % "test", + "org.specs2" %% "specs2-core" % "4.5.1" % "test") parallelExecution in Test := false diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala index 13c374d92a..d99bd1038b 100644 --- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala +++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala @@ -23,6 +23,7 @@ import org.apache.predictionio.data.storage.EngineInstance import org.apache.predictionio.data.storage.EngineInstances import org.apache.predictionio.data.storage.StorageClientConfig import scalikejdbc._ +import scalikejdbc.jodatime.JodaWrappedResultSet._ /** JDBC implementation of [[EngineInstances]] */ class JDBCEngineInstances(client: String, config: StorageClientConfig, prefix: String) diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala index 90eb5f3adb..5bd877f668 100644 --- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala +++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala @@ -23,6 +23,7 @@ import org.apache.predictionio.data.storage.EvaluationInstance import org.apache.predictionio.data.storage.EvaluationInstances import org.apache.predictionio.data.storage.StorageClientConfig import scalikejdbc._ +import scalikejdbc.jodatime.JodaWrappedResultSet._ /** JDBC implementations of [[EvaluationInstances]] */ class JDBCEvaluationInstances(client: String, config: StorageClientConfig, prefix: String) diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala index b4230ccd11..17222c9783 100644 --- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala +++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala @@ -29,6 +29,7 @@ import org.json4s.JObject import org.json4s.native.Serialization.read import org.json4s.native.Serialization.write import scalikejdbc._ +import scalikejdbc.jodatime.JodaWrappedResultSet._ import scala.concurrent.ExecutionContext import scala.concurrent.Future diff --git a/storage/localfs/build.sbt b/storage/localfs/build.sbt index 2306d24f7c..61f07ebeb3 100644 --- a/storage/localfs/build.sbt +++ b/storage/localfs/build.sbt @@ -20,8 +20,7 @@ import PIOBuild._ name := "apache-predictionio-data-localfs" libraryDependencies ++= Seq( - "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", - "org.scalatest" %% "scalatest" % "2.1.7" % "test") + "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided") parallelExecution in Test := false diff --git a/storage/s3/build.sbt b/storage/s3/build.sbt index 40222090ab..f587b90627 100644 --- a/storage/s3/build.sbt +++ b/storage/s3/build.sbt @@ -21,9 +21,7 @@ name := "apache-predictionio-data-s3" libraryDependencies ++= Seq( "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", - "com.google.guava" % "guava" % "14.0.1" % "provided", - "com.amazonaws" % "aws-java-sdk-s3" % "1.11.132", - "org.scalatest" %% "scalatest" % "2.1.7" % "test") + "com.amazonaws" % "aws-java-sdk-s3" % "1.11.571") parallelExecution in Test := false @@ -31,11 +29,6 @@ pomExtra := childrenPomExtra.value assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) -assemblyShadeRules in assembly := Seq( - ShadeRule.rename("org.apache.http.**" -> "shadeio.data.s3.http.@1").inAll, - ShadeRule.rename("com.fasterxml.**" -> "shadeio.data.s3.fasterxml.@1").inAll -) - // skip test in assembly test in assembly := {} diff --git a/tools/build.sbt b/tools/build.sbt index acdb1fe4a4..069a3134c6 100644 --- a/tools/build.sbt +++ b/tools/build.sbt @@ -23,9 +23,8 @@ name := "apache-predictionio-tools" libraryDependencies ++= Seq( "com.github.zafarkhaja" % "java-semver" % "0.9.0", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "com.typesafe.akka" %% "akka-slf4j" % akkaVersion.value, - "com.typesafe.akka" %% "akka-http-testkit" % "10.1.5" % "test", - "org.specs2" %% "specs2-core" % "4.2.0" % "test") + "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", + "org.specs2" %% "specs2-core" % "4.5.1" % "test") assemblyMergeStrategy in assembly := { case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat @@ -38,18 +37,12 @@ assemblyMergeStrategy in assembly := { assemblyExcludedJars in assembly := { val cp = (fullClasspath in assembly).value cp filter { _.data.getName match { - case "reflectasm-1.10.1.jar" => true - case "kryo-3.0.3.jar" => true + case "reflectasm-1.11.7.jar" => true + case "kryo-5.0.0-RC1.jar" => true case _ => false }} } -assemblyShadeRules in assembly := Seq( - ShadeRule.rename("org.objenesis.**" -> "shadeio.@1").inLibrary("com.esotericsoftware.kryo" % "kryo" % "2.21").inProject, - ShadeRule.rename("com.esotericsoftware.reflectasm.**" -> "shadeio.@1").inLibrary("com.esotericsoftware.kryo" % "kryo" % "2.21").inProject, - ShadeRule.rename("com.esotericsoftware.minlog.**" -> "shadeio.@1").inLibrary("com.esotericsoftware.kryo" % "kryo" % "2.21").inProject -) - // skip test in assembly test in assembly := {} diff --git a/tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala b/tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala index 1b4c8a86ec..a3691d009a 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala @@ -24,6 +24,7 @@ import grizzled.slf4j.Logging import scala.concurrent.Await import scala.concurrent.duration.Duration +import scala.io.StdIn import scala.language.implicitConversions import scala.sys.process._ @@ -238,7 +239,7 @@ object Pio extends Logging { info(f" ${ch.name}%16s | ${ch.id}%10s") } - val choice = if(force) "YES" else readLine("Enter 'YES' to proceed: ") + val choice = if(force) "YES" else StdIn.readLine("Enter 'YES' to proceed: ") choice match { case "YES" => AppCmd.delete(name) @@ -278,7 +279,7 @@ object Pio extends Logging { info(s" App ID: ${appDesc.app.id}") info(s" Description: ${appDesc.app.description}") - val choice = if(force) "YES" else readLine("Enter 'YES' to proceed: ") + val choice = if(force) "YES" else StdIn.readLine("Enter 'YES' to proceed: ") choice match { case "YES" => AppCmd.dataDelete(name, channel, all) @@ -307,7 +308,7 @@ object Pio extends Logging { info(s" Channel ID: ${chan.id}") info(s" App Name: ${appDesc.app.name}") info(s" App ID: ${appDesc.app.id}") - val choice = if(force) "YES" else readLine("Enter 'YES' to proceed: ") + val choice = if(force) "YES" else StdIn.readLine("Enter 'YES' to proceed: ") choice match { case "YES" => AppCmd.channelDelete(appName, deleteChannel) diff --git a/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala b/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala index 9b6dbb5316..fcbf29897e 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala @@ -26,7 +26,7 @@ import org.apache.predictionio.workflow.WorkflowContext import org.apache.predictionio.workflow.WorkflowUtils import org.apache.predictionio.workflow.CleanupFunctions import grizzled.slf4j.Logging -import org.apache.spark.sql.{SaveMode, SparkSession} +import org.apache.spark.sql.{Encoders, SaveMode, SparkSession} import org.json4s.native.Serialization._ case class EventsToFileArgs( @@ -98,7 +98,7 @@ object EventsToFile extends Logging { if (args.format == "json") { jsonStringRdd.saveAsTextFile(args.outputPath) } else { - val jsonDf = sqlSession.read.json(jsonStringRdd) + val jsonDf = sqlSession.read.json(sqlSession.createDataset(jsonStringRdd)(Encoders.STRING)) jsonDf.write.mode(SaveMode.ErrorIfExists).parquet(args.outputPath) } info(s"Events are exported to ${args.outputPath}/.") From 3aff7a514c8373d66cdf8c718602c6fae3ceeb74 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Tue, 18 Jun 2019 10:46:07 +0900 Subject: [PATCH 02/14] fixup --- conf/pio-vendors.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/pio-vendors.sh b/conf/pio-vendors.sh index bb067d76f1..14457f798d 100644 --- a/conf/pio-vendors.sh +++ b/conf/pio-vendors.sh @@ -36,7 +36,7 @@ if [ -z "$PIO_ELASTICSEARCH_VERSION" ]; then fi if [ -z "$PIO_HBASE_VERSION" ]; then - PIO_HBASE_VERSION="1.2.6" + PIO_HBASE_VERSION="1.4.10" fi export ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch" From 8d4c68b0d68a31833e624fff3099b618128d6b68 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Wed, 19 Jun 2019 17:46:00 +0900 Subject: [PATCH 03/14] fixup --- storage/elasticsearch/build.sbt | 1 - .../storage/elasticsearch/ESAccessKeys.scala | 26 +++++--- .../data/storage/elasticsearch/ESApps.scala | 35 ++++++---- .../storage/elasticsearch/ESChannels.scala | 26 +++++--- .../elasticsearch/ESEngineInstances.scala | 36 +++++++---- .../elasticsearch/ESEvaluationInstances.scala | 26 +++++--- .../storage/elasticsearch/ESLEvents.scala | 64 +++++++++++-------- .../storage/elasticsearch/ESPEvents.scala | 14 ++-- .../storage/elasticsearch/ESSequences.scala | 14 ++-- .../data/storage/elasticsearch/ESUtils.scala | 58 +++++++++++------ .../tools/export/EventsToFile.scala | 4 +- 11 files changed, 186 insertions(+), 118 deletions(-) diff --git a/storage/elasticsearch/build.sbt b/storage/elasticsearch/build.sbt index f474330382..5b31854e2c 100644 --- a/storage/elasticsearch/build.sbt +++ b/storage/elasticsearch/build.sbt @@ -25,7 +25,6 @@ libraryDependencies ++= Seq( "org.elasticsearch.client" % "elasticsearch-rest-client" % elasticsearchVersion.value, "org.elasticsearch" %% "elasticsearch-spark-20" % elasticsearchVersion.value exclude("org.apache.spark", "*"), -// "org.elasticsearch" % "elasticsearch-hadoop-mr" % elasticsearchVersion.value, "org.specs2" %% "specs2-core" % "4.5.1" % "test") parallelExecution in Test := false diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala index 7c5fb74bef..eef83e4f68 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala @@ -19,13 +19,15 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException +import scala.collection.JavaConverters.mapAsJavaMapConverter + import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.AccessKey import org.apache.predictionio.data.storage.AccessKeys import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{Request, ResponseException, RestClient} +import org.elasticsearch.client.{ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -60,8 +62,9 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin } try { val response = client.performRequest( - new Request("GET", s"/$internalIndex/$estype/$id") - ) + "GET", + s"/$internalIndex/$estype/$id", + Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -113,10 +116,12 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin def update(accessKey: AccessKey): Unit = { val id = accessKey.key try { - val request = new Request("POST", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - request.setEntity(new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -133,9 +138,10 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin def delete(id: String): Unit = { try { - val request = new Request("DELETE", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - val response = client.performRequest(request) + val response = client.performRequest( + "DELETE", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala index 3ecc85e48b..26621cff35 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala @@ -19,13 +19,15 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException +import scala.collection.JavaConverters.mapAsJavaMapConverter + import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.App import org.apache.predictionio.data.storage.Apps import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{Request, ResponseException, RestClient} +import org.elasticsearch.client.{ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -69,8 +71,9 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) def get(id: Int): Option[App] = { try { val response = client.performRequest( - new Request("GET", s"/$internalIndex/$estype/$id") - ) + "GET", + s"/$internalIndex/$estype/$id", + Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -98,9 +101,12 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) ("query" -> ("term" -> ("name" -> name))) - val request = new Request("POST", s"/$internalIndex/$estype/_search") - request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$internalIndex/$estype/_search", + Map.empty[String, String].asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "hits" \ "total").extract[Long] match { case 0 => None @@ -132,10 +138,12 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) def update(app: App): Unit = { val id = app.id.toString try { - val request = new Request("POST", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - request.setEntity(new NStringEntity(write(app), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON); + val response = client.performRequest( + "POST", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -152,9 +160,10 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) def delete(id: Int): Unit = { try { - val request = new Request("DELETE", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - val response = client.performRequest(request) + val response = client.performRequest( + "DELETE", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala index dec94642d8..ac248debf4 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala @@ -19,13 +19,15 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException +import scala.collection.JavaConverters.mapAsJavaMapConverter + import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.Channel import org.apache.predictionio.data.storage.Channels import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{Request, ResponseException, RestClient} +import org.elasticsearch.client.{ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -67,8 +69,9 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) def get(id: Int): Option[Channel] = { try { val response = client.performRequest( - new Request("GET", s"/$internalIndex/$estype/$id") - ) + "GET", + s"/$internalIndex/$estype/$id", + Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -107,10 +110,12 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) def update(channel: Channel): Boolean = { val id = channel.id.toString try { - val request = new Request("POST", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - request.setEntity(new NStringEntity(write(channel), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(write(channel), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava, + entity) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { @@ -129,9 +134,10 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) def delete(id: Int): Unit = { try { - val request = new Request("DELETE", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - val response = client.performRequest(request) + val response = client.performRequest( + "DELETE", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala index 7030b578a4..96f8a6720c 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala @@ -19,6 +19,8 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException +import scala.collection.JavaConverters.mapAsJavaMapConverter + import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils @@ -26,7 +28,7 @@ import org.apache.predictionio.data.storage.EngineInstance import org.apache.predictionio.data.storage.EngineInstanceSerializer import org.apache.predictionio.data.storage.EngineInstances import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{Request, ResponseException, RestClient} +import org.elasticsearch.client.{ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -79,10 +81,12 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: def preInsert(): Option[String] = { try { - val request = new Request("POST", s"/$internalIndex/$estype/") - request.addParameter("refresh", "true") - request.setEntity(new NStringEntity("{}", ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity("{}", ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$internalIndex/$estype/", + Map("refresh" -> "true").asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -102,8 +106,9 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: def get(id: String): Option[EngineInstance] = { try { val response = client.performRequest( - new Request("GET", s"/$internalIndex/$estype/$id") - ) + "GET", + s"/$internalIndex/$estype/$id", + Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -178,10 +183,12 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: def update(i: EngineInstance): Unit = { val id = i.id try { - val request = new Request("POST", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - request.setEntity(new NStringEntity(write(i), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -198,9 +205,10 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: def delete(id: String): Unit = { try { - val request = new Request("DELETE", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - val response = client.performRequest(request) + val response = client.performRequest( + "DELETE", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala index 1e7b2d74be..0025950d03 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala @@ -19,6 +19,8 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException +import scala.collection.JavaConverters._ + import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils @@ -26,7 +28,7 @@ import org.apache.predictionio.data.storage.EvaluationInstance import org.apache.predictionio.data.storage.EvaluationInstanceSerializer import org.apache.predictionio.data.storage.EvaluationInstances import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{Request, ResponseException, RestClient} +import org.elasticsearch.client.{ResponseException, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -76,8 +78,9 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind def get(id: String): Option[EvaluationInstance] = { try { val response = client.performRequest( - new Request("GET", s"/$internalIndex/$estype/$id") - ) + "GET", + s"/$internalIndex/$estype/$id", + Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { case true => @@ -132,10 +135,12 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind def update(i: EvaluationInstance): Unit = { val id = i.id try { - val request = new Request("POST", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - request.setEntity(new NStringEntity(write(i), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava, + entity) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { @@ -152,9 +157,10 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind def delete(id: String): Unit = { try { - val request = new Request("DELETE", s"/$internalIndex/$estype/$id") - request.addParameter("refresh", "true") - val response = client.performRequest(request) + val response = client.performRequest( + "DELETE", + s"/$internalIndex/$estype/$id", + Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala index c524f75759..708d3d33b3 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala @@ -19,6 +19,7 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException +import scala.collection.JavaConverters._ import scala.concurrent.ExecutionContext import scala.concurrent.Future import org.apache.http.entity.{ContentType, StringEntity} @@ -27,7 +28,7 @@ import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.Event import org.apache.predictionio.data.storage.LEvents import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{Request, RequestOptions, ResponseException, RestClient} +import org.elasticsearch.client.{ResponseException, RestClient} import org.joda.time.DateTime import org.json4s._ import org.json4s.JsonDSL._ @@ -35,6 +36,7 @@ import org.json4s.native.JsonMethods._ import org.json4s.native.Serialization.write import org.json4s.ext.JodaTimeSerializers import grizzled.slf4j.Logging +import org.apache.http.message.BasicHeader class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseIndex: String) extends LEvents with Logging { @@ -78,11 +80,12 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val json = ("query" -> ("match_all" -> List.empty)) - val request = new Request("POST", s"/$index/$estype/_delete_by_query") - request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) - request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) - client.performRequest(request) - .getStatusLine.getStatusCode match { + val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) + client.performRequest( + "POST", + s"/$index/$estype/_delete_by_query", + Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, + entity).getStatusLine.getStatusCode match { case 200 => true case _ => error(s"Failed to remove $index/$estype") @@ -120,10 +123,12 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("prId" -> event.prId) ~ ("creationTime" -> ESUtils.formatUTCDateTime(event.creationTime)) ~ ("properties" -> write(event.properties.toJObject)) - val request = new Request("POST", s"/$index/$estype/$id") - request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) - request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$index/$estype/$id", + Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { @@ -178,14 +183,13 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd }.mkString("", "\n", "\n") - val request = new Request("POST", "/_bulk") - request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) - request.setEntity(new StringEntity(json)) - val options = request.getOptions().toBuilder() - options.addHeader("Content-Type", "application/x-ndjson") - options.setHttpAsyncResponseConsumerFactory(RequestOptions.DEFAULT.getHttpAsyncResponseConsumerFactory) - request.setOptions(options) - val response = client.performRequest(request) + val entity = new StringEntity(json) + val response = client.performRequest( + "POST", + "/_bulk", + Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, + entity, + new BasicHeader("Content-Type", "application/x-ndjson")) val responseJson = parse(EntityUtils.toString(response.getEntity)) val items = (responseJson \ "items").asInstanceOf[JArray] @@ -214,8 +218,9 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val index = baseIndex + "_" + estype try { client.performRequest( - new Request("GET", s"/$index/$estype/$id") - ).getStatusLine.getStatusCode match { + "GET", + s"/$index/$estype/$id", + Map.empty[String, String].asJava).getStatusLine.getStatusCode match { case 200 => true case _ => false } @@ -245,9 +250,12 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("query" -> ("term" -> ("eventId" -> eventId))) - val request = new Request("POST", s"/$index/$estype/_search") - request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$index/$estype/_search", + Map.empty[String, String].asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "hits" \ "total").extract[Long] match { case 0 => None @@ -276,10 +284,12 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("query" -> ("term" -> ("eventId" -> eventId))) - val request = new Request("POST", s"/$index/$estype/_delete_by_query") - request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) - request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$index/$estype/_delete_by_query", + Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "deleted").extract[Int] > 0 } catch { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala index 44dee6d320..a86d378331 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala @@ -17,6 +17,8 @@ package org.apache.predictionio.data.storage.elasticsearch +import scala.collection.JavaConverters._ + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.MapWritable import org.apache.hadoop.io.Text @@ -25,7 +27,7 @@ import org.apache.predictionio.data.storage.PEvents import org.apache.predictionio.data.storage.StorageClientConfig import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD -import org.elasticsearch.client.{Request, RestClient} +import org.elasticsearch.client.RestClient import org.elasticsearch.hadoop.mr.EsInputFormat import org.elasticsearch.spark._ import org.joda.time.DateTime @@ -115,10 +117,12 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri ("query" -> ("term" -> ("eventId" -> eventId))) - val request = new Request("POST", s"/$index/$estype/_delete_by_query") - request.addParameter("refresh", ESUtils.getEventDataRefresh(config)) - request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$index/$estype/_delete_by_query", + Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala index 06e12e73cb..ade0f40ce9 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala @@ -19,12 +19,14 @@ package org.apache.predictionio.data.storage.elasticsearch import java.io.IOException +import scala.collection.JavaConverters._ + import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.StorageClientConfig import org.apache.predictionio.data.storage.StorageClientException -import org.elasticsearch.client.{Request, RestClient} +import org.elasticsearch.client.RestClient import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -46,10 +48,12 @@ class ESSequences(client: RestClient, config: StorageClientConfig, index: String def genNext(name: String): Long = { try { - val request = new Request("POST", s"/$internalIndex/$estype/$name") - request.addParameter("refresh", "false") - request.setEntity(new NStringEntity(write("n" -> name), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(write("n" -> name), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$internalIndex/$estype/$name", + Map("refresh" -> "false").asJava, + entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala index 794f4f892d..93d5d94912 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala @@ -17,9 +17,12 @@ package org.apache.predictionio.data.storage.elasticsearch +import scala.collection.JavaConversions._ +import scala.collection.JavaConverters._ + import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity -import org.elasticsearch.client.{Request, RestClient} +import org.elasticsearch.client.RestClient import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -93,10 +96,12 @@ object ESUtils { query: String, size: Int)( implicit formats: Formats): Seq[JValue] = { - val request = new Request("POST", s"/$index/$estype/_search") - request.addParameter("size", s"$size") - request.setEntity(new NStringEntity(query, ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(query, ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$index/$estype/_search", + Map("size" -> s"${size}"), + entity) val responseJValue = parse(EntityUtils.toString(response.getEntity)) val hits = (responseJValue \ "hits" \ "hits").extract[Seq[JValue]] hits.map(h => (h \ "_source")) @@ -132,9 +137,12 @@ object ESUtils { if (hits.isEmpty) results else { val json = ("scroll" -> scrollLife) ~ ("scroll_id" -> scrollId) - val request = new Request("POST", "/_search/scroll") - request.setEntity(new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val scrollBody = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + "/_search/scroll", + Map[String, String](), + scrollBody) val responseJValue = parse(EntityUtils.toString(response.getEntity)) scroll((responseJValue \ "_scroll_id").extract[String], (responseJValue \ "hits" \ "hits").extract[Seq[JValue]], @@ -142,10 +150,12 @@ object ESUtils { } } - val request = new Request("POST", s"/$index/$estype/_search") - request.addParameter("scroll", scrollLife) - request.setEntity(new NStringEntity(query, ContentType.APPLICATION_JSON)) - val response = client.performRequest(request) + val entity = new NStringEntity(query, ContentType.APPLICATION_JSON) + val response = client.performRequest( + "POST", + s"/$index/$estype/_search", + Map("scroll" -> scrollLife), + entity) val responseJValue = parse(EntityUtils.toString(response.getEntity)) scroll((responseJValue \ "_scroll_id").extract[String], (responseJValue \ "hits" \ "hits").extract[Seq[JValue]], @@ -156,12 +166,14 @@ object ESUtils { client: RestClient, index: String): Unit = { client.performRequest( - new Request("HEAD", s"/$index") - ).getStatusLine.getStatusCode match { + "HEAD", + s"/$index", + Map.empty[String, String].asJava).getStatusLine.getStatusCode match { case 404 => client.performRequest( - new Request("PUT", s"/$index") - ) + "PUT", + s"/$index", + Map.empty[String, String].asJava) case 200 => case _ => throw new IllegalStateException(s"/$index is invalid.") @@ -174,12 +186,16 @@ object ESUtils { estype: String, json: String): Unit = { client.performRequest( - new Request("HEAD", s"/$index/_mapping/$estype") - ).getStatusLine.getStatusCode match { + "HEAD", + s"/$index/_mapping/$estype", + Map.empty[String, String].asJava).getStatusLine.getStatusCode match { case 404 => - val request = new Request("PUT", s"/$index/_mapping/$estype") - request.setEntity(new NStringEntity(json, ContentType.APPLICATION_JSON)) - client.performRequest(request) + val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) + client.performRequest( + "PUT", + s"/$index/_mapping/$estype", + Map.empty[String, String].asJava, + entity) case 200 => case _ => throw new IllegalStateException(s"/$index/$estype is invalid: $json") diff --git a/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala b/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala index fcbf29897e..9b6dbb5316 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala @@ -26,7 +26,7 @@ import org.apache.predictionio.workflow.WorkflowContext import org.apache.predictionio.workflow.WorkflowUtils import org.apache.predictionio.workflow.CleanupFunctions import grizzled.slf4j.Logging -import org.apache.spark.sql.{Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{SaveMode, SparkSession} import org.json4s.native.Serialization._ case class EventsToFileArgs( @@ -98,7 +98,7 @@ object EventsToFile extends Logging { if (args.format == "json") { jsonStringRdd.saveAsTextFile(args.outputPath) } else { - val jsonDf = sqlSession.read.json(sqlSession.createDataset(jsonStringRdd)(Encoders.STRING)) + val jsonDf = sqlSession.read.json(jsonStringRdd) jsonDf.write.mode(SaveMode.ErrorIfExists).parquet(args.outputPath) } info(s"Events are exported to ${args.outputPath}/.") From 9a6e7094265f8067be3da689309d95ad77071c2d Mon Sep 17 00:00:00 2001 From: shimamoto Date: Thu, 20 Jun 2019 16:47:41 +0900 Subject: [PATCH 04/14] fixup --- build.sbt | 9 ++++----- core/build.sbt | 4 +--- data/build.sbt | 4 ++-- .../storage/elasticsearch/ESEvaluationInstances.scala | 4 ++-- .../data/storage/elasticsearch/ESLEvents.scala | 2 +- .../data/storage/elasticsearch/ESSequences.scala | 2 +- .../predictionio/data/storage/jdbc/LEventsSpec.scala | 1 - .../predictionio/data/storage/jdbc/PEventsSpec.scala | 7 +++---- .../predictionio/data/storage/jdbc/TestEvents.scala | 2 ++ tools/build.sbt | 1 - 10 files changed, 16 insertions(+), 20 deletions(-) diff --git a/build.sbt b/build.sbt index 1b580562e7..c1d6aacfb4 100644 --- a/build.sbt +++ b/build.sbt @@ -60,8 +60,7 @@ val commonSettings = Seq( autoAPIMappings := true, licenseConfigurations := Set("compile"), licenseReportTypes := Seq(Csv), - unmanagedClasspath in Test += conf, - unmanagedClasspath in Test += baseDirectory.value.getParentFile / s"storage/jdbc/target/scala-${scalaBinaryVersion.value}/classes") + unmanagedClasspath in Test += conf) val dataElasticsearch = (project in file("storage/elasticsearch")). settings(commonSettings: _*) @@ -92,13 +91,13 @@ val common = (project in file("common")). disablePlugins(sbtassembly.AssemblyPlugin) val data = (project in file("data")). - dependsOn(common). + dependsOn(common, dataJdbc % "compile->test;test->test"). settings(commonSettings: _*). enablePlugins(GenJavadocPlugin). disablePlugins(sbtassembly.AssemblyPlugin) val core = (project in file("core")). - dependsOn(data). + dependsOn(data % "compile->compile;test->test"). settings(commonSettings: _*). enablePlugins(GenJavadocPlugin). enablePlugins(BuildInfoPlugin). @@ -123,7 +122,7 @@ val e2 = (project in file("e2")). disablePlugins(sbtassembly.AssemblyPlugin) val tools = (project in file("tools")). - dependsOn(e2). + dependsOn(core % "compile->compile;test->test"). settings(commonSettings: _*). settings(skip in publish := true). enablePlugins(GenJavadocPlugin). diff --git a/core/build.sbt b/core/build.sbt index 5692dd9f78..5788ec6a5b 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -29,9 +29,7 @@ libraryDependencies ++= Seq( "org.json4s" %% "json4s-ext" % json4sVersion.value, "org.scalaj" %% "scalaj-http" % "2.4.1", "org.slf4j" % "slf4j-log4j12" % "1.7.26", - "org.scalatest" %% "scalatest" % "3.0.8" % "test", - "org.scalamock" %% "scalamock" % "4.2.0" % "test", - "com.h2database" % "h2" % "1.4.199" % "test" + "org.scalatest" %% "scalatest" % "3.0.8" % "test" ) parallelExecution in Test := false diff --git a/data/build.sbt b/data/build.sbt index 89bca2de4c..ac1e38105f 100644 --- a/data/build.sbt +++ b/data/build.sbt @@ -25,8 +25,8 @@ libraryDependencies ++= Seq( "com.google.guava" % "guava" % "27.1-jre", "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "org.clapper" %% "grizzled-slf4j" % "1.3.3", - "org.scalatest" %% "scalatest" % "3.0.8" % "test", + "org.clapper" %% "grizzled-slf4j" % "1.3.3" + exclude("org.slf4j", "slf4j-api"), "org.specs2" %% "specs2-core" % "4.5.1" % "test", "org.scalamock" %% "scalamock" % "4.2.0" % "test", "com.h2database" % "h2" % "1.4.199" % "test") diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala index 0025950d03..e97ee97c3f 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala @@ -54,8 +54,8 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind ("engineParamsGeneratorClass" -> ("type" -> "keyword")) ~ ("batch" -> ("type" -> "keyword")) ~ ("evaluatorResults" -> ("type" -> "text")) ~ - ("evaluatorResultsHTML" -> ("enabled" -> false)) ~ - ("evaluatorResultsJSON" -> ("enabled" -> false)))) + ("evaluatorResultsHTML" -> (("type" -> "object") ~ ("enabled" -> false))) ~ + ("evaluatorResultsJSON" -> (("type" -> "object") ~ ("enabled" -> false))))) ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) def insert(i: EvaluationInstance): String = { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala index 708d3d33b3..391f7ae960 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala @@ -64,7 +64,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("entityId" -> ("type" -> "keyword")) ~ ("targetEntityType" -> ("type" -> "keyword")) ~ ("targetEntityId" -> ("type" -> "keyword")) ~ - ("properties" -> ("enabled" -> false)) ~ + ("properties" -> (("type" -> "object") ~ ("enabled" -> false))) ~ ("eventTime" -> ("type" -> "date")) ~ ("tags" -> ("type" -> "keyword")) ~ ("prId" -> ("type" -> "keyword")) ~ diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala index ade0f40ce9..94697a847c 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala @@ -43,7 +43,7 @@ class ESSequences(client: RestClient, config: StorageClientConfig, index: String val mappingJson = (estype -> ("properties" -> - ("n" -> ("enabled" -> false)))) + ("n" -> (("type" -> "object") ~ ("enabled" -> false))))) ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) def genNext(name: String): Long = { diff --git a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala index d723d07908..29906ef0d7 100644 --- a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala +++ b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala @@ -20,7 +20,6 @@ package org.apache.predictionio.data.storage.jdbc import org.apache.predictionio.data.storage.{Event, LEvents, PropertyMap, Storage} import org.specs2._ -import org.specs2.specification.Step class LEventsSpec extends Specification with TestEvents { def is = s2""" diff --git a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala index 71ebf5ff7a..71875eac1f 100644 --- a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala +++ b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala @@ -22,7 +22,6 @@ import org.apache.predictionio.data.storage._ import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.specs2._ -import org.specs2.specification.Step class PEventsSpec extends Specification with TestEvents { @@ -54,7 +53,7 @@ class PEventsSpec extends Specification with TestEvents { PEvents can be implemented by: - JDBCPEvents ${jdbcPEvents} - - (stop Spark) ${Step(sc.stop())} + - (stop Spark) ${step(sc.stop())} """ @@ -62,8 +61,8 @@ class PEventsSpec extends Specification with TestEvents { JDBCPEvents should - behave like any PEvents implementation ${events(jdbcLocal, jdbcPar)} - - (table cleanup) ${Step(StorageTestUtils.dropJDBCTable(s"${dbName}_$appId"))} - - (table cleanup) ${Step(StorageTestUtils.dropJDBCTable(s"${dbName}_${appId}_$channelId"))} + - (table cleanup) ${step(StorageTestUtils.dropJDBCTable(s"${dbName}_$appId"))} + - (table cleanup) ${step(StorageTestUtils.dropJDBCTable(s"${dbName}_${appId}_$channelId"))} """ diff --git a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala index 2cb08e5beb..f94c0d070c 100644 --- a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala +++ b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala @@ -23,6 +23,8 @@ import org.joda.time.{DateTime, DateTimeZone} trait TestEvents { + protected implicit val testExecutionContext = concurrent.ExecutionContext.Implicits.global + val u1BaseTime = new DateTime(654321) val u2BaseTime = new DateTime(6543210) val u3BaseTime = new DateTime(6543410) diff --git a/tools/build.sbt b/tools/build.sbt index 069a3134c6..876e49b2e2 100644 --- a/tools/build.sbt +++ b/tools/build.sbt @@ -23,7 +23,6 @@ name := "apache-predictionio-tools" libraryDependencies ++= Seq( "com.github.zafarkhaja" % "java-semver" % "0.9.0", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", "org.specs2" %% "specs2-core" % "4.5.1" % "test") assemblyMergeStrategy in assembly := { From 7f538cedfbbd1d6969e86535e55330d2c76c2650 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Thu, 20 Jun 2019 18:24:26 +0900 Subject: [PATCH 05/14] fixup --- build.sbt | 18 ++++++++++++++---- core/build.sbt | 4 +++- .../data/storage/hbase/HBPEvents.scala | 1 - storage/jdbc/build.sbt | 1 - tools/build.sbt | 5 +++-- 5 files changed, 20 insertions(+), 9 deletions(-) diff --git a/build.sbt b/build.sbt index c1d6aacfb4..c63711baa7 100644 --- a/build.sbt +++ b/build.sbt @@ -60,7 +60,13 @@ val commonSettings = Seq( autoAPIMappings := true, licenseConfigurations := Set("compile"), licenseReportTypes := Seq(Csv), - unmanagedClasspath in Test += conf) + unmanagedClasspath in Test += conf, + unmanagedClasspath in Test += baseDirectory.value.getParentFile / s"storage/jdbc/target/scala-${scalaBinaryVersion.value}/classes") + +val commonTestSettings = Seq( + libraryDependencies ++= Seq( + "org.postgresql" % "postgresql" % "42.2.5" % "test", + "org.scalikejdbc" %% "scalikejdbc-joda-time" % "3.3.4" % "test")) val dataElasticsearch = (project in file("storage/elasticsearch")). settings(commonSettings: _*) @@ -75,6 +81,7 @@ val dataHdfs = (project in file("storage/hdfs")). val dataJdbc = (project in file("storage/jdbc")). settings(commonSettings: _*). + settings(commonTestSettings: _*). enablePlugins(GenJavadocPlugin) val dataLocalfs = (project in file("storage/localfs")). @@ -91,14 +98,16 @@ val common = (project in file("common")). disablePlugins(sbtassembly.AssemblyPlugin) val data = (project in file("data")). - dependsOn(common, dataJdbc % "compile->test;test->test"). + dependsOn(common). settings(commonSettings: _*). + settings(commonTestSettings: _*). enablePlugins(GenJavadocPlugin). disablePlugins(sbtassembly.AssemblyPlugin) val core = (project in file("core")). - dependsOn(data % "compile->compile;test->test"). + dependsOn(data). settings(commonSettings: _*). + settings(commonTestSettings: _*). enablePlugins(GenJavadocPlugin). enablePlugins(BuildInfoPlugin). settings( @@ -122,8 +131,9 @@ val e2 = (project in file("e2")). disablePlugins(sbtassembly.AssemblyPlugin) val tools = (project in file("tools")). - dependsOn(core % "compile->compile;test->test"). + dependsOn(e2). settings(commonSettings: _*). + settings(commonTestSettings: _*). settings(skip in publish := true). enablePlugins(GenJavadocPlugin). enablePlugins(SbtTwirl) diff --git a/core/build.sbt b/core/build.sbt index 5788ec6a5b..5692dd9f78 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -29,7 +29,9 @@ libraryDependencies ++= Seq( "org.json4s" %% "json4s-ext" % json4sVersion.value, "org.scalaj" %% "scalaj-http" % "2.4.1", "org.slf4j" % "slf4j-log4j12" % "1.7.26", - "org.scalatest" %% "scalatest" % "3.0.8" % "test" + "org.scalatest" %% "scalatest" % "3.0.8" % "test", + "org.scalamock" %% "scalamock" % "4.2.0" % "test", + "com.h2database" % "h2" % "1.4.199" % "test" ) parallelExecution in Test := false diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala index 66d406665f..7324fa68e9 100644 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala +++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala @@ -119,7 +119,6 @@ class HBPEvents(client: HBClient, config: StorageClientConfig, namespace: String conf.set(TableOutputFormat.OUTPUT_TABLE, tableName) - val table = new HTable(conf, tableName) iter.foreach { id => val rowKey = HBEventsUtil.RowKey(id) diff --git a/storage/jdbc/build.sbt b/storage/jdbc/build.sbt index 44076b4b19..d18a3b9c3f 100644 --- a/storage/jdbc/build.sbt +++ b/storage/jdbc/build.sbt @@ -23,7 +23,6 @@ libraryDependencies ++= Seq( "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", "org.scalikejdbc" %% "scalikejdbc-joda-time" % "3.3.4", - "org.postgresql" % "postgresql" % "42.2.5" % "test", "org.specs2" %% "specs2-core" % "4.5.1" % "test") parallelExecution in Test := false diff --git a/tools/build.sbt b/tools/build.sbt index 876e49b2e2..f8d14fb684 100644 --- a/tools/build.sbt +++ b/tools/build.sbt @@ -23,6 +23,7 @@ name := "apache-predictionio-tools" libraryDependencies ++= Seq( "com.github.zafarkhaja" % "java-semver" % "0.9.0", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", + "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", "org.specs2" %% "specs2-core" % "4.5.1" % "test") assemblyMergeStrategy in assembly := { @@ -36,8 +37,8 @@ assemblyMergeStrategy in assembly := { assemblyExcludedJars in assembly := { val cp = (fullClasspath in assembly).value cp filter { _.data.getName match { - case "reflectasm-1.11.7.jar" => true - case "kryo-5.0.0-RC1.jar" => true + case "reflectasm-1.11.7.jar" => true + case "kryo-5.0.0-RC1.jar" => true case _ => false }} } From 07b56de6e8f4aa6e874e2cc2a9dc8755f8c50e08 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Wed, 26 Jun 2019 10:29:48 +0900 Subject: [PATCH 06/14] fixup --- build.sbt | 12 +++++++++--- core/build.sbt | 5 +---- data/build.sbt | 6 +----- e2/build.sbt | 4 ++-- storage/jdbc/build.sbt | 4 +++- tools/build.sbt | 4 ++-- 6 files changed, 18 insertions(+), 17 deletions(-) diff --git a/build.sbt b/build.sbt index c63711baa7..d595b60d2d 100644 --- a/build.sbt +++ b/build.sbt @@ -65,8 +65,14 @@ val commonSettings = Seq( val commonTestSettings = Seq( libraryDependencies ++= Seq( - "org.postgresql" % "postgresql" % "42.2.5" % "test", - "org.scalikejdbc" %% "scalikejdbc-joda-time" % "3.3.4" % "test")) + "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", + "com.typesafe.akka" %% "akka-stream-testkit" % "2.5.23" % "test", + "org.specs2" %% "specs2-core" % "4.5.1" % "test", + "org.scalatest" %% "scalatest" % "3.0.8" % "test", + "org.scalamock" %% "scalamock" % "4.2.0" % "test", + "com.h2database" % "h2" % "1.4.199" % "test", + "org.postgresql" % "postgresql" % "42.2.5" % "test", + "org.scalikejdbc" %% "scalikejdbc-joda-time" % "3.3.4" % "test")) val dataElasticsearch = (project in file("storage/elasticsearch")). settings(commonSettings: _*) @@ -81,7 +87,6 @@ val dataHdfs = (project in file("storage/hdfs")). val dataJdbc = (project in file("storage/jdbc")). settings(commonSettings: _*). - settings(commonTestSettings: _*). enablePlugins(GenJavadocPlugin) val dataLocalfs = (project in file("storage/localfs")). @@ -127,6 +132,7 @@ val core = (project in file("core")). val e2 = (project in file("e2")). dependsOn(core). settings(commonSettings: _*). + settings(commonTestSettings: _*). enablePlugins(GenJavadocPlugin). disablePlugins(sbtassembly.AssemblyPlugin) diff --git a/core/build.sbt b/core/build.sbt index 5692dd9f78..f018dec0ee 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -28,10 +28,7 @@ libraryDependencies ++= Seq( "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", "org.json4s" %% "json4s-ext" % json4sVersion.value, "org.scalaj" %% "scalaj-http" % "2.4.1", - "org.slf4j" % "slf4j-log4j12" % "1.7.26", - "org.scalatest" %% "scalatest" % "3.0.8" % "test", - "org.scalamock" %% "scalamock" % "4.2.0" % "test", - "com.h2database" % "h2" % "1.4.199" % "test" + "org.slf4j" % "slf4j-log4j12" % "1.7.26" ) parallelExecution in Test := false diff --git a/data/build.sbt b/data/build.sbt index ac1e38105f..de24d6fe0b 100644 --- a/data/build.sbt +++ b/data/build.sbt @@ -23,13 +23,9 @@ libraryDependencies ++= Seq( "org.scala-lang" % "scala-reflect" % scalaVersion.value, "com.github.nscala-time" %% "nscala-time" % "2.22.0", "com.google.guava" % "guava" % "27.1-jre", - "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", "org.clapper" %% "grizzled-slf4j" % "1.3.3" - exclude("org.slf4j", "slf4j-api"), - "org.specs2" %% "specs2-core" % "4.5.1" % "test", - "org.scalamock" %% "scalamock" % "4.2.0" % "test", - "com.h2database" % "h2" % "1.4.199" % "test") +) parallelExecution in Test := false diff --git a/e2/build.sbt b/e2/build.sbt index 29c1d48667..b89d736170 100644 --- a/e2/build.sbt +++ b/e2/build.sbt @@ -22,7 +22,7 @@ name := "apache-predictionio-e2" parallelExecution in Test := false libraryDependencies ++= Seq( - "org.apache.spark" %% "spark-mllib" % sparkVersion.value % "provided", - "org.scalatest" %% "scalatest" % "2.2.5" % "test") + "org.apache.spark" %% "spark-mllib" % sparkVersion.value % "provided" +) pomExtra := childrenPomExtra.value diff --git a/storage/jdbc/build.sbt b/storage/jdbc/build.sbt index d18a3b9c3f..5944d137f7 100644 --- a/storage/jdbc/build.sbt +++ b/storage/jdbc/build.sbt @@ -23,7 +23,9 @@ libraryDependencies ++= Seq( "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", "org.scalikejdbc" %% "scalikejdbc-joda-time" % "3.3.4", - "org.specs2" %% "specs2-core" % "4.5.1" % "test") + "org.postgresql" % "postgresql" % "42.2.5" % "test", + "org.specs2" %% "specs2-core" % "4.5.1" % "test" +) parallelExecution in Test := false diff --git a/tools/build.sbt b/tools/build.sbt index f8d14fb684..c900b7fc1e 100644 --- a/tools/build.sbt +++ b/tools/build.sbt @@ -23,8 +23,8 @@ name := "apache-predictionio-tools" libraryDependencies ++= Seq( "com.github.zafarkhaja" % "java-semver" % "0.9.0", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", - "org.specs2" %% "specs2-core" % "4.5.1" % "test") + "com.typesafe.akka" %% "akka-slf4j" % "2.5.23" +) assemblyMergeStrategy in assembly := { case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat From 94b77e56ce436700e3a206c22cd21cbc4a681924 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Wed, 26 Jun 2019 16:15:31 +0900 Subject: [PATCH 07/14] Revert hbase --- .travis.yml | 2 +- build.sbt | 2 +- conf/pio-env.sh.template | 4 ++-- conf/pio-vendors.sh | 2 +- docs/manual/data/versions.yml | 4 ++-- docs/manual/source/install/index.html.md.erb | 2 +- storage/hbase/build.sbt | 1 + .../data/storage/hbase/HBEventsUtil.scala | 12 ++++++------ .../predictionio/data/storage/hbase/HBLEvents.scala | 8 ++++---- .../data/storage/hbase/StorageClient.scala | 8 ++++---- 10 files changed, 23 insertions(+), 22 deletions(-) diff --git a/.travis.yml b/.travis.yml index d383d87179..2c3b6c4de5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -55,7 +55,7 @@ env: PIO_ELASTICSEARCH_VERSION=6.8.0 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS - PIO_HBASE_VERSION=1.4.10 + PIO_HBASE_VERSION=1.2.6 - BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL diff --git a/build.sbt b/build.sbt index d595b60d2d..a3f420942c 100644 --- a/build.sbt +++ b/build.sbt @@ -45,7 +45,7 @@ hadoopVersion in ThisBuild := sys.props.getOrElse("hadoop.version", "2.7.7") elasticsearchVersion in ThisBuild := sys.props.getOrElse("elasticsearch.version", "6.8.0") -hbaseVersion in ThisBuild := sys.props.getOrElse("hbase.version", "1.4.10") +hbaseVersion in ThisBuild := sys.props.getOrElse("hbase.version", "1.2.6") json4sVersion in ThisBuild := { sparkBinaryVersion.value match { diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template index 8de5f651cf..803ee2dbf5 100644 --- a/conf/pio-env.sh.template +++ b/conf/pio-env.sh.template @@ -39,7 +39,7 @@ MYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-5.1.41.jar # HBASE_CONF_DIR: You must configure this if you intend to run PredictionIO # with HBase on a remote cluster. -# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.4.10/conf +# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.2.6/conf # Filesystem paths where PredictionIO uses as block storage. PIO_FS_BASEDIR=$HOME/.pio_store @@ -99,7 +99,7 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio # HBase Example # PIO_STORAGE_SOURCES_HBASE_TYPE=hbase -# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.4.10 +# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.2.6 # AWS S3 Example # PIO_STORAGE_SOURCES_S3_TYPE=s3 diff --git a/conf/pio-vendors.sh b/conf/pio-vendors.sh index 14457f798d..bb067d76f1 100644 --- a/conf/pio-vendors.sh +++ b/conf/pio-vendors.sh @@ -36,7 +36,7 @@ if [ -z "$PIO_ELASTICSEARCH_VERSION" ]; then fi if [ -z "$PIO_HBASE_VERSION" ]; then - PIO_HBASE_VERSION="1.4.10" + PIO_HBASE_VERSION="1.2.6" fi export ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch" diff --git a/docs/manual/data/versions.yml b/docs/manual/data/versions.yml index 0ef7d808f9..358359a1f7 100644 --- a/docs/manual/data/versions.yml +++ b/docs/manual/data/versions.yml @@ -2,6 +2,6 @@ pio: 0.14.0 spark: 2.4.3 spark_download_filename: spark-2.4.3-bin-hadoop2.7 elasticsearch_download_filename: elasticsearch-6.8.0 -hbase_version: 1.4.10 -hbase_basename: hbase-1.4.10 +hbase_version: 1.2.6 +hbase_basename: hbase-1.2.6 hbase_variant: bin diff --git a/docs/manual/source/install/index.html.md.erb b/docs/manual/source/install/index.html.md.erb index d7263b0114..9d5240919e 100644 --- a/docs/manual/source/install/index.html.md.erb +++ b/docs/manual/source/install/index.html.md.erb @@ -31,7 +31,7 @@ technologies that power Apache PredictionIO®. and one of the following sets: * PostgreSQL 9.6 or MySQL 5.1 -* Apache HBase 1.4 +* Apache HBase 1.2 * Elasticsearch 6.x, 5.6(deprecated) ## Installation diff --git a/storage/hbase/build.sbt b/storage/hbase/build.sbt index 2a2d55c58e..28cd391f48 100644 --- a/storage/hbase/build.sbt +++ b/storage/hbase/build.sbt @@ -29,6 +29,7 @@ libraryDependencies ++= Seq( "org.apache.hbase" % "hbase-server" % hbaseVersion.value exclude("org.apache.hbase", "hbase-client") exclude("org.apache.zookeeper", "zookeeper") + exclude("javax.servlet", "servlet-api") exclude("org.mortbay.jetty", "servlet-api-2.5") exclude("org.mortbay.jetty", "jsp-api-2.1") exclude("org.mortbay.jetty", "jsp-2.1"), diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala index 8115209f91..4b0ad9a5a2 100644 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala +++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala @@ -163,11 +163,11 @@ object HBEventsUtil { val put = new Put(rowKey.toBytes, event.eventTime.getMillis) def addStringToE(col: Array[Byte], v: String): Put = { - put.addColumn(eBytes, col, Bytes.toBytes(v)) + put.add(eBytes, col, Bytes.toBytes(v)) } def addLongToE(col: Array[Byte], v: Long): Put = { - put.addColumn(eBytes, col, Bytes.toBytes(v)) + put.add(eBytes, col, Bytes.toBytes(v)) } addStringToE(colNames("event"), event.event) @@ -310,12 +310,12 @@ object HBEventsUtil { // If you specify a startRow and stopRow, // to scan in reverse, the startRow needs to be lexicographically // after the stopRow. - scan.withStartRow(stop) - scan.withStopRow(start) + scan.setStartRow(stop) + scan.setStopRow(start) scan.setReversed(true) } else { - scan.withStartRow(start) - scan.withStopRow(stop) + scan.setStartRow(start) + scan.setStopRow(stop) } } case (_, _) => { diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala index 97ce9ba990..e95e7e82b1 100644 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala +++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala @@ -42,10 +42,8 @@ class HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace def resultToEvent(result: Result, appId: Int): Event = HBEventsUtil.resultToEvent(result, appId) - def getTable(appId: Int, channelId: Option[Int] = None): Table = { - val tableName = TableName.valueOf(HBEventsUtil.tableName(namespace, appId, channelId)) - client.connection.getTable(tableName) - } + def getTable(appId: Int, channelId: Option[Int] = None): HTableInterface = + client.connection.getTable(HBEventsUtil.tableName(namespace, appId, channelId)) override def init(appId: Int, channelId: Option[Int] = None): Boolean = { @@ -105,6 +103,7 @@ class HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace val table = getTable(appId, channelId) val (put, rowKey) = HBEventsUtil.eventToPut(event, appId) table.put(put) + table.flushCommits() table.close() rowKey.toString } @@ -118,6 +117,7 @@ class HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace val table = getTable(appId, channelId) val (puts, rowKeys) = events.map { event => HBEventsUtil.eventToPut(event, appId) }.unzip table.put(puts) + table.flushCommits() table.close() rowKeys.map(_.toString) } diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala index df1a3f9911..1720410150 100644 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala +++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala @@ -25,15 +25,15 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.MasterNotRunningException import org.apache.hadoop.hbase.ZooKeeperConnectionException -import org.apache.hadoop.hbase.client.ConnectionFactory -import org.apache.hadoop.hbase.client.Connection +import org.apache.hadoop.hbase.client.HConnectionManager +import org.apache.hadoop.hbase.client.HConnection import org.apache.hadoop.hbase.client.HBaseAdmin import grizzled.slf4j.Logging case class HBClient( val conf: Configuration, - val connection: Connection, + val connection: HConnection, val admin: HBaseAdmin ) @@ -73,7 +73,7 @@ class StorageClient(val config: StorageClientConfig) } } - val connection = ConnectionFactory.createConnection(conf) + val connection = HConnectionManager.createConnection(conf) val client = HBClient( conf = conf, From 83108663950449567669260c953ad5769d6f8807 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Thu, 27 Jun 2019 12:25:17 +0900 Subject: [PATCH 08/14] fixup --- .travis.yml | 4 ++-- build.sbt | 17 ++++++----------- common/build.sbt | 10 +++++----- conf/pio-env.sh.template | 6 +++--- conf/pio-vendors.sh | 6 +++--- core/build.sbt | 18 +++++++++++------- data/build.sbt | 13 +++++++++---- docs/manual/data/versions.yml | 6 +++--- e2/build.sbt | 4 ++-- project/PIOBuild.scala | 1 + storage/elasticsearch/build.sbt | 2 +- storage/hbase/build.sbt | 2 +- storage/hdfs/build.sbt | 3 ++- storage/jdbc/build.sbt | 7 +++---- .../storage/jdbc/JDBCEngineInstances.scala | 1 - .../storage/jdbc/JDBCEvaluationInstances.scala | 1 - .../data/storage/jdbc/JDBCLEvents.scala | 1 - .../data/storage/jdbc/LEventsSpec.scala | 1 + .../data/storage/jdbc/PEventsSpec.scala | 7 ++++--- .../data/storage/jdbc/TestEvents.scala | 2 -- storage/localfs/build.sbt | 3 ++- storage/s3/build.sbt | 9 ++++++++- tools/build.sbt | 15 +++++++++++---- 23 files changed, 78 insertions(+), 61 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2c3b6c4de5..813bf38897 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,7 +52,7 @@ env: PIO_ELASTICSEARCH_VERSION=5.6.9 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3 - PIO_ELASTICSEARCH_VERSION=6.8.0 + PIO_ELASTICSEARCH_VERSION=6.8.1 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS PIO_HBASE_VERSION=1.2.6 @@ -101,7 +101,7 @@ env: - BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS PIO_SCALA_VERSION=2.11.12 - PIO_SPARK_VERSION=2.4.3 + PIO_SPARK_VERSION=2.4.0 PIO_HADOOP_VERSION=2.7.7 - BUILD_TYPE=LicenseCheck diff --git a/build.sbt b/build.sbt index a3f420942c..f4fd47e2f9 100644 --- a/build.sbt +++ b/build.sbt @@ -37,13 +37,15 @@ javacOptions in (ThisBuild, compile) ++= Seq("-source", "1.8", "-target", "1.8", "-Xlint:deprecation", "-Xlint:unchecked") // Ignore differentiation of Spark patch levels -sparkVersion in ThisBuild := sys.props.getOrElse("spark.version", "2.4.3") +sparkVersion in ThisBuild := sys.props.getOrElse("spark.version", "2.1.3") sparkBinaryVersion in ThisBuild := binaryVersion(sparkVersion.value) hadoopVersion in ThisBuild := sys.props.getOrElse("hadoop.version", "2.7.7") -elasticsearchVersion in ThisBuild := sys.props.getOrElse("elasticsearch.version", "6.8.0") +akkaVersion in ThisBuild := sys.props.getOrElse("akka.version", "2.5.17") + +elasticsearchVersion in ThisBuild := sys.props.getOrElse("elasticsearch.version", "6.8.1") hbaseVersion in ThisBuild := sys.props.getOrElse("hbase.version", "1.2.6") @@ -65,14 +67,8 @@ val commonSettings = Seq( val commonTestSettings = Seq( libraryDependencies ++= Seq( - "com.typesafe.akka" %% "akka-http-testkit" % "10.1.8" % "test", - "com.typesafe.akka" %% "akka-stream-testkit" % "2.5.23" % "test", - "org.specs2" %% "specs2-core" % "4.5.1" % "test", - "org.scalatest" %% "scalatest" % "3.0.8" % "test", - "org.scalamock" %% "scalamock" % "4.2.0" % "test", - "com.h2database" % "h2" % "1.4.199" % "test", - "org.postgresql" % "postgresql" % "42.2.5" % "test", - "org.scalikejdbc" %% "scalikejdbc-joda-time" % "3.3.4" % "test")) + "org.postgresql" % "postgresql" % "9.4-1204-jdbc41" % "test", + "org.scalikejdbc" %% "scalikejdbc" % "3.1.0" % "test")) val dataElasticsearch = (project in file("storage/elasticsearch")). settings(commonSettings: _*) @@ -132,7 +128,6 @@ val core = (project in file("core")). val e2 = (project in file("e2")). dependsOn(core). settings(commonSettings: _*). - settings(commonTestSettings: _*). enablePlugins(GenJavadocPlugin). disablePlugins(sbtassembly.AssemblyPlugin) diff --git a/common/build.sbt b/common/build.sbt index 311b152998..f9fd97bfea 100644 --- a/common/build.sbt +++ b/common/build.sbt @@ -20,11 +20,11 @@ import PIOBuild._ name := "apache-predictionio-common" libraryDependencies ++= Seq( - "com.typesafe.akka" %% "akka-actor" % "2.5.23", - "com.typesafe.akka" %% "akka-slf4j" % "2.5.23", - "com.typesafe.akka" %% "akka-stream" % "2.5.23", - "com.typesafe.akka" %% "akka-http" % "10.1.8", - "org.json4s" %% "json4s-native" % json4sVersion.value + "com.typesafe.akka" %% "akka-actor" % akkaVersion.value, + "com.typesafe.akka" %% "akka-slf4j" % akkaVersion.value, + "com.typesafe.akka" %% "akka-http" % "10.1.5", + "org.json4s" %% "json4s-native" % json4sVersion.value, + "com.typesafe.akka" %% "akka-stream" % "2.5.12" ) pomExtra := childrenPomExtra.value diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template index 803ee2dbf5..bb18b9872f 100644 --- a/conf/pio-env.sh.template +++ b/conf/pio-env.sh.template @@ -24,9 +24,9 @@ # you need to change these to fit your site. # SPARK_HOME: Apache Spark is a hard dependency and must be configured. -SPARK_HOME=$PIO_HOME/vendors/spark-2.4.3-bin-hadoop2.7 +SPARK_HOME=$PIO_HOME/vendors/spark-2.1.1-bin-hadoop2.6 -POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-42.2.5.jar +POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-42.0.0.jar MYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-5.1.41.jar # ES_CONF_DIR: You must configure this if you have advanced configuration for @@ -88,7 +88,7 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio # PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost # PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200 # PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http -# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-6.8.0 +# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-6.8.1 # Optional basic HTTP auth # PIO_STORAGE_SOURCES_ELASTICSEARCH_USERNAME=my-name # PIO_STORAGE_SOURCES_ELASTICSEARCH_PASSWORD=my-secret diff --git a/conf/pio-vendors.sh b/conf/pio-vendors.sh index bb067d76f1..a29faab008 100644 --- a/conf/pio-vendors.sh +++ b/conf/pio-vendors.sh @@ -24,7 +24,7 @@ if [ -z "$PIO_SCALA_VERSION" ]; then fi if [ -z "$PIO_SPARK_VERSION" ]; then - PIO_SPARK_VERSION="2.4.3" + PIO_SPARK_VERSION="2.1.3" fi if [ -z "$PIO_HADOOP_VERSION" ]; then @@ -32,7 +32,7 @@ if [ -z "$PIO_HADOOP_VERSION" ]; then fi if [ -z "$PIO_ELASTICSEARCH_VERSION" ]; then - PIO_ELASTICSEARCH_VERSION="6.8.0" + PIO_ELASTICSEARCH_VERSION="6.8.1" fi if [ -z "$PIO_HBASE_VERSION" ]; then @@ -45,7 +45,7 @@ export ES_TAG="$PIO_ELASTICSEARCH_VERSION" HBASE_MAJOR=`echo $PIO_HBASE_VERSION | awk -F. '{print $1 "." $2}'` export HBASE_TAG="$HBASE_MAJOR" -PGSQL_JAR=postgresql-42.2.5.jar +PGSQL_JAR=postgresql-9.4-1204.jdbc41.jar PGSQL_DOWNLOAD=https://jdbc.postgresql.org/download/${PGSQL_JAR} HADOOP_MAJOR=`echo $PIO_HADOOP_VERSION | awk -F. '{print $1 "." $2}'` diff --git a/core/build.sbt b/core/build.sbt index f018dec0ee..14b3449744 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -20,15 +20,19 @@ import PIOBuild._ name := "apache-predictionio-core" libraryDependencies ++= Seq( - "com.github.scopt" %% "scopt" % "3.7.0", - "com.google.code.gson" % "gson" % "2.8.5", - "com.twitter" %% "chill-bijection" % "0.9.3", - "de.javakaffee" % "kryo-serializers" % "0.45", - "net.jodah" % "typetools" % "0.6.1", + "com.github.scopt" %% "scopt" % "3.5.0", + "com.google.code.gson" % "gson" % "2.5", + "com.twitter" %% "chill-bijection" % "0.7.2", + "de.javakaffee" % "kryo-serializers" % "0.37", + "net.jodah" % "typetools" % "0.3.1", "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", "org.json4s" %% "json4s-ext" % json4sVersion.value, - "org.scalaj" %% "scalaj-http" % "2.4.1", - "org.slf4j" % "slf4j-log4j12" % "1.7.26" + "org.scalaj" %% "scalaj-http" % "1.1.6", + "org.slf4j" % "slf4j-log4j12" % "1.7.18", + "org.scalatest" %% "scalatest" % "2.1.7" % "test", + "org.specs2" %% "specs2" % "2.3.13" % "test", + "org.scalamock" %% "scalamock-scalatest-support" % "3.5.0" % "test", + "com.h2database" % "h2" % "1.4.196" % "test" ) parallelExecution in Test := false diff --git a/data/build.sbt b/data/build.sbt index de24d6fe0b..65925360a3 100644 --- a/data/build.sbt +++ b/data/build.sbt @@ -21,11 +21,16 @@ name := "apache-predictionio-data" libraryDependencies ++= Seq( "org.scala-lang" % "scala-reflect" % scalaVersion.value, - "com.github.nscala-time" %% "nscala-time" % "2.22.0", - "com.google.guava" % "guava" % "27.1-jre", + "com.github.nscala-time" %% "nscala-time" % "2.6.0", + "com.google.guava" % "guava" % "14.0.1", + "com.typesafe.akka" %% "akka-http-testkit" % "10.1.5" % "test", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "org.clapper" %% "grizzled-slf4j" % "1.3.3" -) + "org.clapper" %% "grizzled-slf4j" % "1.0.2", + "org.scalatest" %% "scalatest" % "2.1.7" % "test", + "org.specs2" %% "specs2" % "3.3.1" % "test" + exclude("org.scalaz.stream", s"scalaz-stream_${scalaBinaryVersion.value}"), + "org.scalamock" %% "scalamock-specs2-support" % "3.5.0" % "test", + "com.h2database" % "h2" % "1.4.196" % "test") parallelExecution in Test := false diff --git a/docs/manual/data/versions.yml b/docs/manual/data/versions.yml index 358359a1f7..432fa4def8 100644 --- a/docs/manual/data/versions.yml +++ b/docs/manual/data/versions.yml @@ -1,7 +1,7 @@ pio: 0.14.0 -spark: 2.4.3 -spark_download_filename: spark-2.4.3-bin-hadoop2.7 -elasticsearch_download_filename: elasticsearch-6.8.0 +spark: 2.4.0 +spark_download_filename: spark-2.4.0-bin-hadoop2.7 +elasticsearch_download_filename: elasticsearch-6.8.1 hbase_version: 1.2.6 hbase_basename: hbase-1.2.6 hbase_variant: bin diff --git a/e2/build.sbt b/e2/build.sbt index b89d736170..29c1d48667 100644 --- a/e2/build.sbt +++ b/e2/build.sbt @@ -22,7 +22,7 @@ name := "apache-predictionio-e2" parallelExecution in Test := false libraryDependencies ++= Seq( - "org.apache.spark" %% "spark-mllib" % sparkVersion.value % "provided" -) + "org.apache.spark" %% "spark-mllib" % sparkVersion.value % "provided", + "org.scalatest" %% "scalatest" % "2.2.5" % "test") pomExtra := childrenPomExtra.value diff --git a/project/PIOBuild.scala b/project/PIOBuild.scala index 91709d6baa..615efc0998 100644 --- a/project/PIOBuild.scala +++ b/project/PIOBuild.scala @@ -24,6 +24,7 @@ object PIOBuild { val sparkVersion = settingKey[String]("The version of Apache Spark used for building") val sparkBinaryVersion = settingKey[String]("The binary version of Apache Spark used for building") val hadoopVersion = settingKey[String]("The version of Apache Hadoop used for building") + val akkaVersion = settingKey[String]("The version of Akka used for building") val childrenPomExtra = settingKey[scala.xml.NodeSeq]("Extra POM data for children projects") diff --git a/storage/elasticsearch/build.sbt b/storage/elasticsearch/build.sbt index 5b31854e2c..b7362f68df 100644 --- a/storage/elasticsearch/build.sbt +++ b/storage/elasticsearch/build.sbt @@ -25,7 +25,7 @@ libraryDependencies ++= Seq( "org.elasticsearch.client" % "elasticsearch-rest-client" % elasticsearchVersion.value, "org.elasticsearch" %% "elasticsearch-spark-20" % elasticsearchVersion.value exclude("org.apache.spark", "*"), - "org.specs2" %% "specs2-core" % "4.5.1" % "test") + "org.specs2" %% "specs2" % "2.3.13" % "test") parallelExecution in Test := false diff --git a/storage/hbase/build.sbt b/storage/hbase/build.sbt index 28cd391f48..5e412b06cf 100644 --- a/storage/hbase/build.sbt +++ b/storage/hbase/build.sbt @@ -33,7 +33,7 @@ libraryDependencies ++= Seq( exclude("org.mortbay.jetty", "servlet-api-2.5") exclude("org.mortbay.jetty", "jsp-api-2.1") exclude("org.mortbay.jetty", "jsp-2.1"), - "org.specs2" %% "specs2-core" % "4.5.1" % "test") + "org.specs2" %% "specs2" % "2.3.13" % "test") parallelExecution in Test := false diff --git a/storage/hdfs/build.sbt b/storage/hdfs/build.sbt index d9da828ed9..50ec37928d 100644 --- a/storage/hdfs/build.sbt +++ b/storage/hdfs/build.sbt @@ -23,7 +23,8 @@ libraryDependencies ++= Seq( "org.apache.hadoop" % "hadoop-common" % hadoopVersion.value exclude("commons-beanutils", "*"), "org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion.value, - "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided") + "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided", + "org.scalatest" %% "scalatest" % "2.1.7" % "test") parallelExecution in Test := false diff --git a/storage/jdbc/build.sbt b/storage/jdbc/build.sbt index 5944d137f7..90265405fb 100644 --- a/storage/jdbc/build.sbt +++ b/storage/jdbc/build.sbt @@ -22,10 +22,9 @@ name := "apache-predictionio-data-jdbc" libraryDependencies ++= Seq( "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "org.scalikejdbc" %% "scalikejdbc-joda-time" % "3.3.4", - "org.postgresql" % "postgresql" % "42.2.5" % "test", - "org.specs2" %% "specs2-core" % "4.5.1" % "test" -) + "org.scalikejdbc" %% "scalikejdbc" % "3.1.0", + "org.postgresql" % "postgresql" % "9.4-1204-jdbc41" % "test", + "org.specs2" %% "specs2" % "2.3.13" % "test") parallelExecution in Test := false diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala index d99bd1038b..13c374d92a 100644 --- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala +++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala @@ -23,7 +23,6 @@ import org.apache.predictionio.data.storage.EngineInstance import org.apache.predictionio.data.storage.EngineInstances import org.apache.predictionio.data.storage.StorageClientConfig import scalikejdbc._ -import scalikejdbc.jodatime.JodaWrappedResultSet._ /** JDBC implementation of [[EngineInstances]] */ class JDBCEngineInstances(client: String, config: StorageClientConfig, prefix: String) diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala index 5bd877f668..90eb5f3adb 100644 --- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala +++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala @@ -23,7 +23,6 @@ import org.apache.predictionio.data.storage.EvaluationInstance import org.apache.predictionio.data.storage.EvaluationInstances import org.apache.predictionio.data.storage.StorageClientConfig import scalikejdbc._ -import scalikejdbc.jodatime.JodaWrappedResultSet._ /** JDBC implementations of [[EvaluationInstances]] */ class JDBCEvaluationInstances(client: String, config: StorageClientConfig, prefix: String) diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala index 17222c9783..b4230ccd11 100644 --- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala +++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala @@ -29,7 +29,6 @@ import org.json4s.JObject import org.json4s.native.Serialization.read import org.json4s.native.Serialization.write import scalikejdbc._ -import scalikejdbc.jodatime.JodaWrappedResultSet._ import scala.concurrent.ExecutionContext import scala.concurrent.Future diff --git a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala index 29906ef0d7..d723d07908 100644 --- a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala +++ b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala @@ -20,6 +20,7 @@ package org.apache.predictionio.data.storage.jdbc import org.apache.predictionio.data.storage.{Event, LEvents, PropertyMap, Storage} import org.specs2._ +import org.specs2.specification.Step class LEventsSpec extends Specification with TestEvents { def is = s2""" diff --git a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala index 71875eac1f..71ebf5ff7a 100644 --- a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala +++ b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala @@ -22,6 +22,7 @@ import org.apache.predictionio.data.storage._ import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.specs2._ +import org.specs2.specification.Step class PEventsSpec extends Specification with TestEvents { @@ -53,7 +54,7 @@ class PEventsSpec extends Specification with TestEvents { PEvents can be implemented by: - JDBCPEvents ${jdbcPEvents} - - (stop Spark) ${step(sc.stop())} + - (stop Spark) ${Step(sc.stop())} """ @@ -61,8 +62,8 @@ class PEventsSpec extends Specification with TestEvents { JDBCPEvents should - behave like any PEvents implementation ${events(jdbcLocal, jdbcPar)} - - (table cleanup) ${step(StorageTestUtils.dropJDBCTable(s"${dbName}_$appId"))} - - (table cleanup) ${step(StorageTestUtils.dropJDBCTable(s"${dbName}_${appId}_$channelId"))} + - (table cleanup) ${Step(StorageTestUtils.dropJDBCTable(s"${dbName}_$appId"))} + - (table cleanup) ${Step(StorageTestUtils.dropJDBCTable(s"${dbName}_${appId}_$channelId"))} """ diff --git a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala index f94c0d070c..2cb08e5beb 100644 --- a/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala +++ b/storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala @@ -23,8 +23,6 @@ import org.joda.time.{DateTime, DateTimeZone} trait TestEvents { - protected implicit val testExecutionContext = concurrent.ExecutionContext.Implicits.global - val u1BaseTime = new DateTime(654321) val u2BaseTime = new DateTime(6543210) val u3BaseTime = new DateTime(6543410) diff --git a/storage/localfs/build.sbt b/storage/localfs/build.sbt index 61f07ebeb3..2306d24f7c 100644 --- a/storage/localfs/build.sbt +++ b/storage/localfs/build.sbt @@ -20,7 +20,8 @@ import PIOBuild._ name := "apache-predictionio-data-localfs" libraryDependencies ++= Seq( - "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided") + "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", + "org.scalatest" %% "scalatest" % "2.1.7" % "test") parallelExecution in Test := false diff --git a/storage/s3/build.sbt b/storage/s3/build.sbt index f587b90627..40222090ab 100644 --- a/storage/s3/build.sbt +++ b/storage/s3/build.sbt @@ -21,7 +21,9 @@ name := "apache-predictionio-data-s3" libraryDependencies ++= Seq( "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", - "com.amazonaws" % "aws-java-sdk-s3" % "1.11.571") + "com.google.guava" % "guava" % "14.0.1" % "provided", + "com.amazonaws" % "aws-java-sdk-s3" % "1.11.132", + "org.scalatest" %% "scalatest" % "2.1.7" % "test") parallelExecution in Test := false @@ -29,6 +31,11 @@ pomExtra := childrenPomExtra.value assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) +assemblyShadeRules in assembly := Seq( + ShadeRule.rename("org.apache.http.**" -> "shadeio.data.s3.http.@1").inAll, + ShadeRule.rename("com.fasterxml.**" -> "shadeio.data.s3.fasterxml.@1").inAll +) + // skip test in assembly test in assembly := {} diff --git a/tools/build.sbt b/tools/build.sbt index c900b7fc1e..acdb1fe4a4 100644 --- a/tools/build.sbt +++ b/tools/build.sbt @@ -23,8 +23,9 @@ name := "apache-predictionio-tools" libraryDependencies ++= Seq( "com.github.zafarkhaja" % "java-semver" % "0.9.0", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", - "com.typesafe.akka" %% "akka-slf4j" % "2.5.23" -) + "com.typesafe.akka" %% "akka-slf4j" % akkaVersion.value, + "com.typesafe.akka" %% "akka-http-testkit" % "10.1.5" % "test", + "org.specs2" %% "specs2-core" % "4.2.0" % "test") assemblyMergeStrategy in assembly := { case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat @@ -37,12 +38,18 @@ assemblyMergeStrategy in assembly := { assemblyExcludedJars in assembly := { val cp = (fullClasspath in assembly).value cp filter { _.data.getName match { - case "reflectasm-1.11.7.jar" => true - case "kryo-5.0.0-RC1.jar" => true + case "reflectasm-1.10.1.jar" => true + case "kryo-3.0.3.jar" => true case _ => false }} } +assemblyShadeRules in assembly := Seq( + ShadeRule.rename("org.objenesis.**" -> "shadeio.@1").inLibrary("com.esotericsoftware.kryo" % "kryo" % "2.21").inProject, + ShadeRule.rename("com.esotericsoftware.reflectasm.**" -> "shadeio.@1").inLibrary("com.esotericsoftware.kryo" % "kryo" % "2.21").inProject, + ShadeRule.rename("com.esotericsoftware.minlog.**" -> "shadeio.@1").inLibrary("com.esotericsoftware.kryo" % "kryo" % "2.21").inProject +) + // skip test in assembly test in assembly := {} From cadd37203295b401eb3451d4f08a05fe994450e2 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Thu, 4 Jul 2019 11:55:51 +0900 Subject: [PATCH 09/14] fixup --- LICENSE.txt | 1 - .../storage/elasticsearch/ESEvaluationInstances.scala | 4 ++-- .../data/storage/elasticsearch/ESLEvents.scala | 2 +- .../data/storage/elasticsearch/ESSequences.scala | 2 +- .../pio_tests/engines/recommendation-engine/build.sbt | 10 +++------- .../engines/recommendation-engine/project/assembly.sbt | 2 +- 6 files changed, 8 insertions(+), 13 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 6e02f9bccb..e06a1bcca3 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1708,7 +1708,6 @@ Binary distribution bundles org.scala-lang.modules # scala-parser-combinators_2.11 # 1.0.6 (http://scala-lang.org/) org.scala-lang.modules # scala-parser-combinators_2.11 # 1.1.0 (http://scala-lang.org/) org.scala-lang.modules # scala-xml_2.11 # 1.0.5 (http://scala-lang.org/) - org.scala-lang.modules # scala-xml_2.11 # 1.0.6 (http://scala-lang.org/) which is available under the BSD license (http://www.scala-lang.org/downloads/license.html) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala index e97ee97c3f..0025950d03 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala @@ -54,8 +54,8 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind ("engineParamsGeneratorClass" -> ("type" -> "keyword")) ~ ("batch" -> ("type" -> "keyword")) ~ ("evaluatorResults" -> ("type" -> "text")) ~ - ("evaluatorResultsHTML" -> (("type" -> "object") ~ ("enabled" -> false))) ~ - ("evaluatorResultsJSON" -> (("type" -> "object") ~ ("enabled" -> false))))) + ("evaluatorResultsHTML" -> ("enabled" -> false)) ~ + ("evaluatorResultsJSON" -> ("enabled" -> false)))) ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) def insert(i: EvaluationInstance): String = { diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala index 391f7ae960..708d3d33b3 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala @@ -64,7 +64,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("entityId" -> ("type" -> "keyword")) ~ ("targetEntityType" -> ("type" -> "keyword")) ~ ("targetEntityId" -> ("type" -> "keyword")) ~ - ("properties" -> (("type" -> "object") ~ ("enabled" -> false))) ~ + ("properties" -> ("enabled" -> false)) ~ ("eventTime" -> ("type" -> "date")) ~ ("tags" -> ("type" -> "keyword")) ~ ("prId" -> ("type" -> "keyword")) ~ diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala index 94697a847c..ade0f40ce9 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala @@ -43,7 +43,7 @@ class ESSequences(client: RestClient, config: StorageClientConfig, index: String val mappingJson = (estype -> ("properties" -> - ("n" -> (("type" -> "object") ~ ("enabled" -> false))))) + ("n" -> ("enabled" -> false)))) ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) def genNext(name: String): Long = { diff --git a/tests/pio_tests/engines/recommendation-engine/build.sbt b/tests/pio_tests/engines/recommendation-engine/build.sbt index 14454179d5..c76c1f2c79 100644 --- a/tests/pio_tests/engines/recommendation-engine/build.sbt +++ b/tests/pio_tests/engines/recommendation-engine/build.sbt @@ -15,10 +15,6 @@ * limitations under the License. */ -import AssemblyKeys._ - -assemblySettings - scalaVersion in ThisBuild := sys.env.getOrElse("PIO_SCALA_VERSION", "2.11.12") name := "template-scala-parallel-recommendation" @@ -26,6 +22,6 @@ name := "template-scala-parallel-recommendation" organization := "org.apache.predictionio" libraryDependencies ++= Seq( - "org.apache.predictionio" %% "apache-predictionio-core" % "0.14.0" % "provided", - "org.apache.spark" %% "spark-core" % sys.env.getOrElse("PIO_SPARK_VERSION", "2.1.1") % "provided", - "org.apache.spark" %% "spark-mllib" % sys.env.getOrElse("PIO_SPARK_VERSION", "2.1.1") % "provided") + "org.apache.predictionio" %% "apache-predictionio-core" % "0.15.0-SNAPSHOT" % "provided", + "org.apache.spark" %% "spark-core" % sys.env.getOrElse("PIO_SPARK_VERSION", "2.1.3") % "provided", + "org.apache.spark" %% "spark-mllib" % sys.env.getOrElse("PIO_SPARK_VERSION", "2.1.3") % "provided") diff --git a/tests/pio_tests/engines/recommendation-engine/project/assembly.sbt b/tests/pio_tests/engines/recommendation-engine/project/assembly.sbt index 54c32528e9..9c014713d3 100644 --- a/tests/pio_tests/engines/recommendation-engine/project/assembly.sbt +++ b/tests/pio_tests/engines/recommendation-engine/project/assembly.sbt @@ -1 +1 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.9") From c044311551a2f74ee642ef59ed38726b98544461 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Thu, 4 Jul 2019 15:16:44 +0900 Subject: [PATCH 10/14] fixup --- .../storage/elasticsearch/ESAccessKeys.scala | 6 ++-- .../data/storage/elasticsearch/ESApps.scala | 6 ++-- .../storage/elasticsearch/ESChannels.scala | 2 +- .../elasticsearch/ESEngineInstances.scala | 4 +-- .../elasticsearch/ESEvaluationInstances.scala | 6 ++-- .../storage/elasticsearch/ESLEvents.scala | 34 ++++++++----------- .../storage/elasticsearch/ESPEvents.scala | 8 ++--- .../storage/elasticsearch/ESSequences.scala | 2 +- 8 files changed, 30 insertions(+), 38 deletions(-) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala index eef83e4f68..cac2613253 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala @@ -94,7 +94,7 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin ESUtils.getAll[AccessKey](client, internalIndex, estype, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$internalIndex/$estype/_search", e) Nil } } @@ -108,7 +108,7 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin ESUtils.getAll[AccessKey](client, internalIndex, estype, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$internalIndex/$estype/_search", e) Nil } } @@ -118,7 +118,7 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin try { val entity = new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", + "PUT", s"/$internalIndex/$estype/$id", Map("refresh" -> "true").asJava, entity) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala index 26621cff35..e121ef650d 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala @@ -130,7 +130,7 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) ESUtils.getAll[App](client, internalIndex, estype, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$internalIndex/$estype/_search", e) Nil } } @@ -138,9 +138,9 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) def update(app: App): Unit = { val id = app.id.toString try { - val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON); + val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", + "PUT", s"/$internalIndex/$estype/$id", Map("refresh" -> "true").asJava, entity) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala index ac248debf4..e58dca815d 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala @@ -112,7 +112,7 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) try { val entity = new NStringEntity(write(channel), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", + "PUT", s"/$internalIndex/$estype/$id", Map("refresh" -> "true").asJava, entity) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala index 96f8a6720c..4cf1876078 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala @@ -84,7 +84,7 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: val entity = new NStringEntity("{}", ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$internalIndex/$estype/", + s"/$internalIndex/$estype", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -185,7 +185,7 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: try { val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", + "PUT", s"/$internalIndex/$estype/$id", Map("refresh" -> "true").asJava, entity) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala index 0025950d03..284a165b78 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala @@ -110,7 +110,7 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind ESUtils.getAll[EvaluationInstance](client, internalIndex, estype, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$internalIndex/$estype/_search", e) Nil } } @@ -127,7 +127,7 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind ESUtils.getAll[EvaluationInstance](client, internalIndex, estype, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$internalIndex/$estype/_search", e) Nil } } @@ -137,7 +137,7 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind try { val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", + "PUT", s"/$internalIndex/$estype/$id", Map("refresh" -> "true").asJava, entity) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala index 708d3d33b3..287cc8719e 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala @@ -77,20 +77,16 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val estype = getEsType(appId, channelId) val index = baseIndex + "_" + estype try { - val json = - ("query" -> - ("match_all" -> List.empty)) - val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) client.performRequest( - "POST", - s"/$index/$estype/_delete_by_query", - Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, - entity).getStatusLine.getStatusCode match { - case 200 => true - case _ => - error(s"Failed to remove $index/$estype") - false - } + "DELETE", + s"/$index", + Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava + ).getStatusLine.getStatusCode match { + case 200 => true + case _ => + error(s"Failed to remove $index/$estype") + false + } } catch { case e: Exception => error(s"Failed to remove $index/$estype", e) @@ -125,7 +121,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("properties" -> write(event.properties.toJObject)) val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", + "PUT", s"/$index/$estype/$id", Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, entity) @@ -133,7 +129,6 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val result = (jsonResponse \ "result").extract[String] result match { case "created" => id - case "updated" => id case _ => error(s"[$result] Failed to update $index/$estype/$id") "" @@ -160,7 +155,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val json = events.zip(ids).map { case (event, id) => val commandJson = - ("index" -> ( + ("create" -> ( ("_index" -> index) ~ ("_type" -> estype) ~ ("_id" -> id) @@ -195,12 +190,11 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val items = (responseJson \ "items").asInstanceOf[JArray] items.arr.map { case value: JObject => - val result = (value \ "index" \ "result").extract[String] - val id = (value \ "index" \ "_id").extract[String] + val result = (value \ "create" \ "result").extract[String] + val id = (value \ "create" \ "_id").extract[String] result match { case "created" => id - case "updated" => id case _ => error(s"[$result] Failed to update $index/$estype/$id") "" @@ -266,7 +260,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd } } catch { case e: IOException => - error("Failed to access to /$index/$estype/_search", e) + error(s"Failed to access to /$index/$estype/_search", e) None } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala index a86d378331..87c0c2aba3 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala @@ -124,11 +124,9 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) - val result = (jsonResponse \ "result").extract[String] - result match { - case "deleted" => - case _ => - logger.error(s"[$result] Failed to update $index/$estype:$eventId") + if ((jsonResponse \ "deleted").extract[Int] == 0) { + logger.warn("The number of documents that were successfully deleted is 0. " + + s"$index/$estype:$eventId") } } catch { case e: IOException => diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala index ade0f40ce9..1dad7e9a7a 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala @@ -50,7 +50,7 @@ class ESSequences(client: RestClient, config: StorageClientConfig, index: String try { val entity = new NStringEntity(write("n" -> name), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", + "PUT", s"/$internalIndex/$estype/$name", Map("refresh" -> "false").asJava, entity) From f9060af8fee500b9b3bba0569aa38ec3884b6d6c Mon Sep 17 00:00:00 2001 From: shimamoto Date: Thu, 4 Jul 2019 18:35:20 +0900 Subject: [PATCH 11/14] fixup --- .../predictionio/data/storage/elasticsearch/ESLEvents.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala index 287cc8719e..f656ac1fd5 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala @@ -80,7 +80,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd client.performRequest( "DELETE", s"/$index", - Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava + Map.empty[String, String].asJava ).getStatusLine.getStatusCode match { case 200 => true case _ => From 2a61b763e13c0f6b8e80c71faa9e53f3f3361936 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Tue, 16 Jul 2019 18:42:19 +0900 Subject: [PATCH 12/14] fixup --- .travis.yml | 4 +- .../storage/elasticsearch/ESAccessKeys.scala | 49 +++++----- .../data/storage/elasticsearch/ESApps.scala | 60 ++++++------ .../storage/elasticsearch/ESChannels.scala | 45 ++++----- .../elasticsearch/ESEngineInstances.scala | 75 +++++++-------- .../elasticsearch/ESEvaluationInstances.scala | 65 ++++++------- .../storage/elasticsearch/ESLEvents.scala | 83 +++++----------- .../storage/elasticsearch/ESPEvents.scala | 22 ++--- .../storage/elasticsearch/ESSequences.scala | 25 ++--- .../data/storage/elasticsearch/ESUtils.scala | 95 ++++++++++--------- 10 files changed, 249 insertions(+), 274 deletions(-) diff --git a/.travis.yml b/.travis.yml index 813bf38897..6dd923f596 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,10 +49,10 @@ env: METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3 - PIO_ELASTICSEARCH_VERSION=5.6.9 + PIO_ELASTICSEARCH_VERSION=6.8.1 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3 - PIO_ELASTICSEARCH_VERSION=6.8.1 + PIO_ELASTICSEARCH_VERSION=7.2.0 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS PIO_HBASE_VERSION=1.2.6 diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala index cac2613253..6661257fcf 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala @@ -36,19 +36,20 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging /** Elasticsearch implementation of AccessKeys. */ -class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: String) +class ESAccessKeys(client: RestClient, config: StorageClientConfig, metadataName: String) extends AccessKeys with Logging { implicit val formats = DefaultFormats.lossless - private val estype = "accesskeys" - private val internalIndex = index + "_" + estype - - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("key" -> ("type" -> "keyword")) ~ - ("events" -> ("type" -> "keyword")))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + private val metadataKey = "accesskeys" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("key" -> ("type" -> "keyword")) ~ + ("events" -> ("type" -> "keyword")))) + + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(accessKey: AccessKey): Option[String] = { val key = if (accessKey.key.isEmpty) generateKey else accessKey.key @@ -63,7 +64,7 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -77,11 +78,11 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -91,10 +92,10 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin val json = ("query" -> ("match_all" -> List.empty)) - ESUtils.getAll[AccessKey](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[AccessKey](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -105,10 +106,10 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin ("query" -> ("term" -> ("appid" -> appid))) - ESUtils.getAll[AccessKey](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[AccessKey](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -119,7 +120,7 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin val entity = new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON) val response = client.performRequest( "PUT", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -128,11 +129,11 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin case "created" => case "updated" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) } } @@ -140,18 +141,18 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala index e121ef650d..bb7adf2aaf 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala @@ -36,27 +36,28 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging /** Elasticsearch implementation of Items. */ -class ESApps(client: RestClient, config: StorageClientConfig, index: String) +class ESApps(client: RestClient, config: StorageClientConfig, metadataName: String) extends Apps with Logging { implicit val formats = DefaultFormats.lossless - private val estype = "apps" - private val seq = new ESSequences(client, config, index) - private val internalIndex = index + "_" + estype + private val seq = new ESSequences(client, config, metadataName) + private val metadataKey = "apps" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("id" -> ("type" -> "keyword")) ~ + ("name" -> ("type" -> "keyword")))) - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("id" -> ("type" -> "keyword")) ~ - ("name" -> ("type" -> "keyword")))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(app: App): Option[Int] = { val id = app.id match { case v if v == 0 => @scala.annotation.tailrec def generateId: Int = { - seq.genNext(estype).toInt match { + seq.genNext(metadataKey).toInt match { case x if !get(x).isEmpty => generateId case x => x } @@ -72,7 +73,7 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -86,11 +87,11 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -104,20 +105,17 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$internalIndex/$estype/_search", + s"/$index/_search", Map.empty[String, String].asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) - (jsonResponse \ "hits" \ "total").extract[Long] match { - case 0 => None - case _ => - val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]] - val result = (results.head \ "_source").extract[App] - Some(result) + val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]] + results.headOption.map { jv => + (jv \ "_source").extract[App] } } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) None } } @@ -127,10 +125,10 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) val json = ("query" -> ("match_all" -> Nil)) - ESUtils.getAll[App](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[App](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -141,7 +139,7 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON) val response = client.performRequest( "PUT", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -150,11 +148,11 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) case "created" => case "updated" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) } } @@ -162,18 +160,18 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala index e58dca815d..ebba755dc0 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala @@ -35,26 +35,27 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging -class ESChannels(client: RestClient, config: StorageClientConfig, index: String) +class ESChannels(client: RestClient, config: StorageClientConfig, metadataName: String) extends Channels with Logging { implicit val formats = DefaultFormats.lossless - private val estype = "channels" - private val seq = new ESSequences(client, config, index) - private val internalIndex = index + "_" + estype + private val seq = new ESSequences(client, config, metadataName) + private val metadataKey = "channels" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("name" -> ("type" -> "keyword")))) - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("name" -> ("type" -> "keyword")))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(channel: Channel): Option[Int] = { val id = channel.id match { case v if v == 0 => @scala.annotation.tailrec def generateId: Int = { - seq.genNext(estype).toInt match { + seq.genNext(metadataKey).toInt match { case x if !get(x).isEmpty => generateId case x => x } @@ -70,7 +71,7 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -84,11 +85,11 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -99,10 +100,10 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) ("query" -> ("term" -> ("appid" -> appid))) - ESUtils.getAll[Channel](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[Channel](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -113,7 +114,7 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) val entity = new NStringEntity(write(channel), ContentType.APPLICATION_JSON) val response = client.performRequest( "PUT", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val json = parse(EntityUtils.toString(response.getEntity)) @@ -122,12 +123,12 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) case "created" => true case "updated" => true case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") false } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) false } } @@ -136,18 +137,18 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala index 4cf1876078..850bdb325e 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala @@ -36,30 +36,31 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging -class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: String) +class ESEngineInstances(client: RestClient, config: StorageClientConfig, metadataName: String) extends EngineInstances with Logging { implicit val formats = DefaultFormats + new EngineInstanceSerializer - private val estype = "engine_instances" - private val internalIndex = index + "_" + estype - - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("status" -> ("type" -> "keyword")) ~ - ("startTime" -> ("type" -> "date")) ~ - ("endTime" -> ("type" -> "date")) ~ - ("engineId" -> ("type" -> "keyword")) ~ - ("engineVersion" -> ("type" -> "keyword")) ~ - ("engineVariant" -> ("type" -> "keyword")) ~ - ("engineFactory" -> ("type" -> "keyword")) ~ - ("batch" -> ("type" -> "keyword")) ~ - ("dataSourceParams" -> ("type" -> "keyword")) ~ - ("preparatorParams" -> ("type" -> "keyword")) ~ - ("algorithmsParams" -> ("type" -> "keyword")) ~ - ("servingParams" -> ("type" -> "keyword")) + private val metadataKey = "engine_instances" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("status" -> ("type" -> "keyword")) ~ + ("startTime" -> ("type" -> "date")) ~ + ("endTime" -> ("type" -> "date")) ~ + ("engineId" -> ("type" -> "keyword")) ~ + ("engineVersion" -> ("type" -> "keyword")) ~ + ("engineVariant" -> ("type" -> "keyword")) ~ + ("engineFactory" -> ("type" -> "keyword")) ~ + ("batch" -> ("type" -> "keyword")) ~ + ("dataSourceParams" -> ("type" -> "keyword")) ~ + ("preparatorParams" -> ("type" -> "keyword")) ~ + ("algorithmsParams" -> ("type" -> "keyword")) ~ + ("servingParams" -> ("type" -> "keyword")) )) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(i: EngineInstance): String = { val id = i.id match { @@ -84,7 +85,7 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: val entity = new NStringEntity("{}", ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$internalIndex/$estype", + s"/$index/$estype", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -93,12 +94,12 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: case "created" => Some((jsonResponse \ "_id").extract[String]) case _ => - error(s"[$result] Failed to create $internalIndex/$estype") + error(s"[$result] Failed to create $index/$estype") None } } catch { case e: IOException => - error(s"Failed to create $internalIndex/$estype", e) + error(s"Failed to create $index/$estype", e) None } } @@ -107,7 +108,7 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -121,11 +122,11 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -135,10 +136,10 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: val json = ("query" -> ("match_all" -> List.empty)) - ESUtils.getAll[EngineInstance](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[EngineInstance](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -163,10 +164,10 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: ("sort" -> List( ("startTime" -> ("order" -> "desc")))) - ESUtils.getAll[EngineInstance](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[EngineInstance](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -186,7 +187,7 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) val response = client.performRequest( "PUT", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -195,11 +196,11 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: case "created" => case "updated" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) } } @@ -207,18 +208,18 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala index 284a165b78..93c3e33c8c 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala @@ -36,34 +36,35 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging -class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, index: String) +class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, metadataName: String) extends EvaluationInstances with Logging { implicit val formats = DefaultFormats + new EvaluationInstanceSerializer - private val estype = "evaluation_instances" - private val seq = new ESSequences(client, config, index) - private val internalIndex = index + "_" + estype + private val seq = new ESSequences(client, config, metadataName) + private val metadataKey = "evaluation_instances" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("status" -> ("type" -> "keyword")) ~ + ("startTime" -> ("type" -> "date")) ~ + ("endTime" -> ("type" -> "date")) ~ + ("evaluationClass" -> ("type" -> "keyword")) ~ + ("engineParamsGeneratorClass" -> ("type" -> "keyword")) ~ + ("batch" -> ("type" -> "keyword")) ~ + ("evaluatorResults" -> ("type" -> "text")) ~ + ("evaluatorResultsHTML" -> ("enabled" -> false)) ~ + ("evaluatorResultsJSON" -> ("enabled" -> false)))) - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("status" -> ("type" -> "keyword")) ~ - ("startTime" -> ("type" -> "date")) ~ - ("endTime" -> ("type" -> "date")) ~ - ("evaluationClass" -> ("type" -> "keyword")) ~ - ("engineParamsGeneratorClass" -> ("type" -> "keyword")) ~ - ("batch" -> ("type" -> "keyword")) ~ - ("evaluatorResults" -> ("type" -> "text")) ~ - ("evaluatorResultsHTML" -> ("enabled" -> false)) ~ - ("evaluatorResultsJSON" -> ("enabled" -> false)))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(i: EvaluationInstance): String = { val id = i.id match { case v if v.isEmpty => @scala.annotation.tailrec def generateId: String = { - seq.genNext(estype).toString match { + seq.genNext(metadataKey).toString match { case x if !get(x).isEmpty => generateId case x => x } @@ -79,7 +80,7 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -93,11 +94,11 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -107,10 +108,10 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind val json = ("query" -> ("match_all" -> List.empty)) - ESUtils.getAll[EvaluationInstance](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[EvaluationInstance](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -124,10 +125,10 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind ("sort" -> ("startTime" -> ("order" -> "desc"))) - ESUtils.getAll[EvaluationInstance](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[EvaluationInstance](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -138,7 +139,7 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) val response = client.performRequest( "PUT", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val json = parse(EntityUtils.toString(response.getEntity)) @@ -147,11 +148,11 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind case "created" => case "updated" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) } } @@ -159,18 +160,18 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala index f656ac1fd5..8cd14344f6 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala @@ -28,7 +28,7 @@ import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.Event import org.apache.predictionio.data.storage.LEvents import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{ResponseException, RestClient} +import org.elasticsearch.client.RestClient import org.joda.time.DateTime import org.json4s._ import org.json4s.JsonDSL._ @@ -38,11 +38,11 @@ import org.json4s.ext.JodaTimeSerializers import grizzled.slf4j.Logging import org.apache.http.message.BasicHeader -class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseIndex: String) +class ESLEvents(val client: RestClient, config: StorageClientConfig, val eventdataName: String) extends LEvents with Logging { implicit val formats = DefaultFormats.lossless ++ JodaTimeSerializers.all - def getEsType(appId: Int, channelId: Option[Int] = None): String = { + def eventdataKey(appId: Int, channelId: Option[Int] = None): String = { channelId.map { ch => s"${appId}_${ch}" }.getOrElse { @@ -51,11 +51,9 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd } override def init(appId: Int, channelId: Option[Int] = None): Boolean = { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype - ESUtils.createIndex(client, index) + val index = eventdataName + "_" + eventdataKey(appId, channelId) val json = - (estype -> + ("mappings" -> ("properties" -> ("name" -> ("type" -> "keyword")) ~ ("eventId" -> ("type" -> "keyword")) ~ @@ -69,13 +67,12 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("tags" -> ("type" -> "keyword")) ~ ("prId" -> ("type" -> "keyword")) ~ ("creationTime" -> ("type" -> "date")))) - ESUtils.createMapping(client, index, estype, compact(render(json))) + ESUtils.createIndex(client, index, compact(render(json))) true } override def remove(appId: Int, channelId: Option[Int] = None): Boolean = { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) try { client.performRequest( "DELETE", @@ -84,12 +81,12 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ).getStatusLine.getStatusCode match { case 200 => true case _ => - error(s"Failed to remove $index/$estype") + error(s"Failed to remove $index") false } } catch { case e: Exception => - error(s"Failed to remove $index/$estype", e) + error(s"Failed to remove $index", e) false } } @@ -101,8 +98,8 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[String] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) + val estype = ESUtils.esType(client, index) try { val id = event.eventId.getOrElse { ESEventsUtil.getBase64UUID @@ -146,8 +143,8 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[Seq[String]] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) + val estype = ESUtils.esType(client, index) try { val ids = events.map { event => event.eventId.getOrElse(ESEventsUtil.getBase64UUID) @@ -208,37 +205,12 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd } } - private def exists(client: RestClient, estype: String, id: Int): Boolean = { - val index = baseIndex + "_" + estype - try { - client.performRequest( - "GET", - s"/$index/$estype/$id", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { - case 200 => true - case _ => false - } - } catch { - case e: ResponseException => - e.getResponse.getStatusLine.getStatusCode match { - case 404 => false - case _ => - error(s"Failed to access to /$index/$estype/$id", e) - false - } - case e: IOException => - error(s"Failed to access to $index/$estype/$id", e) - false - } - } - override def futureGet( eventId: String, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[Option[Event]] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) try { val json = ("query" -> @@ -247,20 +219,17 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_search", + s"/$index/_search", Map.empty[String, String].asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) - (jsonResponse \ "hits" \ "total").extract[Long] match { - case 0 => None - case _ => - val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]] - val result = (results.head \ "_source").extract[Event] - Some(result) + val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]] + results.headOption.map { jv => + (jv \ "_source").extract[Event] } } catch { case e: IOException => - error(s"Failed to access to /$index/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) None } } @@ -271,8 +240,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[Boolean] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) try { val json = ("query" -> @@ -281,14 +249,14 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_delete_by_query", + s"/$index/_delete_by_query", Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "deleted").extract[Int] > 0 } catch { case e: IOException => - error(s"Failed to delete $index/$estype:$eventId", e) + error(s"Failed to delete $index:$eventId", e) false } } @@ -308,15 +276,14 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd reversed: Option[Boolean] = None) (implicit ec: ExecutionContext): Future[Iterator[Event]] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) try { val query = ESUtils.createEventQuery( startTime, untilTime, entityType, entityId, eventNames, targetEntityType, targetEntityId, reversed) limit.getOrElse(20) match { - case -1 => ESUtils.getEventAll(client, index, estype, query).toIterator - case size => ESUtils.getEvents(client, index, estype, query, size).toIterator + case -1 => ESUtils.getEventAll(client, index, query).toIterator + case size => ESUtils.getEvents(client, index, query, size).toIterator } } catch { case e: IOException => diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala index 87c0c2aba3..f54456f2bf 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala @@ -41,11 +41,11 @@ import org.json4s.native.JsonMethods._ import org.json4s.ext.JodaTimeSerializers -class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: String) +class ESPEvents(client: RestClient, config: StorageClientConfig, eventdataName: String) extends PEvents { implicit val formats = DefaultFormats.lossless ++ JodaTimeSerializers.all - def getEsType(appId: Int, channelId: Option[Int] = None): String = { + def eventdataKey(appId: Int, channelId: Option[Int] = None): String = { channelId.map { ch => s"${appId}_${ch}" }.getOrElse { @@ -77,10 +77,9 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri startTime, untilTime, entityType, entityId, eventNames, targetEntityType, targetEntityId, None) - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) val conf = new Configuration() - conf.set("es.resource", s"$index/$estype") + conf.set("es.resource", index) conf.set("es.query", query) conf.set("es.nodes", getESNodes()) @@ -97,8 +96,8 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri override def write( events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) + val estype = ESUtils.esType(client, index) val conf = Map("es.resource" -> s"$index/$estype", "es.nodes" -> getESNodes()) events.map { event => ESEventsUtil.eventToPut(event, appId) @@ -108,8 +107,7 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri override def delete( eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) eventIds.foreachPartition { iter => iter.foreach { eventId => try { @@ -120,17 +118,17 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_delete_by_query", + s"/$index/_delete_by_query", Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) if ((jsonResponse \ "deleted").extract[Int] == 0) { logger.warn("The number of documents that were successfully deleted is 0. " - + s"$index/$estype:$eventId") + + s"$index:$eventId") } } catch { case e: IOException => - logger.error(s"Failed to update $index/$estype:$eventId", e) + logger.error(s"Failed to update $index:$eventId", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala index 1dad7e9a7a..0fb1a73a76 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala @@ -34,24 +34,25 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging -class ESSequences(client: RestClient, config: StorageClientConfig, index: String) extends Logging { +class ESSequences(client: RestClient, config: StorageClientConfig, metadataName: String) extends Logging { implicit val formats = DefaultFormats - private val estype = "sequences" - private val internalIndex = index + "_" + estype + private val metadataKey = "sequences" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("n" -> ("enabled" -> false)))) - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("n" -> ("enabled" -> false)))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def genNext(name: String): Long = { try { val entity = new NStringEntity(write("n" -> name), ContentType.APPLICATION_JSON) val response = client.performRequest( "PUT", - s"/$internalIndex/$estype/$name", + s"/$index/$estype/$name", Map("refresh" -> "false").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -62,11 +63,11 @@ class ESSequences(client: RestClient, config: StorageClientConfig, index: String case "updated" => (jsonResponse \ "_version").extract[Long] case _ => - throw new IllegalStateException(s"[$result] Failed to update $internalIndex/$estype/$name") + throw new IllegalStateException(s"[$result] Failed to update $index/$estype/$name") } } catch { case e: IOException => - throw new StorageClientException(s"Failed to update $internalIndex/$estype/$name", e) + throw new StorageClientException(s"Failed to update $index/$estype/$name", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala index 93d5d94912..50193f0c53 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala @@ -19,10 +19,9 @@ package org.apache.predictionio.data.storage.elasticsearch import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ - import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity -import org.elasticsearch.client.RestClient +import org.elasticsearch.client.{Response, RestClient} import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -82,24 +81,22 @@ object ESUtils { def getEvents( client: RestClient, index: String, - estype: String, query: String, size: Int)( implicit formats: Formats): Seq[Event] = { - getDocList(client, index, estype, query, size).map(x => toEvent(x)) + getDocList(client, index, query, size).map(x => toEvent(x)) } def getDocList( client: RestClient, index: String, - estype: String, query: String, size: Int)( implicit formats: Formats): Seq[JValue] = { val entity = new NStringEntity(query, ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_search", + s"/$index/_search", Map("size" -> s"${size}"), entity) val responseJValue = parse(EntityUtils.toString(response.getEntity)) @@ -110,25 +107,22 @@ object ESUtils { def getAll[T: Manifest]( client: RestClient, index: String, - estype: String, query: String)( implicit formats: Formats): Seq[T] = { - getDocAll(client, index, estype, query).map(x => x.extract[T]) + getDocAll(client, index, query).map(x => x.extract[T]) } def getEventAll( client: RestClient, index: String, - estype: String, query: String)( implicit formats: Formats): Seq[Event] = { - getDocAll(client, index, estype, query).map(x => toEvent(x)) + getDocAll(client, index, query).map(x => toEvent(x)) } def getDocAll( client: RestClient, index: String, - estype: String, query: String)( implicit formats: Formats): Seq[JValue] = { @@ -153,7 +147,7 @@ object ESUtils { val entity = new NStringEntity(query, ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_search", + s"/$index/_search", Map("scroll" -> scrollLife), entity) val responseJValue = parse(EntityUtils.toString(response.getEntity)) @@ -164,42 +158,55 @@ object ESUtils { def createIndex( client: RestClient, - index: String): Unit = { - client.performRequest( - "HEAD", + index: String, + json: String)( + implicit formats: Formats): String = { + val response = client.performRequest( + "GET", s"/$index", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { - case 404 => - client.performRequest( - "PUT", - s"/$index", - Map.empty[String, String].asJava) - case 200 => - case _ => - throw new IllegalStateException(s"/$index is invalid.") - } + Map.empty[String, String].asJava) + response.getStatusLine.getStatusCode match { + case 404 => + val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) + client.performRequest( + "PUT", + s"/$index", + Map("include_type_name" -> "false"), + entity).getStatusLine.getStatusCode match { + case 200 => + "_doc" + case _ => + throw new IllegalStateException(s"/$index is invalid: $json") + } + case 200 => + typeName(index, response) + case _ => + throw new IllegalStateException(s"/$index is invalid: $json") + } } - def createMapping( + def esType( client: RestClient, - index: String, - estype: String, - json: String): Unit = { - client.performRequest( - "HEAD", - s"/$index/_mapping/$estype", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { - case 404 => - val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) - client.performRequest( - "PUT", - s"/$index/_mapping/$estype", - Map.empty[String, String].asJava, - entity) - case 200 => - case _ => - throw new IllegalStateException(s"/$index/$estype is invalid: $json") - } + index: String)( + implicit formats: Formats): String = { + val response = client.performRequest( + "GET", + s"/$index", + Map.empty[String, String].asJava) + response.getStatusLine.getStatusCode match { + case 200 => + typeName(index, response) + case _ => + throw new IllegalStateException(s"/$index is invalid.") + } + } + + private def typeName(index: String, response: Response)( + implicit formats: Formats) = { + (parse(EntityUtils.toString(response.getEntity)) \ index \ "mappings") + .extract[JObject].values.collectFirst { + case (name, _) if name != "_doc" && name != "properties" => name + }.getOrElse("_doc") } def formatUTCDateTime(dt: DateTime): String = { From d9b2f65d02567c2f0344da8c585aa60e79ce0487 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Tue, 16 Jul 2019 19:47:49 +0900 Subject: [PATCH 13/14] fixup --- .travis.yml | 3 -- .../data/storage/elasticsearch/ESUtils.scala | 46 ++++++++----------- 2 files changed, 20 insertions(+), 29 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6dd923f596..355f3d1855 100644 --- a/.travis.yml +++ b/.travis.yml @@ -50,9 +50,6 @@ env: - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3 PIO_ELASTICSEARCH_VERSION=6.8.1 - - BUILD_TYPE=Integration - METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3 - PIO_ELASTICSEARCH_VERSION=7.2.0 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS PIO_HBASE_VERSION=1.2.6 diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala index 50193f0c53..453f78053e 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala @@ -161,25 +161,24 @@ object ESUtils { index: String, json: String)( implicit formats: Formats): String = { - val response = client.performRequest( - "GET", + client.performRequest( + "HEAD", s"/$index", - Map.empty[String, String].asJava) - response.getStatusLine.getStatusCode match { - case 404 => - val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) - client.performRequest( - "PUT", - s"/$index", - Map("include_type_name" -> "false"), - entity).getStatusLine.getStatusCode match { - case 200 => - "_doc" - case _ => - throw new IllegalStateException(s"/$index is invalid: $json") - } + Map.empty[String, String].asJava).getStatusLine.getStatusCode match { + case 404 => + val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) + client.performRequest( + "PUT", + s"/$index", + Map("include_type_name" -> "false"), + entity).getStatusLine.getStatusCode match { + case 200 => + "_doc" + case _ => + throw new IllegalStateException(s"/$index is invalid: $json") + } case 200 => - typeName(index, response) + esType(client, index) case _ => throw new IllegalStateException(s"/$index is invalid: $json") } @@ -195,20 +194,15 @@ object ESUtils { Map.empty[String, String].asJava) response.getStatusLine.getStatusCode match { case 200 => - typeName(index, response) + (parse(EntityUtils.toString(response.getEntity)) \ index \ "mappings") + .extract[JObject].values.collectFirst { + case (name, _) if name != "_doc" && name != "properties" => name + }.getOrElse("_doc") case _ => throw new IllegalStateException(s"/$index is invalid.") } } - private def typeName(index: String, response: Response)( - implicit formats: Formats) = { - (parse(EntityUtils.toString(response.getEntity)) \ index \ "mappings") - .extract[JObject].values.collectFirst { - case (name, _) if name != "_doc" && name != "properties" => name - }.getOrElse("_doc") - } - def formatUTCDateTime(dt: DateTime): String = { DateTimeFormat .forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ").print(dt.withZone(DateTimeZone.UTC)) From 4bd1d9ae7037f094b544914de48ad880eeac24d1 Mon Sep 17 00:00:00 2001 From: shimamoto Date: Wed, 17 Jul 2019 11:21:12 +0900 Subject: [PATCH 14/14] fixup --- .../data/storage/elasticsearch/ESUtils.scala | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala index 453f78053e..80079e319d 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala @@ -18,10 +18,10 @@ package org.apache.predictionio.data.storage.elasticsearch import scala.collection.JavaConversions._ -import scala.collection.JavaConverters._ + import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity -import org.elasticsearch.client.{Response, RestClient} +import org.elasticsearch.client.RestClient import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ @@ -161,10 +161,10 @@ object ESUtils { index: String, json: String)( implicit formats: Formats): String = { - client.performRequest( + client.performRequest( "HEAD", s"/$index", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { + Map("include_type_name" -> "false")).getStatusLine.getStatusCode match { case 404 => val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) client.performRequest( @@ -176,14 +176,17 @@ object ESUtils { "_doc" case _ => throw new IllegalStateException(s"/$index is invalid: $json") - } - case 200 => - esType(client, index) - case _ => - throw new IllegalStateException(s"/$index is invalid: $json") - } + } + case 200 => + esType(client, index) + case _ => + throw new IllegalStateException(s"/$index is invalid: $json") + } } + // We cannot have several types within a single index as of ES 6.0, so + // continue to add or update a document under the current type. This code is + // a step towards ES 7.0 support (removal of mapping types). def esType( client: RestClient, index: String)( @@ -191,7 +194,7 @@ object ESUtils { val response = client.performRequest( "GET", s"/$index", - Map.empty[String, String].asJava) + Map("include_type_name" -> "true")) response.getStatusLine.getStatusCode match { case 200 => (parse(EntityUtils.toString(response.getEntity)) \ index \ "mappings")