From 3fad195066ecf59412e636a7cbd1873b8c7ab4ce Mon Sep 17 00:00:00 2001 From: chie8842 Date: Thu, 27 Oct 2016 11:46:33 +0900 Subject: [PATCH 01/10] first commit --- dev/pr-deps/spark-deps-hadoop-2.2 | 167 ++++++++++++++++ dev/pr-deps/spark-deps-hadoop-2.3 | 175 +++++++++++++++++ dev/pr-deps/spark-deps-hadoop-2.4 | 175 +++++++++++++++++ dev/pr-deps/spark-deps-hadoop-2.6 | 184 +++++++++++++++++ dev/pr-deps/spark-deps-hadoop-2.7 | 185 ++++++++++++++++++ docs/ml-features.md | 15 ++ .../examples/ml/JavaInteractionExample.java | 73 +++++++ .../src/main/python/ml/interaction_example.py | 50 +++++ .../examples/ml/InteractionExample.scala | 55 ++++++ 9 files changed, 1079 insertions(+) create mode 100644 dev/pr-deps/spark-deps-hadoop-2.2 create mode 100644 dev/pr-deps/spark-deps-hadoop-2.3 create mode 100644 dev/pr-deps/spark-deps-hadoop-2.4 create mode 100644 dev/pr-deps/spark-deps-hadoop-2.6 create mode 100644 dev/pr-deps/spark-deps-hadoop-2.7 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java create mode 100644 examples/src/main/python/ml/interaction_example.py create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala diff --git a/dev/pr-deps/spark-deps-hadoop-2.2 b/dev/pr-deps/spark-deps-hadoop-2.2 new file mode 100644 index 000000000000..99279a4ca8be --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.2 @@ -0,0 +1,167 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.12.jar +breeze_2.11-0.12.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-crypto-1.0.0.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.5.jar +commons-logging-1.1.3.jar +commons-math-2.1.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.12.1.1.jar +eigenbase-properties-1.1.5.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.2.0.jar +hadoop-auth-2.2.0.jar +hadoop-client-2.2.0.jar +hadoop-common-2.2.0.jar +hadoop-hdfs-2.2.0.jar +hadoop-mapreduce-client-app-2.2.0.jar +hadoop-mapreduce-client-common-2.2.0.jar +hadoop-mapreduce-client-core-2.2.0.jar +hadoop-mapreduce-client-jobclient-2.2.0.jar +hadoop-mapreduce-client-shuffle-2.2.0.jar +hadoop-yarn-api-2.2.0.jar +hadoop-yarn-client-2.2.0.jar +hadoop-yarn-common-2.2.0.jar +hadoop-yarn-server-common-2.2.0.jar +hadoop-yarn-server-web-proxy-2.2.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-3.0.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.7.1.jar +jetty-util-6.1.26.jar +jline-2.12.1.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mesos-1.0.0-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +netty-3.8.0.Final.jar +netty-all-4.0.41.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.4.jar +pyrolite-4.13.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +shapeless_2.11-2.0.0.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.6.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.2.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.3 b/dev/pr-deps/spark-deps-hadoop-2.3 new file mode 100644 index 000000000000..f094b4a7e167 --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.3 @@ -0,0 +1,175 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.12.jar +breeze_2.11-0.12.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-crypto-1.0.0.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.5.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.12.1.1.jar +eigenbase-properties-1.1.5.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.3.0.jar +hadoop-auth-2.3.0.jar +hadoop-client-2.3.0.jar +hadoop-common-2.3.0.jar +hadoop-hdfs-2.3.0.jar +hadoop-mapreduce-client-app-2.3.0.jar +hadoop-mapreduce-client-common-2.3.0.jar +hadoop-mapreduce-client-core-2.3.0.jar +hadoop-mapreduce-client-jobclient-2.3.0.jar +hadoop-mapreduce-client-shuffle-2.3.0.jar +hadoop-yarn-api-2.3.0.jar +hadoop-yarn-client-2.3.0.jar +hadoop-yarn-common-2.3.0.jar +hadoop-yarn-server-common-2.3.0.jar +hadoop-yarn-server-web-proxy-2.3.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-3.0.0.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.1.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-1.0.0-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.41.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.4.jar +pyrolite-4.13.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +shapeless_2.11-2.0.0.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.6.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.2.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.4 b/dev/pr-deps/spark-deps-hadoop-2.4 new file mode 100644 index 000000000000..7f0ef98680a1 --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.4 @@ -0,0 +1,175 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.12.jar +breeze_2.11-0.12.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-crypto-1.0.0.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.5.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.12.1.1.jar +eigenbase-properties-1.1.5.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.4.1.jar +hadoop-auth-2.4.1.jar +hadoop-client-2.4.1.jar +hadoop-common-2.4.1.jar +hadoop-hdfs-2.4.1.jar +hadoop-mapreduce-client-app-2.4.1.jar +hadoop-mapreduce-client-common-2.4.1.jar +hadoop-mapreduce-client-core-2.4.1.jar +hadoop-mapreduce-client-jobclient-2.4.1.jar +hadoop-mapreduce-client-shuffle-2.4.1.jar +hadoop-yarn-api-2.4.1.jar +hadoop-yarn-client-2.4.1.jar +hadoop-yarn-common-2.4.1.jar +hadoop-yarn-server-common-2.4.1.jar +hadoop-yarn-server-web-proxy-2.4.1.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-3.0.0.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.1.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-1.0.0-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.41.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.4.jar +pyrolite-4.13.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +shapeless_2.11-2.0.0.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.6.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.2.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.6 b/dev/pr-deps/spark-deps-hadoop-2.6 new file mode 100644 index 000000000000..4a27bf3deecb --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.6 @@ -0,0 +1,184 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +apacheds-i18n-2.0.0-M15.jar +apacheds-kerberos-codec-2.0.0-M15.jar +api-asn1-api-1.0.0-M20.jar +api-util-1.0.0-M20.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.12.jar +breeze_2.11-0.12.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-crypto-1.0.0.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.5.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.6.0.jar +curator-framework-2.6.0.jar +curator-recipes-2.6.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.12.1.1.jar +eigenbase-properties-1.1.5.jar +gson-2.2.4.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.6.4.jar +hadoop-auth-2.6.4.jar +hadoop-client-2.6.4.jar +hadoop-common-2.6.4.jar +hadoop-hdfs-2.6.4.jar +hadoop-mapreduce-client-app-2.6.4.jar +hadoop-mapreduce-client-common-2.6.4.jar +hadoop-mapreduce-client-core-2.6.4.jar +hadoop-mapreduce-client-jobclient-2.6.4.jar +hadoop-mapreduce-client-shuffle-2.6.4.jar +hadoop-yarn-api-2.6.4.jar +hadoop-yarn-client-2.6.4.jar +hadoop-yarn-common-2.6.4.jar +hadoop-yarn-server-common-2.6.4.jar +hadoop-yarn-server-web-proxy-2.6.4.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +htrace-core-3.0.4.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-jaxrs-1.9.13.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +jackson-xc-1.9.13.jar +janino-3.0.0.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.1.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-1.0.0-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.41.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.4.jar +pyrolite-4.13.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +shapeless_2.11-2.0.0.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.6.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.2.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xercesImpl-2.9.1.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.6.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.7 b/dev/pr-deps/spark-deps-hadoop-2.7 new file mode 100644 index 000000000000..151670a8e23e --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.7 @@ -0,0 +1,185 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +apacheds-i18n-2.0.0-M15.jar +apacheds-kerberos-codec-2.0.0-M15.jar +api-asn1-api-1.0.0-M20.jar +api-util-1.0.0-M20.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.12.jar +breeze_2.11-0.12.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-crypto-1.0.0.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.5.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.6.0.jar +curator-framework-2.6.0.jar +curator-recipes-2.6.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.12.1.1.jar +eigenbase-properties-1.1.5.jar +gson-2.2.4.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.7.3.jar +hadoop-auth-2.7.3.jar +hadoop-client-2.7.3.jar +hadoop-common-2.7.3.jar +hadoop-hdfs-2.7.3.jar +hadoop-mapreduce-client-app-2.7.3.jar +hadoop-mapreduce-client-common-2.7.3.jar +hadoop-mapreduce-client-core-2.7.3.jar +hadoop-mapreduce-client-jobclient-2.7.3.jar +hadoop-mapreduce-client-shuffle-2.7.3.jar +hadoop-yarn-api-2.7.3.jar +hadoop-yarn-client-2.7.3.jar +hadoop-yarn-common-2.7.3.jar +hadoop-yarn-server-common-2.7.3.jar +hadoop-yarn-server-web-proxy-2.7.3.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +htrace-core-3.1.0-incubating.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-jaxrs-1.9.13.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +jackson-xc-1.9.13.jar +janino-3.0.0.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.1.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsp-api-2.1.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-1.0.0-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.41.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.4.jar +pyrolite-4.13.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +shapeless_2.11-2.0.0.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.6.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.2.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xercesImpl-2.9.1.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.6.jar diff --git a/docs/ml-features.md b/docs/ml-features.md index a7f710fa52e6..99e353eb9f35 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -729,6 +729,21 @@ for more details on the API. +## Interaction + +`Implements` is a `Transformer` which implements interaction transform. + This transformer takes in Double and Vector type columns and outputs a flattened vector of their feature interactions. + +
+
+ +Refer to the [Normalizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Interaction) +for more details on the API. + +{% include_example scala/org/apache/spark/examples/ml/InteractionExample.scala %} +
+ + ## Normalizer diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java new file mode 100644 index 000000000000..f798083f8c3c --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml; + +import org.apache.spark.ml.feature.Interaction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +import java.util.Arrays; +import java.util.List; +import java.lang.StringBuffer; + +// $example on$ +// $example off$ + +public class JavaInteractionExample { + public static void main(String[] args) { + SparkSession spark = SparkSession + .builder() + .appName("JavaInteractionExample") + .getOrCreate(); + + // $example on$ + List data = Arrays.asList( + RowFactory.create(0, 1, 2), + RowFactory.create(1, 4, 3), + RowFactory.create(2, 6, 1), + RowFactory.create(3, 10, 8), + RowFactory.create(4, 9, 2), + RowFactory.create(5, 1, 1) + ); + + StructType schema = new StructType(new StructField[]{ + new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()), + new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()), + new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()) + }); + + Dataset df = spark.createDataFrame(data, schema); + + Interaction interaction = new Interaction() + .setInputCols(new String[]{"id1","id2","id3"}) + .setOutputCol("interactedCol"); + Dataset interacted = interaction.transform(df); + + interacted.show(); + // $example off$ + + spark.stop(); + } +} + diff --git a/examples/src/main/python/ml/interaction_example.py b/examples/src/main/python/ml/interaction_example.py new file mode 100644 index 000000000000..1ac0c71ba69c --- /dev/null +++ b/examples/src/main/python/ml/interaction_example.py @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import print_function + +# $example on$ +from pyspark.ml.feature import Interaction +# $example off$ +from pyspark.sql import SparkSession + +if __name__ == "__main__": + spark = SparkSession\ + .builder\ + .appName("InteractionExample")\ + .getOrCreate() + + # $example on$ + df = spark.createDataFrame([ + (0, 1, 2), + (1, 4, 3), + (2, 6, 1), + (3, 10, 8), + (4, 9, 2), + (5, 1, 1) + ], ["id1", "id2", "id3"]) + + interaction = StringIndexer(inputCol="category", outputCol="categoryIndex") + model = stringIndexer.fit(df) + indexed = model.transform(df) + + encoder = OneHotEncoder(inputCol="categoryIndex", outputCol="categoryVec") + encoded = encoder.transform(indexed) + encoded.show() + # $example off$ + + spark.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala new file mode 100644 index 000000000000..a3a6462e4278 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off println +package org.apache.spark.examples.ml + +// $example on$ +import org.apache.spark.ml.feature.Interaction +// $example off$ +import org.apache.spark.sql.SparkSession + +object InteractionExample { + def main(args: Array[String]): Unit = { + val spark = SparkSession + .builder + .appName("InteractionExample") + .getOrCreate() + + // $example on$ + val df = spark.createDataFrame(Seq( + (0, 1, 2), + (1, 4, 3), + (2, 6, 1), + (3, 10, 8), + (4, 9, 2), + (5, 1, 1) + )).toDF("id1", "id2", "id3") + + val interaction = new Interaction() + .setInputCols(Array("id1", "id2", "id3")) + .setOutputCol("interactedCol") + + val interacted = interaction.transform(df) + + interacted.show() + // $example off$ + + spark.stop() + } +} +// scalastyle:on println \ No newline at end of file From 57b347c466f03478aea678383d7e4b5da056e421 Mon Sep 17 00:00:00 2001 From: chie8842 Date: Thu, 27 Oct 2016 11:48:00 +0900 Subject: [PATCH 02/10] Revert "first commit" This reverts commit 3fad195066ecf59412e636a7cbd1873b8c7ab4ce. --- dev/pr-deps/spark-deps-hadoop-2.2 | 167 ---------------- dev/pr-deps/spark-deps-hadoop-2.3 | 175 ----------------- dev/pr-deps/spark-deps-hadoop-2.4 | 175 ----------------- dev/pr-deps/spark-deps-hadoop-2.6 | 184 ----------------- dev/pr-deps/spark-deps-hadoop-2.7 | 185 ------------------ docs/ml-features.md | 15 -- .../examples/ml/JavaInteractionExample.java | 73 ------- .../src/main/python/ml/interaction_example.py | 50 ----- .../examples/ml/InteractionExample.scala | 55 ------ 9 files changed, 1079 deletions(-) delete mode 100644 dev/pr-deps/spark-deps-hadoop-2.2 delete mode 100644 dev/pr-deps/spark-deps-hadoop-2.3 delete mode 100644 dev/pr-deps/spark-deps-hadoop-2.4 delete mode 100644 dev/pr-deps/spark-deps-hadoop-2.6 delete mode 100644 dev/pr-deps/spark-deps-hadoop-2.7 delete mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java delete mode 100644 examples/src/main/python/ml/interaction_example.py delete mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala diff --git a/dev/pr-deps/spark-deps-hadoop-2.2 b/dev/pr-deps/spark-deps-hadoop-2.2 deleted file mode 100644 index 99279a4ca8be..000000000000 --- a/dev/pr-deps/spark-deps-hadoop-2.2 +++ /dev/null @@ -1,167 +0,0 @@ -JavaEWAH-0.3.2.jar -RoaringBitmap-0.5.11.jar -ST4-4.0.4.jar -antlr-2.7.7.jar -antlr-runtime-3.4.jar -antlr4-runtime-4.5.3.jar -aopalliance-1.0.jar -aopalliance-repackaged-2.4.0-b34.jar -apache-log4j-extras-1.2.17.jar -arpack_combined_all-0.1.jar -avro-1.7.7.jar -avro-ipc-1.7.7.jar -avro-mapred-1.7.7-hadoop2.jar -bonecp-0.8.0.RELEASE.jar -breeze-macros_2.11-0.12.jar -breeze_2.11-0.12.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar -chill-java-0.8.0.jar -chill_2.11-0.8.0.jar -commons-beanutils-1.7.0.jar -commons-beanutils-core-1.8.0.jar -commons-cli-1.2.jar -commons-codec-1.10.jar -commons-collections-3.2.2.jar -commons-compiler-2.7.6.jar -commons-compress-1.4.1.jar -commons-configuration-1.6.jar -commons-crypto-1.0.0.jar -commons-dbcp-1.4.jar -commons-digester-1.8.jar -commons-httpclient-3.1.jar -commons-io-2.4.jar -commons-lang-2.6.jar -commons-lang3-3.5.jar -commons-logging-1.1.3.jar -commons-math-2.1.jar -commons-math3-3.4.1.jar -commons-net-2.2.jar -commons-pool-1.5.4.jar -compress-lzf-1.0.3.jar -core-1.1.2.jar -curator-client-2.4.0.jar -curator-framework-2.4.0.jar -curator-recipes-2.4.0.jar -datanucleus-api-jdo-3.2.6.jar -datanucleus-core-3.2.10.jar -datanucleus-rdbms-3.2.9.jar -derby-10.12.1.1.jar -eigenbase-properties-1.1.5.jar -guava-14.0.1.jar -guice-3.0.jar -guice-servlet-3.0.jar -hadoop-annotations-2.2.0.jar -hadoop-auth-2.2.0.jar -hadoop-client-2.2.0.jar -hadoop-common-2.2.0.jar -hadoop-hdfs-2.2.0.jar -hadoop-mapreduce-client-app-2.2.0.jar -hadoop-mapreduce-client-common-2.2.0.jar -hadoop-mapreduce-client-core-2.2.0.jar -hadoop-mapreduce-client-jobclient-2.2.0.jar -hadoop-mapreduce-client-shuffle-2.2.0.jar -hadoop-yarn-api-2.2.0.jar -hadoop-yarn-client-2.2.0.jar -hadoop-yarn-common-2.2.0.jar -hadoop-yarn-server-common-2.2.0.jar -hadoop-yarn-server-web-proxy-2.2.0.jar -hk2-api-2.4.0-b34.jar -hk2-locator-2.4.0-b34.jar -hk2-utils-2.4.0-b34.jar -httpclient-4.5.2.jar -httpcore-4.4.4.jar -ivy-2.4.0.jar -jackson-annotations-2.6.5.jar -jackson-core-2.6.5.jar -jackson-core-asl-1.9.13.jar -jackson-databind-2.6.5.jar -jackson-mapper-asl-1.9.13.jar -jackson-module-paranamer-2.6.5.jar -jackson-module-scala_2.11-2.6.5.jar -janino-3.0.0.jar -javassist-3.18.1-GA.jar -javax.annotation-api-1.2.jar -javax.inject-1.jar -javax.inject-2.4.0-b34.jar -javax.servlet-api-3.1.0.jar -javax.ws.rs-api-2.0.1.jar -javolution-5.5.1.jar -jcl-over-slf4j-1.7.16.jar -jdo-api-3.0.1.jar -jersey-client-2.22.2.jar -jersey-common-2.22.2.jar -jersey-container-servlet-2.22.2.jar -jersey-container-servlet-core-2.22.2.jar -jersey-guava-2.22.2.jar -jersey-media-jaxb-2.22.2.jar -jersey-server-2.22.2.jar -jets3t-0.7.1.jar -jetty-util-6.1.26.jar -jline-2.12.1.jar -joda-time-2.9.3.jar -jodd-core-3.5.2.jar -jpam-1.1.jar -json-20090211.jar -json4s-ast_2.11-3.2.11.jar -json4s-core_2.11-3.2.11.jar -json4s-jackson_2.11-3.2.11.jar -jsr305-1.3.9.jar -jta-1.1.jar -jtransforms-2.4.0.jar -jul-to-slf4j-1.7.16.jar -kryo-shaded-3.0.3.jar -leveldbjni-all-1.8.jar -libfb303-0.9.2.jar -libthrift-0.9.2.jar -log4j-1.2.17.jar -lz4-1.3.0.jar -mesos-1.0.0-shaded-protobuf.jar -metrics-core-3.1.2.jar -metrics-graphite-3.1.2.jar -metrics-json-3.1.2.jar -metrics-jvm-3.1.2.jar -minlog-1.3.0.jar -netty-3.8.0.Final.jar -netty-all-4.0.41.Final.jar -objenesis-2.1.jar -opencsv-2.3.jar -oro-2.0.8.jar -osgi-resource-locator-1.0.1.jar -paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar -parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar -pmml-model-1.2.15.jar -pmml-schema-1.2.15.jar -protobuf-java-2.5.0.jar -py4j-0.10.4.jar -pyrolite-4.13.jar -scala-compiler-2.11.8.jar -scala-library-2.11.8.jar -scala-parser-combinators_2.11-1.0.4.jar -scala-reflect-2.11.8.jar -scala-xml_2.11-1.0.2.jar -scalap-2.11.8.jar -shapeless_2.11-2.0.0.jar -slf4j-api-1.7.16.jar -slf4j-log4j12-1.7.16.jar -snappy-0.2.jar -snappy-java-1.1.2.6.jar -spire-macros_2.11-0.7.4.jar -spire_2.11-0.7.4.jar -stax-api-1.0.1.jar -stream-2.7.0.jar -stringtemplate-3.2.1.jar -super-csv-2.2.0.jar -univocity-parsers-2.2.1.jar -validation-api-1.1.0.Final.jar -xbean-asm5-shaded-4.4.jar -xmlenc-0.52.jar -xz-1.0.jar -zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.3 b/dev/pr-deps/spark-deps-hadoop-2.3 deleted file mode 100644 index f094b4a7e167..000000000000 --- a/dev/pr-deps/spark-deps-hadoop-2.3 +++ /dev/null @@ -1,175 +0,0 @@ -JavaEWAH-0.3.2.jar -RoaringBitmap-0.5.11.jar -ST4-4.0.4.jar -activation-1.1.1.jar -antlr-2.7.7.jar -antlr-runtime-3.4.jar -antlr4-runtime-4.5.3.jar -aopalliance-1.0.jar -aopalliance-repackaged-2.4.0-b34.jar -apache-log4j-extras-1.2.17.jar -arpack_combined_all-0.1.jar -avro-1.7.7.jar -avro-ipc-1.7.7.jar -avro-mapred-1.7.7-hadoop2.jar -base64-2.3.8.jar -bcprov-jdk15on-1.51.jar -bonecp-0.8.0.RELEASE.jar -breeze-macros_2.11-0.12.jar -breeze_2.11-0.12.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar -chill-java-0.8.0.jar -chill_2.11-0.8.0.jar -commons-beanutils-1.7.0.jar -commons-beanutils-core-1.8.0.jar -commons-cli-1.2.jar -commons-codec-1.10.jar -commons-collections-3.2.2.jar -commons-compiler-2.7.6.jar -commons-compress-1.4.1.jar -commons-configuration-1.6.jar -commons-crypto-1.0.0.jar -commons-dbcp-1.4.jar -commons-digester-1.8.jar -commons-httpclient-3.1.jar -commons-io-2.4.jar -commons-lang-2.6.jar -commons-lang3-3.5.jar -commons-logging-1.1.3.jar -commons-math3-3.4.1.jar -commons-net-2.2.jar -commons-pool-1.5.4.jar -compress-lzf-1.0.3.jar -core-1.1.2.jar -curator-client-2.4.0.jar -curator-framework-2.4.0.jar -curator-recipes-2.4.0.jar -datanucleus-api-jdo-3.2.6.jar -datanucleus-core-3.2.10.jar -datanucleus-rdbms-3.2.9.jar -derby-10.12.1.1.jar -eigenbase-properties-1.1.5.jar -guava-14.0.1.jar -guice-3.0.jar -guice-servlet-3.0.jar -hadoop-annotations-2.3.0.jar -hadoop-auth-2.3.0.jar -hadoop-client-2.3.0.jar -hadoop-common-2.3.0.jar -hadoop-hdfs-2.3.0.jar -hadoop-mapreduce-client-app-2.3.0.jar -hadoop-mapreduce-client-common-2.3.0.jar -hadoop-mapreduce-client-core-2.3.0.jar -hadoop-mapreduce-client-jobclient-2.3.0.jar -hadoop-mapreduce-client-shuffle-2.3.0.jar -hadoop-yarn-api-2.3.0.jar -hadoop-yarn-client-2.3.0.jar -hadoop-yarn-common-2.3.0.jar -hadoop-yarn-server-common-2.3.0.jar -hadoop-yarn-server-web-proxy-2.3.0.jar -hk2-api-2.4.0-b34.jar -hk2-locator-2.4.0-b34.jar -hk2-utils-2.4.0-b34.jar -httpclient-4.5.2.jar -httpcore-4.4.4.jar -ivy-2.4.0.jar -jackson-annotations-2.6.5.jar -jackson-core-2.6.5.jar -jackson-core-asl-1.9.13.jar -jackson-databind-2.6.5.jar -jackson-mapper-asl-1.9.13.jar -jackson-module-paranamer-2.6.5.jar -jackson-module-scala_2.11-2.6.5.jar -janino-3.0.0.jar -java-xmlbuilder-1.0.jar -javassist-3.18.1-GA.jar -javax.annotation-api-1.2.jar -javax.inject-1.jar -javax.inject-2.4.0-b34.jar -javax.servlet-api-3.1.0.jar -javax.ws.rs-api-2.0.1.jar -javolution-5.5.1.jar -jaxb-api-2.2.2.jar -jcl-over-slf4j-1.7.16.jar -jdo-api-3.0.1.jar -jersey-client-2.22.2.jar -jersey-common-2.22.2.jar -jersey-container-servlet-2.22.2.jar -jersey-container-servlet-core-2.22.2.jar -jersey-guava-2.22.2.jar -jersey-media-jaxb-2.22.2.jar -jersey-server-2.22.2.jar -jets3t-0.9.3.jar -jetty-6.1.26.jar -jetty-util-6.1.26.jar -jline-2.12.1.jar -joda-time-2.9.3.jar -jodd-core-3.5.2.jar -jpam-1.1.jar -json-20090211.jar -json4s-ast_2.11-3.2.11.jar -json4s-core_2.11-3.2.11.jar -json4s-jackson_2.11-3.2.11.jar -jsr305-1.3.9.jar -jta-1.1.jar -jtransforms-2.4.0.jar -jul-to-slf4j-1.7.16.jar -kryo-shaded-3.0.3.jar -leveldbjni-all-1.8.jar -libfb303-0.9.2.jar -libthrift-0.9.2.jar -log4j-1.2.17.jar -lz4-1.3.0.jar -mail-1.4.7.jar -mesos-1.0.0-shaded-protobuf.jar -metrics-core-3.1.2.jar -metrics-graphite-3.1.2.jar -metrics-json-3.1.2.jar -metrics-jvm-3.1.2.jar -minlog-1.3.0.jar -mx4j-3.0.2.jar -netty-3.8.0.Final.jar -netty-all-4.0.41.Final.jar -objenesis-2.1.jar -opencsv-2.3.jar -oro-2.0.8.jar -osgi-resource-locator-1.0.1.jar -paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar -parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar -pmml-model-1.2.15.jar -pmml-schema-1.2.15.jar -protobuf-java-2.5.0.jar -py4j-0.10.4.jar -pyrolite-4.13.jar -scala-compiler-2.11.8.jar -scala-library-2.11.8.jar -scala-parser-combinators_2.11-1.0.4.jar -scala-reflect-2.11.8.jar -scala-xml_2.11-1.0.2.jar -scalap-2.11.8.jar -shapeless_2.11-2.0.0.jar -slf4j-api-1.7.16.jar -slf4j-log4j12-1.7.16.jar -snappy-0.2.jar -snappy-java-1.1.2.6.jar -spire-macros_2.11-0.7.4.jar -spire_2.11-0.7.4.jar -stax-api-1.0-2.jar -stax-api-1.0.1.jar -stream-2.7.0.jar -stringtemplate-3.2.1.jar -super-csv-2.2.0.jar -univocity-parsers-2.2.1.jar -validation-api-1.1.0.Final.jar -xbean-asm5-shaded-4.4.jar -xmlenc-0.52.jar -xz-1.0.jar -zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.4 b/dev/pr-deps/spark-deps-hadoop-2.4 deleted file mode 100644 index 7f0ef98680a1..000000000000 --- a/dev/pr-deps/spark-deps-hadoop-2.4 +++ /dev/null @@ -1,175 +0,0 @@ -JavaEWAH-0.3.2.jar -RoaringBitmap-0.5.11.jar -ST4-4.0.4.jar -activation-1.1.1.jar -antlr-2.7.7.jar -antlr-runtime-3.4.jar -antlr4-runtime-4.5.3.jar -aopalliance-1.0.jar -aopalliance-repackaged-2.4.0-b34.jar -apache-log4j-extras-1.2.17.jar -arpack_combined_all-0.1.jar -avro-1.7.7.jar -avro-ipc-1.7.7.jar -avro-mapred-1.7.7-hadoop2.jar -base64-2.3.8.jar -bcprov-jdk15on-1.51.jar -bonecp-0.8.0.RELEASE.jar -breeze-macros_2.11-0.12.jar -breeze_2.11-0.12.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar -chill-java-0.8.0.jar -chill_2.11-0.8.0.jar -commons-beanutils-1.7.0.jar -commons-beanutils-core-1.8.0.jar -commons-cli-1.2.jar -commons-codec-1.10.jar -commons-collections-3.2.2.jar -commons-compiler-2.7.6.jar -commons-compress-1.4.1.jar -commons-configuration-1.6.jar -commons-crypto-1.0.0.jar -commons-dbcp-1.4.jar -commons-digester-1.8.jar -commons-httpclient-3.1.jar -commons-io-2.4.jar -commons-lang-2.6.jar -commons-lang3-3.5.jar -commons-logging-1.1.3.jar -commons-math3-3.4.1.jar -commons-net-2.2.jar -commons-pool-1.5.4.jar -compress-lzf-1.0.3.jar -core-1.1.2.jar -curator-client-2.4.0.jar -curator-framework-2.4.0.jar -curator-recipes-2.4.0.jar -datanucleus-api-jdo-3.2.6.jar -datanucleus-core-3.2.10.jar -datanucleus-rdbms-3.2.9.jar -derby-10.12.1.1.jar -eigenbase-properties-1.1.5.jar -guava-14.0.1.jar -guice-3.0.jar -guice-servlet-3.0.jar -hadoop-annotations-2.4.1.jar -hadoop-auth-2.4.1.jar -hadoop-client-2.4.1.jar -hadoop-common-2.4.1.jar -hadoop-hdfs-2.4.1.jar -hadoop-mapreduce-client-app-2.4.1.jar -hadoop-mapreduce-client-common-2.4.1.jar -hadoop-mapreduce-client-core-2.4.1.jar -hadoop-mapreduce-client-jobclient-2.4.1.jar -hadoop-mapreduce-client-shuffle-2.4.1.jar -hadoop-yarn-api-2.4.1.jar -hadoop-yarn-client-2.4.1.jar -hadoop-yarn-common-2.4.1.jar -hadoop-yarn-server-common-2.4.1.jar -hadoop-yarn-server-web-proxy-2.4.1.jar -hk2-api-2.4.0-b34.jar -hk2-locator-2.4.0-b34.jar -hk2-utils-2.4.0-b34.jar -httpclient-4.5.2.jar -httpcore-4.4.4.jar -ivy-2.4.0.jar -jackson-annotations-2.6.5.jar -jackson-core-2.6.5.jar -jackson-core-asl-1.9.13.jar -jackson-databind-2.6.5.jar -jackson-mapper-asl-1.9.13.jar -jackson-module-paranamer-2.6.5.jar -jackson-module-scala_2.11-2.6.5.jar -janino-3.0.0.jar -java-xmlbuilder-1.0.jar -javassist-3.18.1-GA.jar -javax.annotation-api-1.2.jar -javax.inject-1.jar -javax.inject-2.4.0-b34.jar -javax.servlet-api-3.1.0.jar -javax.ws.rs-api-2.0.1.jar -javolution-5.5.1.jar -jaxb-api-2.2.2.jar -jcl-over-slf4j-1.7.16.jar -jdo-api-3.0.1.jar -jersey-client-2.22.2.jar -jersey-common-2.22.2.jar -jersey-container-servlet-2.22.2.jar -jersey-container-servlet-core-2.22.2.jar -jersey-guava-2.22.2.jar -jersey-media-jaxb-2.22.2.jar -jersey-server-2.22.2.jar -jets3t-0.9.3.jar -jetty-6.1.26.jar -jetty-util-6.1.26.jar -jline-2.12.1.jar -joda-time-2.9.3.jar -jodd-core-3.5.2.jar -jpam-1.1.jar -json-20090211.jar -json4s-ast_2.11-3.2.11.jar -json4s-core_2.11-3.2.11.jar -json4s-jackson_2.11-3.2.11.jar -jsr305-1.3.9.jar -jta-1.1.jar -jtransforms-2.4.0.jar -jul-to-slf4j-1.7.16.jar -kryo-shaded-3.0.3.jar -leveldbjni-all-1.8.jar -libfb303-0.9.2.jar -libthrift-0.9.2.jar -log4j-1.2.17.jar -lz4-1.3.0.jar -mail-1.4.7.jar -mesos-1.0.0-shaded-protobuf.jar -metrics-core-3.1.2.jar -metrics-graphite-3.1.2.jar -metrics-json-3.1.2.jar -metrics-jvm-3.1.2.jar -minlog-1.3.0.jar -mx4j-3.0.2.jar -netty-3.8.0.Final.jar -netty-all-4.0.41.Final.jar -objenesis-2.1.jar -opencsv-2.3.jar -oro-2.0.8.jar -osgi-resource-locator-1.0.1.jar -paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar -parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar -pmml-model-1.2.15.jar -pmml-schema-1.2.15.jar -protobuf-java-2.5.0.jar -py4j-0.10.4.jar -pyrolite-4.13.jar -scala-compiler-2.11.8.jar -scala-library-2.11.8.jar -scala-parser-combinators_2.11-1.0.4.jar -scala-reflect-2.11.8.jar -scala-xml_2.11-1.0.2.jar -scalap-2.11.8.jar -shapeless_2.11-2.0.0.jar -slf4j-api-1.7.16.jar -slf4j-log4j12-1.7.16.jar -snappy-0.2.jar -snappy-java-1.1.2.6.jar -spire-macros_2.11-0.7.4.jar -spire_2.11-0.7.4.jar -stax-api-1.0-2.jar -stax-api-1.0.1.jar -stream-2.7.0.jar -stringtemplate-3.2.1.jar -super-csv-2.2.0.jar -univocity-parsers-2.2.1.jar -validation-api-1.1.0.Final.jar -xbean-asm5-shaded-4.4.jar -xmlenc-0.52.jar -xz-1.0.jar -zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.6 b/dev/pr-deps/spark-deps-hadoop-2.6 deleted file mode 100644 index 4a27bf3deecb..000000000000 --- a/dev/pr-deps/spark-deps-hadoop-2.6 +++ /dev/null @@ -1,184 +0,0 @@ -JavaEWAH-0.3.2.jar -RoaringBitmap-0.5.11.jar -ST4-4.0.4.jar -activation-1.1.1.jar -antlr-2.7.7.jar -antlr-runtime-3.4.jar -antlr4-runtime-4.5.3.jar -aopalliance-1.0.jar -aopalliance-repackaged-2.4.0-b34.jar -apache-log4j-extras-1.2.17.jar -apacheds-i18n-2.0.0-M15.jar -apacheds-kerberos-codec-2.0.0-M15.jar -api-asn1-api-1.0.0-M20.jar -api-util-1.0.0-M20.jar -arpack_combined_all-0.1.jar -avro-1.7.7.jar -avro-ipc-1.7.7.jar -avro-mapred-1.7.7-hadoop2.jar -base64-2.3.8.jar -bcprov-jdk15on-1.51.jar -bonecp-0.8.0.RELEASE.jar -breeze-macros_2.11-0.12.jar -breeze_2.11-0.12.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar -chill-java-0.8.0.jar -chill_2.11-0.8.0.jar -commons-beanutils-1.7.0.jar -commons-beanutils-core-1.8.0.jar -commons-cli-1.2.jar -commons-codec-1.10.jar -commons-collections-3.2.2.jar -commons-compiler-2.7.6.jar -commons-compress-1.4.1.jar -commons-configuration-1.6.jar -commons-crypto-1.0.0.jar -commons-dbcp-1.4.jar -commons-digester-1.8.jar -commons-httpclient-3.1.jar -commons-io-2.4.jar -commons-lang-2.6.jar -commons-lang3-3.5.jar -commons-logging-1.1.3.jar -commons-math3-3.4.1.jar -commons-net-2.2.jar -commons-pool-1.5.4.jar -compress-lzf-1.0.3.jar -core-1.1.2.jar -curator-client-2.6.0.jar -curator-framework-2.6.0.jar -curator-recipes-2.6.0.jar -datanucleus-api-jdo-3.2.6.jar -datanucleus-core-3.2.10.jar -datanucleus-rdbms-3.2.9.jar -derby-10.12.1.1.jar -eigenbase-properties-1.1.5.jar -gson-2.2.4.jar -guava-14.0.1.jar -guice-3.0.jar -guice-servlet-3.0.jar -hadoop-annotations-2.6.4.jar -hadoop-auth-2.6.4.jar -hadoop-client-2.6.4.jar -hadoop-common-2.6.4.jar -hadoop-hdfs-2.6.4.jar -hadoop-mapreduce-client-app-2.6.4.jar -hadoop-mapreduce-client-common-2.6.4.jar -hadoop-mapreduce-client-core-2.6.4.jar -hadoop-mapreduce-client-jobclient-2.6.4.jar -hadoop-mapreduce-client-shuffle-2.6.4.jar -hadoop-yarn-api-2.6.4.jar -hadoop-yarn-client-2.6.4.jar -hadoop-yarn-common-2.6.4.jar -hadoop-yarn-server-common-2.6.4.jar -hadoop-yarn-server-web-proxy-2.6.4.jar -hk2-api-2.4.0-b34.jar -hk2-locator-2.4.0-b34.jar -hk2-utils-2.4.0-b34.jar -htrace-core-3.0.4.jar -httpclient-4.5.2.jar -httpcore-4.4.4.jar -ivy-2.4.0.jar -jackson-annotations-2.6.5.jar -jackson-core-2.6.5.jar -jackson-core-asl-1.9.13.jar -jackson-databind-2.6.5.jar -jackson-jaxrs-1.9.13.jar -jackson-mapper-asl-1.9.13.jar -jackson-module-paranamer-2.6.5.jar -jackson-module-scala_2.11-2.6.5.jar -jackson-xc-1.9.13.jar -janino-3.0.0.jar -java-xmlbuilder-1.0.jar -javassist-3.18.1-GA.jar -javax.annotation-api-1.2.jar -javax.inject-1.jar -javax.inject-2.4.0-b34.jar -javax.servlet-api-3.1.0.jar -javax.ws.rs-api-2.0.1.jar -javolution-5.5.1.jar -jaxb-api-2.2.2.jar -jcl-over-slf4j-1.7.16.jar -jdo-api-3.0.1.jar -jersey-client-2.22.2.jar -jersey-common-2.22.2.jar -jersey-container-servlet-2.22.2.jar -jersey-container-servlet-core-2.22.2.jar -jersey-guava-2.22.2.jar -jersey-media-jaxb-2.22.2.jar -jersey-server-2.22.2.jar -jets3t-0.9.3.jar -jetty-6.1.26.jar -jetty-util-6.1.26.jar -jline-2.12.1.jar -joda-time-2.9.3.jar -jodd-core-3.5.2.jar -jpam-1.1.jar -json-20090211.jar -json4s-ast_2.11-3.2.11.jar -json4s-core_2.11-3.2.11.jar -json4s-jackson_2.11-3.2.11.jar -jsr305-1.3.9.jar -jta-1.1.jar -jtransforms-2.4.0.jar -jul-to-slf4j-1.7.16.jar -kryo-shaded-3.0.3.jar -leveldbjni-all-1.8.jar -libfb303-0.9.2.jar -libthrift-0.9.2.jar -log4j-1.2.17.jar -lz4-1.3.0.jar -mail-1.4.7.jar -mesos-1.0.0-shaded-protobuf.jar -metrics-core-3.1.2.jar -metrics-graphite-3.1.2.jar -metrics-json-3.1.2.jar -metrics-jvm-3.1.2.jar -minlog-1.3.0.jar -mx4j-3.0.2.jar -netty-3.8.0.Final.jar -netty-all-4.0.41.Final.jar -objenesis-2.1.jar -opencsv-2.3.jar -oro-2.0.8.jar -osgi-resource-locator-1.0.1.jar -paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar -parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar -pmml-model-1.2.15.jar -pmml-schema-1.2.15.jar -protobuf-java-2.5.0.jar -py4j-0.10.4.jar -pyrolite-4.13.jar -scala-compiler-2.11.8.jar -scala-library-2.11.8.jar -scala-parser-combinators_2.11-1.0.4.jar -scala-reflect-2.11.8.jar -scala-xml_2.11-1.0.2.jar -scalap-2.11.8.jar -shapeless_2.11-2.0.0.jar -slf4j-api-1.7.16.jar -slf4j-log4j12-1.7.16.jar -snappy-0.2.jar -snappy-java-1.1.2.6.jar -spire-macros_2.11-0.7.4.jar -spire_2.11-0.7.4.jar -stax-api-1.0-2.jar -stax-api-1.0.1.jar -stream-2.7.0.jar -stringtemplate-3.2.1.jar -super-csv-2.2.0.jar -univocity-parsers-2.2.1.jar -validation-api-1.1.0.Final.jar -xbean-asm5-shaded-4.4.jar -xercesImpl-2.9.1.jar -xmlenc-0.52.jar -xz-1.0.jar -zookeeper-3.4.6.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.7 b/dev/pr-deps/spark-deps-hadoop-2.7 deleted file mode 100644 index 151670a8e23e..000000000000 --- a/dev/pr-deps/spark-deps-hadoop-2.7 +++ /dev/null @@ -1,185 +0,0 @@ -JavaEWAH-0.3.2.jar -RoaringBitmap-0.5.11.jar -ST4-4.0.4.jar -activation-1.1.1.jar -antlr-2.7.7.jar -antlr-runtime-3.4.jar -antlr4-runtime-4.5.3.jar -aopalliance-1.0.jar -aopalliance-repackaged-2.4.0-b34.jar -apache-log4j-extras-1.2.17.jar -apacheds-i18n-2.0.0-M15.jar -apacheds-kerberos-codec-2.0.0-M15.jar -api-asn1-api-1.0.0-M20.jar -api-util-1.0.0-M20.jar -arpack_combined_all-0.1.jar -avro-1.7.7.jar -avro-ipc-1.7.7.jar -avro-mapred-1.7.7-hadoop2.jar -base64-2.3.8.jar -bcprov-jdk15on-1.51.jar -bonecp-0.8.0.RELEASE.jar -breeze-macros_2.11-0.12.jar -breeze_2.11-0.12.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar -chill-java-0.8.0.jar -chill_2.11-0.8.0.jar -commons-beanutils-1.7.0.jar -commons-beanutils-core-1.8.0.jar -commons-cli-1.2.jar -commons-codec-1.10.jar -commons-collections-3.2.2.jar -commons-compiler-2.7.6.jar -commons-compress-1.4.1.jar -commons-configuration-1.6.jar -commons-crypto-1.0.0.jar -commons-dbcp-1.4.jar -commons-digester-1.8.jar -commons-httpclient-3.1.jar -commons-io-2.4.jar -commons-lang-2.6.jar -commons-lang3-3.5.jar -commons-logging-1.1.3.jar -commons-math3-3.4.1.jar -commons-net-2.2.jar -commons-pool-1.5.4.jar -compress-lzf-1.0.3.jar -core-1.1.2.jar -curator-client-2.6.0.jar -curator-framework-2.6.0.jar -curator-recipes-2.6.0.jar -datanucleus-api-jdo-3.2.6.jar -datanucleus-core-3.2.10.jar -datanucleus-rdbms-3.2.9.jar -derby-10.12.1.1.jar -eigenbase-properties-1.1.5.jar -gson-2.2.4.jar -guava-14.0.1.jar -guice-3.0.jar -guice-servlet-3.0.jar -hadoop-annotations-2.7.3.jar -hadoop-auth-2.7.3.jar -hadoop-client-2.7.3.jar -hadoop-common-2.7.3.jar -hadoop-hdfs-2.7.3.jar -hadoop-mapreduce-client-app-2.7.3.jar -hadoop-mapreduce-client-common-2.7.3.jar -hadoop-mapreduce-client-core-2.7.3.jar -hadoop-mapreduce-client-jobclient-2.7.3.jar -hadoop-mapreduce-client-shuffle-2.7.3.jar -hadoop-yarn-api-2.7.3.jar -hadoop-yarn-client-2.7.3.jar -hadoop-yarn-common-2.7.3.jar -hadoop-yarn-server-common-2.7.3.jar -hadoop-yarn-server-web-proxy-2.7.3.jar -hk2-api-2.4.0-b34.jar -hk2-locator-2.4.0-b34.jar -hk2-utils-2.4.0-b34.jar -htrace-core-3.1.0-incubating.jar -httpclient-4.5.2.jar -httpcore-4.4.4.jar -ivy-2.4.0.jar -jackson-annotations-2.6.5.jar -jackson-core-2.6.5.jar -jackson-core-asl-1.9.13.jar -jackson-databind-2.6.5.jar -jackson-jaxrs-1.9.13.jar -jackson-mapper-asl-1.9.13.jar -jackson-module-paranamer-2.6.5.jar -jackson-module-scala_2.11-2.6.5.jar -jackson-xc-1.9.13.jar -janino-3.0.0.jar -java-xmlbuilder-1.0.jar -javassist-3.18.1-GA.jar -javax.annotation-api-1.2.jar -javax.inject-1.jar -javax.inject-2.4.0-b34.jar -javax.servlet-api-3.1.0.jar -javax.ws.rs-api-2.0.1.jar -javolution-5.5.1.jar -jaxb-api-2.2.2.jar -jcl-over-slf4j-1.7.16.jar -jdo-api-3.0.1.jar -jersey-client-2.22.2.jar -jersey-common-2.22.2.jar -jersey-container-servlet-2.22.2.jar -jersey-container-servlet-core-2.22.2.jar -jersey-guava-2.22.2.jar -jersey-media-jaxb-2.22.2.jar -jersey-server-2.22.2.jar -jets3t-0.9.3.jar -jetty-6.1.26.jar -jetty-util-6.1.26.jar -jline-2.12.1.jar -joda-time-2.9.3.jar -jodd-core-3.5.2.jar -jpam-1.1.jar -json-20090211.jar -json4s-ast_2.11-3.2.11.jar -json4s-core_2.11-3.2.11.jar -json4s-jackson_2.11-3.2.11.jar -jsp-api-2.1.jar -jsr305-1.3.9.jar -jta-1.1.jar -jtransforms-2.4.0.jar -jul-to-slf4j-1.7.16.jar -kryo-shaded-3.0.3.jar -leveldbjni-all-1.8.jar -libfb303-0.9.2.jar -libthrift-0.9.2.jar -log4j-1.2.17.jar -lz4-1.3.0.jar -mail-1.4.7.jar -mesos-1.0.0-shaded-protobuf.jar -metrics-core-3.1.2.jar -metrics-graphite-3.1.2.jar -metrics-json-3.1.2.jar -metrics-jvm-3.1.2.jar -minlog-1.3.0.jar -mx4j-3.0.2.jar -netty-3.8.0.Final.jar -netty-all-4.0.41.Final.jar -objenesis-2.1.jar -opencsv-2.3.jar -oro-2.0.8.jar -osgi-resource-locator-1.0.1.jar -paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar -parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar -pmml-model-1.2.15.jar -pmml-schema-1.2.15.jar -protobuf-java-2.5.0.jar -py4j-0.10.4.jar -pyrolite-4.13.jar -scala-compiler-2.11.8.jar -scala-library-2.11.8.jar -scala-parser-combinators_2.11-1.0.4.jar -scala-reflect-2.11.8.jar -scala-xml_2.11-1.0.2.jar -scalap-2.11.8.jar -shapeless_2.11-2.0.0.jar -slf4j-api-1.7.16.jar -slf4j-log4j12-1.7.16.jar -snappy-0.2.jar -snappy-java-1.1.2.6.jar -spire-macros_2.11-0.7.4.jar -spire_2.11-0.7.4.jar -stax-api-1.0-2.jar -stax-api-1.0.1.jar -stream-2.7.0.jar -stringtemplate-3.2.1.jar -super-csv-2.2.0.jar -univocity-parsers-2.2.1.jar -validation-api-1.1.0.Final.jar -xbean-asm5-shaded-4.4.jar -xercesImpl-2.9.1.jar -xmlenc-0.52.jar -xz-1.0.jar -zookeeper-3.4.6.jar diff --git a/docs/ml-features.md b/docs/ml-features.md index 99e353eb9f35..a7f710fa52e6 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -729,21 +729,6 @@ for more details on the API.
-## Interaction - -`Implements` is a `Transformer` which implements interaction transform. - This transformer takes in Double and Vector type columns and outputs a flattened vector of their feature interactions. - -
-
- -Refer to the [Normalizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Interaction) -for more details on the API. - -{% include_example scala/org/apache/spark/examples/ml/InteractionExample.scala %} -
- - ## Normalizer diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java deleted file mode 100644 index f798083f8c3c..000000000000 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.ml; - -import org.apache.spark.ml.feature.Interaction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Row; -import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.types.DataTypes; -import org.apache.spark.sql.types.Metadata; -import org.apache.spark.sql.types.StructField; -import org.apache.spark.sql.types.StructType; - -import java.util.Arrays; -import java.util.List; -import java.lang.StringBuffer; - -// $example on$ -// $example off$ - -public class JavaInteractionExample { - public static void main(String[] args) { - SparkSession spark = SparkSession - .builder() - .appName("JavaInteractionExample") - .getOrCreate(); - - // $example on$ - List data = Arrays.asList( - RowFactory.create(0, 1, 2), - RowFactory.create(1, 4, 3), - RowFactory.create(2, 6, 1), - RowFactory.create(3, 10, 8), - RowFactory.create(4, 9, 2), - RowFactory.create(5, 1, 1) - ); - - StructType schema = new StructType(new StructField[]{ - new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()), - new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()), - new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()) - }); - - Dataset df = spark.createDataFrame(data, schema); - - Interaction interaction = new Interaction() - .setInputCols(new String[]{"id1","id2","id3"}) - .setOutputCol("interactedCol"); - Dataset interacted = interaction.transform(df); - - interacted.show(); - // $example off$ - - spark.stop(); - } -} - diff --git a/examples/src/main/python/ml/interaction_example.py b/examples/src/main/python/ml/interaction_example.py deleted file mode 100644 index 1ac0c71ba69c..000000000000 --- a/examples/src/main/python/ml/interaction_example.py +++ /dev/null @@ -1,50 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import print_function - -# $example on$ -from pyspark.ml.feature import Interaction -# $example off$ -from pyspark.sql import SparkSession - -if __name__ == "__main__": - spark = SparkSession\ - .builder\ - .appName("InteractionExample")\ - .getOrCreate() - - # $example on$ - df = spark.createDataFrame([ - (0, 1, 2), - (1, 4, 3), - (2, 6, 1), - (3, 10, 8), - (4, 9, 2), - (5, 1, 1) - ], ["id1", "id2", "id3"]) - - interaction = StringIndexer(inputCol="category", outputCol="categoryIndex") - model = stringIndexer.fit(df) - indexed = model.transform(df) - - encoder = OneHotEncoder(inputCol="categoryIndex", outputCol="categoryVec") - encoded = encoder.transform(indexed) - encoded.show() - # $example off$ - - spark.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala deleted file mode 100644 index a3a6462e4278..000000000000 --- a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// scalastyle:off println -package org.apache.spark.examples.ml - -// $example on$ -import org.apache.spark.ml.feature.Interaction -// $example off$ -import org.apache.spark.sql.SparkSession - -object InteractionExample { - def main(args: Array[String]): Unit = { - val spark = SparkSession - .builder - .appName("InteractionExample") - .getOrCreate() - - // $example on$ - val df = spark.createDataFrame(Seq( - (0, 1, 2), - (1, 4, 3), - (2, 6, 1), - (3, 10, 8), - (4, 9, 2), - (5, 1, 1) - )).toDF("id1", "id2", "id3") - - val interaction = new Interaction() - .setInputCols(Array("id1", "id2", "id3")) - .setOutputCol("interactedCol") - - val interacted = interaction.transform(df) - - interacted.show() - // $example off$ - - spark.stop() - } -} -// scalastyle:on println \ No newline at end of file From 1e194734005dad06c46a9dd288644a542efa2b04 Mon Sep 17 00:00:00 2001 From: chie8842 Date: Thu, 27 Oct 2016 11:50:27 +0900 Subject: [PATCH 03/10] first commit --- docs/ml-features.md | 15 ++++ .../examples/ml/JavaInteractionExample.java | 73 +++++++++++++++++++ .../src/main/python/ml/interaction_example.py | 50 +++++++++++++ .../examples/ml/InteractionExample.scala | 55 ++++++++++++++ 4 files changed, 193 insertions(+) create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java create mode 100644 examples/src/main/python/ml/interaction_example.py create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala diff --git a/docs/ml-features.md b/docs/ml-features.md index a7f710fa52e6..99e353eb9f35 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -729,6 +729,21 @@ for more details on the API.
+## Interaction + +`Implements` is a `Transformer` which implements interaction transform. + This transformer takes in Double and Vector type columns and outputs a flattened vector of their feature interactions. + +
+
+ +Refer to the [Normalizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Interaction) +for more details on the API. + +{% include_example scala/org/apache/spark/examples/ml/InteractionExample.scala %} +
+ + ## Normalizer diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java new file mode 100644 index 000000000000..f798083f8c3c --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml; + +import org.apache.spark.ml.feature.Interaction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +import java.util.Arrays; +import java.util.List; +import java.lang.StringBuffer; + +// $example on$ +// $example off$ + +public class JavaInteractionExample { + public static void main(String[] args) { + SparkSession spark = SparkSession + .builder() + .appName("JavaInteractionExample") + .getOrCreate(); + + // $example on$ + List data = Arrays.asList( + RowFactory.create(0, 1, 2), + RowFactory.create(1, 4, 3), + RowFactory.create(2, 6, 1), + RowFactory.create(3, 10, 8), + RowFactory.create(4, 9, 2), + RowFactory.create(5, 1, 1) + ); + + StructType schema = new StructType(new StructField[]{ + new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()), + new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()), + new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()) + }); + + Dataset df = spark.createDataFrame(data, schema); + + Interaction interaction = new Interaction() + .setInputCols(new String[]{"id1","id2","id3"}) + .setOutputCol("interactedCol"); + Dataset interacted = interaction.transform(df); + + interacted.show(); + // $example off$ + + spark.stop(); + } +} + diff --git a/examples/src/main/python/ml/interaction_example.py b/examples/src/main/python/ml/interaction_example.py new file mode 100644 index 000000000000..1ac0c71ba69c --- /dev/null +++ b/examples/src/main/python/ml/interaction_example.py @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import print_function + +# $example on$ +from pyspark.ml.feature import Interaction +# $example off$ +from pyspark.sql import SparkSession + +if __name__ == "__main__": + spark = SparkSession\ + .builder\ + .appName("InteractionExample")\ + .getOrCreate() + + # $example on$ + df = spark.createDataFrame([ + (0, 1, 2), + (1, 4, 3), + (2, 6, 1), + (3, 10, 8), + (4, 9, 2), + (5, 1, 1) + ], ["id1", "id2", "id3"]) + + interaction = StringIndexer(inputCol="category", outputCol="categoryIndex") + model = stringIndexer.fit(df) + indexed = model.transform(df) + + encoder = OneHotEncoder(inputCol="categoryIndex", outputCol="categoryVec") + encoded = encoder.transform(indexed) + encoded.show() + # $example off$ + + spark.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala new file mode 100644 index 000000000000..a3a6462e4278 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off println +package org.apache.spark.examples.ml + +// $example on$ +import org.apache.spark.ml.feature.Interaction +// $example off$ +import org.apache.spark.sql.SparkSession + +object InteractionExample { + def main(args: Array[String]): Unit = { + val spark = SparkSession + .builder + .appName("InteractionExample") + .getOrCreate() + + // $example on$ + val df = spark.createDataFrame(Seq( + (0, 1, 2), + (1, 4, 3), + (2, 6, 1), + (3, 10, 8), + (4, 9, 2), + (5, 1, 1) + )).toDF("id1", "id2", "id3") + + val interaction = new Interaction() + .setInputCols(Array("id1", "id2", "id3")) + .setOutputCol("interactedCol") + + val interacted = interaction.transform(df) + + interacted.show() + // $example off$ + + spark.stop() + } +} +// scalastyle:on println \ No newline at end of file From 91adc6cf291f09918e2fecc6fd4a32c50dc5a306 Mon Sep 17 00:00:00 2001 From: chie8842 Date: Thu, 27 Oct 2016 16:08:40 +0900 Subject: [PATCH 04/10] updated example files --- .../examples/ml/JavaInteractionExample.java | 1 - .../src/main/python/ml/interaction_example.py | 50 ------------------- .../examples/ml/InteractionExample.scala | 2 +- 3 files changed, 1 insertion(+), 52 deletions(-) delete mode 100644 examples/src/main/python/ml/interaction_example.py diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java index f798083f8c3c..782bb327472f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java @@ -29,7 +29,6 @@ import java.util.Arrays; import java.util.List; -import java.lang.StringBuffer; // $example on$ // $example off$ diff --git a/examples/src/main/python/ml/interaction_example.py b/examples/src/main/python/ml/interaction_example.py deleted file mode 100644 index 1ac0c71ba69c..000000000000 --- a/examples/src/main/python/ml/interaction_example.py +++ /dev/null @@ -1,50 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import print_function - -# $example on$ -from pyspark.ml.feature import Interaction -# $example off$ -from pyspark.sql import SparkSession - -if __name__ == "__main__": - spark = SparkSession\ - .builder\ - .appName("InteractionExample")\ - .getOrCreate() - - # $example on$ - df = spark.createDataFrame([ - (0, 1, 2), - (1, 4, 3), - (2, 6, 1), - (3, 10, 8), - (4, 9, 2), - (5, 1, 1) - ], ["id1", "id2", "id3"]) - - interaction = StringIndexer(inputCol="category", outputCol="categoryIndex") - model = stringIndexer.fit(df) - indexed = model.transform(df) - - encoder = OneHotEncoder(inputCol="categoryIndex", outputCol="categoryVec") - encoded = encoder.transform(indexed) - encoded.show() - # $example off$ - - spark.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala index a3a6462e4278..996cf9855b2d 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala @@ -52,4 +52,4 @@ object InteractionExample { spark.stop() } } -// scalastyle:on println \ No newline at end of file +// scalastyle:on println From 0ff184f3a695819f1522dd2a60d32fa0a213992e Mon Sep 17 00:00:00 2001 From: chie8842 Date: Thu, 27 Oct 2016 16:09:36 +0900 Subject: [PATCH 05/10] updated doc file --- docs/ml-features.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/ml-features.md b/docs/ml-features.md index 99e353eb9f35..18eb952ef917 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -737,13 +737,20 @@ for more details on the API.
-Refer to the [Normalizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Interaction) +Refer to the [Interaction Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Interaction) for more details on the API. {% include_example scala/org/apache/spark/examples/ml/InteractionExample.scala %}
+
+ +Refer to the [Interaction Java docs](api/java/org/apache/spark/ml/feature/Interaction.html) +for more details on the API. +{% include_example java/org/apache/spark/examples/ml/JavaInteractionExample.java %} +
+
## Normalizer From 97154a5aeadb10b16777fd2e5fa55694de39f056 Mon Sep 17 00:00:00 2001 From: chie8842 Date: Thu, 27 Oct 2016 21:34:16 +0900 Subject: [PATCH 06/10] added example. --- docs/ml-features.md | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/docs/ml-features.md b/docs/ml-features.md index 18eb952ef917..b20c47ea21b7 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -731,8 +731,41 @@ for more details on the API. ## Interaction -`Implements` is a `Transformer` which implements interaction transform. - This transformer takes in Double and Vector type columns and outputs a flattened vector of their feature interactions. +`Interaction` is a `Transformer` which takes a list of vector/double columns, and generate a single vector column +that contains the interactions (multiplication) among them with proper handling of feature names. + +**Examples** + +Assume that we have the following DataFrame with columns tree input column: + +~~~~ + +id1 | id2 | id3 +----|-----|----- + 0 | 1 | 2 + 1 | 4 | 3 + 2 | 6 | 1 + 3 | 10 | 8 + 4 | 9 | 2 + 5 | 1 | 1 +~~~~ + +Applying `Interaction` with `id1`, `id2`, `id3` as the input columns, +then `interactedCol` as the output column contains: + +~~~~ +id1 | id2 | id3 | interactedCol +----|-----|-----|--------------- + 0 | 1 | 2 | [0.0] + 1 | 4 | 3 | [0.0] + 2 | 6 | 1 | [12.0] + 3 | 10 | 8 | [240.0] + 4 | 9 | 2 | [72.0] + 5 | 1 | 1 | [5.0] + +~~~~ + +Each vector represents the token counts of the document over the vocabulary.
From 03794a94292e5be6b82d1b4958238d1601f81720 Mon Sep 17 00:00:00 2001 From: chie8842 Date: Thu, 3 Nov 2016 20:26:22 +0900 Subject: [PATCH 07/10] changed column contents --- .../examples/ml/JavaInteractionExample.java | 48 ++++++++++++------- .../examples/ml/InteractionExample.scala | 33 +++++++++---- 2 files changed, 55 insertions(+), 26 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java index 782bb327472f..4213c05703cc 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java @@ -18,10 +18,9 @@ package org.apache.spark.examples.ml; import org.apache.spark.ml.feature.Interaction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Row; -import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; +import org.apache.spark.ml.feature.VectorAssembler; +import org.apache.spark.ml.linalg.Vectors; +import org.apache.spark.sql.*; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; @@ -42,28 +41,45 @@ public static void main(String[] args) { // $example on$ List data = Arrays.asList( - RowFactory.create(0, 1, 2), - RowFactory.create(1, 4, 3), - RowFactory.create(2, 6, 1), - RowFactory.create(3, 10, 8), - RowFactory.create(4, 9, 2), - RowFactory.create(5, 1, 1) + RowFactory.create(1, 1, 2, 3, 8, 4, 5), + RowFactory.create(2, 4, 3, 8, 7, 9, 8), + RowFactory.create(3, 6, 1, 9, 2, 3, 6), + RowFactory.create(4, 10, 8, 6, 9, 4, 5), + RowFactory.create(5, 9, 2, 7, 10, 7, 3), + RowFactory.create(6, 1, 1, 4, 2, 8, 4) ); - + StructType schema = new StructType(new StructField[]{ new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()), new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()), - new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()) + new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()), + new StructField("id4", DataTypes.IntegerType, false, Metadata.empty()), + new StructField("id5", DataTypes.IntegerType, false, Metadata.empty()), + new StructField("id6", DataTypes.IntegerType, false, Metadata.empty()), + new StructField("id7", DataTypes.IntegerType, false, Metadata.empty()) }); Dataset df = spark.createDataFrame(data, schema); + VectorAssembler assembler1 = new VectorAssembler() + .setInputCols(new String[]{"id2", "id3", "id4"}) + .setOutputCol("vec1"); + + Dataset assembled1 = assembler1.transform(df); + + VectorAssembler assembler2 = new VectorAssembler() + .setInputCols(new String[]{"id5", "id6", "id7"}) + .setOutputCol("vec2"); + + Dataset assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2"); + Interaction interaction = new Interaction() - .setInputCols(new String[]{"id1","id2","id3"}) - .setOutputCol("interactedCol"); - Dataset interacted = interaction.transform(df); + .setInputCols(new String[]{"id1","vec1","vec2"}) + .setOutputCol("interactedCol"); + + Dataset interacted = interaction.transform(assembled2); - interacted.show(); + interacted.show(false); // $example off$ spark.stop(); diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala index 996cf9855b2d..8113c992b1d6 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala @@ -20,6 +20,7 @@ package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.feature.Interaction +import org.apache.spark.ml.feature.VectorAssembler // $example off$ import org.apache.spark.sql.SparkSession @@ -32,21 +33,33 @@ object InteractionExample { // $example on$ val df = spark.createDataFrame(Seq( - (0, 1, 2), - (1, 4, 3), - (2, 6, 1), - (3, 10, 8), - (4, 9, 2), - (5, 1, 1) - )).toDF("id1", "id2", "id3") + (1, 1, 2, 3, 8, 4, 5), + (2, 4, 3, 8, 7, 9, 8), + (3, 6, 1, 9, 2, 3, 6), + (4, 10, 8, 6, 9, 4, 5), + (5, 9, 2, 7, 10, 7, 3), + (6, 1, 1, 4, 2, 8, 4) + )).toDF("id1", "id2", "id3", "id4", "id5", "id6", "id7") + + val assembler1 = new VectorAssembler(). + setInputCols(Array("id2", "id3", "id4")). + setOutputCol("vec1") + + val assembled1 = assembler1.transform(df) + + val assembler2 = new VectorAssembler(). + setInputCols(Array("id5", "id6", "id7")). + setOutputCol("vec2") + + val assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2") val interaction = new Interaction() - .setInputCols(Array("id1", "id2", "id3")) + .setInputCols(Array("id1", "vec1", "vec2")) .setOutputCol("interactedCol") - val interacted = interaction.transform(df) + val interacted = interaction.transform(assembled2) - interacted.show() + interacted.show(truncate = false) // $example off$ spark.stop() From a619091d20e680cd05e12ea6f22e2281a27057bf Mon Sep 17 00:00:00 2001 From: chie8842 Date: Sat, 5 Nov 2016 18:44:51 +0900 Subject: [PATCH 08/10] fixed documentation about description of interaction function --- docs/ml-features.md | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/docs/ml-features.md b/docs/ml-features.md index b20c47ea21b7..d1e7f7246a66 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -731,42 +731,39 @@ for more details on the API. ## Interaction -`Interaction` is a `Transformer` which takes a list of vector/double columns, and generate a single vector column -that contains the interactions (multiplication) among them with proper handling of feature names. +`Interaction` is a `Transformer` which takes a vector/double columns, and generate a single vector column that contains multiplication results of all combination of each vector/double values. + +For example, if you have two vector type columns each of which contains three double type values as input columns, then you'll get a vector with 9 double type values as the output column. **Examples** -Assume that we have the following DataFrame with columns tree input column: +Assume that we have the following DataFrame with the columns "id1", "vec1", and "vec2": ~~~~ - -id1 | id2 | id3 -----|-----|----- - 0 | 1 | 2 - 1 | 4 | 3 - 2 | 6 | 1 - 3 | 10 | 8 - 4 | 9 | 2 - 5 | 1 | 1 + id1|vec1 |vec2 + ---|--------------|-------------- + 1 |[1.0,2.0,3.0] |[8.0,4.0,5.0] + 2 |[4.0,3.0,8.0] |[7.0,9.0,8.0] + 3 |[6.0,1.0,9.0] |[2.0,3.0,6.0] + 4 |[10.0,8.0,6.0]|[9.0,4.0,5.0] + 5 |[9.0,2.0,7.0] |[10.0,7.0,3.0] + 6 |[1.0,1.0,4.0] |[2.0,8.0,4.0] ~~~~ -Applying `Interaction` with `id1`, `id2`, `id3` as the input columns, +Applying `Interaction` with those input columns, then `interactedCol` as the output column contains: ~~~~ -id1 | id2 | id3 | interactedCol -----|-----|-----|--------------- - 0 | 1 | 2 | [0.0] - 1 | 4 | 3 | [0.0] - 2 | 6 | 1 | [12.0] - 3 | 10 | 8 | [240.0] - 4 | 9 | 2 | [72.0] - 5 | 1 | 1 | [5.0] - + id1|vec1 |vec2 |interactedCol + ---|--------------|--------------|------------------------------------------------------ + 1 |[1.0,2.0,3.0] |[8.0,4.0,5.0] |[8.0,4.0,5.0,16.0,8.0,10.0,24.0,12.0,15.0] + 2 |[4.0,3.0,8.0] |[7.0,9.0,8.0] |[56.0,72.0,64.0,42.0,54.0,48.0,112.0,144.0,128.0] + 3 |[6.0,1.0,9.0] |[2.0,3.0,6.0] |[36.0,54.0,108.0,6.0,9.0,18.0,54.0,81.0,162.0] + 4 |[10.0,8.0,6.0]|[9.0,4.0,5.0] |[360.0,160.0,200.0,288.0,128.0,160.0,216.0,96.0,120.0] + 5 |[9.0,2.0,7.0] |[10.0,7.0,3.0]|[450.0,315.0,135.0,100.0,70.0,30.0,350.0,245.0,105.0] + 6 |[1.0,1.0,4.0] |[2.0,8.0,4.0] |[12.0,48.0,24.0,12.0,48.0,24.0,48.0,192.0,96.0] ~~~~ -Each vector represents the token counts of the document over the vocabulary. -
From 322ee9f498ed621bce79ca206f58542e02d9e3be Mon Sep 17 00:00:00 2001 From: chie8842 Date: Sat, 5 Nov 2016 20:06:39 +0900 Subject: [PATCH 09/10] changed description of interaction function --- docs/ml-features.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ml-features.md b/docs/ml-features.md index d1e7f7246a66..ee7b1474907d 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -733,7 +733,7 @@ for more details on the API. `Interaction` is a `Transformer` which takes a vector/double columns, and generate a single vector column that contains multiplication results of all combination of each vector/double values. -For example, if you have two vector type columns each of which contains three double type values as input columns, then you'll get a vector with 9 double type values as the output column. +For example, if you have 2 vector type columns each of which has 3 dimensions as input columns, then then you'll get a 9-dimensional vector as the output column. **Examples** From 10eb5cdaedb588c8404d7c4a862ac12d223c459f Mon Sep 17 00:00:00 2001 From: chie8842 Date: Sun, 6 Nov 2016 11:16:22 +0900 Subject: [PATCH 10/10] changed description of interaction function --- docs/ml-features.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ml-features.md b/docs/ml-features.md index ee7b1474907d..ffe2bb6066d3 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -731,7 +731,7 @@ for more details on the API. ## Interaction -`Interaction` is a `Transformer` which takes a vector/double columns, and generate a single vector column that contains multiplication results of all combination of each vector/double values. +`Interaction` is a `Transformer` which takes vector or double-valued columns, and generates a single vector column that contains the product of all combinations of one value from each input column. For example, if you have 2 vector type columns each of which has 3 dimensions as input columns, then then you'll get a 9-dimensional vector as the output column.