diff --git a/dev/pr-deps/spark-deps-hadoop-2.2 b/dev/pr-deps/spark-deps-hadoop-2.2 new file mode 100644 index 000000000000..b5c38a6c056e --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.2 @@ -0,0 +1,165 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.11.2.jar +breeze_2.11-0.11.2.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.3.2.jar +commons-logging-1.1.3.jar +commons-math-2.1.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.11.1.1.jar +eigenbase-properties-1.1.5.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.2.0.jar +hadoop-auth-2.2.0.jar +hadoop-client-2.2.0.jar +hadoop-common-2.2.0.jar +hadoop-hdfs-2.2.0.jar +hadoop-mapreduce-client-app-2.2.0.jar +hadoop-mapreduce-client-common-2.2.0.jar +hadoop-mapreduce-client-core-2.2.0.jar +hadoop-mapreduce-client-jobclient-2.2.0.jar +hadoop-mapreduce-client-shuffle-2.2.0.jar +hadoop-yarn-api-2.2.0.jar +hadoop-yarn-client-2.2.0.jar +hadoop-yarn-common-2.2.0.jar +hadoop-yarn-server-common-2.2.0.jar +hadoop-yarn-server-web-proxy-2.2.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-2.7.8.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.7.1.jar +jetty-util-6.1.26.jar +jline-2.12.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mesos-0.21.1-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +netty-3.8.0.Final.jar +netty-all-4.0.29.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.1.jar +pyrolite-4.9.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.4.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.1.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.3 b/dev/pr-deps/spark-deps-hadoop-2.3 new file mode 100644 index 000000000000..969df0495d4c --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.3 @@ -0,0 +1,173 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.11.2.jar +breeze_2.11-0.11.2.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.3.2.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.11.1.1.jar +eigenbase-properties-1.1.5.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.3.0.jar +hadoop-auth-2.3.0.jar +hadoop-client-2.3.0.jar +hadoop-common-2.3.0.jar +hadoop-hdfs-2.3.0.jar +hadoop-mapreduce-client-app-2.3.0.jar +hadoop-mapreduce-client-common-2.3.0.jar +hadoop-mapreduce-client-core-2.3.0.jar +hadoop-mapreduce-client-jobclient-2.3.0.jar +hadoop-mapreduce-client-shuffle-2.3.0.jar +hadoop-yarn-api-2.3.0.jar +hadoop-yarn-client-2.3.0.jar +hadoop-yarn-common-2.3.0.jar +hadoop-yarn-server-common-2.3.0.jar +hadoop-yarn-server-web-proxy-2.3.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-2.7.8.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-0.21.1-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.29.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.1.jar +pyrolite-4.9.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.4.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.1.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.4 b/dev/pr-deps/spark-deps-hadoop-2.4 new file mode 100644 index 000000000000..f0491ece7c2b --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.4 @@ -0,0 +1,173 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.11.2.jar +breeze_2.11-0.11.2.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.3.2.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.11.1.1.jar +eigenbase-properties-1.1.5.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.4.0.jar +hadoop-auth-2.4.0.jar +hadoop-client-2.4.0.jar +hadoop-common-2.4.0.jar +hadoop-hdfs-2.4.0.jar +hadoop-mapreduce-client-app-2.4.0.jar +hadoop-mapreduce-client-common-2.4.0.jar +hadoop-mapreduce-client-core-2.4.0.jar +hadoop-mapreduce-client-jobclient-2.4.0.jar +hadoop-mapreduce-client-shuffle-2.4.0.jar +hadoop-yarn-api-2.4.0.jar +hadoop-yarn-client-2.4.0.jar +hadoop-yarn-common-2.4.0.jar +hadoop-yarn-server-common-2.4.0.jar +hadoop-yarn-server-web-proxy-2.4.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-2.7.8.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-0.21.1-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.29.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.1.jar +pyrolite-4.9.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.4.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.1.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.5.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.6 b/dev/pr-deps/spark-deps-hadoop-2.6 new file mode 100644 index 000000000000..b3dced63b9e7 --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.6 @@ -0,0 +1,182 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +apacheds-i18n-2.0.0-M15.jar +apacheds-kerberos-codec-2.0.0-M15.jar +api-asn1-api-1.0.0-M20.jar +api-util-1.0.0-M20.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.11.2.jar +breeze_2.11-0.11.2.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.3.2.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.6.0.jar +curator-framework-2.6.0.jar +curator-recipes-2.6.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.11.1.1.jar +eigenbase-properties-1.1.5.jar +gson-2.2.4.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.6.0.jar +hadoop-auth-2.6.0.jar +hadoop-client-2.6.0.jar +hadoop-common-2.6.0.jar +hadoop-hdfs-2.6.0.jar +hadoop-mapreduce-client-app-2.6.0.jar +hadoop-mapreduce-client-common-2.6.0.jar +hadoop-mapreduce-client-core-2.6.0.jar +hadoop-mapreduce-client-jobclient-2.6.0.jar +hadoop-mapreduce-client-shuffle-2.6.0.jar +hadoop-yarn-api-2.6.0.jar +hadoop-yarn-client-2.6.0.jar +hadoop-yarn-common-2.6.0.jar +hadoop-yarn-server-common-2.6.0.jar +hadoop-yarn-server-web-proxy-2.6.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +htrace-core-3.0.4.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-jaxrs-1.9.13.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +jackson-xc-1.9.13.jar +janino-2.7.8.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-0.21.1-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.29.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.1.jar +pyrolite-4.9.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.4.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.1.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xercesImpl-2.9.1.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.6.jar diff --git a/dev/pr-deps/spark-deps-hadoop-2.7 b/dev/pr-deps/spark-deps-hadoop-2.7 new file mode 100644 index 000000000000..16f60f29ffbb --- /dev/null +++ b/dev/pr-deps/spark-deps-hadoop-2.7 @@ -0,0 +1,183 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +apacheds-i18n-2.0.0-M15.jar +apacheds-kerberos-codec-2.0.0-M15.jar +api-asn1-api-1.0.0-M20.jar +api-util-1.0.0-M20.jar +arpack_combined_all-0.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.11.2.jar +breeze_2.11-0.11.2.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.3.2.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +core-1.1.2.jar +curator-client-2.6.0.jar +curator-framework-2.6.0.jar +curator-recipes-2.6.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.11.1.1.jar +eigenbase-properties-1.1.5.jar +gson-2.2.4.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.7.0.jar +hadoop-auth-2.7.0.jar +hadoop-client-2.7.0.jar +hadoop-common-2.7.0.jar +hadoop-hdfs-2.7.0.jar +hadoop-mapreduce-client-app-2.7.0.jar +hadoop-mapreduce-client-common-2.7.0.jar +hadoop-mapreduce-client-core-2.7.0.jar +hadoop-mapreduce-client-jobclient-2.7.0.jar +hadoop-mapreduce-client-shuffle-2.7.0.jar +hadoop-yarn-api-2.7.0.jar +hadoop-yarn-client-2.7.0.jar +hadoop-yarn-common-2.7.0.jar +hadoop-yarn-server-common-2.7.0.jar +hadoop-yarn-server-web-proxy-2.7.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +htrace-core-3.1.0-incubating.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-jaxrs-1.9.13.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +jackson-xc-1.9.13.jar +janino-2.7.8.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsp-api-2.1.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-0.21.1-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.29.Final.jar +objenesis-2.1.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.1.jar +pyrolite-4.9.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snappy-0.2.jar +snappy-java-1.1.2.4.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.1.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xercesImpl-2.9.1.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.6.jar diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index 1855eab96eaa..d49466457cee 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -179,6 +179,45 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { StatFunctions.crossTabulate(df, col1, col2) } + /** + * One-way table of frequencies + * + * @param col the name of the column over which to tabulate frequencies + * @param maybeWeightCol the name of the column with the weights + * @param frequencyColumnName the name to give the column with the frequencies + * @param proportionColumnName the name to give the column with the proportions + * @return a dataframe with the tabulation + */ + def tab(col: String, + maybeWeightCol: Option[String] = None, + frequencyColumnName: String = "Frequency", + proportionColumnName: String = "Proportion"): DataFrame = + StatFunctions.tabulate(df, col, maybeWeightCol, frequencyColumnName, proportionColumnName) + + /** + * One-way table of frequencies + * + * @param col the name of the column over which to tabulate frequencies + * @param weightCol the name of the column with the weights + * @param frequencyColumnName the name to give the column with the frequencies + * @param proportionColumnName the name to give the column with the proportions + * @return a dataframe with the tabulation + */ + def tab(col: String, + weightCol: String, + frequencyColumnName: String, + proportionColumnName: String): DataFrame = + tab(col, Some(weightCol), frequencyColumnName, proportionColumnName) + + /** + * One-way table of frequencies + * + * @param col the name of the column over which to tabulate frequencies + * @param weightCol the name of the column with the weights + * @return a dataframe with the tabulation + */ + def tab(col: String, weightCol: String): DataFrame = tab(col, Some(weightCol)) + /** * Finding frequent items for columns, possibly with false positives. Using the * frequent element count algorithm described in diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index 9c0406168e6e..bfe008882312 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -55,7 +55,6 @@ private[sql] object StatFunctions extends Logging { * @param relativeError The relative target precision to achieve (>= 0). * If set to zero, the exact quantiles are computed, which could be very expensive. * Note that values greater than 1 are accepted but give the same result as 1. - * * @return for each column, returns the requested approximations */ def multipleApproxQuantiles( @@ -129,6 +128,7 @@ private[sql] object StatFunctions extends Logging { * Returns a summary with the given observation inserted into the summary. * This method may either modify in place the current summary (and return the same summary, * modified in place), or it may create a new summary from scratch it necessary. + * * @param x the new observation to insert into the summary */ def insert(x: Double): QuantileSummaries = { @@ -297,6 +297,7 @@ private[sql] object StatFunctions extends Logging { /** * Statistics from the Greenwald-Khanna paper. + * * @param value the sampled value * @param g the minimum rank jump from the previous value's minimum rank * @param delta the maximum span of the rank. @@ -403,6 +404,7 @@ private[sql] object StatFunctions extends Logging { /** * Calculate the covariance of two numerical columns of a DataFrame. + * * @param df The DataFrame * @param cols the column names * @return the covariance of the two columns. @@ -412,6 +414,45 @@ private[sql] object StatFunctions extends Logging { counts.cov } + /** + * Calculate the covariance of two numerical columns of a DataFrame. + * + * @param df The DataFrame + * @param col1 the name of the column over which to tabulate the values + * @param maybeWeightCol the column with the weights + * @param frequencyColumnName the name of the column created with the frequency tabulation + * @param proportionColumnName the name of the column created with the percent tabulation + * @return a dataframe with the tabulation. + */ + private[sql] def tabulate( + df: DataFrame, + col1: String, + maybeWeightCol: Option[String], + frequencyColumnName: String, + proportionColumnName: String): DataFrame = { + + import df.sqlContext.implicits._ + + val createdWeightColName = "__weight__" + val (dfWithWeightColumn, weightCol) = maybeWeightCol + .fold( (df.withColumn(createdWeightColName, lit(1d)), createdWeightColName) )( weightCol => + (df, weightCol) + ) + + dfWithWeightColumn.agg(sum(weightCol), count(weightCol)) + .map{ row => (row.getDouble(0), row.getLong(1)) } + .collect().headOption + .fold(df) { case (sumOfWeights, numberOfObservations) => + + dfWithWeightColumn.groupBy(col1).agg(count(col1), sum(weightCol) as weightCol) + .withColumn(frequencyColumnName, col(weightCol) * numberOfObservations / sumOfWeights) + .withColumn(proportionColumnName, col(frequencyColumnName) / numberOfObservations) + .drop(weightCol) + + } + + } + /** Generate a table of frequencies for the elements of two columns. */ private[sql] def crossTabulate(df: DataFrame, col1: String, col2: String): DataFrame = { val tableName = s"${col1}_$col2" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala index 73026c749db4..5aff9592f1c3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala @@ -152,6 +152,28 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext { } } + test("tabulation of single variable, with and without weights") { + + val obs1 = ("1", "M", 10, "P", 2d) + val obs2 = ("2", "M", 12, "S", 4d) + val obs3 = ("3", "M", 13, "B", 1d) + val obs4 = ("4", "F", 11, "P", 1d) + val obs5 = ("5", "F", 13, "M", 3d) + val df = Seq(obs1, obs2, obs3, obs4, obs5).toDF("id", "gender", "age", "educ", "w") + + val tabWithoutWeights = df.stat.tab("gender") + tabWithoutWeights.select("Frequency").collect().map(_.getDouble(0)) should + contain theSameElementsInOrderAs Seq(2d, 3d) + tabWithoutWeights.select("Proportion").collect().map(_.getDouble(0)) should + contain theSameElementsInOrderAs Seq(0.4, 0.6) + + val tabWithWeights = df.stat.tab("gender", "w") + tabWithWeights.select("Frequency").collect().map(_.getDouble(0)) should + contain theSameElementsInOrderAs Seq(4d*5/11, 7d*5/11) + tabWithWeights.select("Proportion").collect().map(_.getDouble(0)) should + contain theSameElementsInOrderAs Seq(4d/11, 7d/11) + } + test("crosstab") { val rng = new Random() val data = Seq.tabulate(25)(i => (rng.nextInt(5), rng.nextInt(10)))