diff --git a/README.md b/README.md index aa76fdec3ad..11f2b47a9e8 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # RAPIDS Accelerator For Apache Spark -NOTE: For the latest stable [README.md](https://github.com/nvidia/spark-rapids/blob/main/README.md) ensure you are on the main branch. The RAPIDS Accelerator for Apache Spark provides a set of plugins for Apache Spark that leverage GPUs to accelerate processing via the RAPIDS libraries and UCX. Documentation on the current release can be found [here](https://nvidia.github.io/spark-rapids/). +NOTE: For the latest stable [README.md](https://github.com/nvidia/spark-rapids/blob/main/README.md) ensure you are on the main branch. The RAPIDS Accelerator for Apache Spark provides a set of plugins for Apache Spark that leverage GPUs to accelerate processing via the RAPIDS libraries and UCX. Documentation on the current release can be found [here](https://nvidia.github.io/spark-rapids/). -The RAPIDS Accelerator for Apache Spark provides a set of plugins for +The RAPIDS Accelerator for Apache Spark provides a set of plugins for [Apache Spark](https://spark.apache.org) that leverage GPUs to accelerate processing via the [RAPIDS](https://rapids.ai) libraries and [UCX](https://www.openucx.org/). @@ -19,7 +19,7 @@ To get started tuning your job and get the most performance out of it please sta ## Configuration -The plugin has a set of Spark configs that control its behavior and are documented +The plugin has a set of Spark configs that control its behavior and are documented [here](docs/configs.md). ## Issues @@ -30,13 +30,13 @@ may file one [here](https://github.com/NVIDIA/spark-rapids/issues/new/choose). ## Download The jar files for the most recent release can be retrieved from the [download](docs/download.md) -page. +page. ## Building From Source See the [build instructions in the contributing guide](CONTRIBUTING.md#building-from-source). -## Testing +## Testing Tests are described [here](tests/README.md). @@ -45,7 +45,7 @@ The RAPIDS Accelerator For Apache Spark does provide some APIs for doing zero co transfer into other GPU enabled applications. It is described [here](docs/ml-integration.md). -Currently, we are working with XGBoost to try to provide this integration out of the box. +Currently, we are working with XGBoost to try to provide this integration out of the box. You may need to disable RMM caching when exporting data to an ML library as that library will likely want to use all of the GPU's memory and if it is not aware of RMM it will not have @@ -60,6 +60,21 @@ The profiling tool generates information which can be used for debugging and pro Information such as Spark version, executor information, properties and so on. This runs on either CPU or GPU generated event logs. -Please refer to [spark qualification tool documentation](docs/spark-qualification-tool.md) +Please refer to [spark qualification tool documentation](docs/spark-qualification-tool.md) and [spark profiling tool documentation](docs/spark-profiling-tool.md) -for more details on how to use the tools. \ No newline at end of file +for more details on how to use the tools. + +## Dependency for External Projects + +If you need to develop some functionality on top of RAPIDS Accelerator For Apache Spark (we currently +limit support to GPU-accelerated UDFs) we recommend you declare our distribution artifact +as a `provided` dependency. + +```xml + + com.nvidia + rapids-4-spark_2.12 + 21.12.0-SNAPSHOT + provided + +``` \ No newline at end of file diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 240f7175326..a28af74f149 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -210,6 +210,31 @@ org.apache.rat apache-rat-plugin + + + maven-clean-plugin + 3.1.0 + + + clean-reduced-dependency-poms + clean + + clean + + + ${skipDrpClean} + + + ${project.basedir} + + dependency-reduced-pom*.xml + + + + + + + diff --git a/dist/pom.xml b/dist/pom.xml index 35dadf2e8c9..c505e9cdb03 100644 --- a/dist/pom.xml +++ b/dist/pom.xml @@ -34,8 +34,23 @@ rapids-4-spark-aggregator_${scala.binary.version} ${project.version} ${spark.version.classifier} + provided + + + + ai.rapids + cudf + ${cudf.version} + ${cuda.version} + compile + @@ -223,7 +238,14 @@ default-jar + none + + + create-parallel-worlds-jar package + + jar + ${project.build.directory}/parallel-world @@ -336,20 +358,6 @@ - - org.apache.maven.plugins - maven-install-plugin - 3.0.0-M1 - - - default-install - install - - ${project.build.directory}/dependency-reduced-pom.xml - - - - diff --git a/dist/scripts/binary-dedupe.sh b/dist/scripts/binary-dedupe.sh index 1415d014c4a..88672e15983 100755 --- a/dist/scripts/binary-dedupe.sh +++ b/dist/scripts/binary-dedupe.sh @@ -220,9 +220,5 @@ time ( echo "$((++STEP))/ deleting all class files listed in $DELETE_DUPLICATES_TXT" time (< "$DELETE_DUPLICATES_TXT" sort -u | xargs rm) 2>&1 -echo "Generating dependency-reduced-pom.xml" -# which is just delete the dependencies list altogether -sed -e '//,/<\/dependencies>/d' ../pom.xml > dependency-reduced-pom.xml - end_time=$(date +%s) echo "binary-dedupe completed in $((end_time - start_time)) seconds"