Merge branch 'branch-0.17' into enh-weakcc-flags

rapidsai · Nov 12, 2020 · a6d4fdb · a6d4fdb
2 parents 9490b16 + a6622de
commit a6d4fdb
Show file tree

Hide file tree

Showing 394 changed files with 2,194 additions and 13,008 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,8 @@
 ## New Features
 
 ## Improvements
+- PR #3077: Improve runtime for test_kmeans
+- PR #3070: Speed up dask/test_datasets tests
 - PR #3075: Speed up test_linear_model tests
 - PR #3078: Speed up test_incremental_pca tests
 - PR #2902: `matrix/matrix.cuh` in RAFT namespacing
@@ -15,10 +17,19 @@
 - PR #2996: Removing the max_depth restriction for switching to the batched backend
 - PR #3004: Remove Single Process Multi GPU (SPMG) code
 - PR #3044: Move leftover `linalg` and `stats` to RAFT namespaces
+- PR #3067: Deleting prims moved to RAFT and updating header paths
 - PR #3074: Reducing dask coordinate descent test runtime
 - PR #3096: Avoid memory transfers in CSR WeakCC for DBSCAN
+- PR #3088: More readable and robust FIL C++ test management
+- PR #3052: Speeding up MNMG KNN Cl&Re testing
+- PR #3115: Speeding up MNMG UMAP testing
+- PR #3112: Speed test_array
+- PR #3111: Adding Cython to Code Coverage
+- PR #3129:  Update notebooks README
 
 ## Bug Fixes
+- PR #3065: Refactoring prims metrics function names from camelcase to underscore format
+- PR #3033: Splitting ml metrics to individual files
 - PR #3072: Fusing metrics and score directories in src_prims
 - PR #3037: Avoid logging deadlock in multi-threaded C code
 - PR #2983: Fix seeding of KISS99 RNG
@@ -27,9 +38,16 @@
 - PR #3012: Increasing learning rate for SGD log loss and invscaling pytests
 - PR #3021: Fix a hang in cuML RF experimental backend
 - PR #3039: Update RF and decision tree parameter initializations in benchmark codes
+- PR #3060: Speed up test suite `test_fil`
 - PR #3061: Handle C++ exception thrown from FIL predict
 - PR #3073: Update mathjax CDN URL for documentation
 - PR #3062: Bumping xgboost version to match cuml version
+- PR #3084: Fix artifacts in t-SNE results
+- PR #3086: Reverting FIL Notebook Testing
+- PR #3114: Fixed a typo in SVC's predict_proba AttributeError
+- PR #3117: Fix two crashes in experimental RF backend
+- PR #3119: Fix memset args for benchmark 
+- PR #3130: Return Python string from `dump_as_json()` of RF
 
 # cuML 0.16.0 (Date TBD)
 

diff --git a/build.sh b/build.sh
@@ -19,7 +19,7 @@ ARGS=$*
 REPODIR=$(cd $(dirname $0); pwd)
 
 VALIDTARGETS="clean libcuml cuml cpp-mgtests prims bench prims-bench cppdocs pydocs"
-VALIDFLAGS="-v -g -n --allgpuarch --buildfaiss --buildgtest --singlegpu --nvtx --show_depr_warn -h --help "
+VALIDFLAGS="-v -g -n --allgpuarch --buildfaiss --buildgtest --singlegpu --nvtx --show_depr_warn --codecov -h --help "
 VALIDARGS="${VALIDTARGETS} ${VALIDFLAGS}"
 HELP="$0 [<target> ...] [<flag> ...]
  where <target> is:
@@ -43,6 +43,8 @@ HELP="$0 [<target> ...] [<flag> ...]
    --singlegpu      - Build libcuml and cuml without multigpu components
    --nvtx           - Enable nvtx for profiling support
    --show_depr_warn - show cmake deprecation warnings
+   --codecov        - Enable code coverage support by compiling with Cython linetracing
+                      and profiling enabled (WARNING: Impacts performance)
    -h               - print this text
 
  default action (no args) is to build and install 'libcuml', 'cuml', and 'prims' targets only for the detected GPU arch
@@ -58,7 +60,7 @@ BUILD_TYPE=Release
 INSTALL_TARGET=install
 BUILD_ALL_GPU_ARCH=0
 SINGLEGPU_CPP_FLAG=""
-SINGLEGPU_PYTHON_FLAG=""
+BUILD_PYTHON_ARGS=${BUILD_PYTHON_ARGS:=""}
 NVTX=OFF
 CLEAN=0
 BUILD_DISABLE_DEPRECATION_WARNING=ON
@@ -115,7 +117,7 @@ if hasArg --allgpuarch; then
     BUILD_ALL_GPU_ARCH=1
 fi
 if hasArg --singlegpu; then
-    SINGLEGPU_PYTHON_FLAG="--singlegpu"
+    BUILD_PYTHON_ARGS="${BUILD_PYTHON_ARGS} --singlegpu"
     SINGLEGPU_CPP_FLAG=ON
 fi
 if hasArg cpp-mgtests; then
@@ -133,6 +135,9 @@ fi
 if hasArg --show_depr_warn; then
     BUILD_DISABLE_DEPRECATION_WARNING=OFF
 fi
+if hasArg --codecov; then
+    BUILD_PYTHON_ARGS="${BUILD_PYTHON_ARGS} --linetrace=1 --profile"
+fi
 if hasArg clean; then
     CLEAN=1
 fi
@@ -224,9 +229,9 @@ fi
 if completeBuild || hasArg cuml || hasArg pydocs; then
     cd ${REPODIR}/python
     if [[ ${INSTALL_TARGET} != "" ]]; then
-        python setup.py build_ext -j${PARALLEL_LEVEL:-1} ${SINGLEGPU_PYTHON_FLAG} --library-dir=${LIBCUML_BUILD_DIR} install --single-version-externally-managed --record=record.txt
+        python setup.py build_ext -j${PARALLEL_LEVEL:-1} ${BUILD_PYTHON_ARGS} --library-dir=${LIBCUML_BUILD_DIR} install --single-version-externally-managed --record=record.txt
     else
-        python setup.py build_ext -j${PARALLEL_LEVEL:-1} --library-dir=${LIBCUML_BUILD_DIR} ${SINGLEGPU_PYTHON_FLAG}
+        python setup.py build_ext -j${PARALLEL_LEVEL:-1} ${BUILD_PYTHON_ARGS} --library-dir=${LIBCUML_BUILD_DIR}
     fi
 
     if hasArg pydocs; then

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -98,7 +98,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
     ################################################################################
 
     gpuci_logger "Build from source"
-    $WORKSPACE/build.sh clean libcuml cuml prims bench -v
+    $WORKSPACE/build.sh clean libcuml cuml prims bench -v --codecov
 
     gpuci_logger "Resetting LD_LIBRARY_PATH"
 
@@ -190,7 +190,7 @@ else
     conda install -c $WORKSPACE/ci/artifacts/cuml/cpu/conda-bld/ libcuml
 
     gpuci_logger "Building cuml"
-    "$WORKSPACE/build.sh" -v cuml
+    "$WORKSPACE/build.sh" -v cuml --codecov
 
     gpuci_logger "Python pytest for cuml"
     cd $WORKSPACE/python

diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh
@@ -10,9 +10,7 @@ TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u)
 # Add notebooks that should be skipped here
 # (space-separated list of filenames without paths)
 
-# TODO: (MDD) Temporarily adding forest_inference_demo.ipynb since xgboost is broken in 0.17
-#       Remove once xgboost is working again.
-SKIPNBS="cuml_benchmarks.ipynb forest_inference_demo.ipynb"
+SKIPNBS="cuml_benchmarks.ipynb"
 
 ## Check env
 env

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -401,8 +401,19 @@ if(BUILD_CUML_CPP_LIBRARY)
     src/holtwinters/holtwinters.cu
     src/kmeans/kmeans.cu
     src/knn/knn.cu
-    src/metrics/metrics.cu
+    src/metrics/accuracy_score.cu
+    src/metrics/adjusted_rand_index.cu
+    src/metrics/completeness_score.cu
+    src/metrics/entropy.cu
+    src/metrics/homogeneity_score.cu
+    src/metrics/kl_divergence.cu
+    src/metrics/mutual_info_score.cu
+    src/metrics/pairwise_distance.cu
+    src/metrics/r2_score.cu
+    src/metrics/rand_index.cu
+    src/metrics/silhouette_score.cu
     src/metrics/trustworthiness.cu
+    src/metrics/v_measure.cu
     src/pca/pca.cu
     src/randomforest/randomforest.cu
     src/random_projection/rproj.cu

diff --git a/cpp/bench/common/ml_benchmark.hpp b/cpp/bench/common/ml_benchmark.hpp
@@ -17,8 +17,8 @@
 #pragma once
 
 #include <benchmark/benchmark.h>
-#include <common/cudart_utils.h>
 #include <cuda_runtime.h>
+#include <raft/cudart_utils.h>
 #include <cuml/common/cuml_allocator.hpp>
 #include <cuml/common/logger.hpp>
 #include <cuml/common/utils.hpp>
@@ -55,14 +55,14 @@ struct CudaEventTimer {
     CUDA_CHECK(cudaEventCreate(&stop));
     // flush L2?
     if (ptr != nullptr && l2CacheSize > 0) {
-      CUDA_CHECK(cudaMemsetAsync(ptr, sizeof(char) * l2CacheSize, 0, s));
+      CUDA_CHECK(cudaMemsetAsync(ptr, 0, sizeof(char) * l2CacheSize, s));
       CUDA_CHECK(cudaStreamSynchronize(stream));
     }
     CUDA_CHECK(cudaEventRecord(start, stream));
   }
   CudaEventTimer() = delete;
 
-  /** 
+  /**
    * @brief The dtor stops the timer and performs a synchroniazation. Time of
    *       the benchmark::State object provided to the ctor will be set to the
    *       value given by `cudaEventElapsedTime()`.

diff --git a/cpp/bench/prims/add.cu b/cpp/bench/prims/add.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <linalg/add.cuh>
+#include <raft/linalg/add.cuh>
 #include "../common/ml_benchmark.hpp"
 
 namespace MLCommon {

diff --git a/cpp/bench/prims/distance_common.cuh b/cpp/bench/prims/distance_common.cuh
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <common/cudart_utils.h>
+#include <raft/cudart_utils.h>
 #include <distance/distance.cuh>
 #include "../common/ml_benchmark.hpp"
 

diff --git a/cpp/bench/prims/fused_l2_nn.cu b/cpp/bench/prims/fused_l2_nn.cu
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include <common/cudart_utils.h>
+#include <raft/cudart_utils.h>
 #include <distance/fused_l2_nn.cuh>
 #include <limits>
-#include <linalg/norm.cuh>
-#include <random/rng.cuh>
+#include <raft/linalg/norm.cuh>
+#include <raft/random/rng.cuh>
 #include "../common/ml_benchmark.hpp"
 
 namespace MLCommon {

diff --git a/cpp/bench/prims/gram_matrix.cu b/cpp/bench/prims/gram_matrix.cu
@@ -18,7 +18,7 @@
 #include <matrix/grammatrix.cuh>
 #include <matrix/kernelfactory.cuh>
 #include <memory>
-#include <random/rng.cuh>
+#include <raft/random/rng.cuh>
 #include <sstream>
 #include <string>
 #include <vector>

diff --git a/cpp/bench/prims/map_then_reduce.cu b/cpp/bench/prims/map_then_reduce.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <linalg/map_then_reduce.cuh>
+#include <raft/linalg/map_then_reduce.cuh>
 #include "../common/ml_benchmark.hpp"
 
 namespace MLCommon {

diff --git a/cpp/bench/prims/matrix_vector_op.cu b/cpp/bench/prims/matrix_vector_op.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <linalg/matrix_vector_op.cuh>
+#include <raft/linalg/matrix_vector_op.cuh>
 #include "../common/ml_benchmark.hpp"
 
 namespace MLCommon {

diff --git a/cpp/bench/prims/permute.cu b/cpp/bench/prims/permute.cu
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include <common/cudart_utils.h>
+#include <raft/cudart_utils.h>
+#include <raft/random/rng.cuh>
 #include <random/permute.cuh>
-#include <random/rng.cuh>
 #include "../common/ml_benchmark.hpp"
 
 namespace MLCommon {

diff --git a/cpp/bench/prims/reduce.cu b/cpp/bench/prims/reduce.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <linalg/reduce.cuh>
+#include <raft/linalg/reduce.cuh>
 #include "../common/ml_benchmark.hpp"
 
 namespace MLCommon {

diff --git a/cpp/bench/prims/rng.cu b/cpp/bench/prims/rng.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include <common/cudart_utils.h>
-#include <random/rng.cuh>
+#include <raft/cudart_utils.h>
+#include <raft/random/rng.cuh>
 #include "../common/ml_benchmark.hpp"
 
 namespace MLCommon {

diff --git a/cpp/bench/sg/arima_loglikelihood.cu b/cpp/bench/sg/arima_loglikelihood.cu
@@ -22,9 +22,9 @@
 
 #include <cuml/tsa/arima_common.h>
 #include <cuml/tsa/batched_arima.hpp>
-#include <random/rng.cuh>
+#include <raft/random/rng.cuh>
 
-#include <common/cudart_utils.h>
+#include <raft/cudart_utils.h>
 #include "benchmark.cuh"
 
 namespace ML {

diff --git a/cpp/bench/sg/benchmark.cuh b/cpp/bench/sg/benchmark.cuh
@@ -17,8 +17,8 @@
 #pragma once
 
 #include <benchmark/benchmark.h>
-#include <common/cudart_utils.h>
 #include <cuda_runtime.h>
+#include <raft/cudart_utils.h>
 #include <cuml/common/logger.hpp>
 #include <cuml/cuml.hpp>
 #include "../common/ml_benchmark.hpp"

diff --git a/cpp/bench/sg/dataset.cuh b/cpp/bench/sg/dataset.cuh
@@ -16,15 +16,15 @@
 
 #pragma once
 
-#include <common/cudart_utils.h>
-#include <linalg/transpose.h>
+#include <raft/cudart_utils.h>
+#include <raft/linalg/transpose.h>
 #include <common/cumlHandle.hpp>
-#include <cuda_utils.cuh>
 #include <cuml/cuml.hpp>
 #include <cuml/datasets/make_blobs.hpp>
 #include <fstream>
 #include <iostream>
-#include <linalg/unary_op.cuh>
+#include <raft/cuda_utils.cuh>
+#include <raft/linalg/unary_op.cuh>
 #include <random/make_regression.cuh>
 #include <sstream>
 #include <string>

diff --git a/cpp/bench/sg/dataset_ts.cuh b/cpp/bench/sg/dataset_ts.cuh
@@ -17,11 +17,11 @@
 #pragma once
 
 #include <common/cumlHandle.hpp>
-#include <cuda_utils.cuh>
 #include <cuml/cuml.hpp>
+#include <raft/cuda_utils.cuh>
 
-#include <common/cudart_utils.h>
-#include <random/rng.cuh>
+#include <raft/cudart_utils.h>
+#include <raft/random/rng.cuh>
 
 namespace ML {
 namespace Bench {

diff --git a/cpp/bench/sg/umap.cu b/cpp/bench/sg/umap.cu
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include <cuda_utils.cuh>
 #include <cuml/cuml.hpp>
 #include <cuml/manifold/umap.hpp>
+#include <raft/cuda_utils.cuh>
 #include <utility>
 #include "benchmark.cuh"
 

diff --git a/cpp/cmake/Dependencies.cmake b/cpp/cmake/Dependencies.cmake
@@ -39,7 +39,7 @@ else(DEFINED ENV{RAFT_PATH})
 
   ExternalProject_Add(raft
     GIT_REPOSITORY    https://github.com/rapidsai/raft.git
-    GIT_TAG           3f8a4bf6f81289f1fdaae9a5bd4a10de8674aa5c
+    GIT_TAG           9b3afe67895fbea397fb2c72375157aadfc132d8
     PREFIX            ${RAFT_DIR}
     CONFIGURE_COMMAND ""
     BUILD_COMMAND     ""

diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h
@@ -71,6 +71,9 @@ enum output_t {
   /** output class label: either apply threshold to the output of the previous stage (for binary classification),
       or select the class with the most votes to get the class label (for multi-class classification).  */
   CLASS = 0x100,
+  SIGMOID_CLASS = SIGMOID | CLASS,
+  AVG_CLASS = AVG | CLASS,
+  AVG_SIGMOID_CLASS = AVG | SIGMOID | CLASS,
 };
 
 /** storage_type_t defines whether to import the forests as dense or sparse */