rapidsai · wence- · Dec 12, 2022 · Dec 12, 2022 · wence- · Dec 12, 2022
@@ -669,6 +669,25 @@ target_link_libraries(
           $<TARGET_NAME_IF_EXISTS:cuFile_interface>
 )
 
+add_library(cudfcuinit_intercept SHARED src/utilities/cuinit_intercept.cpp)
+set_target_properties(
+  cudfcuinit_intercept
+  PROPERTIES BUILD_RPATH "\$ORIGIN"
+             INSTALL_RPATH "\$ORIGIN"
+             # set target compile options
+             CXX_STANDARD 17
+             CXX_STANDARD_REQUIRED ON
+             CUDA_STANDARD 17
+             CUDA_STANDARD_REQUIRED ON
+             POSITION_INDEPENDENT_CODE ON
+             INTERFACE_POSITION_INDEPENDENT_CODE ON
+)
+
+if(TARGET conda_env)
+  target_link_libraries(cudfcuinit_intercept PRIVATE conda_env)
+endif()
+target_link_libraries(cudfcuinit_intercept PUBLIC CUDA::cudart cuda dl)
+
 # Add Conda library, and include paths if specified
 if(TARGET conda_env)
   target_link_libraries(cudf PRIVATE conda_env)

@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <cuda.h>
+#include <dlfcn.h>
+#include <iostream>
+
+#if defined(__GLIBC__) && __GLIBC__ >= 2 && defined(__GLIBC_MINOR__) && __GLIBC_MINOR__ >= 1
+namespace {
+static int cuInitCount{0};
+using init_t = CUresult (*)(unsigned int);
+using proc_t = CUresult (*)(const char*,
+                            void**,
+                            int,
+                            cuuint64_t
+#if CUDA_VERSION >= 12000
+                            ,
+                            CUdriverProcAddressQueryResult*
+#endif
+);
+using dlsym_t = void* (*)(void*, const char*);
+static init_t original_cuInit{nullptr};
+static proc_t original_cuGetProcAddress{nullptr};
+static dlsym_t original_dlsym{nullptr};
+
+static __attribute__((constructor)) void init_cuInit_hack()
+{
+  // Hack hack hack, only for glibc, this magic number can be found in
+  // glibc's sysdeps/unix/sysv/linux/x86_64/64/libc.abilist (glibc >=
+  // 2.34) (or libdl.abilist (glibc < 2.34).
+  original_dlsym = (dlsym_t)dlvsym(RTLD_NEXT, "dlsym", "GLIBC_2.2.5");
+  if (original_dlsym) {
+    original_cuGetProcAddress = (proc_t)original_dlsym(RTLD_NEXT, "cuGetProcAddress");
+  }
+}
+
+extern "C" {
+CUresult cuInit(unsigned int flags)
+{
+  if (!original_cuInit) {
+    void* ptr{nullptr};
+    CUresult err = original_cuGetProcAddress("cuInit",
+                                             &ptr,
+                                             CUDA_VERSION,
+                                             CU_GET_PROC_ADDRESS_DEFAULT
+#if CUDA_VERSION >= 12000
+                                             ,
+                                             nullptr
+#endif
+    );
+    if (err != CUDA_SUCCESS) { return err; }
+    if (ptr) { original_cuInit = (init_t)(ptr); }
+  }
+  std::cerr << "cuInit has been called " << ++cuInitCount << " times" << std::endl;
+  if (original_cuInit) {
+    return original_cuInit(flags);
+  } else {
+    return CUDA_ERROR_NOT_INITIALIZED;
+  }
+}
+
+CUresult cuGetProcAddress(const char* symbol,
+                          void** pfn,
+                          int cudaVersion,
+                          cuuint64_t flags
+#if CUDA_VERSION >= 12000
+                          ,
+                          CUdriverProcAddressQueryResult* symbolStatus
+#endif
+)
+{
+  if (!original_cuGetProcAddress) { return CUDA_ERROR_NOT_SUPPORTED; }
+  CUresult err = original_cuGetProcAddress(symbol,
+                                           pfn,
+                                           cudaVersion,
+                                           flags
+#if CUDA_VERSION >= 12000
+                                           ,
+                                           symbolStatus
+#endif
+  );
+  if (std::string{symbol} == "cuInit") {
+    original_cuInit = (init_t)(*pfn);
+    *pfn            = (void*)cuInit;
+  }
+  return err;
+}
+
+void* dlsym(void* handle, const char* name_)
+{
+  std::string name{name_};
+  if (name == "cuInit") {
+    return (void*)cuInit;
+  } else if (name == "cuGetProcAddress") {
+    return (void*)cuGetProcAddress;
+  } else {
+    return original_dlsym(handle, name_);
+  }
+}
+}
+}  // namespace
+#endif
@@ -0,0 +1,28 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+location = Path(__file__)
+cpp_build_dir = location / ".." / ".." / ".." / ".." / ".." / "cpp" / "build"
+libintercept = (cpp_build_dir / "libcudfcuinit_intercept.so").resolve()
+
+
+@pytest.mark.skipif(
+    not libintercept.exists(),
+    reason="libcudfcuinit_intercept.so not built, can't check for cuInit",
+)
+def test_import_no_cuinit():
+    env = os.environ.copy()
+    env["RAPIDS_NO_INITIALIZE"] = "1"
+    env["LD_PRELOAD"] = str(libintercept)
+    output = subprocess.check_output(
+        [sys.executable, "-c", "import cudf"],
+        env=env,
+        stderr=subprocess.STDOUT,
+    )
+    assert "cuInit has been called" not in output.decode()