NVIDIA · wmaxey · Dec 13, 2022 · Dec 8, 2022 · Dec 8, 2022 · Dec 8, 2022
diff --git a/.upstream-tests/test/CMakeLists.txt b/.upstream-tests/test/CMakeLists.txt
@@ -70,3 +70,6 @@ configure_lit_site_cfg(
 add_lit_testsuite(check-cudacxx
   "Running libcu++ tests"
   ${CMAKE_CURRENT_BINARY_DIR})
+
+# Add test target for standalone headers
+add_subdirectory(host_only)
diff --git a/.upstream-tests/test/host_only/CMakeLists.txt b/.upstream-tests/test/host_only/CMakeLists.txt
@@ -0,0 +1,30 @@
+function(libcudacxx_add_standalone_header_test test_file_output_var header_under_test)
+    # ex: build/.../header_tests/cuda/std/version.cpp
+    set(test_cpp "${CMAKE_CURRENT_BINARY_DIR}/header_tests/${header_under_test}.cpp")
+
+    configure_file(
+        "${CMAKE_CURRENT_SOURCE_DIR}/header_test.cpp.in"
+        "${test_cpp}"
+        )
+
+    set(${test_file_output_var} ${test_cpp} PARENT_SCOPE)
+    # ex: cuda/std/version -> cuda_std_version
+    string(REPLACE "/" "_" executable_name ${header_under_test})
+    add_executable(${executable_name} ${test_cpp})
+    target_include_directories(${executable_name} PRIVATE ${CMAKE_SOURCE_DIR}/include)
+    target_compile_options(${executable_name} PRIVATE
+        $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+            -Wall -Werror>
+        $<$<CXX_COMPILER_ID:MSVC>:
+            /W4 /WX>)
+    set_target_properties(${executable_name} PROPERTIES CXX_STANDARD 11)
+endfunction()
+
+# Don't generate CUDA targets, they fail currently.
+# file(GLOB cuda_headers LIST_DIRECTORIES false RELATIVE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/cuda/*)
+file(GLOB cuda_std_headers LIST_DIRECTORIES false RELATIVE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/cuda/std/*)
+
+foreach(header IN LISTS cuda_headers cuda_std_headers)
+    libcudacxx_add_standalone_header_test(test_file ${header})
+    message(STATUS "Detected ${header} ... Writing ${test_file}")
+endforeach()
diff --git a/.upstream-tests/test/host_only/header_test.cpp.in b/.upstream-tests/test/host_only/header_test.cpp.in
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <@header_under_test@>
+
+int main() {
+    return 0;
+}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -230,4 +230,3 @@ if (_libcudacxx_enable_tests)
     ARGS ${LLVM_LIT_EXTRA_ARGS}
   )
 endif ()
-
diff --git a/environments/linux/docker/compose.yml b/environments/linux/docker/compose.yml
@@ -13,6 +13,7 @@ services:
       target: devenv
       args: &ubuntu22-args
         BUILDKIT_INLINE_CACHE: "1"
+        USE_LLVM_INSTALLER: "1"
         ROOT_IMAGE: "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04"
         COMPILERS: "g++-11 g++-12 clang-11 clang-12 clang-13 clang-14 clang-15"
     deploy: &deploy-template
@@ -36,6 +37,7 @@ services:
       target: devenv
       args: &ubuntu20-args
         BUILDKIT_INLINE_CACHE: "1"
+        USE_LLVM_INSTALLER: 0
         ROOT_IMAGE: "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04"
         COMPILERS: "g++-8 g++-9 g++-10 clang-8 clang-9 clang-10"
     deploy: *deploy-template
@@ -53,8 +55,9 @@ services:
       target: devenv
       args: &ubuntu18-args
         BUILDKIT_INLINE_CACHE: "1"
+        USE_LLVM_INSTALLER: 0
         ROOT_IMAGE: "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu18.04"
-        COMPILERS: "g++-5 g++-6 g++-7 clang-7"
+        COMPILERS: "g++-6 g++-7 clang-7"
     deploy: *deploy-template
 
 ##### Not used yet
@@ -246,20 +249,6 @@ services:
 
 
 ### Ubuntu 18.04 images:
-
-  gcc-5:
-    extends:
-      service: ubuntu18.04
-    image: "libcudacxx/gcc-5"
-    container_name: gcc-5
-    profiles: ["gcc-5"]
-    build:
-      context: ../../../
-      dockerfile: environments/linux/docker/ubuntu.base.Dockerfile
-      target: libcudacxx-configured
-      args:
-        HOST_CXX: "g++-5"
-
   gcc-6:
     extends:
       service: ubuntu18.04

diff --git a/environments/linux/docker/ubuntu.base.Dockerfile b/environments/linux/docker/ubuntu.base.Dockerfile
@@ -16,6 +16,8 @@ ARG UBUNTU_TOOL_DEB_REPO=https://ppa.launchpadcontent.net/ubuntu-toolchain-r/ppa
 ARG UBUNTU_TOOL_FINGER=60C317803A41BA51845E371A1E9377A2BA9EF27F
 
 ARG LLVM_INSTALLER=https://apt.llvm.org/llvm.sh
+ARG USE_LLVM_INSTALLER=1
+
 # `-y` answers yes to any interactive prompts.
 # `-qq` because apt is noisy
 ARG APT_GET="apt-get -y -qq"
@@ -44,7 +46,7 @@ RUN function comment() { :; }; \
         python3 python3-wheel python3-pip; \
     comment "Install GCC and Clang"; \
     # Unattended installation hack
-    echo "\n" | bash /tmp/llvm.sh all; \
+    if [ "${USE_LLVM_INSTALLER}" -eq "1" ]; then echo "\n" | bash /tmp/llvm.sh all; fi; \
     ${APT_GET} install gcc g++ ${COMPILERS}; \
     comment "Install CMake"; \
     sh /tmp/cmake.sh --skip-license --prefix=/usr; \
@@ -87,6 +89,7 @@ RUN cmake -S /libcudacxx -B /build \
         -DCMAKE_CUDA_FLAGS="-allow-unsupported-compiler"
 
 RUN make -j -C /build/libcxx
+RUN make -j -C /build/test/host_only
 
 ENV LIBCUDACXX_SITE_CONFIG=/build/test/lit.site.cfg
 ENV LIBCXX_SITE_CONFIG=/build/libcxx/test/lit.site.cfg
diff --git a/include/cuda/std/array b/include/cuda/std/array
@@ -9,6 +9,10 @@
 #ifndef _CUDA_ARRAY
 #define _CUDA_ARRAY
 
+#ifndef __CUDACC_RTC__
+# include <cstdlib>
+#endif
+
 #include "cassert"
 #include "cstdint"
 #include "limits"
@@ -30,5 +34,3 @@
 #include "detail/__pragma_pop"
 
 #endif //_CUDA_ARRAY
-
-
diff --git a/include/cuda/std/barrier b/include/cuda/std/barrier
@@ -11,6 +11,10 @@
 #  error "CUDA synchronization primitives are only supported for sm_70 and up."
 #endif
 
+#ifndef __CUDACC_RTC__
+# include <new>
+#endif
+
 #ifndef _CUDA_BARRIER
 #define _CUDA_BARRIER
 

diff --git a/include/cuda/std/cmath b/include/cuda/std/cmath
@@ -9,19 +9,18 @@
 #ifndef _CUDA_CMATH
 #define _CUDA_CMATH
 
+#ifndef __CUDACC_RTC__
+#include <math.h>
+#endif
+
 #include "limits"
 #include "type_traits"
 
 #include "detail/__config"
 
 #include "detail/__pragma_push"
-
-#ifndef _LIBCUDACXX_COMPILER_NVRTC
-#include <math.h>
-#endif
 #include "detail/libcxx/include/cmath"
 
 #include "detail/__pragma_pop"
 
 #endif //_CUDA_CMATH
-
diff --git a/include/cuda/std/detail/libcxx/include/__config b/include/cuda/std/detail/libcxx/include/__config
@@ -796,8 +796,8 @@ typedef __char32_t char32_t;
 #define _LIBCUDACXX_HAS_NO_ASAN
 #endif
 
-#if _GNUC_VER < 600 && defined(_LIBCUDACXX_COMPILER_NVCC)
-#define _LIBCUDACXX_MISSING_GCC_MATH_INTRINSICS
+#if _GNUC_VER < 600
+#define _LIBCUDACXX_GCC_MATH_IN_STD
 #endif
 
 #if _GNUC_VER >= 700

diff --git a/include/cuda/std/detail/libcxx/include/atomic b/include/cuda/std/detail/libcxx/include/atomic
@@ -599,6 +599,28 @@ void atomic_signal_fence(memory_order m) noexcept;
 #  include <string.h>
 #endif
 
+#if !defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE)
+#define ATOMIC_BOOL_LOCK_FREE      2
+#define ATOMIC_CHAR_LOCK_FREE      2
+#define ATOMIC_CHAR16_T_LOCK_FREE  2
+#define ATOMIC_CHAR32_T_LOCK_FREE  2
+#define ATOMIC_WCHAR_T_LOCK_FREE   2
+#define ATOMIC_SHORT_LOCK_FREE     2
+#define ATOMIC_INT_LOCK_FREE       2
+#define ATOMIC_LONG_LOCK_FREE      2
+#define ATOMIC_LLONG_LOCK_FREE     2
+#define ATOMIC_POINTER_LOCK_FREE   2
+#endif //!defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE)
+
+#ifndef __ATOMIC_RELAXED
+#define __ATOMIC_RELAXED 0
+#define __ATOMIC_CONSUME 1
+#define __ATOMIC_ACQUIRE 2
+#define __ATOMIC_RELEASE 3
+#define __ATOMIC_ACQ_REL 4
+#define __ATOMIC_SEQ_CST 5
+#endif //__ATOMIC_RELAXED
+
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
 // Figure out what the underlying type for `memory_order` would be if it were

diff --git a/include/cuda/std/detail/libcxx/include/bit b/include/cuda/std/detail/libcxx/include/bit
@@ -171,7 +171,7 @@ int __fallback_popc8(uint64_t __x) {
 inline _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_BIT_CONSTEXPR
 int __fallback_popc16(uint64_t __x) {
   return __fallback_popc8(
-      __x + (__x >> 4) & 0x0f0f0f0f0f0f0f0f);
+      (__x + (__x >> 4)) & 0x0f0f0f0f0f0f0f0f);
 }
 
 inline _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_BIT_CONSTEXPR

diff --git a/include/cuda/std/detail/libcxx/include/cmath b/include/cuda/std/detail/libcxx/include/cmath
@@ -318,12 +318,8 @@ long double    truncl(long double x);
 #define NAN      __builtin_nan()
 #endif
 
-_LIBCUDACXX_BEGIN_NAMESPACE_STD
 
-using ::signbit;
-using ::isfinite;
-using ::isinf;
-using ::isnan;
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
 
 using ::acos;
 using ::acosf;
@@ -377,18 +373,20 @@ using ::abs;
 #endif
 
 #ifndef _LIBCUDACXX_COMPILER_NVRTC
-// GCC <= 50X is missing some math intrinsics in the global namespace
-#if !defined(_LIBCUDACXX_MISSING_GCC_MATH_INTRINSICS)
+
 using ::fpclassify;
-using ::isnormal;
 using ::isgreater;
 using ::isgreaterequal;
 using ::isless;
 using ::islessequal;
 using ::islessgreater;
+
+using ::signbit;
+using ::isnan;
+using ::isfinite;
+using ::isinf;
+using ::isnormal;
 using ::isunordered;
-using ::isunordered;
-#endif
 
 using ::float_t;
 using ::double_t;

diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h
@@ -11,28 +11,6 @@
 #  error "CUDA atomics are only supported for sm_60 and up on *nix and sm_70 and up on Windows."
 #endif
 
-#if !defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE)
-#define ATOMIC_BOOL_LOCK_FREE      2
-#define ATOMIC_CHAR_LOCK_FREE      2
-#define ATOMIC_CHAR16_T_LOCK_FREE  2
-#define ATOMIC_CHAR32_T_LOCK_FREE  2
-#define ATOMIC_WCHAR_T_LOCK_FREE   2
-#define ATOMIC_SHORT_LOCK_FREE     2
-#define ATOMIC_INT_LOCK_FREE       2
-#define ATOMIC_LONG_LOCK_FREE      2
-#define ATOMIC_LLONG_LOCK_FREE     2
-#define ATOMIC_POINTER_LOCK_FREE   2
-#endif //!defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE)
-
-#ifndef __ATOMIC_RELAXED
-#define __ATOMIC_RELAXED 0
-#define __ATOMIC_CONSUME 1
-#define __ATOMIC_ACQUIRE 2
-#define __ATOMIC_RELEASE 3
-#define __ATOMIC_ACQ_REL 4
-#define __ATOMIC_SEQ_CST 5
-#endif //__ATOMIC_RELAXED
-
 inline __host__ __device__ int __stronger_order_cuda(int __a, int __b) {
     int const __max = __a > __b ? __a : __b;
     if(__max != __ATOMIC_RELEASE)