diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 051ccc2c8..0b434c5a0 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -32,5 +32,6 @@ mod tasking_vx; #[macro_use] mod counted_array; +pub use crate::compiler::c::CCompilerKind; pub use crate::compiler::compiler::*; pub use crate::compiler::preprocessor_cache::PreprocessorCacheEntry; diff --git a/src/compiler/nvcc.rs b/src/compiler/nvcc.rs index 5da0a53a4..bc24d0e59 100644 --- a/src/compiler/nvcc.rs +++ b/src/compiler/nvcc.rs @@ -461,7 +461,20 @@ pub fn generate_compile_commands( output_file_name: output.file_name().unwrap().to_owned(), }; - Ok((command, None, Cacheable::Yes)) + Ok(( + command, + None, + // Never assume the outer `nvcc` call is cacheable. We must decompose the nvcc call into + // its constituent subcommands with `--dryrun` and only cache the final build product. + // + // Always decomposing `nvcc --dryrun` is the only way to ensure caching nvcc invocations + // is fully sound, because the `nvcc -E` preprocessor output is not sufficient to detect + // all source code changes. + // + // Specifically, `nvcc -E` always defines __CUDA_ARCH__, which means changes to host-only + // code guarded by an `#ifndef __CUDA_ARCH__` will _not_ be captured in `nvcc -E` output. + Cacheable::No, + )) } #[derive(Clone, Debug)] @@ -811,19 +824,28 @@ where ) } } else { - // Returns Cacheable::Yes to indicate we _do_ want to run this host - // compiler call through sccache (because it may be distributed), - // but we _do not_ want to cache its output. The output file will - // be cached as the result of the outer `nvcc` command. Caching - // here would store the same object twice under two different hashes, - // unnecessarily bloating the cache size. + // Cache the host compiler calls, since we've marked the outer `nvcc` call + // as non-cacheable. This ensures `sccache nvcc ...` _always_ decomposes the + // nvcc call into its constituent subcommands with `--dryrun`, but only caches + // the final build product once. + // + // Always decomposing `nvcc --dryrun` is the only way to ensure caching nvcc invocations + // is fully sound, because the `nvcc -E` preprocessor output is not sufficient to detect + // all source code changes. + // + // Specifically, `nvcc -E` always defines __CUDA_ARCH__, which means changes to host-only + // code guarded by an `#ifndef __CUDA_ARCH__` will _not_ be captured in `nvcc -E` output. ( env_vars .iter() .chain( [ - // Do not cache host compiler calls - ("SCCACHE_NO_CACHE".into(), "true".into()), + // HACK: This compilation will look like a C/C++ compilation, + // but we want to report it in the stats as a CUDA compilation. + // The SccacheService API doesn't have a great way to specify this + // case, so we set a special envvar here that it can read when the + // compilation is finished. + ("__SCCACHE_THIS_IS_A_CUDA_COMPILATION__".into(), "".into()), ] .iter(), ) diff --git a/src/server.rs b/src/server.rs index 14207b48c..0620e61b9 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1305,8 +1305,22 @@ where let out_pretty = hasher.output_pretty().into_owned(); let color_mode = hasher.color_mode(); - let kind = compiler.kind(); - let lang = hasher.language(); + + let (kind, lang) = { + // HACK: See note in src/compiler/nvcc.rs + if env_vars + .iter() + .any(|(k, _)| k == "__SCCACHE_THIS_IS_A_CUDA_COMPILATION__") + { + ( + CompilerKind::C(crate::compiler::CCompilerKind::Nvcc), + Language::Cuda, + ) + } else { + (compiler.kind(), hasher.language()) + } + }; + let me = self.clone(); self.rt diff --git a/tests/system.rs b/tests/system.rs index 75fa004a6..de480dfa9 100644 --- a/tests/system.rs +++ b/tests/system.rs @@ -703,12 +703,12 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { trace!("compile A request stats"); get_stats(|info| { assert_eq!(2, info.stats.compile_requests); - assert_eq!(5, info.stats.requests_executed); - assert_eq!(1, info.stats.cache_hits.all()); + assert_eq!(8, info.stats.requests_executed); + assert_eq!(3, info.stats.cache_hits.all()); assert_eq!(3, info.stats.cache_misses.all()); assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap()); - assert!(info.stats.cache_hits.get("PTX").is_none()); - assert!(info.stats.cache_hits.get("CUBIN").is_none()); + assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap()); + assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap()); assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap()); assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap()); assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap()); @@ -717,8 +717,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { let adv_ptx_key = adv_key_kind("ptx", compiler.name); let adv_cubin_key = adv_key_kind("cubin", compiler.name); assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap()); - assert!(info.stats.cache_hits.get_adv(&adv_ptx_key).is_none()); - assert!(info.stats.cache_hits.get_adv(&adv_cubin_key).is_none()); + assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap()); + assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap()); assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap()); assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap()); @@ -747,12 +747,12 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { trace!("compile B request stats"); get_stats(|info| { assert_eq!(3, info.stats.compile_requests); - assert_eq!(9, info.stats.requests_executed); - assert_eq!(2, info.stats.cache_hits.all()); + assert_eq!(12, info.stats.requests_executed); + assert_eq!(4, info.stats.cache_hits.all()); assert_eq!(5, info.stats.cache_misses.all()); assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap()); - assert!(info.stats.cache_hits.get("PTX").is_none()); - assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap()); + assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap()); + assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap()); assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap()); assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap()); assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap()); @@ -761,8 +761,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { let adv_ptx_key = adv_key_kind("ptx", compiler.name); let adv_cubin_key = adv_key_kind("cubin", compiler.name); assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap()); - assert!(info.stats.cache_hits.get_adv(&adv_ptx_key).is_none()); - assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap()); + assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap()); + assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap()); assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap()); assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap()); @@ -789,13 +789,13 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { trace!("compile ptx request stats"); get_stats(|info| { assert_eq!(4, info.stats.compile_requests); - assert_eq!(11, info.stats.requests_executed); - assert_eq!(3, info.stats.cache_hits.all()); - assert_eq!(6, info.stats.cache_misses.all()); + assert_eq!(14, info.stats.requests_executed); + assert_eq!(5, info.stats.cache_hits.all()); + assert_eq!(5, info.stats.cache_misses.all()); assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap()); - assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap()); - assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap()); - assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap()); + assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap()); + assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap()); + assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap()); assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap()); assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap()); assert!(info.stats.cache_misses.get("C/C++").is_none()); @@ -803,9 +803,9 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { let adv_ptx_key = adv_key_kind("ptx", compiler.name); let adv_cubin_key = adv_key_kind("cubin", compiler.name); assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap()); - assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap()); - assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap()); - assert_eq!(&3, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); + assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap()); + assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap()); + assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap()); assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap()); }); @@ -831,13 +831,13 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { trace!("compile cubin request stats"); get_stats(|info| { assert_eq!(5, info.stats.compile_requests); - assert_eq!(14, info.stats.requests_executed); - assert_eq!(5, info.stats.cache_hits.all()); - assert_eq!(7, info.stats.cache_misses.all()); + assert_eq!(17, info.stats.requests_executed); + assert_eq!(7, info.stats.cache_hits.all()); + assert_eq!(5, info.stats.cache_misses.all()); assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap()); - assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap()); - assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap()); - assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap()); + assert_eq!(&3, info.stats.cache_hits.get("PTX").unwrap()); + assert_eq!(&3, info.stats.cache_hits.get("CUBIN").unwrap()); + assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap()); assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap()); assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap()); assert!(info.stats.cache_misses.get("C/C++").is_none()); @@ -845,9 +845,9 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { let adv_ptx_key = adv_key_kind("ptx", compiler.name); let adv_cubin_key = adv_key_kind("cubin", compiler.name); assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap()); - assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap()); - assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap()); - assert_eq!(&4, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); + assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap()); + assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap()); + assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap()); assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap()); }); @@ -914,14 +914,14 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { trace!("request stats"); get_stats(|info| { assert_eq!(4, info.stats.compile_requests); - assert_eq!(8, info.stats.requests_executed); - assert_eq!(3, info.stats.cache_hits.all()); + assert_eq!(12, info.stats.requests_executed); + assert_eq!(5, info.stats.cache_hits.all()); assert_eq!(3, info.stats.cache_misses.all()); - assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap()); - assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap()); - assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap()); - assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap()); - assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap()); + assert!(info.stats.cache_hits.get("C/C++").is_none()); + assert_eq!(&2, info.stats.cache_hits.get("CUDA").unwrap()); + assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap()); + assert!(info.stats.cache_misses.get("C/C++").is_none()); + assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap()); assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap()); }); }