Skip to content

Commit

Permalink
Never cache the outer CUDA compilation (because nvcc -E can't be tr…
Browse files Browse the repository at this point in the history
…usted). Always decompose via `nvcc --dryrun`, then cache and report the host compiler call as a CUDA compilation
  • Loading branch information
trxcllnt committed Dec 21, 2024
1 parent 6bc8154 commit a03b43c
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 48 deletions.
1 change: 1 addition & 0 deletions src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,6 @@ mod tasking_vx;
#[macro_use]
mod counted_array;

pub use crate::compiler::c::CCompilerKind;
pub use crate::compiler::compiler::*;
pub use crate::compiler::preprocessor_cache::PreprocessorCacheEntry;
40 changes: 31 additions & 9 deletions src/compiler/nvcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,20 @@ pub fn generate_compile_commands(
output_file_name: output.file_name().unwrap().to_owned(),
};

Ok((command, None, Cacheable::Yes))
Ok((
command,
None,
// Never assume the outer `nvcc` call is cacheable. We must decompose the nvcc call into
// its constituent subcommands with `--dryrun` and only cache the final build product.
//
// Always decomposing `nvcc --dryrun` is the only way to ensure caching nvcc invocations
// is fully sound, because the `nvcc -E` preprocessor output is not sufficient to detect
// all source code changes.
//
// Specifically, `nvcc -E` always defines __CUDA_ARCH__, which means changes to host-only
// code guarded by an `#ifndef __CUDA_ARCH__` will _not_ be captured in `nvcc -E` output.
Cacheable::No,
))
}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -811,19 +824,28 @@ where
)
}
} else {
// Returns Cacheable::Yes to indicate we _do_ want to run this host
// compiler call through sccache (because it may be distributed),
// but we _do not_ want to cache its output. The output file will
// be cached as the result of the outer `nvcc` command. Caching
// here would store the same object twice under two different hashes,
// unnecessarily bloating the cache size.
// Cache the host compiler calls, since we've marked the outer `nvcc` call
// as non-cacheable. This ensures `sccache nvcc ...` _always_ decomposes the
// nvcc call into its constituent subcommands with `--dryrun`, but only caches
// the final build product once.
//
// Always decomposing `nvcc --dryrun` is the only way to ensure caching nvcc invocations
// is fully sound, because the `nvcc -E` preprocessor output is not sufficient to detect
// all source code changes.
//
// Specifically, `nvcc -E` always defines __CUDA_ARCH__, which means changes to host-only
// code guarded by an `#ifndef __CUDA_ARCH__` will _not_ be captured in `nvcc -E` output.
(
env_vars
.iter()
.chain(
[
// Do not cache host compiler calls
("SCCACHE_NO_CACHE".into(), "true".into()),
// HACK: This compilation will look like a C/C++ compilation,
// but we want to report it in the stats as a CUDA compilation.
// The SccacheService API doesn't have a great way to specify this
// case, so we set a special envvar here that it can read when the
// compilation is finished.
("__SCCACHE_THIS_IS_A_CUDA_COMPILATION__".into(), "".into()),
]
.iter(),
)
Expand Down
18 changes: 16 additions & 2 deletions src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1305,8 +1305,22 @@ where

let out_pretty = hasher.output_pretty().into_owned();
let color_mode = hasher.color_mode();
let kind = compiler.kind();
let lang = hasher.language();

let (kind, lang) = {
// HACK: See note in src/compiler/nvcc.rs
if env_vars
.iter()
.any(|(k, _)| k == "__SCCACHE_THIS_IS_A_CUDA_COMPILATION__")
{
(
CompilerKind::C(crate::compiler::CCompilerKind::Nvcc),
Language::Cuda,
)
} else {
(compiler.kind(), hasher.language())
}
};

let me = self.clone();

self.rt
Expand Down
74 changes: 37 additions & 37 deletions tests/system.rs
Original file line number Diff line number Diff line change
Expand Up @@ -703,12 +703,12 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile A request stats");
get_stats(|info| {
assert_eq!(2, info.stats.compile_requests);
assert_eq!(5, info.stats.requests_executed);
assert_eq!(1, info.stats.cache_hits.all());
assert_eq!(8, info.stats.requests_executed);
assert_eq!(3, info.stats.cache_hits.all());
assert_eq!(3, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert!(info.stats.cache_hits.get("PTX").is_none());
assert!(info.stats.cache_hits.get("CUBIN").is_none());
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
Expand All @@ -717,8 +717,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
assert!(info.stats.cache_hits.get_adv(&adv_ptx_key).is_none());
assert!(info.stats.cache_hits.get_adv(&adv_cubin_key).is_none());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
Expand Down Expand Up @@ -747,12 +747,12 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile B request stats");
get_stats(|info| {
assert_eq!(3, info.stats.compile_requests);
assert_eq!(9, info.stats.requests_executed);
assert_eq!(2, info.stats.cache_hits.all());
assert_eq!(12, info.stats.requests_executed);
assert_eq!(4, info.stats.cache_hits.all());
assert_eq!(5, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert!(info.stats.cache_hits.get("PTX").is_none());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
Expand All @@ -761,8 +761,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
assert!(info.stats.cache_hits.get_adv(&adv_ptx_key).is_none());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
Expand All @@ -789,23 +789,23 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile ptx request stats");
get_stats(|info| {
assert_eq!(4, info.stats.compile_requests);
assert_eq!(11, info.stats.requests_executed);
assert_eq!(3, info.stats.cache_hits.all());
assert_eq!(6, info.stats.cache_misses.all());
assert_eq!(14, info.stats.requests_executed);
assert_eq!(5, info.stats.cache_hits.all());
assert_eq!(5, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert!(info.stats.cache_misses.get("C/C++").is_none());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&3, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
});
Expand All @@ -831,23 +831,23 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile cubin request stats");
get_stats(|info| {
assert_eq!(5, info.stats.compile_requests);
assert_eq!(14, info.stats.requests_executed);
assert_eq!(5, info.stats.cache_hits.all());
assert_eq!(7, info.stats.cache_misses.all());
assert_eq!(17, info.stats.requests_executed);
assert_eq!(7, info.stats.cache_hits.all());
assert_eq!(5, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&3, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&3, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert!(info.stats.cache_misses.get("C/C++").is_none());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&4, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
});
Expand Down Expand Up @@ -914,14 +914,14 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
trace!("request stats");
get_stats(|info| {
assert_eq!(4, info.stats.compile_requests);
assert_eq!(8, info.stats.requests_executed);
assert_eq!(3, info.stats.cache_hits.all());
assert_eq!(12, info.stats.requests_executed);
assert_eq!(5, info.stats.cache_hits.all());
assert_eq!(3, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
assert!(info.stats.cache_hits.get("C/C++").is_none());
assert_eq!(&2, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert!(info.stats.cache_misses.get("C/C++").is_none());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
});
}
Expand Down

0 comments on commit a03b43c

Please sign in to comment.