Skip to content

Commit

Permalink
Add postprocessing of version strings and hashes to increase hit rate
Browse files Browse the repository at this point in the history
  • Loading branch information
Fredrik Nordin committed Feb 19, 2024
1 parent d2ef7d8 commit d8e90ea
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
4 changes: 4 additions & 0 deletions cltcache.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ ignore_errors=false
# "-CC" => NOLINT-comments should work everywhere, but valid code may fail preprocessor stage. Combine with ignore_errors if you are paranoid about issues with NOLINT-comments
preserve_comments=-C

# Increase cache hit rate by ignoring some types of string contents
strip_string_versions=true
strip_string_hex_hashes=true

[behavior]
# Cache results even when clang-tidy fails
cache_failure=true
Expand Down
24 changes: 23 additions & 1 deletion src/cltcache/cltcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,25 @@ def remove_o_flag(compile_args):
return compile_args


def postprocess_source(source, config):
def hash_replace(match):
return match.group(0).replace(match.group(1), len(match.group(1)) * "0")
replacements = []
if config.get("preprocessor", "strip_string_versions", fallback=True):
replacements.append(
(r'("[^"^\n]*?)([0-9]+(\.[0-9]+)+)', r'\1<version>'))
if config.get("preprocessor", "strip_string_hex_hashes", fallback=True):
replacements.append((r'"[^"^\n]*?([0-9a-fA-F]{5,128})', hash_replace))
for pattern, replacement in replacements:
changedSource = re.sub(pattern, replacement, source)
attempts = 0
while changedSource != source and attempts < 20:
source = changedSource
changedSource = re.sub(pattern, replacement, source)
attempts += 1
return source


def get_preproc_hash(compile_args, config):
compile_args = remove_o_flag(compile_args)
preproc_flag = "-E"
Expand All @@ -85,7 +104,10 @@ def get_preproc_hash(compile_args, config):
verbose = config.getboolean("behavior", "verbose", fallback=False)
if verbose:
print("cltcache length of preproccesed source:", len(preproc_source))
preproc_hash = sha256(preproc_source)
postproc_source = postprocess_source(preproc_source, config)
if verbose:
print("cltcache length of postproccesed source:", len(postproc_source))
preproc_hash = sha256(postproc_source)
return preproc_hash


Expand Down

0 comments on commit d8e90ea

Please sign in to comment.