Skip to content

Commit

Permalink
Refine cache stats: distinguish legit misses from miss errors
Browse files Browse the repository at this point in the history
cache misses due to failures turn out to be fairly frequent, at least from log.
Distinguish legit misses from errors would help us quantify this and take appropriate
actions.

This PR attaches a cause to the miss.

Testing Done:
https://travis-ci.org/peiyuwang/pants/builds/93991253

Bugs closed: 2638

Reviewed at https://rbcommons.com/s/twitter/r/3190/
  • Loading branch information
peiyuwang authored and ericzundel committed Dec 2, 2015
1 parent 6285e4f commit fb30d2d
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ def check_cache(vts):
"""
if not self.artifact_cache_reads_enabled():
return False
cached_vts, uncached_vts = self.check_artifact_cache([vts])
cached_vts, _, _ = self.check_artifact_cache([vts])
if not cached_vts:
self.context.log.debug('Missed cache during double check for {}'
.format(vts.target.address.spec))
Expand Down
1 change: 1 addition & 0 deletions src/python/pants/cache/restful_artifact_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def use_cached_files(self, cache_key, results_dir=None):
return self._localcache.store_and_use_artifact(cache_key, byte_iter, results_dir)
except Exception as e:
logger.warn('\nError while reading from remote artifact cache: {0}\n'.format(e))
# TODO(peiyu): clean up partially downloaded local file if any
return UnreadableArtifact(cache_key, e)

return False
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/goal/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python_library(
]
)

# why is this in goal?
# this is in goal because of run_tracker
python_library(
name = 'artifact_cache_stats',
sources = ['artifact_cache_stats.py'],
Expand Down
30 changes: 21 additions & 9 deletions src/python/pants/goal/artifact_cache_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
from collections import defaultdict, namedtuple

from pants.cache.artifact_cache import UnreadableArtifact
from pants.util.dirutil import safe_mkdir


Expand All @@ -28,11 +29,11 @@ def init_stat():
safe_mkdir(self._dir)

def add_hits(self, cache_name, targets):
self._add_stat(0, cache_name, targets)
self._add_stat(0, cache_name, targets, None)

# any cache misses, whether legit or due to error.
def add_misses(self, cache_name, targets):
self._add_stat(1, cache_name, targets)
# any cache misses, each target is paired with its cause for the miss.
def add_misses(self, cache_name, targets, causes):
self._add_stat(1, cache_name, targets, causes)

def get_all(self):
"""Returns the cache stats as a list of dicts."""
Expand All @@ -48,12 +49,23 @@ def get_all(self):
return ret

# hit_or_miss is the appropriate index in CacheStat, i.e., 0 for hit, 1 for miss.
def _add_stat(self, hit_or_miss, cache_name, targets):
for tgt in targets:
self.stats_per_cache[cache_name][hit_or_miss].append(tgt.address.reference())
def _add_stat(self, hit_or_miss, cache_name, targets, causes):
def format_vts(tgt, cause):
"""Format into (target, cause) tuple."""
target_address = tgt.address.reference()
if isinstance(cause, UnreadableArtifact):
return (target_address, cause.err.message)
elif cause == False:
return (target_address, 'uncached')
else:
return (target_address, '')

causes = causes or [True] * len(targets)
target_with_causes = [format_vts(tgt, cause) for tgt, cause in zip(targets, causes)]
self.stats_per_cache[cache_name][hit_or_miss].extend(target_with_causes)
suffix = 'misses' if hit_or_miss else 'hits'
if self._dir and os.path.exists(self._dir): # Check existence in case of a clean-all.
suffix = 'misses' if hit_or_miss else 'hits'
with open(os.path.join(self._dir, '{}.{}'.format(cache_name, suffix)), 'a') as f:
f.write('\n'.join([tgt.address.reference() for tgt in targets]))
f.write('\n'.join([' '.join(target_with_cause).strip()
for target_with_cause in target_with_causes]))
f.write('\n')
7 changes: 5 additions & 2 deletions src/python/pants/reporting/html_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,12 +285,15 @@ def fix_detail_id(e, _id):

msg_elements = []
for cache_name, stat in artifact_cache_stats.stats_per_cache.items():
# TODO consider display causes for hit/miss targets
hit_targets = [tgt for tgt, cause in stat.hit_targets]
miss_targets = [tgt for tgt, cause in stat.miss_targets]
msg_elements.extend([
cache_name + ' artifact cache: ',
# Explicitly set the detail ids, so their displayed/hidden state survives a refresh.
fix_detail_id(items_to_report_element(stat.hit_targets, 'hit'), 'cache-hit-details'),
fix_detail_id(items_to_report_element(hit_targets, 'hit'), 'cache-hit-details'),
', ',
fix_detail_id(items_to_report_element(stat.miss_targets, 'miss'), 'cache-miss-details'),
fix_detail_id(items_to_report_element(miss_targets, 'miss'), 'cache-miss-details'),
'.'
])
if not msg_elements:
Expand Down
51 changes: 27 additions & 24 deletions src/python/pants/task/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import itertools
import os
import sys
from abc import abstractmethod
from contextlib import contextmanager
from hashlib import sha1

from twitter.common.collections.orderedset import OrderedSet
from itertools import chain, repeat

from pants.base.exceptions import TaskError
from pants.base.fingerprint_strategy import TaskIdentityFingerprintStrategy
Expand Down Expand Up @@ -349,7 +347,7 @@ def invalidated(self,

if invalidation_check.invalid_vts and self.artifact_cache_reads_enabled():
with self.context.new_workunit('cache'):
cached_vts, uncached_vts = \
cached_vts, uncached_vts, uncached_causes = \
self.check_artifact_cache(self.check_artifact_cache_for(invalidation_check))
if cached_vts:
cached_targets = [vt.target for vt in cached_vts]
Expand All @@ -360,7 +358,8 @@ def invalidated(self,
if uncached_vts:
uncached_targets = [vt.target for vt in uncached_vts]
self.context.run_tracker.artifact_cache_stats.add_misses(cache_manager.task_name,
uncached_targets)
uncached_targets,
uncached_causes)
if not silent:
self._report_targets('No cached artifacts for ', uncached_targets, '.')
# Now that we've checked the cache, re-partition whatever is still invalid.
Expand Down Expand Up @@ -435,8 +434,10 @@ def check_artifact_cache_for(self, invalidation_check):
def check_artifact_cache(self, vts):
"""Checks the artifact cache for the specified list of VersionedTargetSets.
Returns a pair (cached, uncached) of VersionedTargets that were
satisfied/unsatisfied from the cache.
Returns a tuple (cached, uncached, uncached_causes) of VersionedTargets that were
satisfied/unsatisfied from the cache. Uncached VTS are also attached with their
causes for the miss: `False` indicates a legit miss while `UnreadableArtifact`
is due to either local or remote cache failures.
"""
return self.do_check_artifact_cache(vts)

Expand All @@ -447,36 +448,38 @@ def do_check_artifact_cache(self, vts, post_process_cached_vts=None):
satisfied/unsatisfied from the cache.
"""
if not vts:
return [], []

cached_vts = []
uncached_vts = OrderedSet(vts)
return [], [], []

read_cache = self._cache_factory.get_read_cache()
items = [(read_cache, vt.cache_key, vt.results_dir if vt.has_results_dir else None)
for vt in vts]

res = self.context.subproc_map(call_use_cached_files, items)

for vt, was_in_cache in zip(vts, res):
if was_in_cache:
cached_vts.append(vt)
uncached_vts.discard(vt)
elif isinstance(was_in_cache, UnreadableArtifact):
self._cache_key_errors.add(was_in_cache.key)

self._maybe_create_results_dirs(vts)

cached_vts = []
uncached_vts = []
uncached_causes = []

# Note that while the input vts may represent multiple targets (for tasks that overrride
# check_artifact_cache_for), the ones we return must represent single targets.
def flatten(vts):
return list(itertools.chain.from_iterable([vt.versioned_targets for vt in vts]))
all_cached_vts, all_uncached_vts = flatten(cached_vts), flatten(uncached_vts)
# Once flattened, cached/uncached vts are in separate lists. Each uncached vts is paired
# with why it is missed for stat reporting purpose.
for vt, was_in_cache in zip(vts, res):
if was_in_cache:
cached_vts.extend(vt.versioned_targets)
else:
uncached_vts.extend(vt.versioned_targets)
uncached_causes.extend(repeat(was_in_cache, len(vt.versioned_targets)))
if isinstance(was_in_cache, UnreadableArtifact):
self._cache_key_errors.update(was_in_cache.key)

if post_process_cached_vts:
post_process_cached_vts(all_cached_vts)
for vt in all_cached_vts:
post_process_cached_vts(cached_vts)
for vt in cached_vts:
vt.update()
return all_cached_vts, all_uncached_vts
return cached_vts, uncached_vts, uncached_causes

def update_artifact_cache(self, vts_artifactfiles_pairs):
"""Write to the artifact cache, if we're configured to.
Expand Down
2 changes: 1 addition & 1 deletion tests/python/pants_test/base/context_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class DummyRunTracker(object):
class DummyArtifactCacheStats(object):
def add_hits(self, cache_name, targets): pass

def add_misses(self, cache_name, targets): pass
def add_misses(self, cache_name, targets, causes): pass

artifact_cache_stats = DummyArtifactCacheStats()

Expand Down
30 changes: 27 additions & 3 deletions tests/python/pants_test/goal/BUILD
Original file line number Diff line number Diff line change
@@ -1,9 +1,33 @@
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

target(
name = 'goal',
dependencies = [
':artifact_cache_stats',
':other',
]
)

python_tests(
name='goal',
sources=globs('*.py'),
name='artifact_cache_stats',
sources= ['test_artifact_cache_stats.py'],
dependencies=[
'src/python/pants/cache',
'src/python/pants/goal:artifact_cache_stats',
'src/python/pants/util:contextutil',
'tests/python/pants_test:base_test',
]
)

python_tests(
name='other',
sources=[
'test_context.py',
'test_products.py',
'test_run_tracker.py',
'test_union_products.py',
],
dependencies=[
'3rdparty/python/twitter/commons:twitter.common.collections',
'src/python/pants/build_graph',
Expand All @@ -13,4 +37,4 @@ python_tests(
'src/python/pants/util:dirutil',
'tests/python/pants_test:base_test',
]
)
)
85 changes: 85 additions & 0 deletions tests/python/pants_test/goal/test_artifact_cache_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# coding=utf-8
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import os
from contextlib import contextmanager

import requests

from pants.cache.artifact import ArtifactError
from pants.cache.artifact_cache import UnreadableArtifact
from pants.goal.artifact_cache_stats import ArtifactCacheStats
from pants.util.contextutil import temporary_dir
from pants_test.base_test import BaseTest


class ArtifactCacheStatsTest(BaseTest):
TEST_CACHE_NAME_1 = 'ZincCompile'
TEST_CACHE_NAME_2 = 'Checkstyle_test_checkstyle'
TEST_LOCAL_ERROR = UnreadableArtifact('foo', ArtifactError('CRC check failed'))
TEST_REMOTE_ERROR = UnreadableArtifact('bar', requests.exceptions.ConnectionError('Read time out'))
TEST_SPEC_A = 'src/scala/a'
TEST_SPEC_B = 'src/scala/b'
TEST_SPEC_C = 'src/java/c'

def setUp(self):
super(ArtifactCacheStatsTest, self).setUp()

self.target_a = self.make_target(spec=self.TEST_SPEC_A)
self.target_b = self.make_target(spec=self.TEST_SPEC_B)
self.target_c = self.make_target(spec=self.TEST_SPEC_C)

def test_add_hits(self):
expected_stats = [
{
'cache_name': self.TEST_CACHE_NAME_2,
'num_hits': 0,
'num_misses': 1,
'hits': [],
'misses': [(self.TEST_SPEC_A, self.TEST_LOCAL_ERROR.err.message)]
},
{
'cache_name': self.TEST_CACHE_NAME_1,
'num_hits': 1,
'num_misses': 1,
'hits': [(self.TEST_SPEC_B, '')],
'misses': [(self.TEST_SPEC_C, self.TEST_REMOTE_ERROR.err.message)]
},
]

expected_hit_or_miss_files = {
'{}.misses'.format(self.TEST_CACHE_NAME_2):
'{} {}\n'.format(self.TEST_SPEC_A, self.TEST_LOCAL_ERROR.err.message),
'{}.hits'.format(self.TEST_CACHE_NAME_1):
'{}\n'.format(self.TEST_SPEC_B),
'{}.misses'.format(self.TEST_CACHE_NAME_1):
'{} {}\n'.format(self.TEST_SPEC_C, self.TEST_REMOTE_ERROR.err.message),
}

with self.mock_artifact_cache_stats(expected_stats,
expected_hit_or_miss_files=expected_hit_or_miss_files)\
as artifact_cache_stats:
artifact_cache_stats.add_hits(self.TEST_CACHE_NAME_1, [self.target_b])
artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_1, [self.target_c],
[self.TEST_REMOTE_ERROR])
artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_2, [self.target_a],
[self.TEST_LOCAL_ERROR])

@contextmanager
def mock_artifact_cache_stats(self,
expected_stats,
expected_hit_or_miss_files=None):
with temporary_dir() as tmp_dir:
artifact_cache_stats = ArtifactCacheStats(tmp_dir)
yield artifact_cache_stats
self.assertEquals(expected_stats, artifact_cache_stats.get_all())

self.assertEquals(sorted(list(expected_hit_or_miss_files.keys())),
sorted(os.listdir(tmp_dir)))
for hit_or_miss_file in expected_hit_or_miss_files.keys():
with open(os.path.join(tmp_dir, hit_or_miss_file)) as hit_or_miss_saved:
self.assertEquals(expected_hit_or_miss_files[hit_or_miss_file], hit_or_miss_saved.read())

0 comments on commit fb30d2d

Please sign in to comment.