diff --git a/src/python/pants/backend/jvm/tasks/jvm_compile/jvm_compile.py b/src/python/pants/backend/jvm/tasks/jvm_compile/jvm_compile.py index 8bb653b7ce3..5e1cd973cd6 100644 --- a/src/python/pants/backend/jvm/tasks/jvm_compile/jvm_compile.py +++ b/src/python/pants/backend/jvm/tasks/jvm_compile/jvm_compile.py @@ -618,7 +618,7 @@ def check_cache(vts): """ if not self.artifact_cache_reads_enabled(): return False - cached_vts, uncached_vts = self.check_artifact_cache([vts]) + cached_vts, _, _ = self.check_artifact_cache([vts]) if not cached_vts: self.context.log.debug('Missed cache during double check for {}' .format(vts.target.address.spec)) diff --git a/src/python/pants/cache/restful_artifact_cache.py b/src/python/pants/cache/restful_artifact_cache.py index e84c363c4cc..f6ab6216aa6 100644 --- a/src/python/pants/cache/restful_artifact_cache.py +++ b/src/python/pants/cache/restful_artifact_cache.py @@ -91,6 +91,7 @@ def use_cached_files(self, cache_key, results_dir=None): return self._localcache.store_and_use_artifact(cache_key, byte_iter, results_dir) except Exception as e: logger.warn('\nError while reading from remote artifact cache: {0}\n'.format(e)) + # TODO(peiyu): clean up partially downloaded local file if any return UnreadableArtifact(cache_key, e) return False diff --git a/src/python/pants/goal/BUILD b/src/python/pants/goal/BUILD index 187b6bcaec8..46931b468fb 100644 --- a/src/python/pants/goal/BUILD +++ b/src/python/pants/goal/BUILD @@ -17,7 +17,7 @@ python_library( ] ) -# why is this in goal? +# this is in goal because of run_tracker python_library( name = 'artifact_cache_stats', sources = ['artifact_cache_stats.py'], diff --git a/src/python/pants/goal/artifact_cache_stats.py b/src/python/pants/goal/artifact_cache_stats.py index dd087482a71..fe550d19803 100644 --- a/src/python/pants/goal/artifact_cache_stats.py +++ b/src/python/pants/goal/artifact_cache_stats.py @@ -8,6 +8,7 @@ import os from collections import defaultdict, namedtuple +from pants.cache.artifact_cache import UnreadableArtifact from pants.util.dirutil import safe_mkdir @@ -28,11 +29,11 @@ def init_stat(): safe_mkdir(self._dir) def add_hits(self, cache_name, targets): - self._add_stat(0, cache_name, targets) + self._add_stat(0, cache_name, targets, None) - # any cache misses, whether legit or due to error. - def add_misses(self, cache_name, targets): - self._add_stat(1, cache_name, targets) + # any cache misses, each target is paired with its cause for the miss. + def add_misses(self, cache_name, targets, causes): + self._add_stat(1, cache_name, targets, causes) def get_all(self): """Returns the cache stats as a list of dicts.""" @@ -48,12 +49,23 @@ def get_all(self): return ret # hit_or_miss is the appropriate index in CacheStat, i.e., 0 for hit, 1 for miss. - def _add_stat(self, hit_or_miss, cache_name, targets): - for tgt in targets: - self.stats_per_cache[cache_name][hit_or_miss].append(tgt.address.reference()) + def _add_stat(self, hit_or_miss, cache_name, targets, causes): + def format_vts(tgt, cause): + """Format into (target, cause) tuple.""" + target_address = tgt.address.reference() + if isinstance(cause, UnreadableArtifact): + return (target_address, cause.err.message) + elif cause == False: + return (target_address, 'uncached') + else: + return (target_address, '') + causes = causes or [True] * len(targets) + target_with_causes = [format_vts(tgt, cause) for tgt, cause in zip(targets, causes)] + self.stats_per_cache[cache_name][hit_or_miss].extend(target_with_causes) + suffix = 'misses' if hit_or_miss else 'hits' if self._dir and os.path.exists(self._dir): # Check existence in case of a clean-all. - suffix = 'misses' if hit_or_miss else 'hits' with open(os.path.join(self._dir, '{}.{}'.format(cache_name, suffix)), 'a') as f: - f.write('\n'.join([tgt.address.reference() for tgt in targets])) + f.write('\n'.join([' '.join(target_with_cause).strip() + for target_with_cause in target_with_causes])) f.write('\n') diff --git a/src/python/pants/reporting/html_reporter.py b/src/python/pants/reporting/html_reporter.py index 93d11a6d2f7..bc1c480fd76 100644 --- a/src/python/pants/reporting/html_reporter.py +++ b/src/python/pants/reporting/html_reporter.py @@ -285,12 +285,15 @@ def fix_detail_id(e, _id): msg_elements = [] for cache_name, stat in artifact_cache_stats.stats_per_cache.items(): + # TODO consider display causes for hit/miss targets + hit_targets = [tgt for tgt, cause in stat.hit_targets] + miss_targets = [tgt for tgt, cause in stat.miss_targets] msg_elements.extend([ cache_name + ' artifact cache: ', # Explicitly set the detail ids, so their displayed/hidden state survives a refresh. - fix_detail_id(items_to_report_element(stat.hit_targets, 'hit'), 'cache-hit-details'), + fix_detail_id(items_to_report_element(hit_targets, 'hit'), 'cache-hit-details'), ', ', - fix_detail_id(items_to_report_element(stat.miss_targets, 'miss'), 'cache-miss-details'), + fix_detail_id(items_to_report_element(miss_targets, 'miss'), 'cache-miss-details'), '.' ]) if not msg_elements: diff --git a/src/python/pants/task/task.py b/src/python/pants/task/task.py index aef58523092..d255c65e52f 100644 --- a/src/python/pants/task/task.py +++ b/src/python/pants/task/task.py @@ -5,14 +5,12 @@ from __future__ import (absolute_import, division, generators, nested_scopes, print_function, unicode_literals, with_statement) -import itertools import os import sys from abc import abstractmethod from contextlib import contextmanager from hashlib import sha1 - -from twitter.common.collections.orderedset import OrderedSet +from itertools import chain, repeat from pants.base.exceptions import TaskError from pants.base.fingerprint_strategy import TaskIdentityFingerprintStrategy @@ -349,7 +347,7 @@ def invalidated(self, if invalidation_check.invalid_vts and self.artifact_cache_reads_enabled(): with self.context.new_workunit('cache'): - cached_vts, uncached_vts = \ + cached_vts, uncached_vts, uncached_causes = \ self.check_artifact_cache(self.check_artifact_cache_for(invalidation_check)) if cached_vts: cached_targets = [vt.target for vt in cached_vts] @@ -360,7 +358,8 @@ def invalidated(self, if uncached_vts: uncached_targets = [vt.target for vt in uncached_vts] self.context.run_tracker.artifact_cache_stats.add_misses(cache_manager.task_name, - uncached_targets) + uncached_targets, + uncached_causes) if not silent: self._report_targets('No cached artifacts for ', uncached_targets, '.') # Now that we've checked the cache, re-partition whatever is still invalid. @@ -435,8 +434,10 @@ def check_artifact_cache_for(self, invalidation_check): def check_artifact_cache(self, vts): """Checks the artifact cache for the specified list of VersionedTargetSets. - Returns a pair (cached, uncached) of VersionedTargets that were - satisfied/unsatisfied from the cache. + Returns a tuple (cached, uncached, uncached_causes) of VersionedTargets that were + satisfied/unsatisfied from the cache. Uncached VTS are also attached with their + causes for the miss: `False` indicates a legit miss while `UnreadableArtifact` + is due to either local or remote cache failures. """ return self.do_check_artifact_cache(vts) @@ -447,10 +448,7 @@ def do_check_artifact_cache(self, vts, post_process_cached_vts=None): satisfied/unsatisfied from the cache. """ if not vts: - return [], [] - - cached_vts = [] - uncached_vts = OrderedSet(vts) + return [], [], [] read_cache = self._cache_factory.get_read_cache() items = [(read_cache, vt.cache_key, vt.results_dir if vt.has_results_dir else None) @@ -458,25 +456,30 @@ def do_check_artifact_cache(self, vts, post_process_cached_vts=None): res = self.context.subproc_map(call_use_cached_files, items) - for vt, was_in_cache in zip(vts, res): - if was_in_cache: - cached_vts.append(vt) - uncached_vts.discard(vt) - elif isinstance(was_in_cache, UnreadableArtifact): - self._cache_key_errors.add(was_in_cache.key) - self._maybe_create_results_dirs(vts) + cached_vts = [] + uncached_vts = [] + uncached_causes = [] + # Note that while the input vts may represent multiple targets (for tasks that overrride # check_artifact_cache_for), the ones we return must represent single targets. - def flatten(vts): - return list(itertools.chain.from_iterable([vt.versioned_targets for vt in vts])) - all_cached_vts, all_uncached_vts = flatten(cached_vts), flatten(uncached_vts) + # Once flattened, cached/uncached vts are in separate lists. Each uncached vts is paired + # with why it is missed for stat reporting purpose. + for vt, was_in_cache in zip(vts, res): + if was_in_cache: + cached_vts.extend(vt.versioned_targets) + else: + uncached_vts.extend(vt.versioned_targets) + uncached_causes.extend(repeat(was_in_cache, len(vt.versioned_targets))) + if isinstance(was_in_cache, UnreadableArtifact): + self._cache_key_errors.update(was_in_cache.key) + if post_process_cached_vts: - post_process_cached_vts(all_cached_vts) - for vt in all_cached_vts: + post_process_cached_vts(cached_vts) + for vt in cached_vts: vt.update() - return all_cached_vts, all_uncached_vts + return cached_vts, uncached_vts, uncached_causes def update_artifact_cache(self, vts_artifactfiles_pairs): """Write to the artifact cache, if we're configured to. diff --git a/tests/python/pants_test/base/context_utils.py b/tests/python/pants_test/base/context_utils.py index e93750c3475..6dc1f3b17d6 100644 --- a/tests/python/pants_test/base/context_utils.py +++ b/tests/python/pants_test/base/context_utils.py @@ -49,7 +49,7 @@ class DummyRunTracker(object): class DummyArtifactCacheStats(object): def add_hits(self, cache_name, targets): pass - def add_misses(self, cache_name, targets): pass + def add_misses(self, cache_name, targets, causes): pass artifact_cache_stats = DummyArtifactCacheStats() diff --git a/tests/python/pants_test/goal/BUILD b/tests/python/pants_test/goal/BUILD index 57d0106afa8..26fa032f4c3 100644 --- a/tests/python/pants_test/goal/BUILD +++ b/tests/python/pants_test/goal/BUILD @@ -1,9 +1,33 @@ # Copyright 2015 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). +target( + name = 'goal', + dependencies = [ + ':artifact_cache_stats', + ':other', + ] +) + python_tests( - name='goal', - sources=globs('*.py'), + name='artifact_cache_stats', + sources= ['test_artifact_cache_stats.py'], + dependencies=[ + 'src/python/pants/cache', + 'src/python/pants/goal:artifact_cache_stats', + 'src/python/pants/util:contextutil', + 'tests/python/pants_test:base_test', + ] +) + +python_tests( + name='other', + sources=[ + 'test_context.py', + 'test_products.py', + 'test_run_tracker.py', + 'test_union_products.py', + ], dependencies=[ '3rdparty/python/twitter/commons:twitter.common.collections', 'src/python/pants/build_graph', @@ -13,4 +37,4 @@ python_tests( 'src/python/pants/util:dirutil', 'tests/python/pants_test:base_test', ] -) +) \ No newline at end of file diff --git a/tests/python/pants_test/goal/test_artifact_cache_stats.py b/tests/python/pants_test/goal/test_artifact_cache_stats.py new file mode 100644 index 00000000000..d8752597565 --- /dev/null +++ b/tests/python/pants_test/goal/test_artifact_cache_stats.py @@ -0,0 +1,85 @@ +# coding=utf-8 +# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import (absolute_import, division, generators, nested_scopes, print_function, + unicode_literals, with_statement) + +import os +from contextlib import contextmanager + +import requests + +from pants.cache.artifact import ArtifactError +from pants.cache.artifact_cache import UnreadableArtifact +from pants.goal.artifact_cache_stats import ArtifactCacheStats +from pants.util.contextutil import temporary_dir +from pants_test.base_test import BaseTest + + +class ArtifactCacheStatsTest(BaseTest): + TEST_CACHE_NAME_1 = 'ZincCompile' + TEST_CACHE_NAME_2 = 'Checkstyle_test_checkstyle' + TEST_LOCAL_ERROR = UnreadableArtifact('foo', ArtifactError('CRC check failed')) + TEST_REMOTE_ERROR = UnreadableArtifact('bar', requests.exceptions.ConnectionError('Read time out')) + TEST_SPEC_A = 'src/scala/a' + TEST_SPEC_B = 'src/scala/b' + TEST_SPEC_C = 'src/java/c' + + def setUp(self): + super(ArtifactCacheStatsTest, self).setUp() + + self.target_a = self.make_target(spec=self.TEST_SPEC_A) + self.target_b = self.make_target(spec=self.TEST_SPEC_B) + self.target_c = self.make_target(spec=self.TEST_SPEC_C) + + def test_add_hits(self): + expected_stats = [ + { + 'cache_name': self.TEST_CACHE_NAME_2, + 'num_hits': 0, + 'num_misses': 1, + 'hits': [], + 'misses': [(self.TEST_SPEC_A, self.TEST_LOCAL_ERROR.err.message)] + }, + { + 'cache_name': self.TEST_CACHE_NAME_1, + 'num_hits': 1, + 'num_misses': 1, + 'hits': [(self.TEST_SPEC_B, '')], + 'misses': [(self.TEST_SPEC_C, self.TEST_REMOTE_ERROR.err.message)] + }, + ] + + expected_hit_or_miss_files = { + '{}.misses'.format(self.TEST_CACHE_NAME_2): + '{} {}\n'.format(self.TEST_SPEC_A, self.TEST_LOCAL_ERROR.err.message), + '{}.hits'.format(self.TEST_CACHE_NAME_1): + '{}\n'.format(self.TEST_SPEC_B), + '{}.misses'.format(self.TEST_CACHE_NAME_1): + '{} {}\n'.format(self.TEST_SPEC_C, self.TEST_REMOTE_ERROR.err.message), + } + + with self.mock_artifact_cache_stats(expected_stats, + expected_hit_or_miss_files=expected_hit_or_miss_files)\ + as artifact_cache_stats: + artifact_cache_stats.add_hits(self.TEST_CACHE_NAME_1, [self.target_b]) + artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_1, [self.target_c], + [self.TEST_REMOTE_ERROR]) + artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_2, [self.target_a], + [self.TEST_LOCAL_ERROR]) + + @contextmanager + def mock_artifact_cache_stats(self, + expected_stats, + expected_hit_or_miss_files=None): + with temporary_dir() as tmp_dir: + artifact_cache_stats = ArtifactCacheStats(tmp_dir) + yield artifact_cache_stats + self.assertEquals(expected_stats, artifact_cache_stats.get_all()) + + self.assertEquals(sorted(list(expected_hit_or_miss_files.keys())), + sorted(os.listdir(tmp_dir))) + for hit_or_miss_file in expected_hit_or_miss_files.keys(): + with open(os.path.join(tmp_dir, hit_or_miss_file)) as hit_or_miss_saved: + self.assertEquals(expected_hit_or_miss_files[hit_or_miss_file], hit_or_miss_saved.read())