Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate the resolution cache and repository cache in Ivy #5844

Merged
merged 1 commit into from
Jun 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions src/python/pants/backend/jvm/ivy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,16 @@ class IvyResolutionStep(object):
# NB(nh): This class is the base class for the ivy resolve and fetch steps.
# It also specifies the abstract methods that define the components of resolution steps.

def __init__(self, confs, hash_name, pinned_artifacts, soft_excludes, ivy_cache_dir,
global_ivy_workdir):
def __init__(self, confs, hash_name, pinned_artifacts, soft_excludes, ivy_resolution_cache_dir,
ivy_repository_cache_dir, global_ivy_workdir):
"""
:param confs: A tuple of string ivy confs to resolve for.
:param hash_name: A unique string name for this resolve.
:param pinned_artifacts: A tuple of "artifact-alikes" to force the versions of.
:param soft_excludes: A flag marking whether to pass excludes to Ivy or to apply them after the
fact.
:param ivy_cache_dir: The cache directory used by Ivy for this resolution step.
:param ivy_repository_cache_dir: The cache directory used by Ivy for repository cache data.
:param ivy_resolution_cache_dir: The cache directory used by Ivy for resolution cache data.
:param global_ivy_workdir: The workdir that all ivy outputs live in.
"""

Expand All @@ -56,7 +57,8 @@ def __init__(self, confs, hash_name, pinned_artifacts, soft_excludes, ivy_cache_
self.pinned_artifacts = pinned_artifacts
self.soft_excludes = soft_excludes

self.ivy_cache_dir = ivy_cache_dir
self.ivy_repository_cache_dir = ivy_repository_cache_dir
self.ivy_resolution_cache_dir = ivy_resolution_cache_dir
self.global_ivy_workdir = global_ivy_workdir

self.workdir_reports_by_conf = {c: self.resolve_report_path(c) for c in confs}
Expand Down Expand Up @@ -109,7 +111,7 @@ def resolve_report_path(self, conf):
def _construct_and_load_symlink_map(self):
artifact_paths, symlink_map = IvyUtils.construct_and_load_symlink_map(
self.symlink_dir,
self.ivy_cache_dir,
self.ivy_repository_cache_dir,
self.ivy_cache_classpath_filename,
self.symlink_classpath_filename)
return artifact_paths, symlink_map
Expand All @@ -122,7 +124,7 @@ def _call_ivy(self, executor, extra_args, ivyxml, jvm_options, hash_name_for_rep
jvm_options,
self.workdir_reports_by_conf,
self.confs,
self.ivy_cache_dir,
self.ivy_resolution_cache_dir,
self.ivy_cache_classpath_filename,
hash_name_for_report,
workunit_factory,
Expand Down Expand Up @@ -726,7 +728,7 @@ def _load_classpath_from_cachepath(path):

@classmethod
def do_resolve(cls, executor, extra_args, ivyxml, jvm_options, workdir_report_paths_by_conf,
confs, ivy_cache_dir, ivy_cache_classpath_filename, resolve_hash_name,
confs, ivy_resolution_cache_dir, ivy_cache_classpath_filename, resolve_hash_name,
workunit_factory, workunit_name):
"""Execute Ivy with the given ivy.xml and copies all relevant files into the workdir.

Expand Down Expand Up @@ -764,15 +766,15 @@ def do_resolve(cls, executor, extra_args, ivyxml, jvm_options, workdir_report_pa
raise cls.IvyError('Ivy failed to create classpath file at {}'
.format(raw_target_classpath_file_tmp))

cls._copy_ivy_reports(workdir_report_paths_by_conf, confs, ivy_cache_dir, resolve_hash_name)
cls._copy_ivy_reports(workdir_report_paths_by_conf, confs, ivy_resolution_cache_dir, resolve_hash_name)

logger.debug('Moved ivy classfile file to {dest}'
.format(dest=ivy_cache_classpath_filename))

@classmethod
def _copy_ivy_reports(cls, workdir_report_paths_by_conf, confs, ivy_cache_dir, resolve_hash_name):
def _copy_ivy_reports(cls, workdir_report_paths_by_conf, confs, ivy_resolution_cache_dir, resolve_hash_name):
for conf in confs:
ivy_cache_report_path = IvyUtils.xml_report_path(ivy_cache_dir, resolve_hash_name,
ivy_cache_report_path = IvyUtils.xml_report_path(ivy_resolution_cache_dir, resolve_hash_name,
conf)
workdir_report_path = workdir_report_paths_by_conf[conf]
try:
Expand Down Expand Up @@ -808,7 +810,7 @@ def _exec_ivy(cls, ivy, confs, ivyxml, args, jvm_options, executor,
raise IvyUtils.IvyError(e)

@classmethod
def construct_and_load_symlink_map(cls, symlink_dir, ivy_cache_dir,
def construct_and_load_symlink_map(cls, symlink_dir, ivy_repository_cache_dir,
ivy_cache_classpath_filename, symlink_classpath_filename):
# Make our actual classpath be symlinks, so that the paths are uniform across systems.
# Note that we must do this even if we read the raw_target_classpath_file from the artifact
Expand All @@ -818,26 +820,26 @@ def construct_and_load_symlink_map(cls, symlink_dir, ivy_cache_dir,
# in artifact-cached analysis files are consistent across systems.
# Note that we have one global, well-known symlink dir, again so that paths are
# consistent across builds.
symlink_map = cls._symlink_cachepath(ivy_cache_dir,
symlink_map = cls._symlink_cachepath(ivy_repository_cache_dir,
ivy_cache_classpath_filename,
symlink_dir,
symlink_classpath_filename)
classpath = cls._load_classpath_from_cachepath(symlink_classpath_filename)
return classpath, symlink_map

@classmethod
def _symlink_cachepath(cls, ivy_cache_dir, inpath, symlink_dir, outpath):
"""Symlinks all paths listed in inpath that are under ivy_cache_dir into symlink_dir.
def _symlink_cachepath(cls, ivy_repository_cache_dir, inpath, symlink_dir, outpath):
"""Symlinks all paths listed in inpath that are under ivy_repository_cache_dir into symlink_dir.

If there is an existing symlink for a file under inpath, it is used rather than creating
a new symlink. Preserves all other paths. Writes the resulting paths to outpath.
Returns a map of path -> symlink to that path.
"""
safe_mkdir(symlink_dir)
# The ivy_cache_dir might itself be a symlink. In this case, ivy may return paths that
# The ivy_repository_cache_dir might itself be a symlink. In this case, ivy may return paths that
# reference the realpath of the .jar file after it is resolved in the cache dir. To handle
# this case, add both the symlink'ed path and the realpath to the jar to the symlink map.
real_ivy_cache_dir = os.path.realpath(ivy_cache_dir)
real_ivy_cache_dir = os.path.realpath(ivy_repository_cache_dir)
symlink_map = OrderedDict()

inpaths = cls._load_classpath_from_cachepath(inpath)
Expand Down Expand Up @@ -870,7 +872,7 @@ def _symlink_cachepath(cls, ivy_cache_dir, inpath, symlink_dir, outpath):
return dict(symlink_map)

@classmethod
def xml_report_path(cls, cache_dir, resolve_hash_name, conf):
def xml_report_path(cls, resolution_cache_dir, resolve_hash_name, conf):
"""The path to the xml report ivy creates after a retrieve.

:API: public
Expand All @@ -882,8 +884,8 @@ def xml_report_path(cls, cache_dir, resolve_hash_name, conf):
:returns: The report path.
:rtype: string
"""
return os.path.join(cache_dir, '{}-{}-{}.xml'.format(IvyUtils.INTERNAL_ORG_NAME,
resolve_hash_name, conf))
return os.path.join(resolution_cache_dir, '{}-{}-{}.xml'.format(IvyUtils.INTERNAL_ORG_NAME,
resolve_hash_name, conf))

@classmethod
def parse_xml_report(cls, conf, path):
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/jvm/tasks/ivy_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def make_empty_report(report, organisation, module, conf):
report = None
org = IvyUtils.INTERNAL_ORG_NAME
name = result.resolve_hash_name
xsl = os.path.join(self.ivy_cache_dir, 'ivy-report.xsl')
xsl = os.path.join(self.ivy_resolution_cache_dir, 'ivy-report.xsl')

# Xalan needs this dir to exist - ensure that, but do no more - we have no clue where this
# points.
Expand Down Expand Up @@ -180,7 +180,7 @@ def make_empty_report(report, organisation, module, conf):
css = os.path.join(self._outdir, 'ivy-report.css')
if os.path.exists(css):
os.unlink(css)
shutil.copy(os.path.join(self.ivy_cache_dir, 'ivy-report.css'), self._outdir)
shutil.copy(os.path.join(self.ivy_resolution_cache_dir, 'ivy-report.css'), self._outdir)

if self._open and report:
try:
Expand Down
17 changes: 15 additions & 2 deletions src/python/pants/backend/jvm/tasks/ivy_task_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pants.backend.jvm.subsystems.jar_dependency_management import JarDependencyManagement
from pants.backend.jvm.targets.jar_library import JarLibrary
from pants.backend.jvm.targets.jvm_target import JvmTarget
from pants.base.deprecated import deprecated
from pants.base.exceptions import TaskError
from pants.base.fingerprint_strategy import FingerprintStrategy
from pants.invalidation.cache_manager import VersionedTargetSet
Expand Down Expand Up @@ -101,6 +102,8 @@ def implementation_version(cls):
return super(IvyTaskMixin, cls).implementation_version() + [('IvyTaskMixin', 4)]

@memoized_property
@deprecated(removal_version='1.10.0.dev0',
hint_message='Use ivy_repository_cache_dir or ivy_resolution_cache_dir instead.')
def ivy_cache_dir(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cheister Can you clarify when ivy_cache_dir will be used instead of the two new options?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in most cases people would use the ivy_cache_dir, since most of the time you want the resolution_cache_dir and repository_cache_dir to be the same.

I'm assuming you're referring to the cache-dir option on the ivy_subsystem and not this one on the ivy_task_mixin?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I was kinda expecting some logic between old and new options. Maybe somewhere around https://github.com/pantsbuild/pants/pull/5844/files/edcfcf1c2206aeab9e1398bd1b5a6252b9a00cbc#diff-23cdf27cceb5831fcb8c131fd37fe4cbR61 like

if ivy_cache_dir is not set:
  use the new options

Does that make sense?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you looking for the logic in ivy_subsystem.py ?

+  def resolution_cache_dir(self):
+    if self.get_options().resolution_cache_dir:
+      return self.get_options().resolution_cache_dir
+    else:
+      return self.get_options().cache_dir
+
+  def repository_cache_dir(self):
+    if self.get_options().repository_cache_dir:
+      return self.get_options().repository_cache_dir
+    else:
+      return self.get_options().cache_dir

Copy link
Contributor

@wisechengyi wisechengyi Jun 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cheister ah yes, thanks!

Nit:

  1. Could you make the default value None explicitly for
+    register('--resolution-cache-dir', advanced=True,
+             help='Directory to store Ivy resolution artifacts.')
+    register('--repository-cache-dir', advanced=True,
+             help='Directory to store Ivy repository artifacts.')

?

  1. add some doc string for following
+  def resolution_cache_dir(self):
+    if self.get_options().resolution_cache_dir:
+      return self.get_options().resolution_cache_dir
+    else:
+      return self.get_options().cache_dir
+
+  def repository_cache_dir(self):
+    if self.get_options().repository_cache_dir:
+      return self.get_options().repository_cache_dir
+    else:
+      return self.get_options().cache_dir

that the logic is for option deprecation handling?

Other than that lgtm!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally had None for the default but was told it was redundant and removed it.

I also don't plan on deprecating --cache-dir, I think that will still be used most of the time when you don't want to share a cache between instances. I updated the documentation for the option to more closely describe how it works with ivy itself.

"""The path of the ivy cache dir used for resolves.

Expand All @@ -111,6 +114,14 @@ def ivy_cache_dir(self):
# TODO(John Sirois): Fixup the IvySubsystem to encapsulate its properties.
return IvySubsystem.global_instance().get_options().cache_dir

@memoized_property
def ivy_repository_cache_dir(self):
return IvySubsystem.global_instance().repository_cache_dir()

@memoized_property
def ivy_resolution_cache_dir(self):
return IvySubsystem.global_instance().resolution_cache_dir()

def resolve(self, executor, targets, classpath_products, confs=None, extra_args=None,
invalidate_dependents=False):
"""Resolves external classpath products (typically jars) for the given targets.
Expand Down Expand Up @@ -237,13 +248,15 @@ def _ivy_resolve(self,
resolve_hash_name,
pinned_artifacts,
self.get_options().soft_excludes,
self.ivy_cache_dir,
self.ivy_resolution_cache_dir,
self.ivy_repository_cache_dir,
global_ivy_workdir)
resolve = IvyResolveStep(confs,
resolve_hash_name,
pinned_artifacts,
self.get_options().soft_excludes,
self.ivy_cache_dir,
self.ivy_resolution_cache_dir,
self.ivy_repository_cache_dir,
global_ivy_workdir)

return self._perform_resolution(fetch, resolve, executor, extra_args, invalidation_check,
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/ivy/bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def ivy(self, bootstrap_workunit_factory=None):
"""
return Ivy(self._get_classpath(bootstrap_workunit_factory),
ivy_settings=self._ivy_subsystem.get_options().ivy_settings,
ivy_cache_dir=self._ivy_subsystem.get_options().cache_dir,
ivy_resolution_cache_dir=self._ivy_subsystem.resolution_cache_dir(),
extra_jvm_options=self._ivy_subsystem.extra_jvm_options())

def _get_classpath(self, workunit_factory):
Expand Down Expand Up @@ -159,5 +159,5 @@ def _bootstrap_ivy(self, bootstrap_jar_path):

return Ivy(bootstrap_jar_path,
ivy_settings=options.bootstrap_ivy_settings or options.ivy_settings,
ivy_cache_dir=options.cache_dir,
ivy_resolution_cache_dir=self._ivy_subsystem.resolution_cache_dir(),
extra_jvm_options=self._ivy_subsystem.extra_jvm_options())
24 changes: 12 additions & 12 deletions src/python/pants/ivy/ivy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ class Ivy(object):
class Error(Exception):
"""Indicates an error executing an ivy command."""

def __init__(self, classpath, ivy_settings=None, ivy_cache_dir=None, extra_jvm_options=None):
def __init__(self, classpath, ivy_settings=None, ivy_resolution_cache_dir=None, extra_jvm_options=None):
"""Configures an ivy wrapper for the ivy distribution at the given classpath.

:param ivy_settings: path to find settings.xml file
:param ivy_cache_dir: path to store downloaded ivy artifacts
:param ivy_resolution_cache_dir: path to store downloaded ivy artifacts
:param extra_jvm_options: list of strings to add to command line when invoking Ivy
"""
self._classpath = maybe_list(classpath)
Expand All @@ -41,14 +41,14 @@ def __init__(self, classpath, ivy_settings=None, ivy_cache_dir=None, extra_jvm_o
raise ValueError('ivy_settings must be a string, given {} of type {}'.format(
self._ivy_settings, type(self._ivy_settings)))

self._ivy_cache_dir = ivy_cache_dir
if not isinstance(self._ivy_cache_dir, string_types):
raise ValueError('ivy_cache_dir must be a string, given {} of type {}'.format(
self._ivy_cache_dir, type(self._ivy_cache_dir)))
self._ivy_resolution_cache_dir = ivy_resolution_cache_dir
if not isinstance(self._ivy_resolution_cache_dir, string_types):
raise ValueError('ivy_resolution_cache_dir must be a string, given {} of type {}'.format(
self._ivy_resolution_cache_dir, type(self._ivy_resolution_cache_dir)))

self._extra_jvm_options = extra_jvm_options or []
self._lock = OwnerPrintingInterProcessFileLock(
os.path.join(self._ivy_cache_dir, 'pants_ivy.file_lock'))
os.path.join(self._ivy_resolution_cache_dir, 'pants_ivy.file_lock'))

@property
def ivy_settings(self):
Expand All @@ -60,14 +60,14 @@ def ivy_settings(self):
return self._ivy_settings

@property
def ivy_cache_dir(self):
def ivy_resolution_cache_dir(self):
"""Returns the ivy cache dir used by this `Ivy` instance."""
return self._ivy_cache_dir
return self._ivy_resolution_cache_dir

@property
@contextmanager
def resolution_lock(self):
safe_mkdir(self._ivy_cache_dir)
safe_mkdir(self._ivy_resolution_cache_dir)
with self._lock:
yield

Expand Down Expand Up @@ -100,12 +100,12 @@ def runner(self, jvm_options=None, args=None, executor=None):
args = ['-settings', self._ivy_settings] + args

options = list(jvm_options) if jvm_options else []
if self._ivy_cache_dir and '-cache' not in args:
if self._ivy_resolution_cache_dir and '-cache' not in args:
# TODO(John Sirois): Currently this is a magic property to support hand-crafted <caches/> in
# ivysettings.xml. Ideally we'd support either simple -caches or these hand-crafted cases
# instead of just hand-crafted. Clean this up by taking over ivysettings.xml and generating
# it from BUILD constructs.
options += ['-Divy.cache.dir={}'.format(self._ivy_cache_dir)]
options += ['-Divy.cache.dir={}'.format(self._ivy_resolution_cache_dir)]
options += self._extra_jvm_options

executor = executor or SubprocessExecutor(DistributionLocator.cached())
Expand Down
21 changes: 20 additions & 1 deletion src/python/pants/ivy/ivy_subsystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,14 @@ def register_options(cls, register):
register('--ivy-profile', advanced=True, default=cls._DEFAULT_VERSION,
help='The version of ivy to fetch.')
register('--cache-dir', advanced=True, default=os.path.expanduser('~/.ivy2/pants'),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should deprecate this option probably?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, based on our discussion, I'm fine with not deprecating this. But you should expand the description of all of these options to explain how they interact, and you should add validation that both cache-dir and the more specific options aren't set at the same time (see my previous comments regarding is_default).

help='Directory to store artifacts retrieved by Ivy.')
help='The default directory used for both the Ivy resolution and repository caches.'
'If you want to isolate the resolution cache from the repository cache, we '
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Is it worth requiring that both resolution and repository is set if either are?

'recommend setting both the --resolution-cache-dir and --repository-cache-dir '
'instead of using --cache-dir')
register('--resolution-cache-dir', advanced=True,
help='Directory to store Ivy resolution artifacts.')
register('--repository-cache-dir', advanced=True,
help='Directory to store Ivy repository artifacts.')
register('--ivy-settings', advanced=True,
help='Location of XML configuration file for Ivy settings.')
register('--bootstrap-ivy-settings', advanced=True,
Expand Down Expand Up @@ -93,3 +100,15 @@ def extra_jvm_options(self):
def _parse_proxy_string(self, proxy_string):
parse_result = urllib.parse.urlparse(proxy_string)
return parse_result.hostname, parse_result.port

def resolution_cache_dir(self):
if self.get_options().resolution_cache_dir:
return self.get_options().resolution_cache_dir
else:
return self.get_options().cache_dir

def repository_cache_dir(self):
if self.get_options().repository_cache_dir:
return self.get_options().repository_cache_dir
else:
return self.get_options().cache_dir
7 changes: 5 additions & 2 deletions tests/python/pants_test/backend/jvm/tasks/test_ivy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,8 @@ def test_if_not_all_symlinked_files_exist_after_successful_resolve_fail(self):
'hash_name',
None,
False,
'cache_dir',
'resolution_cache_dir',
'repository_cache_dir',
'workdir')

# Stub resolving and creating the result, returning one missing artifacts.
Expand All @@ -608,7 +609,9 @@ def test_if_not_all_symlinked_files_exist_after_successful_fetch_fail(self):
'hash_name',
False,
None,
'ivy_cache_dir', 'global_ivy_workdir')
'ivy_resolution_cache_dir',
'ivy_repository_cache_dir',
'global_ivy_workdir')

# Stub resolving and creating the result, returning one missing artifacts.
fetch._do_fetch = do_nothing
Expand Down
4 changes: 2 additions & 2 deletions tests/python/pants_test/ivy/test_bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_simple(self):
ivy_subsystem = IvySubsystem.global_instance()
bootstrapper = Bootstrapper(ivy_subsystem=ivy_subsystem)
ivy = bootstrapper.ivy()
self.assertIsNotNone(ivy.ivy_cache_dir)
self.assertIsNotNone(ivy.ivy_resolution_cache_dir)
self.assertIsNone(ivy.ivy_settings)
bootstrap_jar_path = os.path.join(ivy_subsystem.get_options().pants_bootstrapdir,
'tools', 'jvm', 'ivy', 'bootstrap.jar')
Expand All @@ -36,5 +36,5 @@ def test_reset(self):

def test_default_ivy(self):
ivy = Bootstrapper.default_ivy()
self.assertIsNotNone(ivy.ivy_cache_dir)
self.assertIsNotNone(ivy.ivy_resolution_cache_dir)
self.assertIsNone(ivy.ivy_settings)