Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-40495: compileall option to hardlink duplicate pyc files #19901

Merged
merged 13 commits into from
May 14, 2020
21 changes: 16 additions & 5 deletions Doc/library/compileall.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ compile Python sources.

Ignore symlinks pointing outside the given directory.

.. cmdoption:: --hardlink-dupes

Use hardlinks to prevent duplicates if ``.pyc`` files for multiple
optimization levels have the same content.
vstinner marked this conversation as resolved.
Show resolved Hide resolved

.. versionchanged:: 3.2
Added the ``-i``, ``-b`` and ``-h`` options.

Expand All @@ -125,7 +130,7 @@ compile Python sources.
Added the ``--invalidation-mode`` option.

.. versionchanged:: 3.9
Added the ``-s``, ``-p``, ``-e`` options.
Added the ``-s``, ``-p``, ``-e`` and ``--hardlink-dupes`` options.
Raised the default recursion limit from 10 to
:py:func:`sys.getrecursionlimit()`.
Added the possibility to specify the ``-o`` option multiple times.
Expand All @@ -143,7 +148,7 @@ runtime.
Public functions
----------------

.. function:: compile_dir(dir, maxlevels=sys.getrecursionlimit(), ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None)
.. function:: compile_dir(dir, maxlevels=sys.getrecursionlimit(), ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None, hardlink_dupes=False)

Recursively descend the directory tree named by *dir*, compiling all :file:`.py`
files along the way. Return a true value if all the files compiled successfully,
Expand Down Expand Up @@ -193,6 +198,9 @@ Public functions
the ``-s``, ``-p`` and ``-e`` options described above.
They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.

If *hardlink_dupes* is ``True``, hardlinks are used to prevent duplicates
frenzymadness marked this conversation as resolved.
Show resolved Hide resolved
if ``.pyc`` files for multiple optimization levels have the same content.

.. versionchanged:: 3.2
Added the *legacy* and *optimize* parameter.

Expand All @@ -219,9 +227,9 @@ Public functions
Setting *workers* to 0 now chooses the optimal number of cores.

.. versionchanged:: 3.9
Added *stripdir*, *prependdir* and *limit_sl_dest* arguments.
Added *stripdir*, *prependdir*, *limit_sl_dest* and *hardlink_dupes* arguments.

.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None)
.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None, hardlink_dupes=False)

Compile the file with path *fullname*. Return a true value if the file
compiled successfully, and a false value otherwise.
Expand Down Expand Up @@ -257,6 +265,9 @@ Public functions
the ``-s``, ``-p`` and ``-e`` options described above.
They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.

If *hardlink_dupes* is ``True``, hardlinks are used to prevent duplicates
if ``.pyc`` files for multiple optimization levels have the same content.

.. versionadded:: 3.2

.. versionchanged:: 3.5
Expand All @@ -273,7 +284,7 @@ Public functions
The *invalidation_mode* parameter's default value is updated to None.

.. versionchanged:: 3.9
Added *stripdir*, *prependdir* and *limit_sl_dest* arguments.
Added *stripdir*, *prependdir*, *limit_sl_dest* and *hardlink_dupes* arguments.

.. function:: compile_path(skip_curdir=True, maxlevels=0, force=False, quiet=0, legacy=False, optimize=-1, invalidation_mode=None)

Expand Down
38 changes: 31 additions & 7 deletions Lib/compileall.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import importlib.util
import py_compile
import struct
import filecmp

from functools import partial
from pathlib import Path
Expand Down Expand Up @@ -47,7 +48,7 @@ def _walk_dir(dir, maxlevels, quiet=0):
def compile_dir(dir, maxlevels=None, ddir=None, force=False,
rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
invalidation_mode=None, *, stripdir=None,
prependdir=None, limit_sl_dest=None):
prependdir=None, limit_sl_dest=None, hardlink_dupes=False):
"""Byte-compile all modules in the given directory tree.

Arguments (only dir is required):
Expand All @@ -70,6 +71,7 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
after stripdir
limit_sl_dest: ignore symlinks if they are pointing outside of
the defined path
hardlink_dupes: hardlink duplicated pyc files
"""
ProcessPoolExecutor = None
if ddir is not None and (stripdir is not None or prependdir is not None):
Expand Down Expand Up @@ -104,22 +106,24 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
invalidation_mode=invalidation_mode,
stripdir=stripdir,
prependdir=prependdir,
limit_sl_dest=limit_sl_dest),
limit_sl_dest=limit_sl_dest,
hardlink_dupes=hardlink_dupes),
files)
success = min(results, default=True)
else:
for file in files:
if not compile_file(file, ddir, force, rx, quiet,
legacy, optimize, invalidation_mode,
stripdir=stripdir, prependdir=prependdir,
limit_sl_dest=limit_sl_dest):
limit_sl_dest=limit_sl_dest,
hardlink_dupes=hardlink_dupes):
success = False
return success

def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1,
invalidation_mode=None, *, stripdir=None, prependdir=None,
limit_sl_dest=None):
limit_sl_dest=None, hardlink_dupes=False):
"""Byte-compile one file.

Arguments (only fullname is required):
Expand All @@ -140,6 +144,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
after stripdir
limit_sl_dest: ignore symlinks if they are pointing outside of
the defined path.
hardlink_dupes: hardlink duplicated pyc files
"""

if ddir is not None and (stripdir is not None or prependdir is not None):
Expand Down Expand Up @@ -176,6 +181,10 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if isinstance(optimize, int):
optimize = [optimize]

if hardlink_dupes:
raise ValueError(("Hardlinking of duplicated bytecode makes sense "
"only for more than one optimization level."))
frenzymadness marked this conversation as resolved.
Show resolved Hide resolved

if rx is not None:
mo = rx.search(fullname)
if mo:
Expand Down Expand Up @@ -220,10 +229,16 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if not quiet:
print('Compiling {!r}...'.format(fullname))
try:
for opt_level, cfile in opt_cfiles.items():
for index, opt_level in enumerate(sorted(optimize)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way, sorting here is smart and efficient. That's a great idea :-)

cfile = opt_cfiles[opt_level]
ok = py_compile.compile(fullname, cfile, dfile, True,
optimize=opt_level,
invalidation_mode=invalidation_mode)
if index > 0 and hardlink_dupes:
previous_cfile = opt_cfiles[optimize[index - 1]]
vstinner marked this conversation as resolved.
Show resolved Hide resolved
if filecmp.cmp(cfile, previous_cfile, shallow=False):
os.unlink(cfile)
os.link(previous_cfile, cfile)
except py_compile.PyCompileError as err:
success = False
if quiet >= 2:
Expand Down Expand Up @@ -352,6 +367,9 @@ def main():
'Python interpreter itself (specified by -O).'))
parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
help='Ignore symlinks pointing outsite of the DIR')
parser.add_argument('--hardlink-dupes', action='store_true',
dest='hardlink_dupes',
help='Hardlink duplicated pyc files')

args = parser.parse_args()
compile_dests = args.compile_dest
Expand All @@ -371,6 +389,10 @@ def main():
if args.opt_levels is None:
args.opt_levels = [-1]

if len(args.opt_levels) == 1 and args.hardlink_dupes:
parser.error(("Hardlinking of duplicated bytecode makes sense "
"only for more than one optimization level."))
vstinner marked this conversation as resolved.
Show resolved Hide resolved

if args.ddir is not None and (
args.stripdir is not None or args.prependdir is not None
):
Expand Down Expand Up @@ -404,7 +426,8 @@ def main():
stripdir=args.stripdir,
prependdir=args.prependdir,
optimize=args.opt_levels,
limit_sl_dest=args.limit_sl_dest):
limit_sl_dest=args.limit_sl_dest,
hardlink_dupes=args.hardlink_dupes):
success = False
else:
if not compile_dir(dest, maxlevels, args.ddir,
Expand All @@ -414,7 +437,8 @@ def main():
stripdir=args.stripdir,
prependdir=args.prependdir,
optimize=args.opt_levels,
limit_sl_dest=args.limit_sl_dest):
limit_sl_dest=args.limit_sl_dest,
hardlink_dupes=args.hardlink_dupes):
success = False
return success
else:
Expand Down
Loading