Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to python script #240

Merged
merged 12 commits into from
Jul 23, 2017
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*.mo
rmlint
rmlint.sh
rmlint.json
src/config.h
docs/rmlint.1.gz
docs/rmlint.1
Expand Down
173 changes: 102 additions & 71 deletions lib/formats/py.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

# This is the python remover utility shipped inside the rmlint binary.
# The 200 lines source presented below is meant to be clean and hackable.
# It is intented to be used for corner cases where the built-in sh formatter
# It is intended to be used for corner cases where the built-in sh formatter
# is not enough or as an alternative to it. By default it works the same.

# Python2 compat:
Expand All @@ -39,14 +39,16 @@
import argparse
import subprocess

CURRENT_UID = os.geteuid()
CURRENT_GID = pwd.getpwuid(CURRENT_UID).pw_gid

USE_COLOR = sys.stdout.isatty() and sys.stderr.isatty()
COLORS = {
'red': "\x1b[31;01m" if USE_COLOR else "",
'yellow': "\x1b[33;01m" if USE_COLOR else "",
'red': "\x1b[0;31m" if USE_COLOR else "",
'blue': "\x1b[1;34m" if USE_COLOR else "",
'green': "\x1b[0;32m" if USE_COLOR else "",
'yellow': "\x1b[0;33m" if USE_COLOR else "",
'reset': "\x1b[0m" if USE_COLOR else "",
'green': "\x1b[32;01m" if USE_COLOR else "",
'blue': "\x1b[34;01m" if USE_COLOR else ""
}

Copy link
Owner

@sahib sahib Jul 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

original_check needs to be adjusted to work with duplicated directories (i.e. call rmlint --equal for them).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't attempted this since it would require knowing the path to rmlint, which isn't included in the json output. As you commented below, it isn't necessary for the python script to support every feature of the sh output, at least for now.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, but imho this is needed for correctness, not just as feature. But no blocker for this PR, I can implement that myself.


Expand Down Expand Up @@ -78,165 +80,194 @@ def original_check(path, original, be_paranoid=True):


def handle_duplicate_dir(path, original, **kwargs):
shutil.rmtree(path)
if not args.dry_run:
shutil.rmtree(path)


def handle_duplicate_file(path, original, args, **kwargs):
if original_check(path, original['path'], be_paranoid=args.paranoid):
os.remove(path)
if not args.dry_run:
os.remove(path)


def handle_unfinished_cksum(path, **kwargs):
pass # doesn't need any handling.


def handle_empty_dir(path, **kwargs):
os.rmdir(path)
if not args.dry_run:
os.rmdir(path)


def handle_empy_file(path, **kwargs):
os.remove(path)
def handle_empty_file(path, **kwargs):
if not args.dry_run:
os.remove(path)


def handle_nonstripped(path, **kwargs):
subprocess.call(["strip", "--strip-debug", path])
if not args.dry_run:
subprocess.call(["strip", "--strip-debug", path])


def handle_badlink(path, **kwargs):
os.remove(path)


CURRENT_UID = os.geteuid()
CURRENT_GID = pwd.getpwuid(CURRENT_UID).pw_gid
if not args.dry_run:
os.remove(path)


def handle_baduid(path, **kwargs):
os.chmod(path, CURRENT_UID, -1)
if not args.dry_run:
os.chown(path, kwargs['args'].user, -1)


def handle_badgid(path, **kwargs):
os.chmod(path, -1, CURRENT_GID)
if not args.dry_run:
os.chown(path, -1, kwargs['args'].group)


def handle_badugid(path, **kwargs):
os.chmod(path, CURRENT_UID, CURRENT_GID)
if not args.dry_run:
os.chown(path, kwargs['args'].user, kwargs['args'].group)


OPERATIONS = {
"duplicate_dir": handle_duplicate_dir,
"duplicate_file": handle_duplicate_file,
"unfinished_cksum": handle_unfinished_cksum,
"emptydir": handle_empty_dir,
"emptyfile": handle_empy_file,
"emptyfile": handle_empty_file,
"nonstripped": handle_nonstripped,
"badlink": handle_badlink,
"baduid": handle_baduid,
"badgid": handle_badgid,
"badugid": handle_badugid,
}

MESSAGES = {
"duplicate_dir": "removing tree",
"duplicate_file": "removing",
"unfinished_cksum": "checking",
"emptydir": "removing",
"emptyfile": "removing",
"nonstripped": "stripping",
"badlink": "removing",
"baduid": "changing uid",
"badgid": "changing gid",
"badugid": "changing uid & gid",
}


def exec_operation(item, original=None, args=None):
try:
OPERATIONS[item['type']](item['path'], original=original, item=item, args=args)
except OSError as err:
print(
'{c[red]}#{c[reset]} Error on `{item[path]}`:\n{c[red]}#{c[reset]} {err}'.format(
'{c[red]}# {err}{c[reset]}'.format(
item=item, err=err, c=COLORS
),
file=sys.stderr
)


def main(args, header, data, footer):
def main(args, data):
seen_cksums = set()
last_original_item = None

# Process header and footer, if present
header, footer = [], []
if data[0].get('description'):
header = data.pop(0)
if data[-1].get('total_files'):
footer = data.pop(-1)
# TODO: Print header and footer data here before asking for confirmation

if not args.no_ask and not args.dry_run:
print('rmlint was executed in the following way:\n',
header.get('args'),
'\n\nPress Enter to continue and perform modifications, '
'or CTRL-C to exit.'
'\nExecute this script with -d to disable this message.',
file=sys.stderr)
sys.stdin.read(1)

MESSAGES = {
'duplicate_dir': '{c[yellow]}Deleting duplicate directory'.format(c=COLORS),
'duplicate_file': '{c[yellow]}Deleting duplicate:'.format(c=COLORS),
"unfinished_cksum": "checking",
'emptydir': '{c[green]}Deleting empty directory:'.format(c=COLORS),
'emptyfile': '{c[green]}Deleting empty file:'.format(c=COLORS),
'nonstripped': '{c[green]}Stripping debug symbols:'.format(c=COLORS),
'badlink': '{c[green]}Deleting bad symlink:'.format(c=COLORS),
'baduid': '{c[green]}chown {u}'.format(c=COLORS, u=args.user),
'badgid': '{c[green]}chgrp {g}'.format(c=COLORS, g=args.group),
'badugid': '{c[green]}chown {u}:{g}'.format(c=COLORS, u=args.user, g=args.group),
}

for item in data:
if item['type'].startswith('duplicate_') and item['is_original']:
print(
"\n{c[green]}#{c[reset]} Deleting twins of {item[path]} ".format(
item=item, c=COLORS
)
print('{c[blue]}[{prog:3}%]{c[reset]} '
'{c[green]}Keeping original: {c[reset]}{path}'.format(
prog=item['progress'], path=item['path'], c=COLORS)
)
last_original_item = item

# Do not handle originals.
continue

if not args.dry_run:
exec_operation(item, original=last_original_item, args=args)

print('{c[blue]}#{c[reset]} Handling ({t} -> {v}): {p}'.format(
c=COLORS, t=item['type'], v=MESSAGES[item['type']], p=item['path'])
print('{c[blue]}[{prog:3}%]{c[reset]} {v}{c[reset]} {p}'.format(
c=COLORS,
prog=item['progress'],
v=MESSAGES[item['type']],
p=item['path'],
)
)
exec_operation(item, original=last_original_item, args=args)

print('{c[blue]}[100%] Done!{c[reset]}'.format(c=COLORS))


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Handle the files stored in rmlints json output'
description='Handle the files in a JSON output of rmlint.'
)

parser.add_argument(
'json_docs', metavar='json_doc', type=open, nargs='*',
help='A json output of rmlint to handle (can be given many times)'
'json_docs', metavar='json_doc', nargs='*', default=['.rmlint.json'],
help='A JSON output of rmlint to handle (can be given multiple times)'
)
parser.add_argument(
'-n', '--dry-run', action='store_true',
help='Only print what would be done.'
help='Do not perform any modifications, just print what would be done. ' +
'(implies -d)'
)
parser.add_argument(
'-d', '--no-ask', action='store_true', default=False,
help='ask for confirmation before running (does nothing for -n)'
help='Do not ask for confirmation before running.'
)
parser.add_argument(
'-p', '--paranoid', action='store_true', default=False,
help='Do an extra byte-by-byte compare before deleting duplicates'
help='Recheck that files are still identical before removing duplicates.'
)
parser.add_argument(
'-u', '--user', type=int, default=CURRENT_UID,
help='Numerical uid for chown operations'
)
parser.add_argument(
'-g', '--group', type=int, default=CURRENT_GID,
help='Numerical gid for chgrp operations'
)

try:
args = parser.parse_args()
except OSError as err:
print(err)
sys.exit(-1)

if not args.json_docs:
# None given on the commandline
args = parser.parse_args()
json_docus = []
for doc in args.json_docs:
try:
args.json_docs.append(open('.rmlint.json', 'r'))
except OSError as err:
print('Cannot load default json document: ', str(err), file=sys.stderr)
sys.exit(-2)

json_docus = [json.load(doc) for doc in args.json_docs]
json_elems = [item for sublist in json_docus for item in sublist]
with open(doc) as f:
j = json.load(f)
json_docus.append(j)
except IOError as err: # Cannot open file
print(err, file=sys.stderr)
sys.exit(-1)
except ValueError as err: # File is not valid JSON
print('{}: {}'.format(err, doc), file=sys.stderr)
sys.exit(-1)

try:
if not args.no_ask and not args.dry_run:
print('\nPlease hit any key before continuing to shredder your data.', file=sys.stderr)
sys.stdin.read(1)

print('# This is a dry run. Nothing will be modified.')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a colored # for this one too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should only be printed if we're doing a dry_run...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry about that, it's fixed!

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

for json_doc in json_docus:
main(args, json_doc[0], json_doc[1:-1], json_doc[-1])
main(args, json_doc)

if args.dry_run:
print(
'\n{c[green]}#{c[reset]} This was a dry run. Nothing modified.'.format(
'\n{c[green]}#{c[reset]} This was a dry run. Nothing was modified.'.format(
c=COLORS
)
)
except KeyboardInterrupt:
print('canceled.')
print('\ncanceled.')