-
Notifications
You must be signed in to change notification settings - Fork 133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improvements to python script #240
Changes from 8 commits
6a6a071
e6a3639
476fa3e
a8d9c49
5f3dbf5
7df10b6
3a8563d
545aaca
83759d2
5a0aa67
5af12bc
7560b5a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
*.mo | ||
rmlint | ||
rmlint.sh | ||
rmlint.json | ||
src/config.h | ||
docs/rmlint.1.gz | ||
docs/rmlint.1 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,7 @@ | |
|
||
# This is the python remover utility shipped inside the rmlint binary. | ||
# The 200 lines source presented below is meant to be clean and hackable. | ||
# It is intented to be used for corner cases where the built-in sh formatter | ||
# It is intended to be used for corner cases where the built-in sh formatter | ||
# is not enough or as an alternative to it. By default it works the same. | ||
|
||
# Python2 compat: | ||
|
@@ -39,14 +39,16 @@ | |
import argparse | ||
import subprocess | ||
|
||
CURRENT_UID = os.geteuid() | ||
CURRENT_GID = pwd.getpwuid(CURRENT_UID).pw_gid | ||
|
||
USE_COLOR = sys.stdout.isatty() and sys.stderr.isatty() | ||
COLORS = { | ||
'red': "\x1b[31;01m" if USE_COLOR else "", | ||
'yellow': "\x1b[33;01m" if USE_COLOR else "", | ||
'red': "\x1b[0;31m" if USE_COLOR else "", | ||
'blue': "\x1b[1;34m" if USE_COLOR else "", | ||
'green': "\x1b[0;32m" if USE_COLOR else "", | ||
'yellow': "\x1b[0;33m" if USE_COLOR else "", | ||
'reset': "\x1b[0m" if USE_COLOR else "", | ||
'green': "\x1b[32;01m" if USE_COLOR else "", | ||
'blue': "\x1b[34;01m" if USE_COLOR else "" | ||
} | ||
|
||
|
||
|
@@ -78,165 +80,194 @@ def original_check(path, original, be_paranoid=True): | |
|
||
|
||
def handle_duplicate_dir(path, original, **kwargs): | ||
shutil.rmtree(path) | ||
if not args.dry_run: | ||
shutil.rmtree(path) | ||
|
||
|
||
def handle_duplicate_file(path, original, args, **kwargs): | ||
if original_check(path, original['path'], be_paranoid=args.paranoid): | ||
os.remove(path) | ||
if not args.dry_run: | ||
os.remove(path) | ||
|
||
|
||
def handle_unfinished_cksum(path, **kwargs): | ||
pass # doesn't need any handling. | ||
|
||
|
||
def handle_empty_dir(path, **kwargs): | ||
os.rmdir(path) | ||
if not args.dry_run: | ||
os.rmdir(path) | ||
|
||
|
||
def handle_empy_file(path, **kwargs): | ||
os.remove(path) | ||
def handle_empty_file(path, **kwargs): | ||
if not args.dry_run: | ||
os.remove(path) | ||
|
||
|
||
def handle_nonstripped(path, **kwargs): | ||
subprocess.call(["strip", "--strip-debug", path]) | ||
if not args.dry_run: | ||
subprocess.call(["strip", "--strip-debug", path]) | ||
|
||
|
||
def handle_badlink(path, **kwargs): | ||
os.remove(path) | ||
|
||
|
||
CURRENT_UID = os.geteuid() | ||
CURRENT_GID = pwd.getpwuid(CURRENT_UID).pw_gid | ||
if not args.dry_run: | ||
os.remove(path) | ||
|
||
|
||
def handle_baduid(path, **kwargs): | ||
os.chmod(path, CURRENT_UID, -1) | ||
if not args.dry_run: | ||
os.chown(path, kwargs['args'].user, -1) | ||
|
||
|
||
def handle_badgid(path, **kwargs): | ||
os.chmod(path, -1, CURRENT_GID) | ||
if not args.dry_run: | ||
os.chown(path, -1, kwargs['args'].group) | ||
|
||
|
||
def handle_badugid(path, **kwargs): | ||
os.chmod(path, CURRENT_UID, CURRENT_GID) | ||
if not args.dry_run: | ||
os.chown(path, kwargs['args'].user, kwargs['args'].group) | ||
|
||
|
||
OPERATIONS = { | ||
"duplicate_dir": handle_duplicate_dir, | ||
"duplicate_file": handle_duplicate_file, | ||
"unfinished_cksum": handle_unfinished_cksum, | ||
"emptydir": handle_empty_dir, | ||
"emptyfile": handle_empy_file, | ||
"emptyfile": handle_empty_file, | ||
"nonstripped": handle_nonstripped, | ||
"badlink": handle_badlink, | ||
"baduid": handle_baduid, | ||
"badgid": handle_badgid, | ||
"badugid": handle_badugid, | ||
} | ||
|
||
MESSAGES = { | ||
"duplicate_dir": "removing tree", | ||
"duplicate_file": "removing", | ||
"unfinished_cksum": "checking", | ||
"emptydir": "removing", | ||
"emptyfile": "removing", | ||
"nonstripped": "stripping", | ||
"badlink": "removing", | ||
"baduid": "changing uid", | ||
"badgid": "changing gid", | ||
"badugid": "changing uid & gid", | ||
} | ||
|
||
|
||
def exec_operation(item, original=None, args=None): | ||
try: | ||
OPERATIONS[item['type']](item['path'], original=original, item=item, args=args) | ||
except OSError as err: | ||
print( | ||
'{c[red]}#{c[reset]} Error on `{item[path]}`:\n{c[red]}#{c[reset]} {err}'.format( | ||
'{c[red]}# {err}{c[reset]}'.format( | ||
item=item, err=err, c=COLORS | ||
), | ||
file=sys.stderr | ||
) | ||
|
||
|
||
def main(args, header, data, footer): | ||
def main(args, data): | ||
seen_cksums = set() | ||
last_original_item = None | ||
|
||
# Process header and footer, if present | ||
header, footer = [], [] | ||
if data[0].get('description'): | ||
header = data.pop(0) | ||
if data[-1].get('total_files'): | ||
footer = data.pop(-1) | ||
# TODO: Print header and footer data here before asking for confirmation | ||
|
||
if not args.no_ask and not args.dry_run: | ||
print('rmlint was executed in the following way:\n', | ||
header.get('args'), | ||
'\n\nPress Enter to continue and perform modifications, ' | ||
'or CTRL-C to exit.' | ||
'\nExecute this script with -d to disable this message.', | ||
file=sys.stderr) | ||
sys.stdin.read(1) | ||
|
||
MESSAGES = { | ||
'duplicate_dir': '{c[yellow]}Deleting duplicate directory'.format(c=COLORS), | ||
'duplicate_file': '{c[yellow]}Deleting duplicate:'.format(c=COLORS), | ||
"unfinished_cksum": "checking", | ||
'emptydir': '{c[green]}Deleting empty directory:'.format(c=COLORS), | ||
'emptyfile': '{c[green]}Deleting empty file:'.format(c=COLORS), | ||
'nonstripped': '{c[green]}Stripping debug symbols:'.format(c=COLORS), | ||
'badlink': '{c[green]}Deleting bad symlink:'.format(c=COLORS), | ||
'baduid': '{c[green]}chown {u}'.format(c=COLORS, u=args.user), | ||
'badgid': '{c[green]}chgrp {g}'.format(c=COLORS, g=args.group), | ||
'badugid': '{c[green]}chown {u}:{g}'.format(c=COLORS, u=args.user, g=args.group), | ||
} | ||
|
||
for item in data: | ||
if item['type'].startswith('duplicate_') and item['is_original']: | ||
print( | ||
"\n{c[green]}#{c[reset]} Deleting twins of {item[path]} ".format( | ||
item=item, c=COLORS | ||
) | ||
print('{c[blue]}[{prog:3}%]{c[reset]} ' | ||
'{c[green]}Keeping original: {c[reset]}{path}'.format( | ||
prog=item['progress'], path=item['path'], c=COLORS) | ||
) | ||
last_original_item = item | ||
|
||
# Do not handle originals. | ||
continue | ||
|
||
if not args.dry_run: | ||
exec_operation(item, original=last_original_item, args=args) | ||
|
||
print('{c[blue]}#{c[reset]} Handling ({t} -> {v}): {p}'.format( | ||
c=COLORS, t=item['type'], v=MESSAGES[item['type']], p=item['path']) | ||
print('{c[blue]}[{prog:3}%]{c[reset]} {v}{c[reset]} {p}'.format( | ||
c=COLORS, | ||
prog=item['progress'], | ||
v=MESSAGES[item['type']], | ||
p=item['path'], | ||
) | ||
) | ||
exec_operation(item, original=last_original_item, args=args) | ||
|
||
print('{c[blue]}[100%] Done!{c[reset]}'.format(c=COLORS)) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser( | ||
description='Handle the files stored in rmlints json output' | ||
description='Handle the files in a JSON output of rmlint.' | ||
) | ||
|
||
parser.add_argument( | ||
'json_docs', metavar='json_doc', type=open, nargs='*', | ||
help='A json output of rmlint to handle (can be given many times)' | ||
'json_docs', metavar='json_doc', nargs='*', default=['.rmlint.json'], | ||
help='A JSON output of rmlint to handle (can be given multiple times)' | ||
) | ||
parser.add_argument( | ||
'-n', '--dry-run', action='store_true', | ||
help='Only print what would be done.' | ||
help='Do not perform any modifications, just print what would be done. ' + | ||
'(implies -d)' | ||
) | ||
parser.add_argument( | ||
'-d', '--no-ask', action='store_true', default=False, | ||
help='ask for confirmation before running (does nothing for -n)' | ||
help='Do not ask for confirmation before running.' | ||
) | ||
parser.add_argument( | ||
'-p', '--paranoid', action='store_true', default=False, | ||
help='Do an extra byte-by-byte compare before deleting duplicates' | ||
help='Recheck that files are still identical before removing duplicates.' | ||
) | ||
parser.add_argument( | ||
'-u', '--user', type=int, default=CURRENT_UID, | ||
help='Numerical uid for chown operations' | ||
) | ||
parser.add_argument( | ||
'-g', '--group', type=int, default=CURRENT_GID, | ||
help='Numerical gid for chgrp operations' | ||
) | ||
|
||
try: | ||
args = parser.parse_args() | ||
except OSError as err: | ||
print(err) | ||
sys.exit(-1) | ||
|
||
if not args.json_docs: | ||
# None given on the commandline | ||
args = parser.parse_args() | ||
json_docus = [] | ||
for doc in args.json_docs: | ||
try: | ||
args.json_docs.append(open('.rmlint.json', 'r')) | ||
except OSError as err: | ||
print('Cannot load default json document: ', str(err), file=sys.stderr) | ||
sys.exit(-2) | ||
|
||
json_docus = [json.load(doc) for doc in args.json_docs] | ||
json_elems = [item for sublist in json_docus for item in sublist] | ||
with open(doc) as f: | ||
j = json.load(f) | ||
json_docus.append(j) | ||
except IOError as err: # Cannot open file | ||
print(err, file=sys.stderr) | ||
sys.exit(-1) | ||
except ValueError as err: # File is not valid JSON | ||
print('{}: {}'.format(err, doc), file=sys.stderr) | ||
sys.exit(-1) | ||
|
||
try: | ||
if not args.no_ask and not args.dry_run: | ||
print('\nPlease hit any key before continuing to shredder your data.', file=sys.stderr) | ||
sys.stdin.read(1) | ||
|
||
print('# This is a dry run. Nothing will be modified.') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a colored There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should only be printed if we're doing a dry_run... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry about that, it's fixed! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
for json_doc in json_docus: | ||
main(args, json_doc[0], json_doc[1:-1], json_doc[-1]) | ||
main(args, json_doc) | ||
|
||
if args.dry_run: | ||
print( | ||
'\n{c[green]}#{c[reset]} This was a dry run. Nothing modified.'.format( | ||
'\n{c[green]}#{c[reset]} This was a dry run. Nothing was modified.'.format( | ||
c=COLORS | ||
) | ||
) | ||
except KeyboardInterrupt: | ||
print('canceled.') | ||
print('\ncanceled.') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
original_check
needs to be adjusted to work with duplicated directories (i.e. callrmlint --equal
for them).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't attempted this since it would require knowing the path to
rmlint
, which isn't included in the json output. As you commented below, it isn't necessary for the python script to support every feature of thesh
output, at least for now.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True, but imho this is needed for correctness, not just as feature. But no blocker for this PR, I can implement that myself.