sahib · sahib · Jul 23, 2017 · Jun 15, 2017 · Jun 15, 2017 · Jun 16, 2017
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@
 *.mo
 rmlint
 rmlint.sh
+rmlint.json
 src/config.h
 docs/rmlint.1.gz
 docs/rmlint.1

diff --git a/lib/formats/py.py b/lib/formats/py.py
@@ -24,7 +24,7 @@
 
 # This is the python remover utility shipped inside the rmlint binary.
 # The 200 lines source presented below is meant to be clean and hackable.
-# It is intented to be used for corner cases where the built-in sh formatter
+# It is intended to be used for corner cases where the built-in sh formatter
 # is not enough or as an alternative to it. By default it works the same.
 
 # Python2 compat:
@@ -39,14 +39,16 @@
 import argparse
 import subprocess
 
+CURRENT_UID = os.geteuid()
+CURRENT_GID = pwd.getpwuid(CURRENT_UID).pw_gid
 
 USE_COLOR = sys.stdout.isatty() and sys.stderr.isatty()
 COLORS = {
-    'red':    "\x1b[31;01m" if USE_COLOR else "",
-    'yellow': "\x1b[33;01m" if USE_COLOR else "",
+    'red':    "\x1b[0;31m" if USE_COLOR else "",
+    'blue':   "\x1b[1;34m" if USE_COLOR else "",
+    'green':  "\x1b[0;32m" if USE_COLOR else "",
+    'yellow': "\x1b[0;33m" if USE_COLOR else "",
     'reset':  "\x1b[0m" if USE_COLOR else "",
-    'green':  "\x1b[32;01m" if USE_COLOR else "",
-    'blue':   "\x1b[34;01m" if USE_COLOR else ""
 }
 
 
@@ -78,165 +80,194 @@ def original_check(path, original, be_paranoid=True):
 
 
 def handle_duplicate_dir(path, original, **kwargs):
-    shutil.rmtree(path)
+    if not args.dry_run:
+        shutil.rmtree(path)
 
 
 def handle_duplicate_file(path, original, args, **kwargs):
     if original_check(path, original['path'], be_paranoid=args.paranoid):
-        os.remove(path)
+        if not args.dry_run:
+            os.remove(path)
 
 
 def handle_unfinished_cksum(path, **kwargs):
     pass  # doesn't need any handling.
 
 
 def handle_empty_dir(path, **kwargs):
-    os.rmdir(path)
+    if not args.dry_run:
+        os.rmdir(path)
 
 
-def handle_empy_file(path, **kwargs):
-    os.remove(path)
+def handle_empty_file(path, **kwargs):
+    if not args.dry_run:
+        os.remove(path)
 
 
 def handle_nonstripped(path, **kwargs):
-    subprocess.call(["strip", "--strip-debug", path])
+    if not args.dry_run:
+        subprocess.call(["strip", "--strip-debug", path])
 
 
 def handle_badlink(path, **kwargs):
-    os.remove(path)
-
-
-CURRENT_UID = os.geteuid()
-CURRENT_GID = pwd.getpwuid(CURRENT_UID).pw_gid
+    if not args.dry_run:
+        os.remove(path)
 
 
 def handle_baduid(path, **kwargs):
-    os.chmod(path, CURRENT_UID, -1)
+    if not args.dry_run:
+        os.chown(path, kwargs['args'].user, -1)
 
 
 def handle_badgid(path, **kwargs):
-    os.chmod(path, -1, CURRENT_GID)
+    if not args.dry_run:
+        os.chown(path, -1, kwargs['args'].group)
 
 
 def handle_badugid(path, **kwargs):
-    os.chmod(path, CURRENT_UID, CURRENT_GID)
+    if not args.dry_run:
+        os.chown(path, kwargs['args'].user, kwargs['args'].group)
 
 
 OPERATIONS = {
     "duplicate_dir": handle_duplicate_dir,
     "duplicate_file": handle_duplicate_file,
     "unfinished_cksum": handle_unfinished_cksum,
     "emptydir": handle_empty_dir,
-    "emptyfile": handle_empy_file,
+    "emptyfile": handle_empty_file,
     "nonstripped": handle_nonstripped,
     "badlink": handle_badlink,
     "baduid": handle_baduid,
     "badgid": handle_badgid,
     "badugid": handle_badugid,
 }
 
-MESSAGES = {
-    "duplicate_dir": "removing tree",
-    "duplicate_file": "removing",
-    "unfinished_cksum": "checking",
-    "emptydir": "removing",
-    "emptyfile": "removing",
-    "nonstripped": "stripping",
-    "badlink": "removing",
-    "baduid": "changing uid",
-    "badgid": "changing gid",
-    "badugid": "changing uid & gid",
-}
 
 
 def exec_operation(item, original=None, args=None):
     try:
         OPERATIONS[item['type']](item['path'], original=original, item=item, args=args)
     except OSError as err:
         print(
-            '{c[red]}#{c[reset]} Error on `{item[path]}`:\n{c[red]}#{c[reset]}    {err}'.format(
+            '{c[red]}# {err}{c[reset]}'.format(
                 item=item, err=err, c=COLORS
             ),
             file=sys.stderr
         )
 
 
-def main(args, header, data, footer):
+def main(args, data):
     seen_cksums = set()
     last_original_item = None
 
+    # Process header and footer, if present
+    header, footer = [], []
+    if data[0].get('description'):
+        header = data.pop(0)
+    if data[-1].get('total_files'):
+        footer = data.pop(-1)
+    # TODO: Print header and footer data here before asking for confirmation
+
+    if not args.no_ask and not args.dry_run:
+        print('rmlint was executed in the following way:\n',
+            header.get('args'),
+            '\n\nPress Enter to continue and perform modifications, '
+            'or CTRL-C to exit.'
+            '\nExecute this script with -d to disable this message.',
+            file=sys.stderr)
+        sys.stdin.read(1)
+
+    MESSAGES = {
+        'duplicate_dir':    '{c[yellow]}Deleting duplicate directory'.format(c=COLORS),
+        'duplicate_file':   '{c[yellow]}Deleting duplicate:'.format(c=COLORS),
+        "unfinished_cksum": "checking",
+        'emptydir':         '{c[green]}Deleting empty directory:'.format(c=COLORS),
+        'emptyfile':        '{c[green]}Deleting empty file:'.format(c=COLORS),
+        'nonstripped':      '{c[green]}Stripping debug symbols:'.format(c=COLORS),
+        'badlink':          '{c[green]}Deleting bad symlink:'.format(c=COLORS),
+        'baduid':           '{c[green]}chown {u}'.format(c=COLORS, u=args.user),
+        'badgid':           '{c[green]}chgrp {g}'.format(c=COLORS, g=args.group),
+        'badugid':          '{c[green]}chown {u}:{g}'.format(c=COLORS, u=args.user, g=args.group),
+    }
+
     for item in data:
         if item['type'].startswith('duplicate_') and item['is_original']:
-            print(
-                "\n{c[green]}#{c[reset]} Deleting twins of {item[path]} ".format(
-                    item=item, c=COLORS
-                )
+            print('{c[blue]}[{prog:3}%]{c[reset]} '
+                '{c[green]}Keeping original:   {c[reset]}{path}'.format(
+                prog=item['progress'], path=item['path'], c=COLORS)
             )
             last_original_item = item
 
             # Do not handle originals.
             continue
 
-        if not args.dry_run:
-            exec_operation(item, original=last_original_item, args=args)
-
-        print('{c[blue]}#{c[reset]} Handling ({t} -> {v}): {p}'.format(
-            c=COLORS, t=item['type'], v=MESSAGES[item['type']], p=item['path'])
+        print('{c[blue]}[{prog:3}%]{c[reset]} {v}{c[reset]} {p}'.format(
+            c=COLORS,
+            prog=item['progress'],
+            v=MESSAGES[item['type']],
+            p=item['path'],
+            )
         )
+        exec_operation(item, original=last_original_item, args=args)
+
+    print('{c[blue]}[100%] Done!{c[reset]}'.format(c=COLORS))
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
-        description='Handle the files stored in rmlints json output'
+        description='Handle the files in a JSON output of rmlint.'
     )
 
     parser.add_argument(
-        'json_docs', metavar='json_doc', type=open, nargs='*',
-        help='A json output of rmlint to handle (can be given many times)'
+        'json_docs', metavar='json_doc', nargs='*', default=['.rmlint.json'],
+        help='A JSON output of rmlint to handle (can be given multiple times)'
     )
     parser.add_argument(
         '-n', '--dry-run', action='store_true',
-        help='Only print what would be done.'
+        help='Do not perform any modifications, just print what would be done. ' +
+        '(implies -d)'
     )
     parser.add_argument(
         '-d', '--no-ask', action='store_true', default=False,
-        help='ask for confirmation before running (does nothing for -n)'
+        help='Do not ask for confirmation before running.'
     )
     parser.add_argument(
         '-p', '--paranoid', action='store_true', default=False,
-        help='Do an extra byte-by-byte compare before deleting duplicates'
+        help='Recheck that files are still identical before removing duplicates.'
+    )
+    parser.add_argument(
+        '-u', '--user', type=int, default=CURRENT_UID,
+        help='Numerical uid for chown operations'
+    )
+    parser.add_argument(
+        '-g', '--group', type=int, default=CURRENT_GID,
+        help='Numerical gid for chgrp operations'
     )
 
-    try:
-        args = parser.parse_args()
-    except OSError as err:
-        print(err)
-        sys.exit(-1)
-
-    if not args.json_docs:
-        # None given on the commandline
+    args = parser.parse_args()
+    json_docus = []
+    for doc in args.json_docs:
         try:
-            args.json_docs.append(open('.rmlint.json', 'r'))
-        except OSError as err:
-            print('Cannot load default json document: ', str(err), file=sys.stderr)
-            sys.exit(-2)
-
-    json_docus = [json.load(doc) for doc in args.json_docs]
-    json_elems = [item for sublist in json_docus for item in sublist]
+            with open(doc) as f:
+                j = json.load(f)
+            json_docus.append(j)
+        except IOError as err:      # Cannot open file
+            print(err, file=sys.stderr)
+            sys.exit(-1)
+        except ValueError as err:   # File is not valid JSON
+            print('{}: {}'.format(err, doc), file=sys.stderr)
+            sys.exit(-1)
 
     try:
-        if not args.no_ask and not args.dry_run:
-            print('\nPlease hit any key before continuing to shredder your data.', file=sys.stderr)
-            sys.stdin.read(1)
-
+        print('# This is a dry run. Nothing will be modified.')
         for json_doc in json_docus:
-            main(args, json_doc[0], json_doc[1:-1], json_doc[-1])
+            main(args, json_doc)
 
         if args.dry_run:
             print(
-                '\n{c[green]}#{c[reset]} This was a dry run. Nothing modified.'.format(
+                '\n{c[green]}#{c[reset]} This was a dry run. Nothing was modified.'.format(
                     c=COLORS
                 )
             )
     except KeyboardInterrupt:
-        print('canceled.')
+        print('\ncanceled.')