Merge pull request #28 from covert-encryption/armor-refactor

Refactor armor functions to use str and removing any `>` quotes.
covert-encryption · Nov 27, 2021 · e5110ff · e5110ff
2 parents 412012b + e205261
commit e5110ff
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 18 deletions.
diff --git a/covert/cli.py b/covert/cli.py
@@ -231,15 +231,15 @@ def nextfile_callback(prev, cur):
       data = util.armor_encode(data)
     if outf is not realoutf:
       if args.paste:
-        pyperclip.copy(f"```\n{data.decode()}\n```\n")
+        pyperclip.copy(f"```\n{data}\n```\n")
         return
       with realoutf:
         pretty = realoutf.isatty()
         if pretty:
           stderr.write("\x1B[1;30m```\x1B[0;34m\n")
           stderr.flush()
         try:
-          realoutf.write(data + b"\n")
+          realoutf.write(f"{data}\n".encode())
           realoutf.flush()
         finally:
           if pretty:
@@ -259,7 +259,7 @@ def main_dec(args):
   # If ASCII armored or TTY, read all input immediately (assumed to be short enough)
   total_size = os.path.getsize(args.files[0]) if args.files else 0
   if infile.isatty():
-    data = util.armor_decode((pyperclip.paste() if args.paste else tty.read_hidden("Encrypted message")).encode())
+    data = util.armor_decode(pyperclip.paste() if args.paste else tty.read_hidden("Encrypted message"))
     if not data:
       raise KeyboardInterrupt
     infile = BytesIO(data)
@@ -270,7 +270,7 @@ def main_dec(args):
     with infile:
       data = infile.read()
     try:
-      infile = BytesIO(util.armor_decode(data))
+      infile = BytesIO(util.armor_decode(data.decode()))
     except Exception:
       infile = BytesIO(data)
   else:

diff --git a/covert/util.py b/covert/util.py
@@ -1,54 +1,62 @@
+import platform
 import random
 import unicodedata
 from base64 import b64decode, b64encode
 from math import log2
 from secrets import choice, token_bytes
 
 ARMOR_MAX_SINGLELINE = 4000  # Safe limit for line input, where 4096 may be the limit
-B64_ALPHABET = b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+B64_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+IS_APPLE = platform.system() == "Darwin"
 
 
-def armor_decode(data):
+def armor_decode(data: str) -> bytes:
   """Base64 decode."""
-  # Fix CRLF, remove any surrounding whitespace and code block markers, support also urlsafe
-  data = data.replace(b'\r\n', b'\n').strip(b'\t `\n').replace(b'-', b'+').replace(b'_', b'/')
-  if not data.isascii():
+  # Fix CRLF, remove any surrounding whitespace and code block markers
+  data = data.replace('\r\n', '\n').strip('\t `\n')
+  if not data.isascii() or not data.isprintable():
     raise ValueError(f"Invalid armored encoding: data is not ASCII/Base64")
-  # Strip indent, trailing whitespace and empty lines
-  lines = [line for l in data.split(b'\n') if (line := l.strip())]
+  # Strip indent and quote marks, trailing whitespace and empty lines
+  lines = [line for l in data.split('\n') if (line := l.lstrip('\t >').rstrip())]
   # Empty input means empty output (will cause an error elsewhere)
   if not lines:
     return b''
   # Verify all lines
   for i, line in enumerate(lines):
     if any(ch not in B64_ALPHABET for ch in line):
-      raise ValueError(f"Invalid armored encoding: unrecognized data on line {i + 1}: {line!r}")
+      raise ValueError(f"Invalid armored encoding: unrecognized data on line {i + 1}")
   # Verify line lengths
   l = len(lines[0])
   for i, line in enumerate(lines[:-1]):
     l2 = len(line)
     if l2 < 76 or l2 % 4 or l2 != l:
       raise ValueError(f"Invalid armored encoding: length {l2} of line {i + 1} is invalid")
   # Not sure why we even bother to use the standard library after having handled all that...
-  data = b"".join(lines)
+  data = "".join(lines)
   padding = -len(data) % 4
-  return b64decode(data + padding*b'=', validate=True)
+  return b64decode(data + padding*'=', validate=True)
 
 
-def armor_encode(data):
+def armor_encode(data: bytes) -> str:
   """Base64 without the padding nonsense, and with adaptive line wrapping."""
-  data = b64encode(data).rstrip(b'=')
+  data = b64encode(data).decode().rstrip('=')
   if len(data) > ARMOR_MAX_SINGLELINE:
     # Make fingerprinting the encoding by line lengths a bit harder while still using >76
     splitlen = choice(range(76, 121, 4))
-    data = b'\n'.join([data[i:i + splitlen] for i in range(0, len(data), splitlen)])
+    data = '\n'.join([data[i:i + splitlen] for i in range(0, len(data), splitlen)])
   return data
 
 
-def encode(s):
+def encode(s: str) -> bytes:
+  """Unicode-normalizing UTF-8 encode."""
   return unicodedata.normalize("NFKC", s).encode()
 
 
+def decode_native(s: bytes) -> str:
+  """Restore platform-native Unicode normalization form (e.g. for filenames)."""
+  return unicodedata.normalize("NFD" if IS_APPLE else "NFKC", s.decode())
+
+
 def noncegen(nonce=None):
   nonce = token_bytes(12) if nonce is None else bytes(nonce)
   l = len(nonce)