From c72b475f567ae81ed8516d044d2559a77364f271 Mon Sep 17 00:00:00 2001 From: prascagneres Date: Wed, 16 Oct 2024 17:07:14 +0200 Subject: [PATCH] support encrypted hwp files --- src/hwp5/cli.py | 5 ++- src/hwp5/filestructure.py | 27 +++++++---- src/hwp5/hwp5proc.py | 4 ++ src/hwp5/utils.py | 95 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 10 deletions(-) diff --git a/src/hwp5/cli.py b/src/hwp5/cli.py index 1fb921e7..43beaa47 100644 --- a/src/hwp5/cli.py +++ b/src/hwp5/cli.py @@ -86,10 +86,13 @@ def init_with_environ(): def open_hwpfile(args): filename = args.hwp5file + password = "" + if args.password: + password = args.password if args.ole: hwpfile = OleStorage(filename) else: - hwpfile = Hwp5File(filename) + hwpfile = Hwp5File(filename, password) if args.vstreams: hwpfile = ExtraItemStorage(hwpfile) return hwpfile diff --git a/src/hwp5/filestructure.py b/src/hwp5/filestructure.py index f5fc5286..2ad8fa23 100644 --- a/src/hwp5/filestructure.py +++ b/src/hwp5/filestructure.py @@ -22,6 +22,7 @@ from io import BytesIO import logging import sys +import zlib from .bintype import read_type from .compressed import decompress @@ -37,7 +38,11 @@ from .utils import GeneratorTextReader from .utils import cached_property from .utils import transcoder +from .utils import pad +from .utils import decrypt_data +from .utils import genkey +PASSWORD = "" PY3 = sys.version_info.major == 3 if PY3: basestring = str @@ -134,13 +139,19 @@ def __getitem__(self, name): class PasswordProtectedStream(ItemWrapper): def open(self): - # TODO: 현재로선 암호화된 내용을 그냥 반환 - logger.warning('Password-encrypted stream: currently decryption is ' - 'not supported') - return self.wrapped.open() + pwd = genkey(PASSWORD.encode()) + data = self.wrapped.open().read() + decrypted_stream = decrypt_data(pwd, pad(data)) + try: + stream = zlib.decompress(decrypted_stream, -15) + except Exception as e: + logger.warning("Couldn't decrypt stream - probably bad password") + return self.wrapped.open() + return BytesIO(stream) class PasswordProtectedStorage(StorageWrapper): + def __getitem__(self, name): item = self.wrapped[name] if is_stream(item): @@ -533,15 +544,13 @@ class Hwp5File(ItemConversionStorage): stg: an instance of Storage ''' - def __init__(self, stg): + def __init__(self, stg, password: str = ""): stg = Hwp5FileBase(stg) if stg.header.flags.password: + global PASSWORD + PASSWORD = password stg = Hwp5PasswordProtectedDoc(stg) - - # TODO: 현재로선 decryption이 구현되지 않았으므로, - # 레코드 파싱은 불가능하다. 적어도 encrypted stream에 - # 직접 접근은 가능하도록, 다음 레이어들은 bypass한다. ItemConversionStorage.__init__(self, stg) return diff --git a/src/hwp5/hwp5proc.py b/src/hwp5/hwp5proc.py index e03edc49..53922ddf 100644 --- a/src/hwp5/hwp5proc.py +++ b/src/hwp5/hwp5proc.py @@ -113,6 +113,10 @@ def main_argparser(): '--logfile', help=_('Set log file.'), ) + parser.add_argument( + '--password', + help=_('Set password for encrypted files.'), + ) subcommands = parser.add_subparsers( title=_('subcommands'), description=_('valid subcommands'), diff --git a/src/hwp5/utils.py b/src/hwp5/utils.py index 83138f9a..c12e77d9 100644 --- a/src/hwp5/utils.py +++ b/src/hwp5/utils.py @@ -29,6 +29,8 @@ import subprocess import sys import tempfile +from Crypto.Cipher import AES +import hashlib from .importhelper import pkg_resources_filename @@ -419,3 +421,96 @@ def unlink_or_warning(path): except Exception as e: logger.exception(e) logger.warning('%s cannot be deleted', path) + + +# NOTE - credit to junorouse -- their hwp-password-recover project contained the following functions +# https://github.com/junorouse/hwp-password-recover + + +def pad(s: bytes): + block_size = 16 + size_of_last_block = len(s) % block_size + padding_amount = block_size - size_of_last_block + pad_bytes = bytes([padding_amount] * padding_amount) + return s + pad_bytes + + +class AESCipher: + def __init__(self, key): + self.key = key + + def encrypt(self, raw): + cipher = AES.new(self.key, AES.MODE_ECB) + return cipher.encrypt(raw) + + def decrypt(self, enc): + cipher = AES.new(self.key, AES.MODE_ECB) + return cipher.decrypt(enc) + + +def decrypt_data(pwd: bytes, data: bytearray): + TMP_IN = bytearray(16) + final_data = bytearray() + + for kkk in range(0, len(data), 16): + + REAL_INPUT = bytearray(data[kkk:kkk+16]) + + for i in range(128): + + AAA = AESCipher(pwd).encrypt(TMP_IN) + OUT = AAA[0] + + ff = i & 7 + + tmp = 1 + for j in range(3): + v14 = TMP_IN[tmp] + + TMP_IN[tmp-1] = ((2 * TMP_IN[tmp-1]) & 0xff) | (TMP_IN[tmp] >> 7) + v15 = TMP_IN[tmp+1] + v16 = ((2 * v14) & 0xff) | (TMP_IN[tmp+1] >> 7) + + v17 = TMP_IN[tmp+2] + TMP_IN[tmp] = v16 + v18 = ((2 * v15) & 0xff) | (v17 >> 7) + + v19 = TMP_IN[tmp+3] + TMP_IN[tmp+1] = v18 + v20 = ((2 * v17) & 0xff) | (v19 >> 7) + + v21 = ((2 * v19) & 0xff) | (TMP_IN[tmp+4] >> 7) + + TMP_IN[tmp+2] = v20 + TMP_IN[tmp+3] = v21 + + tmp += 5 + + TMP_IN[15] = ((2 * TMP_IN[15]) & 0xff) | (REAL_INPUT[i >> 3] >> (7 - ff)) & 1 + + REAL_INPUT[i >> 3] ^= (OUT & 0x80) >> (i & 7) + + final_data.extend(REAL_INPUT) + + return final_data + + +def genkey(pwd: bytes) -> bytes: + buf = bytearray(160) + password = bytearray(pwd) + + for i in range(0, len(password)): + if i: + v6 = password[i-1] + else: + v6 = 0xec + + v7 = (2 * v6 | (v6 >> 7)) & 0xff + + buf[i*2] = v7 + buf[i*2+1] = password[i] + + sha1 = hashlib.sha1() + sha1.update(buf[0:len(password)*2]) + h = sha1.digest() + return h[0:16]