diff --git a/jawa/df.py b/jawa/df.py new file mode 100644 index 0000000..b1cfa09 --- /dev/null +++ b/jawa/df.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +import itertools +from functools import partial +from struct import unpack +from collections import namedtuple + +from jawa.cf import ClassFile +from jawa.util.stream import DexStreamReader +from jawa.util.utf import decode_modified_utf8 +from jawa.util.descriptor import method_descriptor +""" +Dalvik Executable support. + +The :mod:`jawa.df` module provides tools for working with Android DEX +(``.dex``) files. +""" + + +FieldItem = namedtuple('FieldItem', 'origin type_ name') +MethodItem = namedtuple('MethodItem', 'origin type_ name') +ClassItem = namedtuple('ClassItem', [ + 'origin', + 'access_flags', + 'superclass', + 'interaces', + 'source_file', + 'annotations_off', + 'class_data_off', + 'static_values_off' +]) + + +class DexFile(object): + SUPPORTED_VERSIONS = ('035', '038') + ENDIAN_CONSTANT = 0x12345678 + REVERSE_ENDIAN_CONSTANT = 0x78563412 + NO_INDEX = 0xffffffff + + def __init__(self, fio): + """ + Implements the Android DEX format. + + .. note:: + + Currently support is read-only. DexFile support is a work in + progress. + + .. note:: + + All processing is done in-memory. Loading exceptionally large + DEX files may use large amounts of memory and time. Effort was + made to make usage simple over efficient. + + :param fio: any file-like object providing ``.read()``. + """ + # Only set when the DEX file was loaded from disk, in which case it's + # the adler32 checksum of the file - magic - sum. + self._checksum = None + self._signature = None + self._version = '038' + self._little_endian = True + + self._link_size = 0 + self._link_off = 0 + self._map_off = None + + self._string_table = [] + self._type_table = [] + + self._from_io(fio) + + def _from_io(self, fio): + """ + Loads an existing DEX file from any file-like object supporting + seek(), read(), and tell(). + """ + read = fio.read + + magic, version, null = unpack('4s3sc', read(8)) + if magic != 'dex\n' or null != '\x00': + raise ValueError('invalid magic number') + + if version not in self.SUPPORTED_VERSIONS: + raise ValueError('unsupported dexfile version') + + self._version = version + + # Before we read anything else we need to find the endianess + # flag or we may read things incorrectly. + fio.seek(0x28) + self._little_endian = unpack('' + + # Since these methods are used so frequently we're doing our + # best to reduce per-call overhead by compiling little/big + # endian versions on the fly only once. + fmts = [ + ('byte', 'b', 1), + ('ubyte', 'B', 1), + ('short', 'h', 2), + ('ushort', 'H', 2), + ('int', 'i', 4), + ('uint', 'I', 4), + ('long', 'q', 8), + ('ulong', 'Q', 8) + ] + + for method_name, fmt, size in fmts: + definition = DEX_UNPACK_TEMPLATE.format( + name=method_name, + endian=self.endian, + fmt=fmt, + size=size + ) + + namespace = dict(unpack=unpack) + exec definition in namespace + setattr(self, method_name, types.MethodType( + namespace[method_name], + self + )) + + def uleb128(self, why=None): + r = 0 + v = 0 + + while True: + v = self.byte() + r = (r << 7) | (v & 0x7F) + if v > 0: + break + + return r + + def unpack(self, fmt, why=None): + return unpack(fmt, self.fio.read(calcsize(fmt)))