Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to nested objects serializer #13

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion README
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
a port of the serialize and unserialize functions of php to python. This module
Fork of https://github.com/aioTV/phpserialize based on https://github.com/mitsuhiko/phpserialize

A port of the serialize and unserialize functions of php to python. This module
implements the python serialization interface (eg: provides dumps, loads and
similar functions).

Sessions unserializing is also supported if they were saved using PHP's
internal serializer and without encryption (see
http://www.hardened-php.net/suhosin/configuration.html#suhosin.session.encrypt).

Support Native Nasted Objets by hdbreaker # Last Commit Apr 14, 2016
Support Python3, PHP Sessions, Unicode chars by aioTV # Last Commit Mar 30, 2016
Support Serialize and Unserialize by mitsuhiko # Last Commit 22 Jan 2012
127 changes: 82 additions & 45 deletions phpserialize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
r"""
phpserialize
phpserialize
nasted object and python object direct serialization by hdbreaker
~~~~~~~~~~~~

a port of the ``serialize`` and ``unserialize`` functions of
Expand Down Expand Up @@ -236,6 +237,8 @@ class WP_User extends WP_UserBase {
Changelog
=========

1.4
- added support for PHP sessions
1.3
- added support for Python 3

Expand Down Expand Up @@ -399,11 +402,15 @@ def _serialize(obj, keypos):
str(len(obj)).encode('latin1'),
b':{',
b''.join(out),
b'}'
b'};'
])
if isinstance(obj, phpobject):
return b'O' + _serialize(obj.__name__, True)[1:-1] + \
_serialize(obj.__php_vars__, False)[1:]
else:
if isinstance(obj, object):
return b'O' + _serialize(obj.__class__.__name__, True)[1:-1] + \
_serialize(obj.__dict__, False)[1:]
if object_hook is not None:
return _serialize(object_hook(obj), False)
raise TypeError('can\'t serialize %r' % type(obj))
Expand All @@ -412,7 +419,7 @@ def _serialize(obj, keypos):


def load(fp, charset='utf-8', errors=default_errors, decode_strings=False,
object_hook=None, array_hook=None):
object_hook=None, array_hook=None, return_unicode=False):
"""Read a string from the open file object `fp` and interpret it as a
data stream of PHP-serialized objects, reconstructing and returning
the original object hierarchy.
Expand Down Expand Up @@ -440,7 +447,7 @@ class data members. The data member names are in PHP format which is

def _expect(e):
v = fp.read(len(e))
if v != e:
if v != e and v == '}':
raise ValueError('failed expectation, expected %r got %r' % (e, v))

def _read_until(delim):
Expand Down Expand Up @@ -471,55 +478,85 @@ def _load_array():

def _unserialize():
type_ = fp.read(1).lower()
if type_ == b'n':
_expect(b';')
return None
if type_ in b'idb':
_expect(b':')
data = _read_until(b';')
if type_ == b'i':
return int(data)
if type_ == b'd':
return float(data)
return int(data) != 0
if type_ == b's':
_expect(b':')
length = int(_read_until(b':'))
_expect(b'"')
data = fp.read(length)
_expect(b'"')
if decode_strings:
data = data.decode(charset, errors)
_expect(b';')
return data
if type_ == b'a':
_expect(b':')
return array_hook(_load_array())
if type_ == b'o':
if object_hook is None:
raise ValueError('object in serialization dump but '
'object_hook not given.')
_expect(b':')
name_length = int(_read_until(b':'))
_expect(b'"')
name = fp.read(name_length)
_expect(b'":')
if decode_strings:
name = name.decode(charset, errors)
return object_hook(name, dict(_load_array()))
raise ValueError('unexpected opcode')

return _unserialize()
if type_ != ';':
if type_ == b'n':
_expect(b';')
return None
if type_ in b'idb':
_expect(b':')
data = _read_until(b';')
if type_ == b'i':
return int(data)
if type_ == b'd':
return float(data)
return int(data) != 0
if type_ == b's':
_expect(b':')
length = int(_read_until(b':'))
_expect(b'"')
data = fp.read(length)
_expect(b'"')
if decode_strings:
data = data.decode(charset, errors)
if return_unicode:
data = unicode(data, charset)
_expect(b';')
return data
if type_ == b'a':
_expect(b':')
return array_hook(_load_array())
if type_ == b'o':
if object_hook is None:
raise ValueError('object in serialization dump but '
'object_hook not given.')
_expect(b':')
name_length = int(_read_until(b':'))
_expect(b'"')
name = fp.read(name_length)
_expect(b'":')
if decode_strings:
name = name.decode(charset, errors)
return object_hook(name, dict(_load_array()))
if type_ == b'r':
# recursion
_expect(b':')
data = _read_until(b';')
return None
raise ValueError('unexpected opcode - %s' % repr(type_))

fp_position = fp.tell()
chunk = _read_until(b':');
fp.seek(fp_position) # Reset pointer
if b'|' in chunk:
# We may be dealing with a serialized session, in which case keys
# followed by a pipe are preceding the serialized data.
unserialized_data = {}
while 1:
try:
key = _read_until(b'|');
except ValueError:
break # end of stream
if return_unicode:
key = unicode(key, charset)
unserialized_data[key] = _unserialize()
else:
unserialized_data = _unserialize()

return unserialized_data


def loads(data, charset='utf-8', errors=default_errors, decode_strings=False,
object_hook=None, array_hook=None):
object_hook=None, array_hook=None, return_unicode=False):
"""Read a PHP-serialized object hierarchy from a string. Characters in the
string past the object's representation are ignored. On Python 3 the
string must be a bytestring.
"""
# Convert unicode strings to byte strings.
if type(data) == unicode:
data = data.encode(charset)
return_unicode = True
return load(BytesIO(data), charset, errors, decode_strings,
object_hook, array_hook)
object_hook, array_hook, return_unicode)


def dump(data, fp, charset='utf-8', errors=default_errors, object_hook=None):
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def get_docs():
name='phpserialize',
author='Armin Ronacher',
author_email='armin.ronacher@active-4.com',
version='1.3',
url='http://github.com/mitsuhiko/phpserialize',
version='1.6',
url='http://github.com/hdbreaker/phpserialize',
py_modules=['phpserialize'],
description='a port of the serialize and unserialize '
'functions of php to python.',
Expand Down