Skip to content

More efficient resolving #16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 10, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 59 additions & 53 deletions jsonpointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,28 +153,53 @@ def to_last(self, doc):
if not self.parts:
return doc, None

for part in self.parts[:-1]:
doc = self.walk(doc, part)

return doc, self.get_part(doc, self.parts[-1])


def resolve(self, doc, default=_nothing):
doc = self.resolve(doc, parts=self.parts[:-1])
last = self.parts[-1]
ptype = type(doc)
if ptype == dict:
pass
elif ptype == list or isinstance(doc, Sequence):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't ptype == list imply isinstance(doc, Sequence) It should then be possible to reduce this to

elif isinstance(doc, Sequence):

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I miss this accidentally too. Normally, this should be used is operator since types are some sort of singletons and there is no need to invoke regular comparison routines for them.

The idea with check for list before call isinstance is an optimization for general case where doc is mostly dict or list. So the idea is to use cheaper operations for common cases and fallback to more wide, but costly, type checks: isinstance(foo, Sequence) is 100 times slower when direct type comparison because of function call and unwind all the registered types to those abstract Sequence one.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, so in that case the is operator should be used, and it should have some comment explaining that this is a performance optimization.

if not RE_ARRAY_INDEX.match(str(last)):
raise JsonPointerException("'%s' is not a valid list index" % (last, ))
last = int(last)

return doc, last

def resolve(self, doc, default=_nothing, parts=None):
"""Resolves the pointer against doc and returns the referenced object"""

for part in self.parts:

try:
doc = self.walk(doc, part)
except JsonPointerException:
if default is _nothing:
raise
if parts is None:
parts = self.parts

try:
for part in parts:
ptype = type(doc)
if ptype == dict:
doc = doc[part]
elif ptype == list or isinstance(doc, Sequence):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

if part == '-':
doc = EndOfList(doc)
else:
if not RE_ARRAY_INDEX.match(str(part)):
raise JsonPointerException("'%s' is not a valid list index" % (part, ))
doc = doc[int(part)]
else:
return default
doc = doc[part]
except KeyError:
if default is not _nothing:
return default
raise JsonPointerException("member '%s' not found in %s" % (part, doc))
except IndexError:
if default is not _nothing:
return default
raise JsonPointerException("index '%s' is out of bounds" % (part, ))
except TypeError:
if default is not _nothing:
return default
raise JsonPointerException("Document '%s' does not support indexing, "
"must be dict/list or support __getitem__" % type(doc))

return doc


get = resolve

def set(self, doc, value, inplace=True):
Expand All @@ -196,56 +221,37 @@ def set(self, doc, value, inplace=True):
def get_part(self, doc, part):
""" Returns the next step in the correct type """

if isinstance(doc, Mapping):
# Optimize for common cases of doc being a dict or list, but not a
# sub-class (because isinstance() is far slower)
ptype = type(doc)
if ptype == dict:
return part

elif isinstance(doc, Sequence):

if ptype == list or isinstance(doc, Sequence):
if part == '-':
return part

if not RE_ARRAY_INDEX.match(str(part)):
raise JsonPointerException("'%s' is not a valid list index" % (part, ))

return int(part)

elif hasattr(doc, '__getitem__'):
# Allow indexing via ducktyping if the target has defined __getitem__
return part

else:
raise JsonPointerException("Document '%s' does not support indexing, "
"must be dict/list or support __getitem__" % type(doc))

return part

def walk(self, doc, part):
""" Walks one step in doc and returns the referenced part """

part = self.get_part(doc, part)

assert (type(doc) in (dict, list) or hasattr(doc, '__getitem__')), "invalid document type %s" % (type(doc),)

if isinstance(doc, Mapping):
try:
return doc[part]

except KeyError:
raise JsonPointerException("member '%s' not found in %s" % (part, doc))

elif isinstance(doc, Sequence):

if part == '-':
return EndOfList(doc)

try:
return doc[part]

except IndexError:
raise JsonPointerException("index '%s' is out of bounds" % (part, ))

else:
# Object supports __getitem__, assume custom indexing
if part == '-' and isinstance(doc, Sequence):
return EndOfList(doc)
try:
return doc[part]
except KeyError:
raise JsonPointerException("member '%s' not found in %s" % (part, doc))
except IndexError:
raise JsonPointerException("index '%s' is out of bounds" % (part, ))
except TypeError:
raise JsonPointerException("Document '%s' does not support indexing, "
"must be dict/list or support __getitem__" % type(doc))


def contains(self, ptr):
"""Returns True if self contains the given ptr"""
Expand Down