-
Notifications
You must be signed in to change notification settings - Fork 41
More efficient resolving #16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
stefankoegl
merged 4 commits into
stefankoegl:master
from
alexdutton:more-efficient-resolving
Sep 10, 2017
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
3f460a5
Simplify `walk` method. No need to look before we leap.
alexdutton 76b542b
Reduce number of isinstance() calls in get_part
alexdutton c4372cd
Optimize in get_part for the common cases of doc being dict or list
alexdutton 9653d1b
Re-factor resolve and to_last to not make lots of function calls.
alexdutton File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -153,28 +153,53 @@ def to_last(self, doc): | |
if not self.parts: | ||
return doc, None | ||
|
||
for part in self.parts[:-1]: | ||
doc = self.walk(doc, part) | ||
|
||
return doc, self.get_part(doc, self.parts[-1]) | ||
|
||
|
||
def resolve(self, doc, default=_nothing): | ||
doc = self.resolve(doc, parts=self.parts[:-1]) | ||
last = self.parts[-1] | ||
ptype = type(doc) | ||
if ptype == dict: | ||
pass | ||
elif ptype == list or isinstance(doc, Sequence): | ||
if not RE_ARRAY_INDEX.match(str(last)): | ||
raise JsonPointerException("'%s' is not a valid list index" % (last, )) | ||
last = int(last) | ||
|
||
return doc, last | ||
|
||
def resolve(self, doc, default=_nothing, parts=None): | ||
"""Resolves the pointer against doc and returns the referenced object""" | ||
|
||
for part in self.parts: | ||
|
||
try: | ||
doc = self.walk(doc, part) | ||
except JsonPointerException: | ||
if default is _nothing: | ||
raise | ||
if parts is None: | ||
parts = self.parts | ||
|
||
try: | ||
for part in parts: | ||
ptype = type(doc) | ||
if ptype == dict: | ||
doc = doc[part] | ||
elif ptype == list or isinstance(doc, Sequence): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. |
||
if part == '-': | ||
doc = EndOfList(doc) | ||
else: | ||
if not RE_ARRAY_INDEX.match(str(part)): | ||
raise JsonPointerException("'%s' is not a valid list index" % (part, )) | ||
doc = doc[int(part)] | ||
else: | ||
return default | ||
doc = doc[part] | ||
except KeyError: | ||
if default is not _nothing: | ||
return default | ||
raise JsonPointerException("member '%s' not found in %s" % (part, doc)) | ||
except IndexError: | ||
if default is not _nothing: | ||
return default | ||
raise JsonPointerException("index '%s' is out of bounds" % (part, )) | ||
except TypeError: | ||
if default is not _nothing: | ||
return default | ||
raise JsonPointerException("Document '%s' does not support indexing, " | ||
"must be dict/list or support __getitem__" % type(doc)) | ||
|
||
return doc | ||
|
||
|
||
get = resolve | ||
|
||
def set(self, doc, value, inplace=True): | ||
|
@@ -196,56 +221,37 @@ def set(self, doc, value, inplace=True): | |
def get_part(self, doc, part): | ||
""" Returns the next step in the correct type """ | ||
|
||
if isinstance(doc, Mapping): | ||
# Optimize for common cases of doc being a dict or list, but not a | ||
# sub-class (because isinstance() is far slower) | ||
ptype = type(doc) | ||
if ptype == dict: | ||
return part | ||
|
||
elif isinstance(doc, Sequence): | ||
|
||
if ptype == list or isinstance(doc, Sequence): | ||
if part == '-': | ||
return part | ||
|
||
if not RE_ARRAY_INDEX.match(str(part)): | ||
raise JsonPointerException("'%s' is not a valid list index" % (part, )) | ||
|
||
return int(part) | ||
|
||
elif hasattr(doc, '__getitem__'): | ||
# Allow indexing via ducktyping if the target has defined __getitem__ | ||
return part | ||
|
||
else: | ||
raise JsonPointerException("Document '%s' does not support indexing, " | ||
"must be dict/list or support __getitem__" % type(doc)) | ||
|
||
return part | ||
|
||
def walk(self, doc, part): | ||
""" Walks one step in doc and returns the referenced part """ | ||
|
||
part = self.get_part(doc, part) | ||
|
||
assert (type(doc) in (dict, list) or hasattr(doc, '__getitem__')), "invalid document type %s" % (type(doc),) | ||
|
||
if isinstance(doc, Mapping): | ||
try: | ||
return doc[part] | ||
|
||
except KeyError: | ||
raise JsonPointerException("member '%s' not found in %s" % (part, doc)) | ||
|
||
elif isinstance(doc, Sequence): | ||
|
||
if part == '-': | ||
return EndOfList(doc) | ||
|
||
try: | ||
return doc[part] | ||
|
||
except IndexError: | ||
raise JsonPointerException("index '%s' is out of bounds" % (part, )) | ||
|
||
else: | ||
# Object supports __getitem__, assume custom indexing | ||
if part == '-' and isinstance(doc, Sequence): | ||
return EndOfList(doc) | ||
try: | ||
return doc[part] | ||
except KeyError: | ||
raise JsonPointerException("member '%s' not found in %s" % (part, doc)) | ||
except IndexError: | ||
raise JsonPointerException("index '%s' is out of bounds" % (part, )) | ||
except TypeError: | ||
raise JsonPointerException("Document '%s' does not support indexing, " | ||
"must be dict/list or support __getitem__" % type(doc)) | ||
|
||
|
||
def contains(self, ptr): | ||
"""Returns True if self contains the given ptr""" | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't
ptype == list
implyisinstance(doc, Sequence)
It should then be possible to reduce this toThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I miss this accidentally too. Normally, this should be used
is
operator since types are some sort of singletons and there is no need to invoke regular comparison routines for them.The idea with check for list before call isinstance is an optimization for general case where doc is mostly dict or list. So the idea is to use cheaper operations for common cases and fallback to more wide, but costly, type checks:
isinstance(foo, Sequence)
is 100 times slower when direct type comparison because of function call and unwind all the registered types to those abstract Sequence one.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, so in that case the
is
operator should be used, and it should have some comment explaining that this is a performance optimization.