Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rel-id xpath ext #100

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ output/*/index.html

# Sphinx
docs/_build

# Pytest
/.cache
52 changes: 52 additions & 0 deletions parsel/xpathfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def set_xpathfunc(fname, func):

def setup():
set_xpathfunc('has-class', has_class)
set_xpathfunc('rel-id', rel_id)


def has_class(context, *classes):
Expand All @@ -52,3 +53,54 @@ def has_class(context, *classes):
if ' ' + cls + ' ' not in node_cls:
return False
return True


_id_xpath = etree.XPath('id($node_id)')


def rel_id(context, node_id, nodeset=None):
"""Relative lookup by ID (rel-id function).

Same as ``id`` function, but relative to some nodeset (current node by
default).

For example, the following XPath expressions will return the same result
(however, with different performance)::

document.xpath("id('foo')") # fastest
document.xpath("rel-id('foo')") # fast
document.xpath("//*[@id='foo']") # slow, has to iterate

This function is useful in relative lookups, for example::

document.xpath("rel-id('bar', id('foo'))") # fast
document.xpath("id('foo')//*[@id='bar']") # slow, has to iterate

The above can also be done with::

document.xpath("id('foo')").xpath("rel-id('bar')") # fast

which showcases the fact that the current node is the default nodeset.

"""
if not context.eval_context.get('args_checked'):
if not isinstance(node_id, string_types):
raise ValueError(
'XPath error: rel-id: first argument must be a string')
if nodeset is not None and not isinstance(nodeset, list):
raise ValueError(
'XPath error: rel-id: second argument must be a nodeset')
context.eval_context['args_checked'] = True
if nodeset is None:
nodeset = {context.context_node}
else:
nodeset = set(nodeset)

result = _id_xpath(context.context_node, node_id=node_id)
should_return_result = (
not result or
nodeset.intersection(result) or
nodeset.intersection(result[0].iterancestors()))
if should_return_result:
return result
return []
76 changes: 76 additions & 0 deletions tests/test_xpathfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,79 @@ def myfunc(ctx):
self.assertRaisesRegexp(
ValueError, 'Unregistered function in myfunc',
sel.xpath, 'myfunc()')

def test_rel_id_basic(self):
body = u"""
<foo><p id="foop">Foo</p></foo>
<bar><p id="barp">Bar</p></p>
"""
sel = Selector(text=body)
self.assertEqual(
[x.extract() for x in sel.xpath('rel-id("foop")/text()')],
[u'Foo'],
)
self.assertEqual(
[x.extract() for x in sel.xpath('rel-id("foop", .)/text()')],
[u'Foo'],
)
self.assertEqual(
[x.extract() for x in sel.xpath('rel-id("foop", //foo)/text()')],
[u'Foo'],
)
self.assertEqual(
[x.extract() for x in sel.xpath('rel-id("foop", //p)/text()')],
[u'Foo'],
)
self.assertEqual(
[x.extract() for x in sel.xpath('rel-id("foop", //bar)/text()')],
[],
)
self.assertEqual(
[x.extract() for x in sel.xpath('//foo').xpath('rel-id("foop")/text()')],
[u'Foo'],
)
self.assertEqual(
[x.extract() for x in sel.xpath('//bar').xpath('rel-id("foop")/text()')],
[],
)
self.assertEqual(
[x.extract() for x in sel.xpath('rel-id("barp", //bar)/text()')],
[u'Bar'],
)
self.assertEqual(
[x.extract() for x in sel.xpath('rel-id("foop", //zzz)/text()')],
[],
)

def test_rel_id_in_conditional(self):
body = u"""
<p><p id="foop">Foo</p></foo>
<p><p id="barp">Bar</p></p>
"""
sel = Selector(text=body)
self.assertEqual(
[x.extract() for x in sel.xpath('//p[rel-id("foop")]//text()')],
[u'Foo'],
)
self.assertEqual(
[x.extract() for x in sel.xpath('//p[rel-id("barp")]//text()')],
[u'Bar'],
)

def test_rel_id_error_invalid_id(self):
body = u"""
<p CLASS="foo">First</p>
"""
sel = Selector(text=body)
self.assertRaisesRegexp(
ValueError, 'rel-id: first argument must be a string',
sel.xpath, u'rel-id(123)')

def test_rel_id_error_invalid_nodeset(self):
body = u"""
<p CLASS="foo">First</p>
"""
sel = Selector(text=body)
self.assertRaisesRegexp(
ValueError, 'rel-id: second argument must be a nodeset',
sel.xpath, u'rel-id("123", true())')