Skip to content

Commit

Permalink
Selector.drop and SelectorList.drop methods (#247)
Browse files Browse the repository at this point in the history
* fix: Don't remove text after deleted element

* Update test_selector.py

* fix linter

* feat: Add `drop` method, revert changes to `remove` and deprecate it

* chore: Rename remove → drop

* fix: linter

* chore: Inherit from `CannotRemoveElementWithoutParent` exception

* chore(docs): Switch `.remove` to `drop`

* chore: Change tests to use `.drop()` method
  • Loading branch information
Laerte authored Oct 28, 2022
1 parent 96fc3d7 commit 1913fb7
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 12 deletions.
4 changes: 2 additions & 2 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ Removing elements
-----------------

If for any reason you need to remove elements based on a Selector or
a SelectorList, you can do it with the ``remove()`` method, available for both
a SelectorList, you can do it with the ``drop()`` method, available for both
classes.

.. warning:: this is a destructive action and cannot be undone. The original
Expand All @@ -425,7 +425,7 @@ Example removing an ad from a blog post:
>>> sel = Selector(text=doc)
>>> sel.xpath('//div/text()').getall()
['Content paragraph...', '\n ', '\n Ad content...\n ', '\n ', '\n ', 'More content...']
>>> sel.xpath('//div[@class="ad"]').remove()
>>> sel.xpath('//div[@class="ad"]').drop()
>>> sel.xpath('//div//text()').getall()
['Content paragraph...', 'More content...']

Expand Down
52 changes: 49 additions & 3 deletions parsel/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

import typing
import warnings
from typing import Any, Dict, List, Optional, Mapping, Pattern, Union
from typing import Any, Dict, List, Mapping, Optional, Pattern, Union
from warnings import warn

from lxml import etree, html
from pkg_resources import parse_version

from .utils import flatten, iflatten, extract_regex, shorten
from .csstranslator import HTMLTranslator, GenericTranslator
from .csstranslator import GenericTranslator, HTMLTranslator
from .utils import extract_regex, flatten, iflatten, shorten

_SelectorType = typing.TypeVar("_SelectorType", bound="Selector")

Expand All @@ -27,6 +28,10 @@ class CannotRemoveElementWithoutParent(Exception):
pass


class CannotDropElementWithoutParent(CannotRemoveElementWithoutParent):
pass


class SafeXMLParser(etree.XMLParser):
def __init__(self, *args, **kwargs) -> None:
kwargs.setdefault("resolve_entities", False)
Expand Down Expand Up @@ -236,9 +241,21 @@ def remove(self) -> None:
"""
Remove matched nodes from the parent for each element in this list.
"""
warn(
"Method parsel.selector.SelectorList.remove is deprecated, please use parsel.selector.SelectorList.drop method instead",
category=DeprecationWarning,
stacklevel=2,
)
for x in self:
x.remove()

def drop(self) -> None:
"""
Drop matched nodes from the parent for each element in this list.
"""
for x in self:
x.drop()


class Selector:
"""
Expand Down Expand Up @@ -503,6 +520,11 @@ def remove(self) -> None:
"""
Remove matched nodes from the parent element.
"""
warn(
"Method parsel.selector.Selector.remove is deprecated, please use parsel.selector.Selector.drop method instead",
category=DeprecationWarning,
stacklevel=2,
)
try:
parent = self.root.getparent()
except AttributeError:
Expand All @@ -523,6 +545,30 @@ def remove(self) -> None:
"are you trying to remove a root element?"
)

def drop(self):
"""
Drop matched nodes from the parent element.
"""
try:
self.root.getparent()
except AttributeError:
# 'str' object has no attribute 'getparent'
raise CannotRemoveElementWithoutRoot(
"The node you're trying to drop has no root, "
"are you trying to drop a pseudo-element? "
"Try to use 'li' as a selector instead of 'li::text' or "
"'//li' instead of '//li/text()', for example."
)

try:
self.root.drop_tree()
except (AttributeError, AssertionError):
# 'NoneType' object has no attribute 'drop'
raise CannotDropElementWithoutParent(
"The node you're trying to remove has no parent, "
"are you trying to remove a root element?"
)

@property
def attrib(self) -> Dict[str, str]:
"""Return the attributes dictionary for underlying element."""
Expand Down
24 changes: 17 additions & 7 deletions tests/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ def test_remove_selector_list(self) -> None:
text="<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>"
)
sel_list = sel.css("li")
sel_list.remove()
sel_list.drop()
self.assertIsSelectorList(sel.css("li"))
self.assertEqual(sel.css("li"), [])

Expand All @@ -1059,7 +1059,7 @@ def test_remove_selector(self) -> None:
text="<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>"
)
sel_list = sel.css("li")
sel_list[0].remove()
sel_list[0].drop()
self.assertIsSelectorList(sel.css("li"))
self.assertEqual(sel.css("li::text").getall(), ["2", "3"])

Expand All @@ -1070,7 +1070,7 @@ def test_remove_pseudo_element_selector_list(self) -> None:
sel_list = sel.css("li::text")
self.assertEqual(sel_list.getall(), ["1", "2", "3"])
with self.assertRaises(CannotRemoveElementWithoutRoot):
sel_list.remove()
sel_list.drop()

self.assertIsSelectorList(sel.css("li"))
self.assertEqual(sel.css("li::text").getall(), ["1", "2", "3"])
Expand All @@ -1082,7 +1082,7 @@ def test_remove_pseudo_element_selector(self) -> None:
sel_list = sel.css("li::text")
self.assertEqual(sel_list.getall(), ["1", "2", "3"])
with self.assertRaises(CannotRemoveElementWithoutRoot):
sel_list[0].remove()
sel_list[0].drop()

self.assertIsSelectorList(sel.css("li"))
self.assertEqual(sel.css("li::text").getall(), ["1", "2", "3"])
Expand All @@ -1094,15 +1094,15 @@ def test_remove_root_element_selector(self) -> None:
sel_list = sel.css("li::text")
self.assertEqual(sel_list.getall(), ["1", "2", "3"])
with self.assertRaises(CannotRemoveElementWithoutParent):
sel.remove()
sel.drop()

with self.assertRaises(CannotRemoveElementWithoutParent):
sel.css("html").remove()
sel.css("html").drop()

self.assertIsSelectorList(sel.css("li"))
self.assertEqual(sel.css("li::text").getall(), ["1", "2", "3"])

sel.css("body").remove()
sel.css("body").drop()
self.assertEqual(sel.get(), "<html></html>")

def test_deep_nesting(self):
Expand Down Expand Up @@ -1316,3 +1316,13 @@ def test_set(self) -> None:
).extract(),
["url", "name", "startDate", "location", "offers"],
)

def test_dont_remove_text_after_deleted_element(self) -> None:
sel = self.sscls(
text="""<html><body>Text before.<span>Text in.</span> Text after.</body></html>
"""
)
sel.css("span").drop()
self.assertEqual(
sel.get(), "<html><body>Text before. Text after.</body></html>"
)

0 comments on commit 1913fb7

Please sign in to comment.