From 1913fb79987bc994057cd91bfeabd411369edc5e Mon Sep 17 00:00:00 2001 From: Laerte Pereira <5853172+Laerte@users.noreply.github.com> Date: Fri, 28 Oct 2022 05:29:17 -0300 Subject: [PATCH] Selector.drop and SelectorList.drop methods (#247) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Don't remove text after deleted element * Update test_selector.py * fix linter * feat: Add `drop` method, revert changes to `remove` and deprecate it * chore: Rename remove → drop * fix: linter * chore: Inherit from `CannotRemoveElementWithoutParent` exception * chore(docs): Switch `.remove` to `drop` * chore: Change tests to use `.drop()` method --- docs/usage.rst | 4 ++-- parsel/selector.py | 52 +++++++++++++++++++++++++++++++++++++++--- tests/test_selector.py | 24 +++++++++++++------ 3 files changed, 68 insertions(+), 12 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index d0a6fb0b..dcef13db 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -400,7 +400,7 @@ Removing elements ----------------- If for any reason you need to remove elements based on a Selector or -a SelectorList, you can do it with the ``remove()`` method, available for both +a SelectorList, you can do it with the ``drop()`` method, available for both classes. .. warning:: this is a destructive action and cannot be undone. The original @@ -425,7 +425,7 @@ Example removing an ad from a blog post: >>> sel = Selector(text=doc) >>> sel.xpath('//div/text()').getall() ['Content paragraph...', '\n ', '\n Ad content...\n ', '\n ', '\n ', 'More content...'] - >>> sel.xpath('//div[@class="ad"]').remove() + >>> sel.xpath('//div[@class="ad"]').drop() >>> sel.xpath('//div//text()').getall() ['Content paragraph...', 'More content...'] diff --git a/parsel/selector.py b/parsel/selector.py index e0d5a404..b84b0308 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -4,13 +4,14 @@ import typing import warnings -from typing import Any, Dict, List, Optional, Mapping, Pattern, Union +from typing import Any, Dict, List, Mapping, Optional, Pattern, Union +from warnings import warn from lxml import etree, html from pkg_resources import parse_version -from .utils import flatten, iflatten, extract_regex, shorten -from .csstranslator import HTMLTranslator, GenericTranslator +from .csstranslator import GenericTranslator, HTMLTranslator +from .utils import extract_regex, flatten, iflatten, shorten _SelectorType = typing.TypeVar("_SelectorType", bound="Selector") @@ -27,6 +28,10 @@ class CannotRemoveElementWithoutParent(Exception): pass +class CannotDropElementWithoutParent(CannotRemoveElementWithoutParent): + pass + + class SafeXMLParser(etree.XMLParser): def __init__(self, *args, **kwargs) -> None: kwargs.setdefault("resolve_entities", False) @@ -236,9 +241,21 @@ def remove(self) -> None: """ Remove matched nodes from the parent for each element in this list. """ + warn( + "Method parsel.selector.SelectorList.remove is deprecated, please use parsel.selector.SelectorList.drop method instead", + category=DeprecationWarning, + stacklevel=2, + ) for x in self: x.remove() + def drop(self) -> None: + """ + Drop matched nodes from the parent for each element in this list. + """ + for x in self: + x.drop() + class Selector: """ @@ -503,6 +520,11 @@ def remove(self) -> None: """ Remove matched nodes from the parent element. """ + warn( + "Method parsel.selector.Selector.remove is deprecated, please use parsel.selector.Selector.drop method instead", + category=DeprecationWarning, + stacklevel=2, + ) try: parent = self.root.getparent() except AttributeError: @@ -523,6 +545,30 @@ def remove(self) -> None: "are you trying to remove a root element?" ) + def drop(self): + """ + Drop matched nodes from the parent element. + """ + try: + self.root.getparent() + except AttributeError: + # 'str' object has no attribute 'getparent' + raise CannotRemoveElementWithoutRoot( + "The node you're trying to drop has no root, " + "are you trying to drop a pseudo-element? " + "Try to use 'li' as a selector instead of 'li::text' or " + "'//li' instead of '//li/text()', for example." + ) + + try: + self.root.drop_tree() + except (AttributeError, AssertionError): + # 'NoneType' object has no attribute 'drop' + raise CannotDropElementWithoutParent( + "The node you're trying to remove has no parent, " + "are you trying to remove a root element?" + ) + @property def attrib(self) -> Dict[str, str]: """Return the attributes dictionary for underlying element.""" diff --git a/tests/test_selector.py b/tests/test_selector.py index 99d9a552..d0bb2816 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -1050,7 +1050,7 @@ def test_remove_selector_list(self) -> None: text="