Skip to content

Commit

Permalink
Merge pull request #12 from messense/clean_content_tags
Browse files Browse the repository at this point in the history
Expose `clean_content_tags` option
  • Loading branch information
messense authored Jan 28, 2023
2 parents be812de + c5b54fe commit c9d5ac6
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 0 deletions.
1 change: 1 addition & 0 deletions nh3.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ from typing import Callable, Dict, Optional, Set
def clean(
html: str,
tags: Optional[Set[str]] = None,
clean_content_tags: Optional[Set[str]] = None,
attributes: Optional[Dict[str, Set[str]]] = None,
attribute_filter: Optional[Callable[[str, str, str]], Optional[str]] = None,
strip_comments: bool = True,
Expand Down
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use pyo3::types::{PyString, PyTuple};
#[pyfunction(signature = (
html,
tags = None,
clean_content_tags = None,
attributes = None,
attribute_filter = None,
strip_comments = true,
Expand All @@ -18,6 +19,7 @@ fn clean(
py: Python,
html: &str,
tags: Option<HashSet<&str>>,
clean_content_tags: Option<HashSet<&str>>,
attributes: Option<HashMap<&str, HashSet<&str>>>,
attribute_filter: Option<PyObject>,
strip_comments: bool,
Expand All @@ -31,6 +33,7 @@ fn clean(

let cleaned = py.allow_threads(|| {
if tags.is_some()
|| clean_content_tags.is_some()
|| attributes.is_some()
|| attribute_filter.is_some()
|| !strip_comments
Expand All @@ -40,6 +43,9 @@ fn clean(
if let Some(tags) = tags {
cleaner.tags(tags);
}
if let Some(tags) = clean_content_tags {
cleaner.clean_content_tags(tags);
}
if let Some(mut attrs) = attributes {
if let Some(generic_attrs) = attrs.remove("*") {
cleaner.generic_attributes(generic_attrs);
Expand Down
7 changes: 7 additions & 0 deletions tests/test_nh3.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ def test_clean():
nh3.clean('<a href="https://baidu.com">baidu</a>', link_rel=None)
== '<a href="https://baidu.com">baidu</a>'
)
assert (
nh3.clean(
"<script>alert('hello')</script><style>a { background: #fff }</style>",
clean_content_tags={"script", "style"},
)
== ""
)


def test_clean_with_attribute_filter():
Expand Down

0 comments on commit c9d5ac6

Please sign in to comment.