Skip to content

Commit

Permalink
Merge pull request #65 from gjtorikian/test-weird-unwrap
Browse files Browse the repository at this point in the history
Stop assuming `on_end_tag` handlers are always defined
  • Loading branch information
gjtorikian authored Jul 18, 2024
2 parents 6baf62d + eedbf25 commit f92bd54
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 36 deletions.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@
"[markdown]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[html]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
}
9 changes: 4 additions & 5 deletions ext/selma/src/rewriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,13 +442,12 @@ impl SelmaRewriter {

let closure_element_stack = element_stack.clone();

el.end_tag_handlers()
.unwrap()
.push(Box::new(move |_end_tag| {
let mut stack = closure_element_stack.as_ref().borrow_mut();
stack.pop();
if let Some(end_tag_handlers) = el.end_tag_handlers() {
end_tag_handlers.push(Box::new(move |_end_tag| {
closure_element_stack.as_ref().borrow_mut().pop();
Ok(())
}));
}

Ok(())
}));
Expand Down
2 changes: 1 addition & 1 deletion lib/selma/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Selma
VERSION = "0.4.2"
VERSION = "0.4.3"
end
34 changes: 34 additions & 0 deletions test/fixtures/docs.html

Large diffs are not rendered by default.

69 changes: 39 additions & 30 deletions test/selma_maliciousness_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -168,50 +168,59 @@ def test_sanitizer_expects_all_as_symbol
end
end

class RemoveLinkClass
SELECTOR = Selma::Selector.new(match_element: %(a:not([class="anchor"])))
class ContentExtractor
SELECTOR = Selma::Selector.new(match_element: "*", match_text_within: "title")

def selector
SELECTOR
end
attr_reader :title, :meta

def handle_element(element)
element.remove_attribute("class")
def initialize
super
@title = ""
@meta = {}
@within_title = false
end
end

class RemoveIdAttributes
SELECTOR = Selma::Selector.new(match_element: %(a[id], li[id]))

def selector
SELECTOR
end

def handle_element(element)
# footnote ids should not be removed
return if element.tag_name == "li"
return if element.tag_name == "a"

# links with generated header anchors should not be removed
return if element.tag_name == "a" && element["class"] == "anchor"
if element.tag_name == "pre" ||
element.tag_name == "code" ||
element.tag_name == "form" ||
element.tag_name == "style" ||
element.tag_name == "noscript" ||
element.tag_name == "script" ||
element.tag_name == "svg"
element.remove
elsif element.tag_name == "title"
@within_title = true
element.remove
elsif element.tag_name == "meta"
return if element.attributes["name"].nil?

@meta[element.attributes["name"]] = element.attributes["content"]
else
element.remove_and_keep_content
end
end

element.remove_attribute("id")
def handle_text_chunk(text)
if @within_title
@within_title = false
@title = text.to_s
end
end
end

class BaseRemoveRel
SELECTOR = Selma::Selector.new(match_element: %(a))
def test_rewriter_does_not_halt_on_malformed_html
html = load_fixture("docs.html")

def selector
SELECTOR
end
sanitizer_config = Selma::Sanitizer::Config::RELAXED.dup.merge({
allow_doctype: false,
})
sanitizer = Selma::Sanitizer.new(sanitizer_config)

def handle_element(element)
# we allow rel="license" to support the Rel-license microformat
# http://microformats.org/wiki/rel-license
unless element["rel"] == "license"
element.remove_attribute("rel")
end
end
Selma::Rewriter.new(sanitizer: sanitizer, handlers: [ContentExtractor.new]).rewrite(html)
end
end

0 comments on commit f92bd54

Please sign in to comment.