Skip to content

Commit 83cc451

Browse files
committed
fix: {HTML4,XML}::SAX::{Parser,ParserContext} check arg types
Previously, arguments of the wrong type might cause segfault on CRuby.
1 parent 22c9e5b commit 83cc451

10 files changed

+66
-12
lines changed

ext/java/nokogiri/Html4SaxParserContext.java

+11
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,13 @@ static EncodingType get(final int ordinal)
231231
IRubyObject data,
232232
IRubyObject encoding)
233233
{
234+
if (!(data instanceof RubyString)) {
235+
throw context.getRuntime().newTypeError("data must be kind_of String");
236+
}
237+
if (!(encoding instanceof RubyString)) {
238+
throw context.getRuntime().newTypeError("data must be kind_of String");
239+
}
240+
234241
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
235242
ctx.setInputSourceFile(context, data);
236243
String javaEncoding = findEncodingName(context, encoding);
@@ -247,6 +254,10 @@ static EncodingType get(final int ordinal)
247254
IRubyObject data,
248255
IRubyObject encoding)
249256
{
257+
if (!(encoding instanceof RubyFixnum)) {
258+
throw context.getRuntime().newTypeError("encoding must be kind_of String");
259+
}
260+
250261
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
251262
ctx.setIOInputSource(context, data, context.nil);
252263
String javaEncoding = findEncodingName(context, encoding);

ext/java/nokogiri/XmlSaxParserContext.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,12 @@ public class XmlSaxParserContext extends ParserContext
130130
parse_io(ThreadContext context,
131131
IRubyObject klazz,
132132
IRubyObject data,
133-
IRubyObject enc)
133+
IRubyObject encoding)
134134
{
135-
//int encoding = (int)enc.convertToInteger().getLongValue();
135+
// check the type of the unused encoding to match behavior of CRuby
136+
if (!(encoding instanceof RubyFixnum)) {
137+
throw context.getRuntime().newTypeError("encoding must be kind_of String");
138+
}
136139
final Ruby runtime = context.runtime;
137140
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
138141
ctx.initialize(runtime);

ext/java/nokogiri/internals/ParserContext.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ public abstract class ParserContext extends RubyObject
5858
source = new InputSource();
5959
ParserContext.setUrl(context, source, url);
6060

61+
Ruby ruby = context.getRuntime();
62+
63+
if (!(data.respondsTo("read"))) {
64+
throw ruby.newTypeError("must respond to :read");
65+
}
66+
6167
source.setByteStream(new IOInputStream(data));
6268
if (java_encoding != null) {
6369
source.setEncoding(java_encoding);
@@ -73,7 +79,7 @@ public abstract class ParserContext extends RubyObject
7379
Ruby ruby = context.getRuntime();
7480

7581
if (!(data instanceof RubyString)) {
76-
throw ruby.newArgumentError("must be kind_of String");
82+
throw ruby.newTypeError("must be kind_of String");
7783
}
7884

7985
RubyString stringData = (RubyString) data;

ext/nokogiri/html4_sax_parser_context.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
1919
{
2020
htmlParserCtxtPtr ctxt;
2121

22-
if (NIL_P(data)) {
23-
rb_raise(rb_eArgError, "data cannot be nil");
24-
}
22+
Check_Type(data, T_STRING);
23+
2524
if (!(int)RSTRING_LEN(data)) {
2625
rb_raise(rb_eRuntimeError, "data cannot be empty");
2726
}

ext/nokogiri/xml_sax_parser_context.c

+10-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
VALUE cNokogiriXmlSaxParserContext ;
44

5+
static ID id_read;
6+
57
static void
68
deallocate(xmlParserCtxtPtr ctxt)
79
{
@@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
2628
xmlParserCtxtPtr ctxt;
2729
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
2830

31+
if (!rb_respond_to(io, id_read)) {
32+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
33+
}
34+
2935
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
3036
(xmlInputReadCallback)noko_io_read,
3137
(xmlInputCloseCallback)noko_io_close,
@@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
6268
{
6369
xmlParserCtxtPtr ctxt;
6470

65-
if (NIL_P(data)) {
66-
rb_raise(rb_eArgError, "data cannot be nil");
67-
}
71+
Check_Type(data, T_STRING);
72+
6873
if (!(int)RSTRING_LEN(data)) {
6974
rb_raise(rb_eRuntimeError, "data cannot be empty");
7075
}
@@ -278,4 +283,6 @@ noko_init_xml_sax_parser_context()
278283
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
279284
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
280285
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
286+
287+
id_read = rb_intern("read");
281288
}

lib/nokogiri/html4/sax/parser.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class Parser < Nokogiri::XML::SAX::Parser
2828
###
2929
# Parse html stored in +data+ using +encoding+
3030
def parse_memory(data, encoding = "UTF-8")
31-
raise ArgumentError unless data
31+
raise TypeError unless String === data
3232
return if data.empty?
3333

3434
ctx = ParserContext.memory(data, encoding)

test/html4/sax/test_parser.rb

+7-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_parse_file_with_dir
5454
end
5555

5656
def test_parse_memory_nil
57-
assert_raises(ArgumentError) do
57+
assert_raises(TypeError) do
5858
@parser.parse_memory(nil)
5959
end
6060
end
@@ -161,6 +161,12 @@ def test_parsing_dom_error_from_io
161161
def test_empty_processing_instruction
162162
@parser.parse_memory("<strong>this will segfault<?strong>")
163163
end
164+
165+
it "handles invalid types gracefully" do
166+
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse(0xcafecafe) }
167+
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_memory(0xcafecafe) }
168+
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_io(0xcafecafe) }
169+
end
164170
end
165171
end
166172
end

test/html4/sax/test_parser_context.rb

+9
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,15 @@ def test_from_file
4040
ctx.parse_with(parser)
4141
# end
4242
end
43+
44+
def test_graceful_handling_of_invalid_types
45+
assert_raises(TypeError) { ParserContext.new(0xcafecafe) }
46+
assert_raises(TypeError) { ParserContext.memory(0xcafecafe, "UTF-8") }
47+
assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) }
48+
assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") }
49+
assert_raises(TypeError) { ParserContext.file(0xcafecafe, "UTF-8") }
50+
assert_raises(TypeError) { ParserContext.file("path/to/file", 0xcafecafe) }
51+
end
4352
end
4453
end
4554
end

test/xml/sax/test_parser.rb

+7-1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ class Nokogiri::SAX::TestCase
7171
end
7272
end
7373

74+
it "handles invalid types gracefully" do
75+
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse(0xcafecafe) }
76+
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_memory(0xcafecafe) }
77+
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_io(0xcafecafe) }
78+
end
79+
7480
it :test_namespace_declaration_order_is_saved do
7581
parser.parse(<<~EOF)
7682
<root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
@@ -261,7 +267,7 @@ def call_parse_io_with_encoding(encoding)
261267
end
262268

263269
it :test_render_parse_nil_param do
264-
assert_raises(ArgumentError) { parser.parse_memory(nil) }
270+
assert_raises(TypeError) { parser.parse_memory(nil) }
265271
end
266272

267273
it :test_bad_encoding_args do

test/xml/sax/test_parser_context.rb

+7
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ def test_recovery
8080
assert(pc.recovery)
8181
end
8282

83+
def test_graceful_handling_of_invalid_types
84+
assert_raises(TypeError) { ParserContext.new(0xcafecafe) }
85+
assert_raises(TypeError) { ParserContext.memory(0xcafecafe) }
86+
assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) }
87+
assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") }
88+
end
89+
8390
def test_from_io
8491
ctx = ParserContext.new(StringIO.new("fo"), "UTF-8")
8592
assert(ctx)

0 commit comments

Comments
 (0)