Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ValidateInput option to ReadSettings #131

Merged
merged 2 commits into from
Apr 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 42 additions & 7 deletions etree.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ type ReadSettings struct {
// preserve them instead of keeping only one. Default: false.
PreserveDuplicateAttrs bool

// ValidateInput forces all ReadFrom* methods to validate that the
// provided input is composed of well-formed XML before processing it. If
// invalid XML is detected, the ReadFrom* methods return an error. Because
// this option requires the input to be processed twice, it incurs a
// significant performance penalty. Default: false.
ValidateInput bool

// Entity to be passed to standard xml.Decoder. Default: nil.
Entity map[string]string

Expand All @@ -66,9 +73,6 @@ func newReadSettings() ReadSettings {
CharsetReader: func(label string, input io.Reader) (io.Reader, error) {
return input, nil
},
Permissive: false,
PreserveCData: false,
Entity: nil,
}
}

Expand Down Expand Up @@ -353,6 +357,11 @@ func (d *Document) SetRoot(e *Element) {
// ReadFrom reads XML from the reader 'r' into this document. The function
// returns the number of bytes read and any error encountered.
func (d *Document) ReadFrom(r io.Reader) (n int64, err error) {
if d.ReadSettings.ValidateInput {
if err := validateXML(r, d.ReadSettings); err != nil {
return 0, err
}
}
return d.Element.readFrom(r, d.ReadSettings)
}

Expand Down Expand Up @@ -380,6 +389,35 @@ func (d *Document) ReadFromString(s string) error {
return err
}

// validateXML determines if the data read from the reader 'r' contains
// well-formed XML according to the rules set by the go xml package.
func validateXML(r io.Reader, settings ReadSettings) error {
dec := newDecoder(r, settings)
err := dec.Decode(new(interface{}))
if err != nil {
return err
}

// If there are any trailing tokens after unmarshalling with Decode(),
// then the XML input didn't terminate properly.
_, err = dec.Token()
if err == io.EOF {
return nil
}
return ErrXML
}

// newDecoder creates an XML decoder for the reader 'r' configured using
// the provided read settings.
func newDecoder(r io.Reader, settings ReadSettings) *xml.Decoder {
d := xml.NewDecoder(r)
d.CharsetReader = settings.CharsetReader
d.Strict = !settings.Permissive
d.Entity = settings.Entity
d.AutoClose = settings.AutoClose
return d
}

// WriteTo serializes the document out to the writer 'w'. The function returns
// the number of bytes written and any error encountered.
func (d *Document) WriteTo(w io.Writer) (n int64, err error) {
Expand Down Expand Up @@ -835,10 +873,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
r = newXmlSimpleReader(ri)
}

dec := xml.NewDecoder(r)
dec.CharsetReader = settings.CharsetReader
dec.Strict = !settings.Permissive
dec.Entity = settings.Entity
dec := newDecoder(r, settings)

var stack stack
stack.push(e)
Expand Down
32 changes: 32 additions & 0 deletions etree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1524,3 +1524,35 @@ func TestNotNil(t *testing.T) {
t.Error("got:\n" + got)
}
}

func TestValidateInput(t *testing.T) {
tests := []struct {
s string
err string
}{
{`<root>x</root>`, ""},
{`<root/>`, ""},
{`<root>x`, `XML syntax error on line 1: unexpected EOF`},
{`</root><root>`, `XML syntax error on line 1: unexpected end element </root>`},
{`<>`, `XML syntax error on line 1: expected element name after <`},
{`<root>x</root>trailing`, "etree: invalid XML format"},
{`<root>x</root><`, "etree: invalid XML format"},
{`<root><child>x</child></root1>`, `XML syntax error on line 1: element <root> closed by </root1>`},
}

for i, test := range tests {
doc := NewDocument()
doc.ReadSettings.ValidateInput = true
err := doc.ReadFromString(test.s)
if err == nil {
if test.err != "" {
t.Errorf("etree: test #%d:\nExpected error:\n %s\nReceived error:\n nil", i, test.err)
}
} else {
te := err.Error()
if te != test.err {
t.Errorf("etree: test #%d:\nExpected error;\n %s\nReceived error:\n %s", i, test.err, te)
}
}
}
}
Loading