Skip to content

Commit

Permalink
Revert "encoding/xml: reject XML declaration after start of document"
Browse files Browse the repository at this point in the history
This reverts commit 8a0fbd7.

Reason for revert: Breaking real-world tests inside Google,
which means it probably breaks real-world tests outside Google.

One instance I have seen is a <!-- --> comment (often a copyright notice) before the procinst.

Another test checks that a canonicalizer can handle a test input that simply has procinsts mid-XML.

XML is full of contradictions, XML implementations more so. If we are going to start being picky, that probably needs to be controlled by a GODEBUG (and a proposal).

For #65691 (will reopen manually).

Change-Id: Ib52d0944b1478e71744a2a35b271fdf7e1c972ca
Reviewed-on: https://go-review.googlesource.com/c/go/+/570175
Reviewed-by: Than McIntosh <thanm@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Russ Cox <rsc@golang.org>
  • Loading branch information
rsc authored and gopherbot committed Mar 8, 2024
1 parent a46285f commit 32014d5
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 41 deletions.
28 changes: 4 additions & 24 deletions src/encoding/xml/marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ var (
// EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token
// in the stream.
func (enc *Encoder) EncodeToken(t Token) error {

p := &enc.p
switch t := t.(type) {
case StartElement:
Expand All @@ -227,10 +228,11 @@ func (enc *Encoder) EncodeToken(t Token) error {
p.WriteString("<!--")
p.Write(t)
p.WriteString("-->")
return p.cachedWriteError()
case ProcInst:
// First token to be encoded which is also a ProcInst with target of xml
// is the xml declaration. The only ProcInst where target of xml is allowed.
if t.Target == "xml" && p.wroteNonWS {
if t.Target == "xml" && p.w.Buffered() != 0 {
return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded")
}
if !isNameString(t.Target) {
Expand All @@ -255,30 +257,9 @@ func (enc *Encoder) EncodeToken(t Token) error {
p.WriteString(">")
default:
return fmt.Errorf("xml: EncodeToken of invalid token type")
}
if err := p.cachedWriteError(); err != nil || enc.p.wroteNonWS {
return err
}
enc.p.wroteNonWS = !isWhitespace(t)
return nil
}

// isWhitespace reports whether t is a CharData token consisting entirely of
// XML whitespace.
func isWhitespace(t Token) bool {
switch t := t.(type) {
case CharData:
for _, b := range t {
switch b {
case ' ', '\r', '\n', '\t':
default:
return false
}
}
return true
default:
return false
}
return p.cachedWriteError()
}

// isValidDirective reports whether dir is a valid directive text,
Expand Down Expand Up @@ -348,7 +329,6 @@ type printer struct {
prefixes []string
tags []Name
closed bool
wroteNonWS bool
err error
}

Expand Down
3 changes: 0 additions & 3 deletions src/encoding/xml/marshal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2356,9 +2356,6 @@ func TestProcInstEncodeToken(t *testing.T) {
var buf bytes.Buffer
enc := NewEncoder(&buf)

if err := enc.EncodeToken(CharData(" \n\r\t")); err != nil {
t.Fatal("enc.EncodeToken: expected to be able to encode whitespace as first token")
}
if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err != nil {
t.Fatalf("enc.EncodeToken: expected to be able to encode xml target ProcInst as first token, %s", err)
}
Expand Down
12 changes: 0 additions & 12 deletions src/encoding/xml/xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ type Decoder struct {
line int
linestart int64
offset int64
readNonWS bool
unmarshalDepth int
}

Expand Down Expand Up @@ -564,8 +563,6 @@ func (d *Decoder) rawToken() (Token, error) {
return EndElement{d.toClose}, nil
}

readNonWS := d.readNonWS

b, ok := d.getc()
if !ok {
return nil, d.err
Expand All @@ -578,12 +575,8 @@ func (d *Decoder) rawToken() (Token, error) {
if data == nil {
return nil, d.err
}
if !d.readNonWS && !isWhitespace(CharData(data)) {
d.readNonWS = true
}
return CharData(data), nil
}
d.readNonWS = true

if b, ok = d.mustgetc(); !ok {
return nil, d.err
Expand Down Expand Up @@ -634,11 +627,6 @@ func (d *Decoder) rawToken() (Token, error) {
data = data[0 : len(data)-2] // chop ?>

if target == "xml" {
if readNonWS {
d.err = errors.New("xml: XML declaration after start of document")
return nil, d.err
}

content := string(data)
ver := procInst("version", content)
if ver != "" && ver != "1.0" {
Expand Down
2 changes: 0 additions & 2 deletions src/encoding/xml/xml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1352,12 +1352,10 @@ func TestParseErrors(t *testing.T) {

// Header-related errors.
{`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
{`<foo><?xml version="1.0"?>`, `XML declaration after start of document`},

// Cases below are for "no errors".
{withDefaultHeader(`<?ok?>`), ``},
{withDefaultHeader(`<?ok version="ok"?>`), ``},
{` <?xml version="1.0"?>`, ``},
}

for _, test := range tests {
Expand Down

0 comments on commit 32014d5

Please sign in to comment.