Skip to content

Commit b3124cf

Browse files
committed
Implement text nodes / content escaping
This removes the support for re-indenting XML, but allows to faithfully parse / stringify a document. Beautification could be added again later on if desired.
1 parent 08c0943 commit b3124cf

File tree

4 files changed

+56
-38
lines changed

4 files changed

+56
-38
lines changed

document.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ func (this *Document) LoadStream(r io.Reader, charset CharsetFunc) (err error) {
117117
case xml.SyntaxError:
118118
return errors.New(tt.Error())
119119
case xml.CharData:
120-
ct.Value = ct.Value + strings.TrimSpace(string([]byte(tt)))
120+
t := NewNode(NT_TEXT)
121+
t.Value = string([]byte(tt))
122+
ct.AddChild(t)
121123
case xml.Comment:
122124
t := NewNode(NT_COMMENT)
123125
t.Value = strings.TrimSpace(string([]byte(tt)))

node.go

+29-31
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ import (
99
"encoding/xml"
1010
"fmt"
1111
"strconv"
12-
"strings"
1312
)
1413

1514
const (
1615
NT_ROOT = iota
1716
NT_DIRECTIVE
1817
NT_PROCINST
1918
NT_COMMENT
19+
NT_TEXT
2020
NT_ELEMENT
2121
)
2222

@@ -51,7 +51,7 @@ func NewNode(tid byte) *Node {
5151
// This wraps the standard xml.Unmarshal function and supplies this particular
5252
// node as the content to be unmarshalled.
5353
func (this *Node) Unmarshal(obj interface{}) error {
54-
return xml.NewDecoder(bytes.NewBuffer(this.bytes(0))).Decode(obj)
54+
return xml.NewDecoder(bytes.NewBuffer(this.bytes())).Decode(obj)
5555
}
5656

5757
// Get node value as string
@@ -433,20 +433,22 @@ func (this *Node) SetAttr(name, value string) {
433433
// Note that NT_ROOT is a special-case empty node used as the root for a
434434
// Document. This one has no representation by itself. It merely forwards the
435435
// String() call to it's child nodes.
436-
func (this *Node) Bytes() []byte { return this.bytes(0) }
436+
func (this *Node) Bytes() []byte { return this.bytes() }
437437

438-
func (this *Node) bytes(indent int) (b []byte) {
438+
func (this *Node) bytes() (b []byte) {
439439
switch this.Type {
440440
case NT_PROCINST:
441-
b = this.printProcInst(indent)
441+
b = this.printProcInst()
442442
case NT_COMMENT:
443-
b = this.printComment(indent)
443+
b = this.printComment()
444444
case NT_DIRECTIVE:
445-
b = this.printDirective(indent)
445+
b = this.printDirective()
446446
case NT_ELEMENT:
447-
b = this.printElement(indent)
447+
b = this.printElement()
448+
case NT_TEXT:
449+
b = this.printText()
448450
case NT_ROOT:
449-
b = this.printRoot(indent)
451+
b = this.printRoot()
450452
}
451453
return
452454
}
@@ -456,38 +458,42 @@ func (this *Node) bytes(indent int) (b []byte) {
456458
// Document. This one has no representation by itself. It merely forwards the
457459
// String() call to it's child nodes.
458460
func (this *Node) String() (s string) {
459-
return string(this.bytes(0))
461+
return string(this.bytes())
460462
}
461463

462-
func (this *Node) printRoot(indent int) []byte {
464+
func (this *Node) printRoot() []byte {
463465
var b bytes.Buffer
464466
for _, v := range this.Children {
465-
b.Write(v.bytes(indent))
467+
b.Write(v.bytes())
466468
}
467469
return b.Bytes()
468470
}
469471

470-
func (this *Node) printProcInst(indent int) []byte {
472+
func (this *Node) printProcInst() []byte {
471473
return []byte("<?" + this.Target + " " + this.Value + "?>")
472474
}
473475

474-
func (this *Node) printComment(indent int) []byte {
476+
func (this *Node) printComment() []byte {
475477
return []byte("<!-- " + this.Value + " -->")
476478
}
477479

478-
func (this *Node) printDirective(indent int) []byte {
480+
func (this *Node) printDirective() []byte {
479481
return []byte("<!" + this.Value + "!>")
480482
}
481483

482-
func (this *Node) printElement(indent int) []byte {
484+
func (this *Node) printText() []byte {
485+
val := []byte(this.Value)
486+
if len(this.Parent.Children) > 1 {
487+
return val
488+
}
483489
var b bytes.Buffer
490+
xml.EscapeText(&b, val)
491+
return b.Bytes()
492+
}
484493

485-
lineSuffix, linePrefix := "", strings.Repeat(IndentPrefix, indent)
486-
if len(IndentPrefix) > 0 {
487-
lineSuffix = "\n"
488-
}
494+
func (this *Node) printElement() []byte {
495+
var b bytes.Buffer
489496

490-
b.WriteString(linePrefix)
491497
if len(this.Name.Space) > 0 {
492498
b.WriteRune('<')
493499
b.WriteString(this.Name.Space)
@@ -509,23 +515,16 @@ func (this *Node) printElement(indent int) []byte {
509515

510516
if len(this.Children) == 0 && len(this.Value) == 0 {
511517
b.WriteString(" />")
512-
b.WriteString(lineSuffix)
513518
return b.Bytes()
514519
}
515520

516521
b.WriteRune('>')
517-
if len(this.Value) == 0 {
518-
b.WriteString(lineSuffix)
519-
}
520522

521523
for _, v := range this.Children {
522-
b.Write(v.bytes(indent + 1))
524+
b.Write(v.bytes())
523525
}
524526

525-
b.WriteString(this.Value)
526-
if len(this.Value) == 0 {
527-
b.WriteString(linePrefix)
528-
}
527+
xml.EscapeText(&b, []byte(this.Value))
529528
if len(this.Name.Space) > 0 {
530529
b.WriteString("</")
531530
b.WriteString(this.Name.Space)
@@ -537,7 +536,6 @@ func (this *Node) printElement(indent int) []byte {
537536
b.WriteString(this.Name.Local)
538537
b.WriteRune('>')
539538
}
540-
b.WriteString(lineSuffix)
541539

542540
return b.Bytes()
543541
}

test4.xml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<body>  &lt;https://example.com/file/fm/SU0vRk0xLzIwMTMwOTEwLzA1MDA0MS5ybXdhdGVzdEByZXV0ZXJzLmNvbTEzNzg4NDU1OTk4OTA/Screen%20Shot%202013-09-10%20at%2021.33.54.png&gt; File Attachment:-Screen Shot 2013-09-10 at 21.33.54.png  </body>

xmlx_test.go

+23-6
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ func TestUnmarshal(t *testing.T) {
137137
}
138138
}
139139

140-
func TestString(t *testing.T) {
140+
func TestStringNamespaces(t *testing.T) {
141141
doc := New()
142142
err := doc.LoadFile("test3.xml", nil)
143143

@@ -147,15 +147,32 @@ func TestString(t *testing.T) {
147147
}
148148

149149
expected := `<root xmlns:foo="http:/example.org/foo">
150-
<child foo:bar="1">
151-
<grandchild xmlns:foo="">
152-
<great-grandchild bar="2" />
153-
</grandchild>
154-
</child>
150+
<child foo:bar="1">
151+
<grandchild xmlns:foo="">
152+
<great-grandchild bar="2">&#xA; </great-grandchild>
153+
</grandchild>
154+
</child>
155155
</root>
156156
`
157157

158158
if got := doc.Root.String(); got != expected {
159159
t.Fatalf("expected: %s\ngot: %s\n", expected, got)
160160
}
161161
}
162+
163+
func TestStringEscaping(t *testing.T) {
164+
doc := New()
165+
err := doc.LoadFile("test4.xml", nil)
166+
167+
if err != nil {
168+
t.Errorf("LoadFile(): %s", err)
169+
return
170+
}
171+
172+
expected := `<body>  &lt;https://example.com/file/fm/SU0vRk0xLzIwMTMwOTEwLzA1MDA0MS5ybXdhdGVzdEByZXV0ZXJzLmNvbTEzNzg4NDU1OTk4OTA/Screen%20Shot%202013-09-10%20at%2021.33.54.png&gt; File Attachment:-Screen Shot 2013-09-10 at 21.33.54.png  </body>
173+
`
174+
175+
if got := doc.Root.String(); got != expected {
176+
t.Fatalf("expected: %s\ngot: %s\n", expected, got)
177+
}
178+
}

0 commit comments

Comments
 (0)