Skip to content

Commit 121dfc1

Browse files
committed
Cache reader usage improvement
1 parent c6eae36 commit 121dfc1

File tree

3 files changed

+54
-28
lines changed

3 files changed

+54
-28
lines changed

cached_reader.go

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,48 @@ import (
55
)
66

77
type cachedReader struct {
8-
buffer *bufio.Reader
9-
cache []byte
10-
cacheCap int
11-
cacheLen int
8+
buffer *bufio.Reader
9+
cache []byte
1210
caching bool
1311
}
1412

1513
func newCachedReader(r *bufio.Reader) *cachedReader {
1614
return &cachedReader{
17-
buffer: r,
18-
cache: make([]byte, 4096),
19-
cacheCap: 4096,
20-
cacheLen: 0,
21-
caching: false,
15+
buffer: r,
16+
cache: make([]byte, 0, 4096),
17+
caching: false,
2218
}
2319
}
2420

2521
func (c *cachedReader) StartCaching() {
26-
c.cacheLen = 0
22+
c.cache = c.cache[:0]
2723
c.caching = true
2824
}
2925

30-
func (c *cachedReader) ReadByte() (byte, error) {
31-
if !c.caching {
32-
return c.buffer.ReadByte()
33-
}
34-
b, err := c.buffer.ReadByte()
26+
func (c *cachedReader) ReadByte() (b byte, err error) {
27+
b, err = c.buffer.ReadByte()
3528
if err != nil {
36-
return b, err
29+
return
3730
}
38-
if c.cacheLen < c.cacheCap {
39-
c.cache[c.cacheLen] = b
40-
c.cacheLen++
31+
if c.caching {
32+
c.cacheByte(b)
4133
}
42-
return b, err
34+
return
4335
}
4436

4537
func (c *cachedReader) Cache() []byte {
46-
return c.cache[:c.cacheLen]
38+
return c.cache
39+
}
40+
41+
func (c *cachedReader) CacheWithLimit(n int) []byte {
42+
if n < 1 {
43+
return nil
44+
}
45+
l := len(c.cache)
46+
if n > l {
47+
n = l
48+
}
49+
return c.cache[:n]
4750
}
4851

4952
func (c *cachedReader) StopCaching() {
@@ -55,15 +58,22 @@ func (c *cachedReader) Read(p []byte) (int, error) {
5558
if err != nil {
5659
return n, err
5760
}
58-
if c.caching && c.cacheLen < c.cacheCap {
61+
if c.caching {
5962
for i := 0; i < n; i++ {
60-
c.cache[c.cacheLen] = p[i]
61-
c.cacheLen++
62-
if c.cacheLen >= c.cacheCap {
63+
if !c.cacheByte(p[i]) {
6364
break
6465
}
6566
}
6667
}
6768
return n, err
6869
}
6970

71+
func (c *cachedReader) cacheByte(b byte) bool {
72+
n := len(c.cache)
73+
if n == cap(c.cache) {
74+
return false
75+
}
76+
c.cache = c.cache[:n+1]
77+
c.cache[n] = b
78+
return true
79+
}

cached_reader_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,19 @@ func TestCaching(t *testing.T) {
3939
if !bytes.Equal(cached, []byte("BCDEF")) {
4040
t.Fatalf("Incorrect cached buffer value")
4141
}
42+
43+
cached = cachedReader.CacheWithLimit(-1)
44+
if cached != nil {
45+
t.Fatalf("Incorrect cached buffer value")
46+
}
47+
48+
cached = cachedReader.CacheWithLimit(3)
49+
if !bytes.Equal(cached, []byte("BCD")) {
50+
t.Fatalf("Incorrect cached buffer value")
51+
}
52+
53+
cached = cachedReader.CacheWithLimit(1000)
54+
if !bytes.Equal(cached, []byte("BCDEF")) {
55+
t.Fatalf("Incorrect cached buffer value")
56+
}
4257
}

parse.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package xmlquery
22

33
import (
44
"bufio"
5+
"bytes"
56
"encoding/xml"
67
"fmt"
78
"io"
@@ -184,7 +185,7 @@ func (p *parser) parse() (*Node, error) {
184185

185186
if node.NamespaceURI != "" {
186187
if v, ok := p.space2prefix[node.NamespaceURI]; ok {
187-
cached := string(p.reader.Cache())
188+
cached := string(p.reader.CacheWithLimit(len(v.name) + len(node.Data) + 2))
188189
if strings.HasPrefix(cached, fmt.Sprintf("%s:%s", v.name, node.Data)) || strings.HasPrefix(cached, fmt.Sprintf("<%s:%s", v.name, node.Data)) {
189190
node.Prefix = v.name
190191
}
@@ -244,9 +245,9 @@ func (p *parser) parse() (*Node, error) {
244245
}
245246
case xml.CharData:
246247
// First, normalize the cache...
247-
cached := strings.ToUpper(string(p.reader.Cache()))
248+
cached := bytes.ToUpper(p.reader.CacheWithLimit(9))
248249
nodeType := TextNode
249-
if strings.HasPrefix(cached, "<![CDATA[") || strings.HasPrefix(cached, "![CDATA[") {
250+
if bytes.HasPrefix(cached, []byte("<![CDATA[")) || bytes.HasPrefix(cached, []byte("![CDATA[")) {
250251
nodeType = CharDataNode
251252
}
252253
node := &Node{Type: nodeType, Data: string(tok), level: p.level}

0 commit comments

Comments
 (0)