-
Notifications
You must be signed in to change notification settings - Fork 1
/
blook.go
227 lines (195 loc) · 5.03 KB
/
blook.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
package main
import (
"errors"
"os"
"strings"
)
// min returns minimum of two int64 numbers
func min(a int64, b int64) int64 {
if a > b {
return b
}
return a
}
// writeBytes writes [start; stop] bytes from fromFile to toFile
func writeBytes(fromFile *os.File, start int64, stop int64, toFile *os.File, maxBufferSize int64) (int64, error) {
var bytesWritten int64
bytesWritten = 0
if start > stop {
return bytesWritten, nil
}
fromFile.Seek(start, 0)
buffer := make([]byte, min(stop-start+1, maxBufferSize))
for current := start; current < stop; {
bufferSize := min(stop-current+1, maxBufferSize)
if bufferSize < maxBufferSize {
buffer = make([]byte, bufferSize)
}
n, err := fromFile.Read(buffer)
if err != nil {
return bytesWritten, err
} else if int64(n) < bufferSize {
return bytesWritten, errors.New("Error: unexpected end of input")
}
n, err = toFile.Write(buffer)
if err != nil {
return bytesWritten, err
}
bytesWritten += int64(n)
current += int64(bufferSize)
}
return bytesWritten, nil
}
// newLineIndex returns index of newline symbol in buffer;
// if no newline symbol found returns -1
func newLineIndex(buffer []byte, diff int64) int {
n := len(buffer)
if n == 0 {
return -1
}
idx := 0
if diff == -1 {
idx = n - 1
}
for {
if n == 0 {
return -1
}
if buffer[idx] == '\n' {
return idx
}
idx = idx + int(diff)
n--
}
}
// findBorder searches for newline symbol in [from; to]
// when diff = 1 makes forward search (`from` -> `to`)
// when diff = -1 makes backward search (`to` -> `from`)
func findBorder(file *os.File, from int64, to int64, diff int64, maxBufferSize int64) (int64, error) {
size := to - from + int64(1)
currentSize := min(size, maxBufferSize)
position := from
if diff == -1 {
position = to - currentSize + int64(1)
}
buffer := make([]byte, currentSize)
for {
if size == 0 {
return -1, nil
}
if int64(len(buffer)) != currentSize {
buffer = make([]byte, currentSize)
}
file.Seek(position, 0)
n, err := file.Read(buffer)
if err != nil {
return -1, err
} else if int64(n) < currentSize {
return -1, errors.New("Error: unexpected end of input")
}
idx := newLineIndex(buffer, diff)
if idx >= 0 {
return position + int64(idx), nil
}
position = position + diff*currentSize
size = size - currentSize
currentSize = min(size, maxBufferSize)
}
}
// findString searches string borders
// returns (leftBorder, rightBorder, error)
func findString(file *os.File, from int64, to int64) (int64, int64, error) {
maxBufferSize := int64(64 * 1024)
middle := (from + to) / 2
strFrom, err := findBorder(file, from, middle, -1, maxBufferSize)
if err != nil {
return -1, -1, err
} else if strFrom == -1 {
//no newline found, just return from position
strFrom = from
} else {
//new line found, need to increment position to omit newline byte
strFrom++
}
strTo, err := findBorder(file, middle+1, to, 1, maxBufferSize)
if err != nil {
return -1, -1, err
} else if strTo == -1 {
//no newline found, just return from position
strTo = to
} else {
//new line found, need to decrement position to omit newline byte
strTo--
}
return strFrom, strTo, nil
}
// getString returns string from `file` in [from; to]
func getString(file *os.File, from int64, to int64) (string, error) {
bufferSize := to - from + 1
buffer := make([]byte, bufferSize)
_, err := file.Seek(from, 0)
if err != nil {
return "", err
}
_, err = file.Read(buffer)
if err != nil {
return "", err
}
return string(buffer[:bufferSize]), nil
}
// eligibleString returns true
// for forward = true if pattern >= value (or pattern is prefix for value)
// for forward = false if pattern <= value (or pattern is prefix for value)
// otherwise it returns false
func eligibleString(pattern string, value string, forward bool) bool {
order := -1
if forward {
order = 1
}
return strings.Compare(value, pattern) == order || strings.HasPrefix(value, pattern)
}
// blook returns first byte number in the ordered `file` where `pattern` is occured as a prefix string
func blook(pattern string, file *os.File, size int64, forward bool) (int64, error) {
if size == 0 {
return -1, nil
}
result := int64(-1)
from := int64(0)
to := size - 1
const maxCalls = 64
currCall := 0
for {
if from < 0 || from > to || to >= size {
return result, nil
}
if currCall > maxCalls {
return -1, errors.New("MAX_CALLS_EXCEEDED")
}
strFrom, strTo, err := findString(file, from, to)
if err != nil {
return -1, err
}
value, err := getString(file, strFrom, strTo)
if err != nil {
return -1, err
}
if eligibleString(pattern, value, forward) {
//it's already result, but we need to search for more results
if forward {
result = strFrom
to = strFrom - int64(1)
} else {
result = strTo
from = strTo + int64(2) //next byte is \n, so we need to move to the bytes after \n
}
} else {
//it's not a result, we need to search for more results
if forward {
from = strTo + int64(1)
} else {
to = strFrom - int64(1)
}
}
currCall++
}
}