-
Notifications
You must be signed in to change notification settings - Fork 0
/
string_utils.go
222 lines (194 loc) · 6.12 KB
/
string_utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
package jsonrepair
import (
"regexp"
"strings"
)
const (
codeBackslash = 0x5c // "\"
codeSlash = 0x2f // "/"
codeAsterisk = 0x2a // "*"
codeOpeningBrace = 0x7b // "{"
codeClosingBrace = 0x7d // "}"
codeOpeningBracket = 0x5b // "["
codeClosingBracket = 0x5d // "]"
codeOpenParenthesis = 0x28 // "("
codeCloseParenthesis = 0x29 // ")"
codeSpace = 0x20 // " "
codeNewline = 0xa // "\n"
codeTab = 0x9 // "\t"
codeReturn = 0xd // "\r"
codeBackspace = 0x08 // "\b"
codeFormFeed = 0x0c // "\f"
codeDoubleQuote = 0x0022 // "
codePlus = 0x2b // "+"
codeMinus = 0x2d // "-"
codeQuote = 0x27 // "'"
codeZero = 0x30 // 0
codeNine = 0x39 // 9
codeComma = 0x2c // ","
codeDot = 0x2e // "." (dot, period)
codeColon = 0x3a // ":"
codeSemicolon = 0x3b // ";"
codeUppercaseA = 0x41 // "A"
codeLowercaseA = 0x61 // "a"
codeUppercaseE = 0x45 // "E"
codeLowercaseE = 0x65 // "e"
codeUppercaseF = 0x46 // "F"
codeLowercaseF = 0x66 // "f"
codeNonBreakingSpace = 0xa0
codeEnQuad = 0x2000
codeHairSpace = 0x200a
codeNarrowNoBreakSpace = 0x202f
codeMediumMathematicalSpace = 0x205f
codeIdeographicSpace = 0x3000
codeDoubleQuoteLeft = 0x201c // “
codeDoubleQuoteRight = 0x201d // ”
codeQuoteLeft = 0x2018 // ‘
codeQuoteRight = 0x2019 // ’
codeGraveAccent = 0x0060 // `
codeAcuteAccent = 0x00b4 // ´
) // TODO: sort the codes
func IsHex(code rune) bool {
return ((code >= codeZero && code <= codeNine) ||
(code >= codeUppercaseA && code <= codeUppercaseF) ||
(code >= codeLowercaseA && code <= codeLowercaseF))
}
func IsDigit(code rune) bool {
return code >= codeZero && code <= codeNine
}
func IsValidStringCharacter(code rune) bool {
return code >= 0x20 && code <= 0x10ffff
}
var Delimiters = map[rune]bool{
',': true,
':': true,
'[': true,
']': true,
'{': true,
'}': true,
'(': true,
')': true,
'\n': true,
}
var regexDelimiter = regexp.MustCompile(`^[,:[\]{}()\n+]$`)
func IsDelimiter(c rune) bool {
return regexDelimiter.Match([]byte{byte(c)}) || IsQuote(c)
// return Delimiters[r] || (r != 0 && IsQuote(int(r)))
//return regexDelimiter.test(char) || (char && IsQuote(char.charCodeAt(0)))
}
var regexStartOfValue = regexp.MustCompile(`^[[{\w-]$`)
var regexNumberWithLeadingZero = regexp.MustCompile(`^0\d`)
func IsStartOfValue(r rune) bool {
return regexStartOfValue.Match([]byte{byte(r)}) || (r != 0 && IsQuote(r))
}
func IsControlCharacter(code rune) bool {
return (code == codeNewline ||
code == codeReturn ||
code == codeTab ||
code == codeBackspace ||
code == codeFormFeed)
}
/**
* Check if the given character is a whitespace character like space, tab, or
* newline
*/
func IsWhitespace(code rune) bool {
return code == codeSpace || code == codeNewline || code == codeTab || code == codeReturn
}
/**
* Check if the given character is a special whitespace character, some
* unicode variant
*/
func IsSpecialWhitespace(code rune) bool {
return (code == codeNonBreakingSpace ||
(code >= codeEnQuad && code <= codeHairSpace) ||
code == codeNarrowNoBreakSpace ||
code == codeMediumMathematicalSpace ||
code == codeIdeographicSpace)
}
/**
* Test whether the given character is a quote or double quote character.
* Also tests for special variants of quotes.
*/
func IsQuote(code rune) bool {
// the first check double quotes, since that occurs most often
return IsDoubleQuoteLike(code) || IsSingleQuoteLike(code)
}
func IsDoubleQuoteLike(code rune) bool {
// the first check double quotes, since that occurs most often
return code == codeDoubleQuote || code == codeDoubleQuoteLeft || code == codeDoubleQuoteRight
}
/**
* Test whether the given character is a double quote character.
* Does NOT test for special variants of double quotes.
*/
func IsDoubleQuote(code rune) bool {
return code == codeDoubleQuote
}
/**
* Test whether the given character is a single quote character.
* Also tests for special variants of single quotes.
*/
func IsSingleQuoteLike(code rune) bool {
return (code == codeQuote ||
code == codeQuoteLeft ||
code == codeQuoteRight ||
code == codeGraveAccent ||
code == codeAcuteAccent)
}
/**
* Test whether the given character is a single quote character.
* Does NOT test for special variants of single quotes.
*/
func isSingleQuote(code rune) bool {
return code == codeQuote
}
/**
* Strip last occurrence of textToStrip from text
*/
func stripLastOccurrence(text, textToStrip string, stripRemainingText bool) string {
index := strings.LastIndex(text, textToStrip)
if index != -1 {
if stripRemainingText {
return text[:index]
} else {
return text[:index] + text[index+1:]
}
}
return text
}
func InsertBeforeLastWhitespace(text []rune, textToInsert string) []rune {
index := len(text)
toInsert := []rune(textToInsert)
if !IsWhitespace(text[index-1]) {
// no trailing whitespaces
text = append(text, toInsert...)
return text
}
for IsWhitespace(text[index-1]) {
index--
}
toInsert = append(toInsert, text[index:]...)
text = append(text[:index], toInsert...)
return text
}
func RemoveAtIndex(text []rune, start, count int) []rune {
return append(text[:start], text[start+count:]...)
}
/**
* Test whether a string ends with a newline or comma character and optional whitespace
*/
var endWithCommaOrNewlineReg = regexp.MustCompile(`[,\n][ \t\r]*$`)
func EndsWithCommaOrNewline(text string) bool {
return endWithCommaOrNewlineReg.MatchString(text)
}
func nextNonWhiteSpaceCharacter(text []rune, start int) rune {
var i = start
for i < len(text) && IsWhitespace(text[i]) {
i++
}
if i >= len(text) {
return -1
}
return text[i]
}