-
-
Notifications
You must be signed in to change notification settings - Fork 15
/
vis.go
177 lines (155 loc) · 4.92 KB
/
vis.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*
* govis: unicode aware vis(3) encoding implementation
* Copyright (C) 2017 SUSE LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package govis
import (
"fmt"
"unicode"
)
func isunsafe(ch rune) bool {
return ch == '\b' || ch == '\007' || ch == '\r'
}
func isglob(ch rune) bool {
return ch == '*' || ch == '?' || ch == '[' || ch == '#'
}
// ishttp is defined by RFC 1808.
func ishttp(ch rune) bool {
// RFC1808 does not really consider characters outside of ASCII, so just to
// be safe always treat characters outside the ASCII character set as "not
// HTTP".
if ch > unicode.MaxASCII {
return false
}
return unicode.IsDigit(ch) || unicode.IsLetter(ch) ||
// Safe characters.
ch == '$' || ch == '-' || ch == '_' || ch == '.' || ch == '+' ||
// Extra characters.
ch == '!' || ch == '*' || ch == '\'' || ch == '(' ||
ch == ')' || ch == ','
}
func isgraph(ch rune) bool {
return unicode.IsGraphic(ch) && !unicode.IsSpace(ch) && ch <= unicode.MaxASCII
}
// vis converts a single *byte* into its encoding. While Go supports the
// concept of runes (and thus native utf-8 parsing), in order to make sure that
// the bit-stream will be completely maintained through an Unvis(Vis(...))
// round-trip. The downside is that Vis() will never output unicode -- but on
// the plus side this is actually a benefit on the encoding side (it will
// always work with the simple unvis(3) implementation). It also means that we
// don't have to worry about different multi-byte encodings.
func vis(b byte, flag VisFlag) (string, error) {
// Treat the single-byte character as a rune.
ch := rune(b)
// XXX: This is quite a horrible thing to support.
if flag&VisHTTPStyle == VisHTTPStyle {
if !ishttp(ch) {
return "%" + fmt.Sprintf("%.2X", ch), nil
}
}
// Figure out if the character doesn't need to be encoded. Effectively, we
// encode most "normal" (graphical) characters as themselves unless we have
// been specifically asked not to. Note though that we *ALWAYS* encode
// everything outside ASCII.
// TODO: Switch this to much more logical code.
if ch > unicode.MaxASCII {
/* ... */
} else if flag&VisGlob == VisGlob && isglob(ch) {
/* ... */
} else if isgraph(ch) ||
(flag&VisSpace != VisSpace && ch == ' ') ||
(flag&VisTab != VisTab && ch == '\t') ||
(flag&VisNewline != VisNewline && ch == '\n') ||
(flag&VisSafe != 0 && isunsafe(ch)) {
encoded := string(ch)
if ch == '\\' && flag&VisNoSlash == 0 {
encoded += "\\"
}
return encoded, nil
}
// Try to use C-style escapes first.
if flag&VisCStyle == VisCStyle {
switch ch {
case ' ':
return "\\s", nil
case '\n':
return "\\n", nil
case '\r':
return "\\r", nil
case '\b':
return "\\b", nil
case '\a':
return "\\a", nil
case '\v':
return "\\v", nil
case '\t':
return "\\t", nil
case '\f':
return "\\f", nil
case '\x00':
// Output octal just to be safe.
return "\\000", nil
}
}
// For graphical characters we generate octal output (and also if it's
// being forced by the caller's flags). Also spaces should always be
// encoded as octal.
if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' {
// Always output three-character octal just to be safe.
return fmt.Sprintf("\\%.3o", ch), nil
}
// Now we have to output meta or ctrl escapes. As far as I can tell, this
// is not actually defined by any standard -- so this logic is basically
// copied from the original vis(3) implementation. Hopefully nobody
// actually relies on this (octal and hex are better).
encoded := ""
if flag&VisNoSlash == 0 {
encoded += "\\"
}
// Meta characters have 0x80 set, but are otherwise identical to control
// characters.
if b&0x80 != 0 {
b &= 0x7f
encoded += "M"
}
if unicode.IsControl(rune(b)) {
encoded += "^"
if b == 0x7f {
encoded += "?"
} else {
encoded += fmt.Sprintf("%c", b+'@')
}
} else {
encoded += fmt.Sprintf("-%c", b)
}
return encoded, nil
}
// Vis encodes the provided string to a BSD-compatible encoding using BSD's
// vis() flags. However, it will correctly handle multi-byte encoding (which is
// not done properly by BSD's vis implementation).
func Vis(src string, flag VisFlag) (string, error) {
if flag&visMask != flag {
return "", fmt.Errorf("vis: flag %q contains unknown or unsupported flags", flag)
}
output := ""
for _, ch := range []byte(src) {
encodedCh, err := vis(ch, flag)
if err != nil {
return "", err
}
output += encodedCh
}
return output, nil
}