-
Notifications
You must be signed in to change notification settings - Fork 66
/
converter.go
168 lines (136 loc) · 5.24 KB
/
converter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
package iconv
/*
#cgo darwin LDFLAGS: -liconv
#cgo freebsd LDFLAGS: -liconv
#cgo windows LDFLAGS: -liconv
#include <stdlib.h>
#include <iconv.h>
// As of GO 1.6 passing a pointer to Go pointer, will lead to panic
// Therofore we use this wrapper function, to avoid passing **char directly from go
size_t call_iconv(iconv_t ctx, char *in, size_t *size_in, char *out, size_t *size_out){
return iconv(ctx, &in, size_in, &out, size_out);
}
*/
import "C"
import "syscall"
import "unsafe"
type Converter struct {
context C.iconv_t
open bool
}
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
// there is not enough memory to initialize an iconv descriptor
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) {
converter = new(Converter)
// convert to C strings
toEncodingC := C.CString(toEncoding)
fromEncodingC := C.CString(fromEncoding)
// open an iconv descriptor
converter.context, err = C.iconv_open(toEncodingC, fromEncodingC)
// free the C Strings
C.free(unsafe.Pointer(toEncodingC))
C.free(unsafe.Pointer(fromEncodingC))
// check err
if err == nil {
// no error, mark the context as open
converter.open = true
}
return
}
// destroy is called during garbage collection
func (this *Converter) destroy() {
this.Close()
}
// Close a Converter's iconv description explicitly
func (this *Converter) Close() (err error) {
if this.open {
_, err = C.iconv_close(this.context)
}
return
}
// Convert bytes from an input byte slice into a give output byte slice
//
// As many bytes that can converted and fit into the size of output will be
// processed and the number of bytes read for input as well as the number of
// bytes written to output will be returned. If not all converted bytes can fit
// into output and E2BIG error will also be returned. If input contains an invalid
// sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
//
// For shift based output encodings, any end shift byte sequences can be generated by
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
// will simply reset the iconv descriptor shift state without writing any bytes.
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
// make sure we are still open
if this.open {
inputLeft := C.size_t(len(input))
outputLeft := C.size_t(len(output))
if inputLeft > 0 && outputLeft > 0 {
// we have to give iconv a pointer to a pointer of the underlying
// storage of each byte slice - so far this is the simplest
// way i've found to do that in Go, but it seems ugly
inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
_, err = C.call_iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
// update byte counters
bytesRead = len(input) - int(inputLeft)
bytesWritten = len(output) - int(outputLeft)
} else if inputLeft == 0 && outputLeft > 0 {
// inputPointer will be nil, outputPointer is generated as above
outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
_, err = C.call_iconv(this.context, nil, &inputLeft, outputPointer, &outputLeft)
// update write byte counter
bytesWritten = len(output) - int(outputLeft)
} else {
// both input and output are zero length, do a shift state reset
_, err = C.call_iconv(this.context, nil, &inputLeft, nil, &outputLeft)
}
} else {
err = syscall.EBADF
}
return bytesRead, bytesWritten, err
}
// Convert an input string
//
// EILSEQ error may be returned if input contains invalid bytes for the
// Converter's fromEncoding.
func (this *Converter) ConvertString(input string) (output string, err error) {
// make sure we are still open
if this.open {
// construct the buffers
inputBuffer := []byte(input)
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
// call Convert until all input bytes are read or an error occurs
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
for totalBytesRead < len(inputBuffer) && err == nil {
// use the totals to create buffer slices
bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
totalBytesRead += bytesRead
totalBytesWritten += bytesWritten
// check for the E2BIG error specifically, we can add to the output
// buffer to correct for it and then continue
if err == syscall.E2BIG {
// increase the size of the output buffer by another input length
// first, create a new buffer
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
// copy the existing data
copy(tempBuffer, outputBuffer)
// switch the buffers
outputBuffer = tempBuffer
// forget the error
err = nil
}
}
if err == nil {
// perform a final shift state reset
_, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:])
// update total count
totalBytesWritten += bytesWritten
}
// construct the final output string
output = string(outputBuffer[:totalBytesWritten])
} else {
err = syscall.EBADF
}
return output, err
}