-
Notifications
You must be signed in to change notification settings - Fork 11
/
xml_parser.go
265 lines (229 loc) · 7.93 KB
/
xml_parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
package golibxml
/*
#cgo pkg-config: libxml-2.0
#include <libxml/parser.h>
static inline void free_string(char* s) { free(s); }
static inline xmlChar *to_xmlcharptr(const char *s) { return (xmlChar *)s; }
static inline char *to_charptr(const xmlChar *s) { return (char *)s; }
static inline int *new_int_ptr(int value) {
int *ptr = calloc(sizeof(int), 1);
*ptr = value;
return ptr;
}
static inline char **new_char_array(int size) { return calloc(sizeof(char *), size); }
static inline void set_char_array_string(char **ptr, char *str, int n) { ptr[n] = str; }
static inline char *get_char_array_string(char **ptr, int n) {
return ptr[n];
}
*/
import "C"
import "unsafe"
////////////////////////////////////////////////////////////////////////////////
// TYPES/STRUCTS
////////////////////////////////////////////////////////////////////////////////
type ParserOption int
const (
XML_PARSE_RECOVER ParserOption = C.XML_PARSE_RECOVER //: recover on errors
XML_PARSE_NOENT = C.XML_PARSE_NOENT //: substitute entities
XML_PARSE_DTDLOAD = C.XML_PARSE_DTDLOAD //: load the external subset
XML_PARSE_DTDATTR = C.XML_PARSE_DTDATTR //: default DTD attributes
XML_PARSE_DTDVALID = C.XML_PARSE_DTDVALID //: validate with the DTD
XML_PARSE_NOERROR = C.XML_PARSE_NOERROR //: suppress error reports
XML_PARSE_NOWARNING = C.XML_PARSE_NOWARNING //: suppress warning reports
XML_PARSE_PEDANTIC = C.XML_PARSE_PEDANTIC //: pedantic error reporting
XML_PARSE_NOBLANKS = C.XML_PARSE_NOBLANKS //: remove blank nodes
XML_PARSE_SAX1 = C.XML_PARSE_SAX1 //: use the SAX1 interface internally
XML_PARSE_XINCLUDE = C.XML_PARSE_XINCLUDE //: Implement XInclude substitition
XML_PARSE_NONET = C.XML_PARSE_NONET //: Forbid network access
XML_PARSE_NODICT = C.XML_PARSE_NODICT //: Do not reuse the context dictionnary
XML_PARSE_NSCLEAN = C.XML_PARSE_NSCLEAN //: remove redundant namespaces declarations
XML_PARSE_NOCDATA = C.XML_PARSE_NOCDATA //: merge CDATA as text nodes
XML_PARSE_NOXINCNODE = C.XML_PARSE_NOXINCNODE //: do not generate XINCLUDE START/END nodes
XML_PARSE_COMPACT = C.XML_PARSE_COMPACT //: compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
XML_PARSE_OLD10 = C.XML_PARSE_OLD10 //: parse using XML-1.0 before update 5
XML_PARSE_NOBASEFIX = C.XML_PARSE_NOBASEFIX //: do not fixup XINCLUDE xml:base uris
XML_PARSE_HUGE = C.XML_PARSE_HUGE //: relax any hardcoded limit from the parser
XML_PARSE_OLDSAX = C.XML_PARSE_OLDSAX //: parse using SAX2 interface from before 2.7.0
)
type Parser struct {
Ptr C.xmlParserCtxtPtr
}
////////////////////////////////////////////////////////////////////////////////
// PRIVATE FUNCTIONS
////////////////////////////////////////////////////////////////////////////////
func makeParser(parser C.xmlParserCtxtPtr) *Parser {
if parser == nil {
return nil
}
return &Parser{parser}
}
////////////////////////////////////////////////////////////////////////////////
// INTERFACE
////////////////////////////////////////////////////////////////////////////////
// xmlByteConsumed
func (p *Parser) ByteConsumed() int {
return int(C.xmlByteConsumed(p.Ptr))
}
// xmlCleanupParser
func CleanupParser() {
C.xmlCleanupParser()
}
// xmlClearParserCtxt
func (p *Parser) Clear() {
C.xmlClearParserCtxt(p.Ptr)
}
// xmlCreateDocParserCtxt
func CreateDocParser(cur string) *Parser {
ptr := C.CString(cur)
defer C.free_string(ptr)
cparser := C.xmlCreateDocParserCtxt(C.to_xmlcharptr(ptr))
return makeParser(cparser)
}
// xmlCtxtReadDoc
func (p *Parser) ReadDoc(input string, url string, encoding string, options ParserOption) *Document {
ptri := C.CString(input)
defer C.free_string(ptri)
ptru := C.CString(url)
defer C.free_string(ptru)
ptre := C.CString(encoding)
defer C.free_string(ptre)
doc := C.xmlCtxtReadDoc(p.Ptr, C.to_xmlcharptr(ptri), ptru, ptre, C.int(options))
return makeDoc(doc)
}
// xmlCtxtReset
func (p *Parser) Reset() {
C.xmlCtxtReset(p.Ptr)
}
// xmlCtxtUseOptions
func (p *Parser) UseOptions(options ParserOption) int {
return int(C.xmlCtxtUseOptions(p.Ptr, C.int(options)))
}
// xmlFreeParserCtxt
func (p *Parser) Free() {
C.xmlFreeParserCtxt(p.Ptr)
}
// xmlGetFeaturesList
func GetFeaturesList() []string {
// Get list in C land
clength := C.new_int_ptr(255)
defer C.free(unsafe.Pointer(clength))
clist := C.new_char_array(255)
defer C.free(unsafe.Pointer(clist))
result := C.xmlGetFeaturesList(clength, clist)
if result < 0 {
panic("ERROR TO BE HANDLED")
}
// Convert to Go land
length := int(*clength)
list := make([]string, length)
for i := 0; i < length; i++ {
list[i] = C.GoString(C.get_char_array_string(clist, C.int(i)))
}
return list
}
// xmlNewParserCtxt
func NewParser() *Parser {
pctx := C.xmlNewParserCtxt()
return makeParser(pctx)
}
// xmlParseDTD
func ParseDTD(ExternalID string, SystemID string) *Dtd {
ptre := C.CString(ExternalID)
defer C.free_string(ptre)
ptrs := C.CString(SystemID)
defer C.free_string(ptrs)
cdtd := C.xmlParseDTD(C.to_xmlcharptr(ptre), C.to_xmlcharptr(ptrs))
return makeDtd(cdtd)
}
// xmlParseDoc
func ParseDoc(cur string) *Document {
ptr := C.CString(cur)
defer C.free_string(ptr)
doc := C.xmlParseDoc(C.to_xmlcharptr(ptr))
return makeDoc(doc)
}
// xmlParseDocument
func (p *Parser) Parse() int {
return int(C.xmlParseDocument(p.Ptr))
}
func (p *Parser) MyDoc() *Document {
if docptr := p.Ptr.myDoc; docptr != nil {
return makeDoc(docptr)
}
return nil
}
// xmlParseEntity
func ParseEntity(filename string) *Document {
ptr := C.CString(filename)
defer C.free_string(ptr)
doc := C.xmlParseEntity(ptr)
return makeDoc(doc)
}
// xmlParseFile
func ParseFile(filename string) *Document {
ptr := C.CString(filename)
defer C.free_string(ptr)
doc := C.xmlParseFile(ptr)
return makeDoc(doc)
}
// xmlParseMemory
func ParseMemory(buffer []byte) *Document {
doc := C.xmlParseMemory((*C.char)(unsafe.Pointer(&buffer[0])), C.int(len(buffer)))
return makeDoc(doc)
}
// xmlReadDoc
func ReadDoc(input string, url string, encoding string, options ParserOption) *Document {
ptri := C.CString(input)
defer C.free_string(ptri)
ptru := C.CString(url)
defer C.free_string(ptru)
ptre := C.CString(encoding)
defer C.free_string(ptre)
doc := C.xmlReadDoc(C.to_xmlcharptr(ptri), ptru, ptre, C.int(options))
return makeDoc(doc)
}
// xmlReadFile
func ReadFile(filename string, encoding string, options ParserOption) *Document {
ptrf := C.CString(filename)
defer C.free_string(ptrf)
ptre := C.CString(encoding)
defer C.free_string(ptre)
doc := C.xmlReadFile(ptrf, ptre, C.int(options))
return makeDoc(doc)
}
// xmlReadMemory
func ReadMemory(buffer []byte, url string, encoding string, options ParserOption) *Document {
ptru := C.CString(url)
defer C.free_string(ptru)
ptre := C.CString(encoding)
defer C.free_string(ptre)
doc := C.xmlReadMemory((*C.char)(unsafe.Pointer(&buffer[0])), C.int(len(buffer)), ptru, ptre, C.int(options))
return makeDoc(doc)
}
// xmlRecoverDoc
func RecoverDoc(cur string) *Document {
ptr := C.CString(cur)
defer C.free_string(ptr)
doc := C.xmlRecoverDoc(C.to_xmlcharptr(ptr))
return makeDoc(doc)
}
// xmlRecoverFile
func RecoverFile(filename string) *Document {
ptr := C.CString(filename)
defer C.free_string(ptr)
doc := C.xmlRecoverFile(ptr)
return makeDoc(doc)
}
// xmlRecoverMemory
func RecoverMemory(buffer []byte) *Document {
doc := C.xmlRecoverMemory((*C.char)(unsafe.Pointer(&buffer[0])), C.int(len(buffer)))
return makeDoc(doc)
}
// xmlStopParser
func (p *Parser) Stop() {
C.xmlStopParser(p.Ptr)
}
// xmlSubstituteEntitiesDefault
func SubstituteEntitiesDefault(val int) int {
return int(C.xmlSubstituteEntitiesDefault(C.int(val)))
}