forked from sergiocorreia/stata-misc
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathyaml.ado
281 lines (228 loc) · 7.47 KB
/
yaml.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
// -------------------------------------------------------------------------------------------------
// YAML parser and interface
// -------------------------------------------------------------------------------------------------
capture program drop yaml
program define yaml
gettoken subcmd 0 : 0
Assert inlist("`subcmd'", "read", "local", "global", "clear"), msg("invalid subcommand (`subcmd'). Valid are: read local global clear")
yaml_`subcmd' `0'
if ("`subcmd'"=="local") c_local `r(lcl)' `"`r(value)'"'
if ("`subcmd'"=="global") global `r(lcl)' `"`r(value)'"'
end
// -------------------------------------------------------------------------------------------------
capture program drop yaml_read
program define yaml_read
syntax name(name=dict id="name of new mata object") using/ , [Verbose]
mata: `dict' = yaml_read("`using'", "`verbose'"!="")
end
// -------------------------------------------------------------------------------------------------
capture program drop yaml_local
program define yaml_local, rclass
Assert "`0'"!="", msg("yaml load: invalid syntax. Correct is lcl=key")
* Parse lcl=dict.key
gettoken lcl 0: 0 , parse("=")
gettoken equalsign 0: 0 , parse("=")
gettoken dict 0: 0 , parse(".")
gettoken equalsign key: 0 , parse(".")
* Remove blanks
local dict `dict'
local key `key'
Assert "`dict'"!="", msg("yaml load: dict is empty! args=<`0'>")
Assert "`key'"!="", msg("yaml load: key is empty! args=<`0'>")
mata: yaml_local(`dict', "`key'")
return local lcl "`lcl'"
return local value "`value'"
end
// -------------------------------------------------------------------------------------------------
capture program drop yaml_global
program define yaml_global, rclass
Assert "`0'"!="", msg("yaml load: invalid syntax. Correct is lcl=key")
* Parse lcl=dict.key
gettoken lcl 0: 0 , parse("=")
gettoken equalsign 0: 0 , parse("=")
gettoken dict 0: 0 , parse(".")
gettoken equalsign key: 0 , parse(".")
* Remove blanks
local dict `dict'
local key `key'
Assert "`dict'"!="", msg("yaml load: dict is empty! args=<`0'>")
Assert "`key'"!="", msg("yaml load: key is empty! args=<`0'>")
mata: yaml_local(`dict', "`key'")
return local lcl "`lcl'"
return local value "`value'"
end
// -------------------------------------------------------------------------------------------------
capture program drop yaml_clear
program define yaml_clear
syntax name(name=dict id="name of mata object")
mata drop `dict'
end
// -------------------------------------------------------------------------------------------------
capture program drop Assert
program define Assert
* Copied from assert_msg.ado
* Syntax: assert_msg CONDITION , [MSG(a text message)] [RC(integer return code)]
syntax anything(everything equalok) [if] [in] [, MSG(string asis) RC(integer 9)]
cap assert `anything' `if' `in'
local tmp_rc = _rc
if (`tmp_rc') {
if (`"`msg'"'=="") local msg `" "assertion is false: `anything' `if' `in'" "'
di as error `msg'
exit `rc'
}
end
// -------------------------------------------------------------------------------------------------
// Mata Code
// -------------------------------------------------------------------------------------------------
mata:
transmorphic yaml_read(string scalar fn, real scalar verbose) {
fh = fopen(fn, "r")
dict = asarray_create()
headers = J(1, 6, "") // Up to 6 nesting levels
oldlevel = 1
i = 0
hanging = 0 // 1 if prev line had key: but no value
dict_val = ""
while ( ( line = fget(fh) ) != J(0,0,"") ) {
// Ignore comments
trimline = strtrim(line)
if ( strlen(trimline)==0 | strpos(trimline, "#")==1 ) continue
// trim right BUT NOT left
// trimming starts at first #
line = strrtrim(line)
comment_pos = strpos(line, "#")
if (comment_pos > 0) {
line = substr(line, 1, comment_pos - 1)
}
// get level implied by indentation
indentation = strlen(line) - strlen(strltrim(line))
level = 1 + trunc(indentation/2)
is_list = mod(indentation,2)
line = strltrim(line)
// deal with list "- "
// TODO
// Can't increase more than one level!
if (level>oldlevel+1) {
printf("{err}yaml_read error: level = %f but previous level was %f (%s)\n", level, oldlevel, line)
exit(error(100))
}
if (hanging & level<=oldlevel) {
printf("{err}yaml_read error: last line was hanging but level hasn't increased [line=%s]\n", line)
exit(error(100))
}
_ = regexm(line, "^([a-zA-Z0-9_-]+):.*$")
is_dict = (_!=0)
if (is_dict) {
dict_key = regexs(1)
headers[level] = dict_key
// Is it a hanging dict or does it follow with the value?
dict_val = ""
_ = regexm(line, "^([a-zA-Z0-9_-]+): *(.+)$")
if (_!=0) {
dict_val = regexs(2)
}
else {
hanging = 1
}
}
else if (hanging & level!=oldlevel+1) {
printf("{err}yaml_read error: prev. line (old level=%f) was a hanging dict; expected a nested level (but new level=%f) or a value [line=%s]\n", oldlevel, level, line)
exit(error(100))
}
else if (!hanging) {
printf("{err}yaml_read error: line is not a dict and was not preceeded by a hanging dict (%s)\n", line)
exit(error(100))
557
}
else if (hanging) {
dict_val = line
--level // It was not a dict, just the value
}
// Add value to dict
if (dict_val!="") {
// Remove '' and "" quotes
_ = regexm(dict_val, `"^"([^"]*)"$"')
if (_!=0) dict_val = regexs(1)
_ = regexm(dict_val, `"^'([^']*)'$"')
if (_!=0) dict_val = regexs(1)
full_key = invtokens(headers[., (1..level)], ".")
if (asarray_contains(dict, full_key)) {
printf("{err}yaml_read error: repeated key <%s} (%s)\n", full_key, line)
exit(error(100))
}
asarray(dict, full_key, dict_val) // dict[key] = value
if (verbose) printf("{txt}yaml <{res}%s{txt}>=<{res}%s{txt}> (level=%f)\n", full_key, dict_val, level)
++i
hanging = 0
dict_val = ""
}
oldlevel = level
}
fclose(fh)
if (verbose) {
printf("{txt}(%s key-value pairs added to quipu metadata)\n", strofreal(i))
}
return(dict)
}
void yaml_local(transmorphic dict, string scalar key) {
real scalar key_exists
transmorphic value
key_exists = asarray_contains(dict, key)
assert(key_exists==0 | key_exists==1)
if (!key_exists) {
printf("{err}yaml_local error: key <%s> does not exist\n", key)
exit(error(510))
}
value = asarray(dict, key)
if (rows(value)>1) {
printf("{err}yaml_local error: key <%s> has more than one row! it can only be a scalar or a rowvector\n", key)
exit(error(510))
}
if (cols(value)>1) {
value = invtokens(value)
}
st_local("value", value)
}
end
/* USAGE GUIDE
yaml read mydict using somefile.yaml
yaml local title = mydict.title
yaml local depvar = mydict.vars.depvar.name
MAYBE?
yaml locals (somelocal = key.subkey) (...)
*/
/* YAML SYNTAX GUIDE
- We will only parse a SUB SUB set of yaml
- Case sensitive
- For now treat all values as strings (instead of numbers)
- Like Python, comments start with "#"
- Two objects: lists and dicts
- List items start with " - "
- Line breaks preserved with | (which trims spaces)
- Variables/repeated nodes
- Also json-style syntax
- Documents (start --- end ...)
- Each doc is in essence a dict
- How do we replicate a list that contains dicts?
- Indentation: two spaces
key: value
key:
value
for now just parse this:
section:
__subsection:
____subsubsection:
______key1: value
______key2: value
section:
__a: b
__c: d
somelist:
- item1
- item2
somelist = [item1, item2]
object:
key1: value
key2: value
object = {key1: value, key2: value}
*/