-
Notifications
You must be signed in to change notification settings - Fork 0
/
wdl.pest
271 lines (262 loc) · 12.3 KB
/
wdl.pest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
/* document structure */
document = { SOI ~ version ~ document_element+ ~ EOI }
version = { version_kw ~ version_identifier }
version_identifier = @{ "1" ~ ( "." ~ ASCII_DIGIT+ )+ }
document_element = _{ import | structdef | task | workflow }
import = { import_kw ~ simple_string ~ namespace? ~ import_alias* }
namespace = { as_kw ~ identifier }
import_alias = { alias_kw ~ identifier ~ as_kw ~ identifier }
structdef = { struct_kw ~ identifier ~ block_start ~ unbound_declaration* ~ block_end }
task = { task_kw ~ identifier ~ block_start ~ task_element+ ~ block_end }
task_element = _{ common_element | bound_declaration | command | runtime }
command = { command_kw ~ ( command_heredoc | command_block ) }
command_heredoc = ${ left_heredoc ~ command_heredoc_element* ~ right_heredoc }
command_heredoc_element = _{
command_heredoc_escape_sequence | tilde_placeholder | command_heredoc_literal
}
command_heredoc_escape_sequence = @{ escape ~ ( ">" | "~" | "\\" ) }
command_heredoc_literal = @{
">"{4,} | ( !command_heredoc_literal_exclude ~ ANY )+
}
command_heredoc_literal_exclude = _{
right_heredoc | tilde_placeholder_start | command_heredoc_escape_sequence
}
command_block = _{ single_line_command_block | multi_line_command_block }
command_block_escape_sequence = @{ escape ~ ( "}" | "~" | "$" | "\\" ) }
single_line_command_block = ${ block_start ~ single_line_command_block_element* ~ block_end }
single_line_command_block_element = _{
command_block_escape_sequence | tilde_placeholder | dollar_placeholder |
single_line_command_block_literal
}
single_line_command_block_literal = @{
( !single_line_command_block_literal_exclude ~ ANY )+
}
single_line_command_block_literal_exclude = _{
block_end | tilde_placeholder_start | dollar_placeholder_start |
command_block_escape_sequence | NEWLINE
}
// Pest parsers are greedy, WDL doesn't require '}' to be escaped within a command
// block, and there is no requirement that braces be matched within a command block,
// so we must impose the limitation that the end brace is on a line by itself.
multi_line_command_block = ${
block_start ~ multi_line_command_block_element* ~ multi_line_command_block_end
}
multi_line_command_block_element = _{
command_block_escape_sequence | tilde_placeholder | dollar_placeholder |
multi_line_command_block_literal
}
multi_line_command_block_end = @{ NEWLINE ~ indent* ~ block_end }
multi_line_command_block_literal = @{
( !multi_line_command_block_literal_exclude ~ ANY )+ | "\\"
}
multi_line_command_block_literal_exclude = _{
tilde_placeholder_start | dollar_placeholder_start |
command_block_escape_sequence | multi_line_command_block_end
}
runtime = { runtime_kw ~ block_start ~ runtime_attribute* ~ block_end }
runtime_attribute = { identifier ~ kv_sep ~ expression }
workflow = {
workflow_kw ~ identifier ~ block_start ~ ( common_element | workflow_nested_element )* ~ block_end
}
workflow_nested_element = _{ bound_declaration | call | scatter | conditional }
call = {
call_kw ~ qualified_identifier ~ call_alias? ~ call_inputs?
}
qualified_identifier = { identifier ~ ( field_sep ~ identifier )* }
call_alias = { as_kw ~ identifier }
call_inputs = { block_start ~ (
input_kw ~ kv_sep ~ ( call_input ~ ( list_sep ~ call_input )* ~ list_sep? )?
)? ~ block_end }
call_input = { identifier ~ ( assign ~ expression )? }
scatter = {
scatter_kw ~ group_start ~ identifier ~ in_kw ~ expression ~ group_end ~
block_start ~ workflow_nested_element* ~ block_end
}
conditional = {
if_kw ~ group_start ~ expression ~ group_end ~
block_start ~ workflow_nested_element* ~ block_end
}
/* elements in common between task and workflow */
common_element = _{ input | output | meta | parameter_meta }
unbound_declaration = { typedef ~ identifier }
bound_declaration = { typedef ~ identifier ~ assign ~ expression }
input = { input_kw ~ block_start ~ ( bound_declaration | unbound_declaration )* ~ block_end }
output = { output_kw ~ block_start ~ bound_declaration* ~ block_end }
meta = { meta_kw ~ block_start ~ meta_attribute* ~ block_end }
parameter_meta = { parameter_meta_kw ~ block_start ~ meta_attribute* ~ block_end }
meta_attribute = { identifier ~ kv_sep ~ meta_value }
meta_value = _{ null | boolean | meta_number | simple_string | meta_array | meta_object }
null = { "null" }
meta_number = { sign? ~ ( float | int ) }
meta_array = {
list_start ~ ( meta_value ~ ( list_sep ~ meta_value )* ~ list_sep? )? ~ list_end
}
meta_object = {
block_start ~ ( meta_object_field ~ ( list_sep ~ meta_object_field )* ~ list_sep? )? ~ block_end
}
meta_object_field = { identifier ~ kv_sep ~ meta_value }
/* types */
typedef = {
( primitive_type | non_empty_array_type | array_type | map_type | pair_type | user_type )
~ optional?
}
primitive_type = { "Boolean" | "Int" | "Float" | "String" | "File" | "Object" }
non_empty_array_type = { array_type ~ non_empty }
array_type = { array_kw ~ list_start ~ typedef ~ list_end }
non_empty = { "+" }
map_type = { map_kw ~ list_start ~ typedef ~ list_sep ~ typedef ~ list_end }
pair_type = { pair_kw ~ list_start ~ typedef ~ list_sep ~ typedef ~ list_end }
user_type = { identifier }
optional = { "?" }
/* expressions */
expression = { ternary | disjunction }
ternary = { if_kw ~ expression ~ then_kw ~ expression ~ else_kw ~ expression }
disjunction = { conjunction ~ ( or ~ conjunction )* }
conjunction = { equality ~ ( and ~ equality )* }
equality = { comparison ~ ( ( eq | neq ) ~ comparison )* }
comparison = { math1 ~ ( ( gte | lte | gt | lt ) ~ math1 )* }
math1 = { math2 ~ ( ( add | sub ) ~ math2 )* }
math2 = { unary ~ ( ( mul | div | rem ) ~ unary )* }
unary = { ( sign | not )? ~ access }
access = { ( apply | primary ) ~ ( index | field )* }
index = { list_start ~ expression ~ list_end }
field = { field_sep ~ identifier }
apply = { identifier ~
group_start ~ ( expression ~ ( list_sep ~ expression )* ~ list_sep? )? ~ group_end
}
primary = _{ literal | identifier | group }
literal = _{ none | boolean | float | int | string | pair | array | map | object }
none = { "None" }
string = _{ squote_string | dquote_string }
tilde_placeholder = !{ tilde_placeholder_start ~ expression ~ block_end }
dollar_placeholder = !{ dollar_placeholder_start ~ expression ~ block_end }
squote_string = ${ squote ~ squote_string_part* ~ squote }
squote_string_part = _{
squote_escape_sequence | tilde_placeholder | dollar_placeholder | squote_literal
}
squote_escape_sequence = @{
escape ~ ( "'" | "n" | "t" | "~" | "$" | "\\" | number_escape_sequence )
}
squote_literal = @{ ( !squote_literal_exclude ~ ANY )+ }
squote_literal_exclude = _{
squote | tilde_placeholder_start | dollar_placeholder_start | escape | NEWLINE
}
dquote_string = ${ dquote ~ dquote_string_part* ~ dquote }
dquote_string_part = _{
dquote_escape_sequence | tilde_placeholder | dollar_placeholder | dquote_literal
}
dquote_escape_sequence = @{
escape ~ ( "\"" | "n" | "t" | "~" | "$" | "\\" | number_escape_sequence )
}
dquote_literal = @{ ( !dquote_literal_exclude ~ ANY )+ }
dquote_literal_exclude = _{
dquote | tilde_placeholder_start | dollar_placeholder_start | escape | NEWLINE
}
pair = { group_start ~ expression ~ list_sep ~ expression ~ group_end }
array = {
list_start ~ ( expression ~ ( list_sep ~ expression )* ~ list_sep? )? ~ list_end
}
map = {
block_start ~ ( map_entry ~ ( list_sep ~ map_entry )* ~ list_sep? )? ~ block_end
}
map_entry = { expression ~ kv_sep ~ expression }
object = {
identifier ~ block_start ~
( object_field ~ ( list_sep ~ object_field )* ~ list_sep? )? ~ block_end
}
object_field = { identifier ~ kv_sep ~ expression }
group = { group_start ~ expression ~ group_end }
/* primitive literals */
identifier = @{ ASCII_ALPHA ~ ( ASCII_ALPHANUMERIC | "_" )* }
boolean = { "true" | "false" }
float = @{
( "." ~ ASCII_DIGIT+ ~ exponent? ) |
( ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT* ~ exponent? ) |
( ASCII_DIGIT+ ~ exponent )
}
exponent = { ^"e" ~ sign? ~ ASCII_DIGIT+ }
int = _{ hex_int | oct_int | dec_int }
hex_int = @{ "0" ~ ^"x" ~ ASCII_HEX_DIGIT+ }
oct_int = @{ "0" ~ ASCII_OCT_DIGIT+ }
dec_int = @{ "0" | ( ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* ) }
simple_string = _{ simple_squote_string | simple_dquote_string }
number_escape_sequence = @{
( "u" ~ ASCII_HEX_DIGIT{4} ) |
( "U" ~ ASCII_HEX_DIGIT{8} ) |
( "x" ~ ASCII_HEX_DIGIT{2} ) |
ASCII_OCT_DIGIT{1,3}
}
simple_squote_string = ${
squote ~ ( simple_squote_escape_sequence | simple_squote_literal )* ~ squote
}
simple_squote_escape_sequence = @{ escape ~ ( "'" | "t" | "n" | "\\" | number_escape_sequence ) }
simple_squote_literal = @{ ( !( squote | escape | NEWLINE ) ~ ANY )+ }
simple_dquote_string = ${
dquote ~ ( simple_dquote_escape_sequence | simple_dquote_literal )* ~ dquote
}
simple_dquote_escape_sequence = @{ escape ~ ( "\"" | "t" | "n" | "\\" | number_escape_sequence ) }
simple_dquote_literal = @{ ( !( dquote | escape | NEWLINE ) ~ ANY )+ }
/* keywords */
alias_kw = _{ "alias" }
array_kw = _{ "Array" }
as_kw = _{ "as" }
call_kw = _{ "call" }
command_kw = _{ "command" }
else_kw = _{ "else" }
if_kw = _{ "if" }
in_kw = _{ "in" }
import_kw = _{ "import" }
input_kw = _{ "input" }
map_kw = _{ "Map" }
meta_kw = _{ "meta" }
output_kw = _{ "output" }
pair_kw = _{ "Pair" }
parameter_meta_kw = _{ "parameter_meta" }
runtime_kw = _{ "runtime" }
scatter_kw = _{ "scatter" }
struct_kw = _{ "struct" }
task_kw = _{ "task" }
then_kw = _{ "then" }
version_kw = _{ "version" }
workflow_kw = _{ "workflow" }
/* operators */
or = { "||" }
and = { "&&" }
eq = { "==" }
neq = { "!=" }
gte = { ">=" }
gt = { ">" }
lte = { "<=" }
lt = { "<" }
add = { "+" }
sub = { "-" }
mul = { "*" }
div = { "/" }
rem = { "%" }
not = { "!" }
sign = _{ pos | neg }
pos = { "+" }
neg = { "-" }
/* hidden symbols */
assign = _{ "=" }
field_sep = _{ "." }
block_start = _{ "{" }
block_end = _{ "}" }
kv_sep = _{ ":" }
group_start = _{ "(" }
group_end = _{ ")" }
list_start = _{ "[" }
list_end = _{ "]" }
list_sep = _{ "," }
left_heredoc = _{ "<<<" }
right_heredoc = _{ ">>>" }
squote = _{ "'" }
dquote = _{ "\"" }
escape = _{ "\\" }
tilde_placeholder_start = _{ "~{" }
dollar_placeholder_start = _{ "${" }
/* comments/whitespace */
// comment-text is non-silent because we want to retain comments in the AST
COMMENT = @{ "#" ~ ( !NEWLINE ~ ANY )* ~ &( NEWLINE | EOI ) }
WHITESPACE = _{ indent | NEWLINE }
indent = { " " | "\t" }