-
Notifications
You must be signed in to change notification settings - Fork 858
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #236 from toml-lang/abnf
Add ABNF description of TOML.
- Loading branch information
Showing
1 changed file
with
219 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
;; WARNING: This document is a work-in-progress and should not be considered | ||
;; authoritative until further notice. | ||
|
||
;; This is an attempt to define TOML in ABNF according to the grammar defined | ||
;; in RFC 5234 (http://www.ietf.org/rfc/rfc5234.txt). | ||
|
||
;; You can try out this grammar interactively via the online ABNF tool at | ||
;; http://www.coasttocoastresearch.com/interactiveapg | ||
;; Note that due to the limitations of ABNF parsers, in order for multi-line | ||
;; strings to work in that tool, the following rules must be ammended to | ||
;; disallow the use of unescaped double- or single-quotes: | ||
;; ml-basic-unescaped = basic-unescaped | ||
;; ml-literal-char = literal-char | ||
|
||
;; TOML | ||
|
||
toml = expression *( newline expression ) | ||
|
||
expression = ( ( ws comment ) / | ||
( ws keyval ws [ comment ] ) / | ||
( ws table ws [ comment ] ) / | ||
ws ) | ||
|
||
;; Newline | ||
|
||
newline = ( %x0A / ; LF | ||
%x0D.0A ) ; CRLF | ||
|
||
;; Whitespace | ||
|
||
ws = *wschar | ||
|
||
wschar = ( %x20 / ; Space | ||
%x09 ) ; Horizontal tab | ||
|
||
;; Comment | ||
|
||
comment-start-symbol = %x23 ; # | ||
non-eol = %x09 / %x20-10FFFF | ||
comment = comment-start-symbol *non-eol | ||
|
||
;; Key-Value pairs | ||
|
||
keyval = key keyval-sep val | ||
|
||
key = unquoted-key / quoted-key | ||
unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ | ||
quoted-key = quotation-mark 1*basic-char quotation-mark ; See Basic Strings | ||
|
||
keyval-sep = ws %x3D ws ; = | ||
|
||
val = string / boolean / array / inline-table / date-time / float / integer | ||
|
||
;; Table | ||
|
||
table = std-table / array-table | ||
|
||
;; Standard Table | ||
|
||
std-table = std-table-open key *( table-key-sep key) std-table-close | ||
|
||
std-table-open = %x5B ws ; [ Left square bracket | ||
std-table-close = ws %x5D ; ] Right square bracket | ||
table-key-sep = ws %x2E ws ; . Period | ||
|
||
;; Array Table | ||
|
||
array-table = array-table-open key *( table-key-sep key) array-table-close | ||
|
||
array-table-open = %x5B.5B ws ; [[ Double left square bracket | ||
array-table-close = ws %x5D.5D ; ]] Double right quare bracket | ||
|
||
;; Integer | ||
|
||
integer = [ minus / plus ] int | ||
|
||
minus = %x2D ; - | ||
plus = %x2B ; + | ||
|
||
int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) | ||
digit1-9 = %x31-39 ; 1-9 | ||
underscore = %x5F ; _ | ||
|
||
;; Float | ||
|
||
float = integer ( frac / ( frac exp ) / exp ) | ||
|
||
frac = decimal-point zero-prefixable-int | ||
decimal-point = %x2E ; . | ||
zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) | ||
|
||
exp = e integer | ||
e = %x65 / %x45 ; e E | ||
|
||
;; String | ||
|
||
string = ml-basic-string / basic-string / ml-literal-string / literal-string | ||
|
||
;; Basic String | ||
|
||
basic-string = quotation-mark *basic-char quotation-mark | ||
|
||
quotation-mark = %x22 ; " | ||
|
||
basic-char = basic-unescaped / escaped | ||
basic-unescaped = %x20-21 / %x23-5B / %x5D-10FFFF | ||
escaped = escape ( %x22 / ; " quotation mark U+0022 | ||
%x5C / ; \ reverse solidus U+005C | ||
%x2F / ; / solidus U+002F | ||
%x62 / ; b backspace U+0008 | ||
%x66 / ; f form feed U+000C | ||
%x6E / ; n line feed U+000A | ||
%x72 / ; r carriage return U+000D | ||
%x74 / ; t tab U+0009 | ||
%x75 4HEXDIG / ; uXXXX U+XXXX | ||
%x55 8HEXDIG ) ; UXXXXXXXX U+XXXXXXXX | ||
|
||
escape = %x5C ; \ | ||
|
||
;; Multiline Basic String | ||
|
||
ml-basic-string = ml-basic-string-delim ml-basic-body ml-basic-string-delim | ||
|
||
ml-basic-string-delim = 3quotation-mark | ||
|
||
ml-basic-body = *( ml-basic-char / newline / ( escape newline )) | ||
ml-basic-char = ml-basic-unescaped / escaped | ||
ml-basic-unescaped = %x20-5B / %x5D-10FFFF | ||
|
||
;; Literal String | ||
|
||
literal-string = apostrophe *literal-char apostrophe | ||
|
||
apostrophe = %x27 ; ' apostrophe | ||
|
||
literal-char = %x09 / %x20-26 / %x28-10FFFF | ||
|
||
;; Multiline Literal String | ||
|
||
ml-literal-string = ml-literal-string-delim ml-literal-body ml-literal-string-delim | ||
|
||
ml-literal-string-delim = 3apostrophe | ||
|
||
ml-literal-body = *( ml-literal-char / newline ) | ||
ml-literal-char = %x09 / %x20-10FFFF | ||
|
||
;; Boolean | ||
|
||
boolean = true / false | ||
|
||
true = %x74.72.75.65 ; true | ||
false = %x66.61.6C.73.65 ; false | ||
|
||
;; Date and Time (as defined in RFC 3339) | ||
|
||
date-time = offset-date-time / local-date-time / local-date / local-time | ||
|
||
date-fullyear = 4DIGIT | ||
date-month = 2DIGIT ; 01-12 | ||
date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year | ||
time-hour = 2DIGIT ; 00-23 | ||
time-minute = 2DIGIT ; 00-59 | ||
time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules | ||
time-secfrac = "." 1*DIGIT | ||
time-numoffset = ( "+" / "-" ) time-hour ":" time-minute | ||
time-offset = "Z" / time-numoffset | ||
|
||
partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] | ||
full-date = date-fullyear "-" date-month "-" date-mday | ||
full-time = partial-time time-offset | ||
|
||
;; Offset Date-Time | ||
|
||
offset-date-time = full-date "T" full-time | ||
|
||
;; Local Date-Time | ||
|
||
local-date-time = full-date "T" partial-time | ||
|
||
;; Local Date | ||
|
||
local-date = full-date | ||
|
||
;; Local Time | ||
|
||
local-time = partial-time | ||
|
||
;; Array | ||
|
||
array = array-open array-values array-close | ||
|
||
array-open = %x5B ws-newline ; [ | ||
array-close = ws-newline %x5D ; ] | ||
|
||
array-values = [ ( val array-sep [ ( ws-newline comment ws-newlines) / ws-newlines ] array-values ) / | ||
( val [ array-sep ] [ ( ws-newline comment ws-newlines ) ] ) ] | ||
|
||
array-sep = ws %x2C ws ; , Comma | ||
|
||
ws-newline = *( wschar / newline ) | ||
ws-newlines = newline *( wschar / newline ) | ||
|
||
;; Inline Table | ||
|
||
inline-table = inline-table-open inline-table-keyvals inline-table-close | ||
|
||
inline-table-open = %x7B ws ; { | ||
inline-table-close = ws %x7D ; } | ||
inline-table-sep = ws %x2C ws ; , Comma | ||
|
||
inline-table-keyvals = [ inline-table-keyvals-non-empty ] | ||
inline-table-keyvals-non-empty = ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) / | ||
( key keyval-sep val ) | ||
|
||
;; Built-in ABNF terms, reproduced here for clarity | ||
|
||
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z | ||
DIGIT = %x30-39 ; 0-9 | ||
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" |