This repository has been archived by the owner on Sep 12, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 115
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander
The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems.
- Loading branch information
Showing
8 changed files
with
1,058 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,3 +47,5 @@ pom.xml.asc | |
/release-node/datomish/ | ||
/release-node/goog/ | ||
/release-node/honeysql/ | ||
|
||
/edn/target/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,17 @@ | ||
[package] | ||
name = "edn" | ||
version = "0.0.1" | ||
version = "0.1.0" | ||
authors = ["Joe Walker <jwalker@mozilla.com>"] | ||
|
||
license = "Apache-2.0" | ||
repository = "https://github.com/mozilla/mentat" | ||
description = "EDN parser for Project Mentat" | ||
build = "build.rs" | ||
readme = "./README.md" | ||
|
||
[dependencies] | ||
num = "0.1.35" | ||
ordered-float = "0.3.0" | ||
|
||
[build-dependencies] | ||
peg = "0.4" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# barnardsstar | ||
An experimental EDN parser for Project Mentat. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// Copyright 2016 Mozilla | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
// this file except in compliance with the License. You may obtain a copy of the | ||
// License at http://www.apache.org/licenses/LICENSE-2.0 | ||
// Unless required by applicable law or agreed to in writing, software distributed | ||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations under the License. | ||
|
||
extern crate peg; | ||
|
||
fn main() { | ||
peg::cargo_build("src/edn.rustpeg"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* vim: set filetype=rust.rustpeg */ | ||
|
||
// Copyright 2016 Mozilla | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
// this file except in compliance with the License. You may obtain a copy of the | ||
// License at http://www.apache.org/licenses/LICENSE-2.0 | ||
// Unless required by applicable law or agreed to in writing, software distributed | ||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations under the License. | ||
|
||
use std::collections::{BTreeSet, BTreeMap, LinkedList}; | ||
use std::iter::FromIterator; | ||
use num::BigInt; | ||
use types::Value; | ||
use ordered_float::OrderedFloat; | ||
|
||
// Goal: Be able to parse https://github.com/edn-format/edn | ||
// Also extensible to help parse http://docs.datomic.com/query.html | ||
|
||
// Debugging hint: test using `cargo test --features peg/trace -- --nocapture` | ||
// to trace where the parser is failing | ||
|
||
// TODO: Support tagged elements | ||
// TODO: Support comments | ||
// TODO: Support discard | ||
|
||
#[export] | ||
nil -> Value = "nil" { | ||
Value::Nil | ||
} | ||
|
||
#[export] | ||
boolean -> Value = | ||
"true" { Value::Boolean(true) } / | ||
"false" { Value::Boolean(false) } | ||
|
||
digit = [0-9] | ||
sign = "-" / "+" | ||
|
||
#[export] | ||
bigint -> Value = b:$( sign? digit+ ) "N" { | ||
Value::BigInteger(b.parse::<BigInt>().unwrap()) | ||
} | ||
|
||
#[export] | ||
integer -> Value = i:$( sign? digit+ ) { | ||
Value::Integer(i.parse::<i64>().unwrap()) | ||
} | ||
|
||
frac = sign? digit+ "." digit+ | ||
exp = sign? digit+ ("e" / "E") sign? digit+ | ||
frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+ | ||
|
||
// The order here is important - frac_exp must come before (exp / frac) or the | ||
// parser assumes exp or frac when the float is really a frac_exp and fails | ||
#[export] | ||
float -> Value = f:$( frac_exp / exp / frac ) { | ||
Value::Float(OrderedFloat(f.parse::<f64>().unwrap())) | ||
} | ||
|
||
// TODO: \newline, \return, \space and \tab | ||
special_char = quote / tab | ||
quote = "\\\"" | ||
tab = "\\tab" | ||
char = [^"] / special_char | ||
|
||
#[export] | ||
text -> Value = "\"" t:$( char* ) "\"" { | ||
Value::Text(t.to_string()) | ||
} | ||
|
||
// TODO: Be more picky here | ||
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] | ||
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] / "-" | ||
|
||
#[export] | ||
symbol -> Value = s:$( symbol_char_initial symbol_char_subsequent* ) { | ||
Value::Symbol(s.to_string()) | ||
} | ||
|
||
keyword_char_initial = ":" | ||
// TODO: More chars here? | ||
keyword_char_subsequent = [a-z] / [A-Z] / [0-9] / "/" | ||
|
||
#[export] | ||
keyword -> Value = k:$( keyword_char_initial keyword_char_subsequent+ ) { | ||
Value::Keyword(k.to_string()) | ||
} | ||
|
||
#[export] | ||
list -> Value = "(" __ v:(__ value)* __ ")" { | ||
Value::List(LinkedList::from_iter(v)) | ||
} | ||
|
||
#[export] | ||
vector -> Value = "[" __ v:(__ value)* __ "]" { | ||
Value::Vector(v) | ||
} | ||
|
||
#[export] | ||
set -> Value = "#{" __ v:(__ value)* __ "}" { | ||
Value::Set(BTreeSet::from_iter(v)) | ||
} | ||
|
||
pair -> (Value, Value) = k:(value) " " v:(value) ", "? { | ||
(k, v) | ||
} | ||
|
||
#[export] | ||
map -> Value = "{" __ v:(pair)* __ "}" { | ||
Value::Map(BTreeMap::from_iter(v)) | ||
} | ||
|
||
// It's important that float comes before integer or the parser assumes that | ||
// floats are integers and fails to parse | ||
#[export] | ||
value -> Value | ||
= nil / boolean / float / bigint / integer / text / | ||
keyword / symbol / | ||
list / vector / map / set | ||
|
||
whitespace = (" " / "\r" / "\n" / "\t") | ||
|
||
__ = whitespace* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
// Copyright 2016 Mozilla | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
// this file except in compliance with the License. You may obtain a copy of the | ||
// License at http://www.apache.org/licenses/LICENSE-2.0 | ||
// Unless required by applicable law or agreed to in writing, software distributed | ||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations under the License. | ||
|
||
use std::collections::{BTreeSet, BTreeMap, LinkedList}; | ||
use std::cmp::{Ordering, Ord, PartialOrd}; | ||
use num::BigInt; | ||
use ordered_float::OrderedFloat; | ||
|
||
/// Value represents one of the allowed values in an EDN string. | ||
#[derive(PartialEq, Eq, Hash, Debug)] | ||
pub enum Value { | ||
Nil, | ||
Boolean(bool), | ||
Integer(i64), | ||
BigInteger(BigInt), | ||
// https://users.rust-lang.org/t/hashmap-key-cant-be-float-number-type-why/7892 | ||
Float(OrderedFloat<f64>), | ||
Text(String), | ||
Symbol(String), | ||
Keyword(String), | ||
Vector(Vec<Value>), | ||
List(LinkedList<Value>), | ||
// We're using BTree{Set, Map} rather than Hash{Set, Map} because the BTree variants | ||
// implement Hash (unlike the Hash variants which don't in order to preserve O(n) hashing | ||
// time which is hard given recurrsive data structures) | ||
// See https://internals.rust-lang.org/t/implementing-hash-for-hashset-hashmap/3817/1 | ||
Set(BTreeSet<Value>), | ||
Map(BTreeMap<Value, Value>), | ||
} | ||
|
||
use self::Value::*; | ||
|
||
impl PartialOrd for Value { | ||
fn partial_cmp(&self, other: &Value) -> Option<Ordering> { | ||
Some(self.cmp(other)) | ||
} | ||
} | ||
|
||
// TODO: Check we follow the equality rules at the bottom of https://github.com/edn-format/edn | ||
impl Ord for Value { | ||
fn cmp(&self, other: &Value) -> Ordering { | ||
|
||
let ord_order = to_ord(self).cmp(&to_ord(other)); | ||
match *self { | ||
Nil => match *other { Nil => Ordering::Equal, _ => ord_order }, | ||
Boolean(bs) => match *other { Boolean(bo) => bo.cmp(&bs), _ => ord_order }, | ||
BigInteger(ref bs) => match *other { BigInteger(ref bo) => bo.cmp(&bs), _ => ord_order }, | ||
Integer(is) => match *other { Integer(io) => io.cmp(&is), _ => ord_order }, | ||
Float(ref fs) => match *other { Float(ref fo) => fo.cmp(&fs), _ => ord_order }, | ||
Text(ref ts) => match *other { Text(ref to) => to.cmp(&ts), _ => ord_order }, | ||
Symbol(ref ss) => match *other { Symbol(ref so) => so.cmp(&ss), _ => ord_order }, | ||
Keyword(ref ks) => match *other { Keyword(ref ko) => ko.cmp(&ks), _ => ord_order }, | ||
Vector(ref vs) => match *other { Vector(ref vo) => vo.cmp(&vs), _ => ord_order }, | ||
List(ref ls) => match *other { List(ref lo) => lo.cmp(&ls), _ => ord_order }, | ||
Set(ref ss) => match *other { Set(ref so) => so.cmp(&ss), _ => ord_order }, | ||
Map(ref ms) => match *other { Map(ref mo) => mo.cmp(&ms), _ => ord_order }, | ||
} | ||
} | ||
} | ||
|
||
fn to_ord(value: &Value) -> i32 { | ||
match *value { | ||
Nil => 0, | ||
Boolean(_) => 1, | ||
Integer(_) => 2, | ||
BigInteger(_) => 3, | ||
Float(_) => 4, | ||
Text(_) => 5, | ||
Symbol(_) => 6, | ||
Keyword(_) => 7, | ||
Vector(_) => 8, | ||
List(_) => 9, | ||
Set(_) => 10, | ||
Map(_) => 12, | ||
} | ||
} | ||
|
||
pub struct Pair(Value, Value); |
Oops, something went wrong.