Skip to content
This repository has been archived by the owner on Sep 12, 2018. It is now read-only.

Commit

Permalink
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander
Browse files Browse the repository at this point in the history
The parser mostly works and has a decent test suite. It parses all the
queries issued by the Tofino UAS, with some caveats. Known flaws:

* No support for tagged elements, comments, discarded elements or "'".
* Incomplete support for escaped characters in strings and the range of
  characters that are allowed in keywords and symbols.
* Possible whitespace handling problems.
  • Loading branch information
joewalker authored and rnewman committed Jan 11, 2017
1 parent 3707428 commit c473511
Show file tree
Hide file tree
Showing 8 changed files with 1,058 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,5 @@ pom.xml.asc
/release-node/datomish/
/release-node/goog/
/release-node/honeysql/

/edn/target/
16 changes: 15 additions & 1 deletion edn/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
[package]
name = "edn"
version = "0.0.1"
version = "0.1.0"
authors = ["Joe Walker <jwalker@mozilla.com>"]

license = "Apache-2.0"
repository = "https://github.com/mozilla/mentat"
description = "EDN parser for Project Mentat"
build = "build.rs"
readme = "./README.md"

[dependencies]
num = "0.1.35"
ordered-float = "0.3.0"

[build-dependencies]
peg = "0.4"
2 changes: 2 additions & 0 deletions edn/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# barnardsstar
An experimental EDN parser for Project Mentat.
15 changes: 15 additions & 0 deletions edn/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

extern crate peg;

fn main() {
peg::cargo_build("src/edn.rustpeg");
}
126 changes: 126 additions & 0 deletions edn/src/edn.rustpeg
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/* vim: set filetype=rust.rustpeg */

// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

use std::collections::{BTreeSet, BTreeMap, LinkedList};
use std::iter::FromIterator;
use num::BigInt;
use types::Value;
use ordered_float::OrderedFloat;

// Goal: Be able to parse https://github.com/edn-format/edn
// Also extensible to help parse http://docs.datomic.com/query.html

// Debugging hint: test using `cargo test --features peg/trace -- --nocapture`
// to trace where the parser is failing

// TODO: Support tagged elements
// TODO: Support comments
// TODO: Support discard

#[export]
nil -> Value = "nil" {
Value::Nil
}

#[export]
boolean -> Value =
"true" { Value::Boolean(true) } /
"false" { Value::Boolean(false) }

digit = [0-9]
sign = "-" / "+"

#[export]
bigint -> Value = b:$( sign? digit+ ) "N" {
Value::BigInteger(b.parse::<BigInt>().unwrap())
}

#[export]
integer -> Value = i:$( sign? digit+ ) {
Value::Integer(i.parse::<i64>().unwrap())
}

frac = sign? digit+ "." digit+
exp = sign? digit+ ("e" / "E") sign? digit+
frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+

// The order here is important - frac_exp must come before (exp / frac) or the
// parser assumes exp or frac when the float is really a frac_exp and fails
#[export]
float -> Value = f:$( frac_exp / exp / frac ) {
Value::Float(OrderedFloat(f.parse::<f64>().unwrap()))
}

// TODO: \newline, \return, \space and \tab
special_char = quote / tab
quote = "\\\""
tab = "\\tab"
char = [^"] / special_char

#[export]
text -> Value = "\"" t:$( char* ) "\"" {
Value::Text(t.to_string())
}

// TODO: Be more picky here
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.]
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] / "-"

#[export]
symbol -> Value = s:$( symbol_char_initial symbol_char_subsequent* ) {
Value::Symbol(s.to_string())
}

keyword_char_initial = ":"
// TODO: More chars here?
keyword_char_subsequent = [a-z] / [A-Z] / [0-9] / "/"

#[export]
keyword -> Value = k:$( keyword_char_initial keyword_char_subsequent+ ) {
Value::Keyword(k.to_string())
}

#[export]
list -> Value = "(" __ v:(__ value)* __ ")" {
Value::List(LinkedList::from_iter(v))
}

#[export]
vector -> Value = "[" __ v:(__ value)* __ "]" {
Value::Vector(v)
}

#[export]
set -> Value = "#{" __ v:(__ value)* __ "}" {
Value::Set(BTreeSet::from_iter(v))
}

pair -> (Value, Value) = k:(value) " " v:(value) ", "? {
(k, v)
}

#[export]
map -> Value = "{" __ v:(pair)* __ "}" {
Value::Map(BTreeMap::from_iter(v))
}

// It's important that float comes before integer or the parser assumes that
// floats are integers and fails to parse
#[export]
value -> Value
= nil / boolean / float / bigint / integer / text /
keyword / symbol /
list / vector / map / set

whitespace = (" " / "\r" / "\n" / "\t")

__ = whitespace*
10 changes: 10 additions & 0 deletions edn/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,14 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#![allow(dead_code)]

extern crate ordered_float;
extern crate num;

pub mod keyword;
pub mod types;

pub mod parse {
include!(concat!(env!("OUT_DIR"), "/edn.rs"));
}
85 changes: 85 additions & 0 deletions edn/src/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

use std::collections::{BTreeSet, BTreeMap, LinkedList};
use std::cmp::{Ordering, Ord, PartialOrd};
use num::BigInt;
use ordered_float::OrderedFloat;

/// Value represents one of the allowed values in an EDN string.
#[derive(PartialEq, Eq, Hash, Debug)]
pub enum Value {
Nil,
Boolean(bool),
Integer(i64),
BigInteger(BigInt),
// https://users.rust-lang.org/t/hashmap-key-cant-be-float-number-type-why/7892
Float(OrderedFloat<f64>),
Text(String),
Symbol(String),
Keyword(String),
Vector(Vec<Value>),
List(LinkedList<Value>),
// We're using BTree{Set, Map} rather than Hash{Set, Map} because the BTree variants
// implement Hash (unlike the Hash variants which don't in order to preserve O(n) hashing
// time which is hard given recurrsive data structures)
// See https://internals.rust-lang.org/t/implementing-hash-for-hashset-hashmap/3817/1
Set(BTreeSet<Value>),
Map(BTreeMap<Value, Value>),
}

use self::Value::*;

impl PartialOrd for Value {
fn partial_cmp(&self, other: &Value) -> Option<Ordering> {
Some(self.cmp(other))
}
}

// TODO: Check we follow the equality rules at the bottom of https://github.com/edn-format/edn
impl Ord for Value {
fn cmp(&self, other: &Value) -> Ordering {

let ord_order = to_ord(self).cmp(&to_ord(other));
match *self {
Nil => match *other { Nil => Ordering::Equal, _ => ord_order },
Boolean(bs) => match *other { Boolean(bo) => bo.cmp(&bs), _ => ord_order },
BigInteger(ref bs) => match *other { BigInteger(ref bo) => bo.cmp(&bs), _ => ord_order },
Integer(is) => match *other { Integer(io) => io.cmp(&is), _ => ord_order },
Float(ref fs) => match *other { Float(ref fo) => fo.cmp(&fs), _ => ord_order },
Text(ref ts) => match *other { Text(ref to) => to.cmp(&ts), _ => ord_order },
Symbol(ref ss) => match *other { Symbol(ref so) => so.cmp(&ss), _ => ord_order },
Keyword(ref ks) => match *other { Keyword(ref ko) => ko.cmp(&ks), _ => ord_order },
Vector(ref vs) => match *other { Vector(ref vo) => vo.cmp(&vs), _ => ord_order },
List(ref ls) => match *other { List(ref lo) => lo.cmp(&ls), _ => ord_order },
Set(ref ss) => match *other { Set(ref so) => so.cmp(&ss), _ => ord_order },
Map(ref ms) => match *other { Map(ref mo) => mo.cmp(&ms), _ => ord_order },
}
}
}

fn to_ord(value: &Value) -> i32 {
match *value {
Nil => 0,
Boolean(_) => 1,
Integer(_) => 2,
BigInteger(_) => 3,
Float(_) => 4,
Text(_) => 5,
Symbol(_) => 6,
Keyword(_) => 7,
Vector(_) => 8,
List(_) => 9,
Set(_) => 10,
Map(_) => 12,
}
}

pub struct Pair(Value, Value);
Loading

0 comments on commit c473511

Please sign in to comment.