mozilla · joewalker · Jan 6, 2017 · rnewman · Jan 10, 2017 · rnewman
diff --git a/.gitignore b/.gitignore
@@ -47,3 +47,5 @@ pom.xml.asc
 /release-node/datomish/
 /release-node/goog/
 /release-node/honeysql/
+
+/edn/target/
diff --git a/edn/Cargo.toml b/edn/Cargo.toml
@@ -1,3 +1,17 @@
 [package]
 name = "edn"
-version = "0.0.1"
+version = "0.1.0"
+authors = ["Joe Walker <jwalker@mozilla.com>"]
+
+license = "Apache-2.0"
+repository = "https://github.com/mozilla/datomish"
+description = "EDN Parser for Datomish"
+build = "build.rs"
+readme = "./README.md"
+
+[dependencies]
+num = "0.1.35"
+ordered-float = "0.3.0"
+
+[build-dependencies]
+peg = "0.4"
diff --git a/edn/README.md b/edn/README.md
@@ -0,0 +1,2 @@
+# barnardsstar
+An experimental EDN parser for Datomish
diff --git a/edn/build.rs b/edn/build.rs
@@ -0,0 +1,15 @@
+// Copyright 2016 Mozilla
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+// this file except in compliance with the License. You may obtain a copy of the
+// License at http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+extern crate peg;
+
+fn main() {
+    peg::cargo_build("src/edn.rustpeg");
+}
diff --git a/edn/src/edn.rustpeg b/edn/src/edn.rustpeg
@@ -0,0 +1,124 @@
+// Copyright 2016 Mozilla
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+// this file except in compliance with the License. You may obtain a copy of the
+// License at http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+use std::collections::{BTreeSet, BTreeMap, LinkedList};
+use std::iter::FromIterator;
+use num::BigInt;
+use types::Value;
+use ordered_float::OrderedFloat;
+
+// Goal: Be able to parse https://github.com/edn-format/edn
+// Also extensible to help parse http://docs.datomic.com/query.html
+
+// Debugging hint: test using `cargo test --features peg/trace -- --nocapture`
+// to trace where the parser is failing
+
+// TODO: Support tagged elements
+// TODO: Support comments
+// TODO: Support discard
+
+#[export]
+nil -> Value = "nil" {
+    Value::Nil
+}
+
+#[export]
+boolean -> Value =
+    "true" { Value::Boolean(true) } /
+    "false" { Value::Boolean(false) }
+
+digit = [0-9]
+sign = "-" / "+"
+
+#[export]
+bigint -> Value = b:$( sign? digit+ ) "N" {
+    Value::BigInteger(b.parse::<BigInt>().unwrap())
+}
+
+#[export]
+integer -> Value = i:$( sign? digit+ ) {
+    Value::Integer(i.parse::<i64>().unwrap())
+}
+
+frac =     sign? digit+ "." digit+
+exp =      sign? digit+            ("e" / "E") sign? digit+
+frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+
+
+// The order here is important - frac_exp must come before (exp / frac) or the
+// parser assumes exp or frac when the float is really a frac_exp and fails
+#[export]
+float -> Value = f:$( frac_exp / exp / frac ) {
+    Value::Float(OrderedFloat(f.parse::<f64>().unwrap()))
+}
+
+// TODO: \newline, \return, \space and \tab
+special_char = quote / tab
+quote = "\\\""
+tab = "\\tab"
+char = [^"] / special_char
+
+#[export]
+text -> Value = "\"" t:$(  char* ) "\"" {
+    Value::Text(t.to_string())
+}
+
+// TODO: Be more picky here
+symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.]
+symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] / "-"
+
+#[export]
+symbol -> Value = s:$( symbol_char_initial symbol_char_subsequent* ) {
+    Value::Symbol(s.to_string())
+}
+
+keyword_char_initial = ":"
+// TODO: More chars here?
+keyword_char_subsequent = [a-z] / [A-Z] / [0-9] / "/"
+
+#[export]
+keyword -> Value = k:$( keyword_char_initial keyword_char_subsequent+ ) {
+    Value::Keyword(k.to_string())
+}
+
+#[export]
+list -> Value = "(" __ v:(__ value)* __ ")" {
+    Value::List(LinkedList::from_iter(v))
+}
+
+#[export]
+vector -> Value = "[" __ v:(__ value)* __ "]" {
+    Value::Vector(v)
+}
+
+#[export]
+set -> Value = "#{" __ v:(__ value)* __ "}" {
+    Value::Set(BTreeSet::from_iter(v))
+}
+
+pair -> (Value, Value) = k:(value) " " v:(value) ", "? {
+    (k, v)
+}
+
+#[export]
+map -> Value = "{" __ v:(pair)* __ "}" {
+    Value::Map(BTreeMap::from_iter(v))
+}
+
+// It's important that float comes before integer or the parser assumes that
+// floats are integers and fails to parse
+#[export]
+value -> Value
+    = nil / boolean / float / bigint / integer / text /
+      keyword / symbol /
+      list / vector / map / set
+
+whitespace = (" " / "\r" / "\n" / "\t")
+
+__ = whitespace*
diff --git a/edn/src/lib.rs b/edn/src/lib.rs
@@ -8,4 +8,17 @@
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
-pub mod keyword;
+#![allow(dead_code)]
+
+extern crate ordered_float;
+extern crate num;
+
+pub mod types;
+
+pub mod parse {
+    include!(concat!(env!("OUT_DIR"), "/edn.rs"));
+}
+
+fn main() {
+    println!("Use cargo test");
+}
diff --git a/edn/src/types.rs b/edn/src/types.rs
@@ -0,0 +1,85 @@
+// Copyright 2016 Mozilla
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+// this file except in compliance with the License. You may obtain a copy of the
+// License at http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+use std::collections::{BTreeSet, BTreeMap, LinkedList};
+use std::cmp::{Ordering, Ord, PartialOrd};
+use num::BigInt;
+use ordered_float::OrderedFloat;
+
+/// Value represents one of the allowed values in an EDN string.
+#[derive(PartialEq, Eq, Hash, Debug)]
+pub enum Value {
+    Nil,
+    Boolean(bool),
+    Integer(i64),
+    BigInteger(BigInt),
+    // https://users.rust-lang.org/t/hashmap-key-cant-be-float-number-type-why/7892
+    Float(OrderedFloat<f64>),
+    Text(String),
+    Symbol(String),
+    Keyword(String),
+    Vector(Vec<Value>),
+    List(LinkedList<Value>),
+    // We're using BTree{Set, Map} rather than Hash{Set, Map} because the BTree variants
+    // implement Hash (unlike the Hash variants which don't in order to preserve O(n) hashing
+    // time which is hard given recurrsive data structures)
+    // See https://internals.rust-lang.org/t/implementing-hash-for-hashset-hashmap/3817/1
+    Set(BTreeSet<Value>),
+    Map(BTreeMap<Value, Value>),
+}
+
+use self::Value::*;
+
+impl PartialOrd for Value {
+    fn partial_cmp(&self, other: &Value) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+// TODO: Check we follow the equality rules at the bottom of https://github.com/edn-format/edn
+impl Ord for Value {
+    fn cmp(&self, other: &Value) -> Ordering {
+
+        let ord_order = to_ord(self).cmp(&to_ord(other));
+        match *self {
+            Nil             => match *other { Nil             => Ordering::Equal, _ => ord_order },
+            Boolean(bs)     => match *other { Boolean(bo)     => bo.cmp(&bs), _ => ord_order },
+            BigInteger(ref bs) => match *other { BigInteger(ref bo) => bo.cmp(&bs), _ => ord_order },
+            Integer(is)     => match *other { Integer(io)     => io.cmp(&is), _ => ord_order },
+            Float(ref fs)   => match *other { Float(ref fo)   => fo.cmp(&fs), _ => ord_order },
+            Text(ref ts)    => match *other { Text(ref to)    => to.cmp(&ts), _ => ord_order },
+            Symbol(ref ss)  => match *other { Symbol(ref so)  => so.cmp(&ss), _ => ord_order },
+            Keyword(ref ks) => match *other { Keyword(ref ko) => ko.cmp(&ks), _ => ord_order },
+            Vector(ref vs)  => match *other { Vector(ref vo)  => vo.cmp(&vs), _ => ord_order },
+            List(ref ls)    => match *other { List(ref lo)    => lo.cmp(&ls), _ => ord_order },
+            Set(ref ss)     => match *other { Set(ref so)     => so.cmp(&ss), _ => ord_order },
+            Map(ref ms)     => match *other { Map(ref mo)     => mo.cmp(&ms), _ => ord_order },
+        }
+    }
+}
+
+fn to_ord(value: &Value) -> i32 {
+    match *value {
+        Nil => 0,
+        Boolean(_) => 1,
+        Integer(_) => 2,
+        BigInteger(_) => 3,
+        Float(_) => 4,
+        Text(_) => 5,
+        Symbol(_) => 6,
+        Keyword(_) => 7,
+        Vector(_) => 8,
+        List(_) => 9,
+        Set(_) => 10,
+        Map(_) => 12,
+    }
+}
+
+pub struct Pair(Value, Value);