-
Notifications
You must be signed in to change notification settings - Fork 115
Add a basic EDN parser #149
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,3 +47,5 @@ pom.xml.asc | |
/release-node/datomish/ | ||
/release-node/goog/ | ||
/release-node/honeysql/ | ||
|
||
/edn/target/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,17 @@ | ||
[package] | ||
name = "edn" | ||
version = "0.0.1" | ||
version = "0.1.0" | ||
authors = ["Joe Walker <jwalker@mozilla.com>"] | ||
|
||
license = "Apache-2.0" | ||
repository = "https://github.com/mozilla/datomish" | ||
description = "EDN Parser for Datomish" | ||
build = "build.rs" | ||
readme = "./README.md" | ||
|
||
[dependencies] | ||
num = "0.1.35" | ||
ordered-float = "0.3.0" | ||
|
||
[build-dependencies] | ||
peg = "0.4" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# barnardsstar | ||
An experimental EDN parser for Datomish | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// Copyright 2016 Mozilla | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
// this file except in compliance with the License. You may obtain a copy of the | ||
// License at http://www.apache.org/licenses/LICENSE-2.0 | ||
// Unless required by applicable law or agreed to in writing, software distributed | ||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations under the License. | ||
|
||
extern crate peg; | ||
|
||
fn main() { | ||
peg::cargo_build("src/edn.rustpeg"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add Details: https://github.com/github/linguist/blob/master/README.md#using-gitattributes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's fairly explicitly not rust though. It happens to have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm. I suppose I was thinking that Rust syntax highlighting, indenting, etc. would be better than nothing? Up to you. There are some syntax highlighting plugins for it: https://github.com/treycordova/rustpeg.vim with filetype |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
// Copyright 2016 Mozilla | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
// this file except in compliance with the License. You may obtain a copy of the | ||
// License at http://www.apache.org/licenses/LICENSE-2.0 | ||
// Unless required by applicable law or agreed to in writing, software distributed | ||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations under the License. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. License blocks in all files, please. |
||
use std::collections::{BTreeSet, BTreeMap, LinkedList}; | ||
use std::iter::FromIterator; | ||
use num::BigInt; | ||
use types::Value; | ||
use ordered_float::OrderedFloat; | ||
|
||
// Goal: Be able to parse https://github.com/edn-format/edn | ||
// Also extensible to help parse http://docs.datomic.com/query.html | ||
|
||
// Debugging hint: test using `cargo test --features peg/trace -- --nocapture` | ||
// to trace where the parser is failing | ||
|
||
// TODO: Support tagged elements | ||
// TODO: Support comments | ||
// TODO: Support discard | ||
|
||
#[export] | ||
nil -> Value = "nil" { | ||
Value::Nil | ||
} | ||
|
||
#[export] | ||
boolean -> Value = | ||
"true" { Value::Boolean(true) } / | ||
"false" { Value::Boolean(false) } | ||
|
||
digit = [0-9] | ||
sign = "-" / "+" | ||
|
||
#[export] | ||
bigint -> Value = b:$( sign? digit+ ) "N" { | ||
Value::BigInteger(b.parse::<BigInt>().unwrap()) | ||
} | ||
|
||
#[export] | ||
integer -> Value = i:$( sign? digit+ ) { | ||
Value::Integer(i.parse::<i64>().unwrap()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It occurs to me that
— forgetting the 'N' — the parser will panic. Now, we could build a strategy that always handles panics in the parser, allowing us to avoid error handling, but can we instead signal a failure to parse at this point? |
||
} | ||
|
||
frac = sign? digit+ "." digit+ | ||
exp = sign? digit+ ("e" / "E") sign? digit+ | ||
frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+ | ||
|
||
// The order here is important - frac_exp must come before (exp / frac) or the | ||
// parser assumes exp or frac when the float is really a frac_exp and fails | ||
#[export] | ||
float -> Value = f:$( frac_exp / exp / frac ) { | ||
Value::Float(OrderedFloat(f.parse::<f64>().unwrap())) | ||
} | ||
|
||
// TODO: \newline, \return, \space and \tab | ||
special_char = quote / tab | ||
quote = "\\\"" | ||
tab = "\\tab" | ||
char = [^"] / special_char | ||
|
||
#[export] | ||
text -> Value = "\"" t:$( char* ) "\"" { | ||
Value::Text(t.to_string()) | ||
} | ||
|
||
// TODO: Be more picky here | ||
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] | ||
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] / "-" | ||
|
||
#[export] | ||
symbol -> Value = s:$( symbol_char_initial symbol_char_subsequent* ) { | ||
Value::Symbol(s.to_string()) | ||
} | ||
|
||
keyword_char_initial = ":" | ||
// TODO: More chars here? | ||
keyword_char_subsequent = [a-z] / [A-Z] / [0-9] / "/" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For future correction: both keywords and symbols can contain (There are similar rules around There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
#[export] | ||
keyword -> Value = k:$( keyword_char_initial keyword_char_subsequent+ ) { | ||
Value::Keyword(k.to_string()) | ||
} | ||
|
||
#[export] | ||
list -> Value = "(" __ v:(__ value)* __ ")" { | ||
Value::List(LinkedList::from_iter(v)) | ||
} | ||
|
||
#[export] | ||
vector -> Value = "[" __ v:(__ value)* __ "]" { | ||
Value::Vector(v) | ||
} | ||
|
||
#[export] | ||
set -> Value = "#{" __ v:(__ value)* __ "}" { | ||
Value::Set(BTreeSet::from_iter(v)) | ||
} | ||
|
||
pair -> (Value, Value) = k:(value) " " v:(value) ", "? { | ||
(k, v) | ||
} | ||
|
||
#[export] | ||
map -> Value = "{" __ v:(pair)* __ "}" { | ||
Value::Map(BTreeMap::from_iter(v)) | ||
} | ||
|
||
// It's important that float comes before integer or the parser assumes that | ||
// floats are integers and fails to parse | ||
#[export] | ||
value -> Value | ||
= nil / boolean / float / bigint / integer / text / | ||
keyword / symbol / | ||
list / vector / map / set | ||
|
||
whitespace = (" " / "\r" / "\n" / "\t") | ||
|
||
__ = whitespace* |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,4 +8,17 @@ | |
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations under the License. | ||
|
||
pub mod keyword; | ||
#![allow(dead_code)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You've unrooted the existing |
||
|
||
extern crate ordered_float; | ||
extern crate num; | ||
|
||
pub mod types; | ||
|
||
pub mod parse { | ||
include!(concat!(env!("OUT_DIR"), "/edn.rs")); | ||
} | ||
|
||
fn main() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't be in a |
||
println!("Use cargo test"); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
// Copyright 2016 Mozilla | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use | ||
// this file except in compliance with the License. You may obtain a copy of the | ||
// License at http://www.apache.org/licenses/LICENSE-2.0 | ||
// Unless required by applicable law or agreed to in writing, software distributed | ||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations under the License. | ||
|
||
use std::collections::{BTreeSet, BTreeMap, LinkedList}; | ||
use std::cmp::{Ordering, Ord, PartialOrd}; | ||
use num::BigInt; | ||
use ordered_float::OrderedFloat; | ||
|
||
/// Value represents one of the allowed values in an EDN string. | ||
#[derive(PartialEq, Eq, Hash, Debug)] | ||
pub enum Value { | ||
Nil, | ||
Boolean(bool), | ||
Integer(i64), | ||
BigInteger(BigInt), | ||
// https://users.rust-lang.org/t/hashmap-key-cant-be-float-number-type-why/7892 | ||
Float(OrderedFloat<f64>), | ||
Text(String), | ||
Symbol(String), | ||
Keyword(String), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably this should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. … but that's #154. So roll on for now. |
||
Vector(Vec<Value>), | ||
List(LinkedList<Value>), | ||
// We're using BTree{Set, Map} rather than Hash{Set, Map} because the BTree variants | ||
// implement Hash (unlike the Hash variants which don't in order to preserve O(n) hashing | ||
// time which is hard given recurrsive data structures) | ||
// See https://internals.rust-lang.org/t/implementing-hash-for-hashset-hashmap/3817/1 | ||
Set(BTreeSet<Value>), | ||
Map(BTreeMap<Value, Value>), | ||
} | ||
|
||
use self::Value::*; | ||
|
||
impl PartialOrd for Value { | ||
fn partial_cmp(&self, other: &Value) -> Option<Ordering> { | ||
Some(self.cmp(other)) | ||
} | ||
} | ||
|
||
// TODO: Check we follow the equality rules at the bottom of https://github.com/edn-format/edn | ||
impl Ord for Value { | ||
fn cmp(&self, other: &Value) -> Ordering { | ||
|
||
let ord_order = to_ord(self).cmp(&to_ord(other)); | ||
match *self { | ||
Nil => match *other { Nil => Ordering::Equal, _ => ord_order }, | ||
Boolean(bs) => match *other { Boolean(bo) => bo.cmp(&bs), _ => ord_order }, | ||
BigInteger(ref bs) => match *other { BigInteger(ref bo) => bo.cmp(&bs), _ => ord_order }, | ||
Integer(is) => match *other { Integer(io) => io.cmp(&is), _ => ord_order }, | ||
Float(ref fs) => match *other { Float(ref fo) => fo.cmp(&fs), _ => ord_order }, | ||
Text(ref ts) => match *other { Text(ref to) => to.cmp(&ts), _ => ord_order }, | ||
Symbol(ref ss) => match *other { Symbol(ref so) => so.cmp(&ss), _ => ord_order }, | ||
Keyword(ref ks) => match *other { Keyword(ref ko) => ko.cmp(&ks), _ => ord_order }, | ||
Vector(ref vs) => match *other { Vector(ref vo) => vo.cmp(&vs), _ => ord_order }, | ||
List(ref ls) => match *other { List(ref lo) => lo.cmp(&ls), _ => ord_order }, | ||
Set(ref ss) => match *other { Set(ref so) => so.cmp(&ss), _ => ord_order }, | ||
Map(ref ms) => match *other { Map(ref mo) => mo.cmp(&ms), _ => ord_order }, | ||
} | ||
} | ||
} | ||
|
||
fn to_ord(value: &Value) -> i32 { | ||
match *value { | ||
Nil => 0, | ||
Boolean(_) => 1, | ||
Integer(_) => 2, | ||
BigInteger(_) => 3, | ||
Float(_) => 4, | ||
Text(_) => 5, | ||
Symbol(_) => 6, | ||
Keyword(_) => 7, | ||
Vector(_) => 8, | ||
List(_) => 9, | ||
Set(_) => 10, | ||
Map(_) => 12, | ||
} | ||
} | ||
|
||
pub struct Pair(Value, Value); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Two 'Datomish' to replace.