Skip to content
This repository has been archived by the owner on Sep 12, 2018. It is now read-only.

Expose line/column/character span position information from parsed EDN streams #282

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions db/src/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ lazy_static! {
:db/noHistory {:db/valueType :db.type/boolean
:db/cardinality :db.cardinality/one}}"#;
edn::parse::value(s)
.map(|v| v.without_spans())
.map_err(|_| ErrorKind::BadBootstrapDefinition("Unable to parse V1_SYMBOLIC_SCHEMA".into()))
.unwrap()
};
Expand All @@ -144,8 +145,10 @@ lazy_static! {
:db/unique :db.unique/value
:db/cardinality :db.cardinality/many}}"#;
let right = edn::parse::value(s)
.map(|v| v.without_spans())
.map_err(|_| ErrorKind::BadBootstrapDefinition("Unable to parse V2_SYMBOLIC_SCHEMA".into()))
.unwrap();

edn::utils::merge(&V1_SYMBOLIC_SCHEMA, &right)
.ok_or(ErrorKind::BadBootstrapDefinition("Unable to parse V2_SYMBOLIC_SCHEMA".into()))
.unwrap()
Expand Down
6 changes: 3 additions & 3 deletions db/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ mod tests {
assert_eq!(transactions.0[0].0.len(), 89);

// TODO: extract a test macro simplifying this boilerplate yet further.
let value = edn::parse::value(include_str!("../../tx/fixtures/test_add.edn")).unwrap();
let value = edn::parse::value(include_str!("../../tx/fixtures/test_add.edn")).unwrap().without_spans();

let transactions = value.as_vector().unwrap();
assert_transactions(&conn, &mut db, transactions);
Expand All @@ -974,7 +974,7 @@ mod tests {
assert_eq!(transactions.0.len(), 1);
assert_eq!(transactions.0[0].0.len(), 89);

let value = edn::parse::value(include_str!("../../tx/fixtures/test_retract.edn")).unwrap();
let value = edn::parse::value(include_str!("../../tx/fixtures/test_retract.edn")).unwrap().without_spans();

let transactions = value.as_vector().unwrap();
assert_transactions(&conn, &mut db, transactions);
Expand All @@ -994,7 +994,7 @@ mod tests {
assert_eq!(transactions.0.len(), 1);
assert_eq!(transactions.0[0].0.len(), 89);

let value = edn::parse::value(include_str!("../../tx/fixtures/test_upsert_vector.edn")).unwrap();
let value = edn::parse::value(include_str!("../../tx/fixtures/test_upsert_vector.edn")).unwrap().without_spans();

let transactions = value.as_vector().unwrap();
assert_transactions(&conn, &mut db, transactions);
Expand Down
199 changes: 132 additions & 67 deletions edn/src/edn.rustpeg
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ use std::f64::{NAN, INFINITY, NEG_INFINITY};

use num::BigInt;
use ordered_float::OrderedFloat;
use types;
use types::Value;

use types::{SpannedValue, Span, ValueAndSpan};

// Goal: Be able to parse https://github.com/edn-format/edn
// Also extensible to help parse http://docs.datomic.com/query.html
Expand All @@ -28,20 +28,43 @@ use types::Value;
// TODO: Support tagged elements
// TODO: Support discard

pub nil -> Value =
"nil" { Value::Nil }
pub nil -> ValueAndSpan =
start:#position "nil" end:#position {
ValueAndSpan {
inner: SpannedValue::Nil,
span: Span(start, end)
}
}

pub nan -> Value =
"#f" whitespace+ "NaN" { Value::Float(OrderedFloat(NAN)) }
pub nan -> ValueAndSpan =
start:#position "#f" whitespace+ "NaN" end:#position {
ValueAndSpan {
inner: SpannedValue::Float(OrderedFloat(NAN)),
span: Span(start, end)
}
}

pub infinity -> Value =
"#f" whitespace+ s:$(sign) "Infinity" {
Value::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY }))
pub infinity -> ValueAndSpan =
start:#position "#f" whitespace+ s:$(sign) "Infinity" end:#position {
ValueAndSpan {
inner: SpannedValue::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY })),
span: Span(start, end)
}
}

pub boolean -> Value =
"true" { Value::Boolean(true) } /
"false" { Value::Boolean(false) }
pub boolean -> ValueAndSpan =
start:#position "true" end:#position {
ValueAndSpan {
inner: SpannedValue::Boolean(true),
span: Span(start, end)
}
} /
start:#position "false" end:#position {
ValueAndSpan {
inner: SpannedValue::Boolean(false),
span: Span(start, end)
}
}

digit = [0-9]
alphanumeric = [0-9a-zA-Z]
Expand All @@ -50,30 +73,45 @@ validbase = [3][0-6] / [12][0-9] / [2-9]
hex = [0-9a-fA-F]
sign = "-" / "+"

pub bigint -> Value =
b:$( sign? digit+ ) "N" {
Value::BigInteger(b.parse::<BigInt>().unwrap())
pub bigint -> ValueAndSpan =
start:#position b:$( sign? digit+ ) "N" end:#position {
ValueAndSpan {
inner: SpannedValue::BigInteger(b.parse::<BigInt>().unwrap()),
span: Span(start, end)
}
}

pub octalinteger -> Value =
"0" i:$( octaldigit+ ) {
Value::Integer(i64::from_str_radix(i, 8).unwrap())
pub octalinteger -> ValueAndSpan =
start:#position "0" i:$( octaldigit+ ) end:#position {
ValueAndSpan {
inner: SpannedValue::Integer(i64::from_str_radix(i, 8).unwrap()),
span: Span(start, end)
}
}

pub hexinteger -> Value =
"0x" i:$( hex+ ) {
Value::Integer(i64::from_str_radix(i, 16).unwrap())
pub hexinteger -> ValueAndSpan =
start:#position "0x" i:$( hex+ ) end:#position {
ValueAndSpan {
inner: SpannedValue::Integer(i64::from_str_radix(i, 16).unwrap()),
span: Span(start, end)
}
}

pub basedinteger -> Value =
pub basedinteger -> ValueAndSpan =
// Only allow values 2-36
b:$( validbase ) "r" i:$( alphanumeric+ ) {
Value::Integer(i64::from_str_radix(i, b.parse::<u32>().unwrap()).unwrap())
start:#position b:$( validbase ) "r" i:$( alphanumeric+ ) end:#position {
ValueAndSpan {
inner: SpannedValue::Integer(i64::from_str_radix(i, b.parse::<u32>().unwrap()).unwrap()),
span: Span(start, end)
}
}

pub integer -> Value =
i:$( sign? digit+ ) {
Value::Integer(i.parse::<i64>().unwrap())
pub integer -> ValueAndSpan =
start:#position i:$( sign? digit+ ) end:#position {
ValueAndSpan {
inner: SpannedValue::Integer(i.parse::<i64>().unwrap()),
span: Span(start, end)
}
}

frac = sign? digit+ "." digit+
Expand All @@ -82,9 +120,12 @@ frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+

// The order here is important - frac_exp must come before (exp / frac) or the
// parser assumes exp or frac when the float is really a frac_exp and fails
pub float -> Value =
f:$( frac_exp / exp / frac ) {
Value::Float(OrderedFloat(f.parse::<f64>().unwrap()))
pub float -> ValueAndSpan =
start:#position f:$( frac_exp / exp / frac ) end:#position {
ValueAndSpan {
inner: SpannedValue::Float(OrderedFloat(f.parse::<f64>().unwrap())),
span: Span(start, end)
}
}

// TODO: \newline, \return, \space and \tab
Expand All @@ -93,9 +134,12 @@ quote = "\\\""
tab = "\\tab"
char = [^"] / special_char

pub text -> Value =
"\"" t:$( char* ) "\"" {
Value::Text(t.to_string())
pub text -> ValueAndSpan =
start:#position "\"" t:$( char* ) "\"" end:#position {
ValueAndSpan {
inner: SpannedValue::Text(t.to_string()),
span: Span(start, end)
}
}

namespace_divider = "."
Expand All @@ -112,48 +156,69 @@ symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." )

keyword_prefix = ":"

pub symbol -> Value =
ns:( sns:$(symbol_namespace) namespace_separator {
sns
})? n:$(symbol_name) {
types::to_symbol(ns, n)
}

pub keyword -> Value =
keyword_prefix ns:( sns:$(symbol_namespace) namespace_separator {
sns
})? n:$(symbol_name) {
types::to_keyword(ns, n)
}

pub list -> Value =
"(" __ v:(value)* __ ")" {
Value::List(LinkedList::from_iter(v))
}

pub vector -> Value =
"[" __ v:(value)* __ "]" {
Value::Vector(v)
}

pub set -> Value =
"#{" __ v:(value)* __ "}" {
Value::Set(BTreeSet::from_iter(v))
}

pair -> (Value, Value) =
pub symbol -> ValueAndSpan =
start:#position
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(symbol_name)
end:#position {
ValueAndSpan {
inner: SpannedValue::from_symbol(ns, n),
span: Span(start, end)
}
}

pub keyword -> ValueAndSpan =
start:#position
keyword_prefix
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(symbol_name)
end:#position {
ValueAndSpan {
inner: SpannedValue::from_keyword(ns, n),
span: Span(start, end)
}
}

pub list -> ValueAndSpan =
start:#position "(" __ v:(value)* __ ")" end:#position {
ValueAndSpan {
inner: SpannedValue::List(LinkedList::from_iter(v)),
span: Span(start, end)
}
}

pub vector -> ValueAndSpan =
start:#position "[" __ v:(value)* __ "]" end:#position {
ValueAndSpan {
inner: SpannedValue::Vector(v),
span: Span(start, end)
}
}

pub set -> ValueAndSpan =
start:#position "#{" __ v:(value)* __ "}" end:#position {
ValueAndSpan {
inner: SpannedValue::Set(BTreeSet::from_iter(v)),
span: Span(start, end)
}
}

pair -> (ValueAndSpan, ValueAndSpan) =
k:(value) v:(value) {
(k, v)
}

pub map -> Value =
"{" __ v:(pair)* __ "}" {
Value::Map(BTreeMap::from_iter(v))
pub map -> ValueAndSpan =
start:#position "{" __ v:(pair)* __ "}" end:#position {
ValueAndSpan {
inner: SpannedValue::Map(BTreeMap::from_iter(v)),
span: Span(start, end)
}
}

// It's important that float comes before integer or the parser assumes that
// floats are integers and fails to parse
pub value -> Value =
pub value -> ValueAndSpan =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it not possible to do all of the #position stuff here, in value? That is, capture the posititions before and after the __ markers below, parse Value in the internal bits, and then convert to ValueAndSpan? Maybe not, because of the internal whitespace that gets ignored?

Ah, as I think more about this, I think not: the recursive nature of ValueAndSpan denies that possibility, since the children of a container need to have the right type. So sad. (This might be possible with a ValueAndSpan<S> parameterized by an optional span type.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you've figured out, not possible due to the fact that SpannedValue has ValueAndSpan children in collections.

__ v:(nil / nan / infinity / boolean / float / octalinteger / hexinteger / basedinteger / bigint / integer / text / keyword / symbol / list / vector / map / set) __ {
v
}
Expand Down
Loading