diff --git a/core/src/eval/cache/lazy.rs b/core/src/eval/cache/lazy.rs index 55e97ed0a9..d70a8f3128 100644 --- a/core/src/eval/cache/lazy.rs +++ b/core/src/eval/cache/lazy.rs @@ -572,11 +572,11 @@ impl Thunk { /// Determine if a thunk is worth being put on the stack for future update. /// - /// Typically, WHNFs and annotated values will not be evaluated to a simpler expression and are - /// not worth updating. + /// Typically, expressions in weak head normal form (more precisely, in [effective weak head + /// normal form][crate::term::Term::is_eff_whnf]) won't evaluate further and their update can + /// be skipped. pub fn should_update(&self) -> bool { - let term = &self.borrow().body.term; - !term.is_whnf() && !term.is_annotated() + !self.borrow().body.term.is_eff_whnf() } /// Return a clone of the potential field dependencies stored in a revertible thunk. See diff --git a/core/src/eval/mod.rs b/core/src/eval/mod.rs index 7985ee81d8..f6a51a073d 100644 --- a/core/src/eval/mod.rs +++ b/core/src/eval/mod.rs @@ -666,6 +666,21 @@ impl VirtualMachine { } } } + // Closurize the argument of a enum variant if it's not already done. Usually this is done at the first + // time this record is evaluated. + Term::EnumVariant { tag, arg, attrs } if !attrs.closurized => { + Closure { + body: RichTerm::new( + Term::EnumVariant { + tag, + arg: arg.closurize(&mut self.cache, env), + attrs: attrs.closurized(), + }, + pos, + ), + env: Environment::new(), + } + } // Closurize the record if it's not already done. Usually this is done at the first // time this record is evaluated. Term::Record(data) if !data.attrs.closurized => { @@ -1111,10 +1126,10 @@ pub fn subst( // substitution. Not recursing should be fine, though, because a type in term position // turns into a contract, and we don't substitute inside contracts either currently. | v @ Term::Type(_) => RichTerm::new(v, pos), - Term::EnumVariant(tag, t) => { - let t = subst(cache, t, initial_env, env); + Term::EnumVariant { tag, arg, attrs } => { + let arg = subst(cache, arg, initial_env, env); - RichTerm::new(Term::EnumVariant(tag, t), pos) + RichTerm::new(Term::EnumVariant { tag, arg, attrs }, pos) } Term::Let(id, t1, t2, attrs) => { let t1 = subst(cache, t1, initial_env, env); diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index 594c3c9451..4d95c883b9 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -3127,9 +3127,18 @@ fn eq( (Term::Lbl(l1), Term::Lbl(l2)) => Ok(EqResult::Bool(l1 == l2)), (Term::SealingKey(s1), Term::SealingKey(s2)) => Ok(EqResult::Bool(s1 == s2)), (Term::Enum(id1), Term::Enum(id2)) => Ok(EqResult::Bool(id1.ident() == id2.ident())), - (Term::EnumVariant(id1, arg1), Term::EnumVariant(id2, arg2)) - if id1.ident() == id2.ident() => - { + ( + Term::EnumVariant { + tag: tag1, + arg: arg1, + .. + }, + Term::EnumVariant { + tag: tag2, + arg: arg2, + .. + }, + ) if tag1.ident() == tag2.ident() => { Ok(gen_eqs(cache, std::iter::once((arg1, arg2)), env1, env2)) } (Term::Record(r1), Term::Record(r2)) => { diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 35e66addf6..47f47e6607 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -702,7 +702,7 @@ EnumTag: LocIdent = { EnumVariant: RichTerm = ".." "(" ")" => - RichTerm::from(Term::EnumVariant(tag, arg)); + RichTerm::from(Term::EnumVariant { tag, arg, attrs: Default::default() }); ChunkLiteralPart: ChunkLiteralPart = { "str literal" => ChunkLiteralPart::Str(<>), diff --git a/core/src/pretty.rs b/core/src/pretty.rs index 0cf99dad5f..4bf617c89c 100644 --- a/core/src/pretty.rs +++ b/core/src/pretty.rs @@ -770,11 +770,11 @@ where }, Var(id) => allocator.as_string(id), Enum(id) => allocator.text("'").append(allocator.text(ident_quoted(id))), - EnumVariant(id, data) => allocator + EnumVariant { tag, arg, attrs: _ } => allocator .text("'") - .append(allocator.text(ident_quoted(id))) + .append(allocator.text(ident_quoted(tag))) .append("..(") - .append(data.pretty(allocator)) + .append(arg.pretty(allocator)) .append(")"), Record(record_data) => allocator.record(record_data, &[]), RecRecord(record_data, dyn_fields, _) => allocator.record(record_data, dyn_fields), diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 8b9961f7a6..67e8c2a381 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -123,7 +123,11 @@ pub enum Term { /// version with one argument. Note that one can just use a record to store multiple named /// values in the argument. #[serde(skip)] - EnumVariant(LocIdent, RichTerm), + EnumVariant { + tag: LocIdent, + arg: RichTerm, + attrs: EnumVariantAttrs, + }, /// A record, mapping identifiers to terms. #[serde(serialize_with = "crate::serialize::serialize_record")] @@ -474,6 +478,36 @@ impl std::convert::TryFrom for RuntimeContract { } } +/// The attributes of a enum variant. +#[derive(Debug, Default, Eq, PartialEq, Clone)] +pub struct EnumVariantAttrs { + /// An enum variant is closurized if its argument is a [crate::term::Term::Closure] or a + /// constant. + /// + /// When initially produced by the parser, data structures such as enum variants or arrays + /// aren't closurized. At the first evaluation, they will be turned into closurized versions, + /// by allocating cache nodes (think thunks) for non constant elements. Once done, this flag is + /// set to `true`. + /// + /// Ideally, we would have a different AST representation for evaluation, where enum variants + /// would always be closurized. In the meantime, while we need to cope with a unique AST across + /// the whole pipeline, we use this flag to remember closurization. + pub closurized: bool, +} + +impl EnumVariantAttrs { + /// Create new enum variant attributes. By default, the `closurized` flag is set to `false`. + pub fn new() -> Self { + Default::default() + } + + /// Set the `closurized` flag to `true`. + pub fn closurized(mut self) -> Self { + self.closurized = true; + self + } +} + /// The attributes of a let binding. #[derive(Debug, Default, Eq, PartialEq, Clone)] pub struct LetAttrs { @@ -817,7 +851,7 @@ impl Term { Term::Match { .. } => Some("MatchExpression".to_owned()), Term::Lbl(_) => Some("Label".to_owned()), Term::Enum(_) => Some("Enum".to_owned()), - Term::EnumVariant(_, _) => Some("Enum".to_owned()), + Term::EnumVariant { .. } => Some("Enum".to_owned()), Term::Record(..) | Term::RecRecord(..) => Some("Record".to_owned()), Term::Array(..) => Some("Array".to_owned()), Term::SealingKey(_) => Some("SealingKey".to_owned()), @@ -840,7 +874,21 @@ impl Term { } } - /// Determine if a term is in evaluated form, called weak head normal form (WHNF). + /// Determine if a term is in evaluated form, called weak head normal form (WHNF). This test is + /// purely syntactic, which has the non-obvious consequence that some terms might be in WHNF + /// according to [Self::is_whnf] but might still be evaluated further. + /// + /// This is due to implementation details of the evaluation around closurization. The first + /// time an array or a record is evaluated, it will be closurized - thunks will be allocated to + /// store its elements and make them shareable. Thus, if `self` is `Term::Array(data, attrs)` + /// with `attrs.closurized` set to `false`, evaluation will rewrite it to a different array, + /// although in the surface language of Nickel, arrays are weak head normal forms. + /// + /// For everything happening pre-evaluation, you probably shouldn't care about this subtlety + /// and you can use `is_whnf` directly. + /// + /// However, at run-time, in particular if the property you care about is "is this term going + /// to be evaluate further", then you should use [Self::is_eff_whnf] instead. pub fn is_whnf(&self) -> bool { match self { Term::Null @@ -852,7 +900,7 @@ impl Term { | Term::Match {..} | Term::Lbl(_) | Term::Enum(_) - | Term::EnumVariant(_, _) + | Term::EnumVariant {..} | Term::Record(..) | Term::Array(..) | Term::SealingKey(_) => true, @@ -877,6 +925,24 @@ impl Term { } } + /// Helper used by [Self::is_eff_whnf] to determine if a term is a data structure that hasn't + /// been closurized yet. + fn is_unclosurized_datastructure(&self) -> bool { + match self { + Term::Array(_, attrs) => !attrs.closurized, + Term::Record(data) | Term::RecRecord(data, ..) => !data.attrs.closurized, + Term::EnumVariant { attrs, .. } => !attrs.closurized, + _ => false, + } + } + + /// Determine if an expression is an effective weak head normal form, that is a value that + /// won't be evaluated further by the virtual machine. Being an effective WHNF implies being a + /// WHNF, but the converse isn't true. See [Self::is_whnf] for more details. + pub fn is_eff_whnf(&self) -> bool { + self.is_whnf() && !self.is_unclosurized_datastructure() + } + /// Determine if a term is annotated. pub fn is_annotated(&self) -> bool { matches!(self, Term::Annotated(..)) @@ -916,7 +982,7 @@ impl Term { | Term::RecRecord(..) | Term::Type(_) | Term::ParseError(_) - | Term::EnumVariant(_, _) + | Term::EnumVariant { .. } | Term::RuntimeError(_) => false, } } @@ -931,7 +997,7 @@ impl Term { | Term::StrChunks(..) | Term::Lbl(..) | Term::Enum(..) - | Term::EnumVariant(_, _) + | Term::EnumVariant {..} | Term::Record(..) | Term::RecRecord(..) | Term::Array(..) @@ -2051,7 +2117,7 @@ impl Traverse for RichTerm { }), Term::Fun(_, t) | Term::FunPattern(_, _, t) - | Term::EnumVariant(_, t) + | Term::EnumVariant { arg: t, .. } | Term::Op1(_, t) | Term::Sealed(_, t, _) => t.traverse_ref(f, state), Term::Let(_, t1, t2, _) diff --git a/core/src/transform/free_vars.rs b/core/src/transform/free_vars.rs index 8e035a8bcc..8b633c9a04 100644 --- a/core/src/transform/free_vars.rs +++ b/core/src/transform/free_vars.rs @@ -97,7 +97,7 @@ impl CollectFreeVars for RichTerm { t.collect_free_vars(free_vars); } } - Term::Op1(_, t) | Term::Sealed(_, t, _) | Term::EnumVariant(_, t) => { + Term::Op1(_, t) | Term::Sealed(_, t, _) | Term::EnumVariant { arg: t, .. } => { t.collect_free_vars(free_vars) } Term::OpN(_, ts) => { diff --git a/core/src/typecheck/mod.rs b/core/src/typecheck/mod.rs index 2cf0519be4..bd9db64d80 100644 --- a/core/src/typecheck/mod.rs +++ b/core/src/typecheck/mod.rs @@ -1569,7 +1569,7 @@ fn walk( walk(state, ctxt.clone(), visitor, t) }) } - Term::EnumVariant(_, t) => walk(state, ctxt, visitor, t), + Term::EnumVariant { arg: t, ..} => walk(state, ctxt, visitor, t), Term::Op1(_, t) => walk(state, ctxt.clone(), visitor, t), Term::Op2(_, t1, t2) => { walk(state, ctxt.clone(), visitor, t1)?; @@ -1967,18 +1967,18 @@ fn check( ty.unify(mk_uty_enum!(*id; row), state, &ctxt) .map_err(|err| err.into_typecheck_err(state, rt.pos)) } - Term::EnumVariant(id, t) => { + Term::EnumVariant { tag, arg, .. } => { let row_tail = state.table.fresh_erows_uvar(ctxt.var_level); let ty_arg = state.table.fresh_type_uvar(ctxt.var_level); // We match the expected type against `[| 'id ty_arg; row_tail |]`, where `row_tail` is // a free unification variable, to ensure it has the right shape and extract the // components. - ty.unify(mk_uty_enum!((*id, ty_arg.clone()); row_tail), state, &ctxt) + ty.unify(mk_uty_enum!((*tag, ty_arg.clone()); row_tail), state, &ctxt) .map_err(|err| err.into_typecheck_err(state, rt.pos))?; // Once we have a type for the argument, we check the variant's data against it. - check(state, ctxt, visitor, t, ty_arg) + check(state, ctxt, visitor, arg, ty_arg) } // If some fields are defined dynamically, the only potential type that works is `{_ : a}` // for some `a`. In other words, the checking rule is not the same depending on the target