From b6f946f660c8d05f0310cad04a248396dbbc481c Mon Sep 17 00:00:00 2001 From: Arthur Paulino Date: Tue, 16 Aug 2022 20:35:04 -0300 Subject: [PATCH] Documentation for the compiler (#166) * getting rid of CompileState.union * document projections * more documentation * fix mdata change; drop Expr.lty * more docs, some renamings * more WIP documentation * more docstrings * finished Compiler docstrings * small adjustments * add `cmpDef` docs Co-authored-by: Hanting Zhang --- TestsUtils/CompileAndExtractTests.lean | 3 +- Yatima/Cli/CompileCmd.lean | 17 +- Yatima/Cli/PipeCmd.lean | 24 +- Yatima/Compiler/CompileError.lean | 7 +- Yatima/Compiler/CompileM.lean | 119 +++-- Yatima/Compiler/Compiler.lean | 584 +++++++++++++++---------- Yatima/Compiler/Printing.lean | 7 +- Yatima/Converter/Converter.lean | 1 - Yatima/Datatypes/Expr.lean | 14 +- Yatima/Ipld/ToIpld.lean | 7 +- Yatima/Transpiler/Transpiler.lean | 35 +- Yatima/Transpiler/Utils.lean | 5 +- Yatima/Typechecker/Equal.lean | 1 - Yatima/Typechecker/Eval.lean | 1 - Yatima/Typechecker/Infer.lean | 1 - Yatima/Typechecker/Printing.lean | 4 - Yatima/Typechecker/Value.lean | 6 + 17 files changed, 473 insertions(+), 363 deletions(-) diff --git a/TestsUtils/CompileAndExtractTests.lean b/TestsUtils/CompileAndExtractTests.lean index f5058fa8..7ce74728 100644 --- a/TestsUtils/CompileAndExtractTests.lean +++ b/TestsUtils/CompileAndExtractTests.lean @@ -87,8 +87,7 @@ def pairConstants (x y : Array Const) : def reindexExpr (map : NatNatMap) : Expr → Expr | e@(.var ..) | e@(.sort _) - | e@(.lit ..) - | e@(.lty ..) => e + | e@(.lit ..) => e | .const n i ls => .const n (map.find! i) ls | .app e₁ e₂ => .app (reindexExpr map e₁) (reindexExpr map e₂) | .lam n bi e₁ e₂ => .lam n bi (reindexExpr map e₁) (reindexExpr map e₂) diff --git a/Yatima/Cli/CompileCmd.lean b/Yatima/Cli/CompileCmd.lean index f8b55dd9..35cdc675 100644 --- a/Yatima/Cli/CompileCmd.lean +++ b/Yatima/Cli/CompileCmd.lean @@ -18,25 +18,16 @@ def compileRun (p : Cli.Parsed) : IO UInt32 := do if !(p.hasFlag "prelude") then setLibsPaths let mut stt : CompileState := default let log := p.hasFlag "log" - let mut errMsg : Option String := none let mut cronos := Cronos.new for arg in args do for filePath in ← getLeanFilePathsList ⟨arg⟩ do let filePathStr := filePath.toString cronos ← cronos.clock filePathStr match ← compile filePath log stt with - | .ok stt' => match stt.union stt' with - | .ok stt' => - stt := stt' - cronos ← cronos.clock filePathStr - | .error msg => errMsg := some msg; break - | .error msg => errMsg := some (toString msg); break - if errMsg.isSome then break - match errMsg with - | some msg => - IO.eprintln msg - return 1 - | none => pure () + | .ok stt' => + stt := stt' + cronos ← cronos.clock filePathStr + | .error msg => IO.eprintln msg; return 1 if p.hasFlag "summary" then IO.println s!"{stt.summary}" IO.println s!"\n{cronos.summary}" diff --git a/Yatima/Cli/PipeCmd.lean b/Yatima/Cli/PipeCmd.lean index 4da727ca..5534508a 100644 --- a/Yatima/Cli/PipeCmd.lean +++ b/Yatima/Cli/PipeCmd.lean @@ -23,7 +23,6 @@ def pipeRun (p : Cli.Parsed) : IO UInt32 := do cronos ← cronos.clock "Compilation" if !(p.hasFlag "prelude") then setLibsPaths let mut stt : CompileState := default - let mut errMsg : Option String := none let log := p.hasFlag "log" let mut cronos' := Cronos.new for arg in args do @@ -31,21 +30,14 @@ def pipeRun (p : Cli.Parsed) : IO UInt32 := do let filePathStr := filePath.toString cronos' ← cronos'.clock filePathStr match ← compile filePath log stt with - | .ok stt' => match stt.union stt' with - | .ok stt' => - stt := stt' - cronos' ← cronos'.clock filePathStr - | .error msg => errMsg := some msg; break - | .error msg => errMsg := some (toString msg); break - if errMsg.isSome then break - match errMsg with - | some msg => - IO.eprintln msg - return 1 - | none => - if p.hasFlag "summary" then - IO.println s!"{stt.summary}" - IO.println s!"\n{cronos'.summary}" + | .ok stt' => + stt := stt' + cronos' ← cronos'.clock filePathStr + | .error msg => IO.eprintln msg; return 1 + if p.hasFlag "summary" then + IO.println s!"{stt.summary}" + IO.println s!"\n{cronos'.summary}" + cronos ← cronos.clock "Compilation" if p.hasFlag "typecheck" then cronos ← cronos.clock "Typechecking" diff --git a/Yatima/Compiler/CompileError.lean b/Yatima/Compiler/CompileError.lean index adf0bcd4..8cf4757e 100644 --- a/Yatima/Compiler/CompileError.lean +++ b/Yatima/Compiler/CompileError.lean @@ -4,9 +4,11 @@ import Yatima.Datatypes.Const namespace Yatima.Compiler +/-- Errors that can be thrown in `Yatima.Compiler.CompileM` -/ inductive CompileError | notFoundInCache : Name → CompileError - | invalidDereferringIndex : Nat → Nat → CompileError + | notFoundInRecrCtx : Name → CompileError + | invalidConstantIndex : Nat → Nat → CompileError | unknownConstant : Name → CompileError | unfilledLevelMetavariable : Lean.Level → CompileError | unfilledExprMetavariable : Lean.Expr → CompileError @@ -26,7 +28,8 @@ inductive CompileError instance : ToString CompileError where toString | .notFoundInCache n => s!"Could not find cid of '{n}' in cache" - | .invalidDereferringIndex idx size => + | .notFoundInRecrCtx n => s!"Could not find '{n}' in recrCtx" + | .invalidConstantIndex idx size => s!"Invalid index {idx} for dereferring a constant. Must be < {size}." | .unknownConstant n => s!"Unknown constant '{n}'" | .unfilledLevelMetavariable l => s!"Unfilled level metavariable on universe '{l}'" diff --git a/Yatima/Compiler/CompileM.lean b/Yatima/Compiler/CompileM.lean index 365e9072..7b4e0bca 100644 --- a/Yatima/Compiler/CompileM.lean +++ b/Yatima/Compiler/CompileM.lean @@ -10,30 +10,18 @@ open Std (RBMap) /-- The state for the `Yatima.Compiler.CompileM` monad. -IMPORTANT: `consts` is unreliable when compiling multiple files! +* `store` contains the resulting set of objects in the IPLD format +* `consts` is the "pure" array of constants, without CIDs +* `cache` is just for optimization purposes -/ structure CompileState where store : Ipld.Store - cache : RBMap Name (ConstCid × ConstIdx) compare consts : Array Const + cache : RBMap Name (ConstCid × ConstIdx) compare deriving Inhabited -namespace CompileState - -def union (s s' : CompileState) : Except String CompileState := Id.run do - let mut cache := s.cache - for (n, c') in s'.cache do - match s.cache.find? n with - | some c₁ => - if c₁.1 != c'.1 then return throw s!"Conflicting declarations for '{n}'" - | none => cache := cache.insert n c' - return .ok ⟨ - s.store.union s'.store, - cache, - s'.consts - ⟩ - -def summary (s : CompileState) : String := +/-- Creates a summary off of a `Yatima.Compiler.CompileState` as a `String` -/ +def CompileState.summary (s : CompileState) : String := let consts := ", ".intercalate $ s.consts.toList.map fun c => s!"{c.name} : {c.ctorName}" "Compilation summary:\n" ++ @@ -48,77 +36,86 @@ def summary (s : CompileState) : String := s!" const_meta size: {s.store.const_meta.size}\n" ++ s!" cache size: {s.cache.size}" -end CompileState +/-- +The type of entries for the `recrCtx`. It contains: +1. The index of the constant in the mutual block +2. The index in the list of weakly equal mutual definitions (N/A inductives) +3. The constant index in array of constants +-/ +abbrev RecrCtxEntry := (Nat × Option Nat × ConstIdx) -abbrev RecrCtxEntry := (Nat × Option Nat × Nat) +/-- +The read-only environment for the `Yatima.Compiler.CompileM` monad. +* `constMap` is the original set of constants provided by Lean +* `univCtx` is the current list of universes +* `bindCtx` is the current list of binders +* `recrCtx` is keeps the information for names that represent recursive calls +* `log` tells whether the user wants to log the compilation +-/ structure CompileEnv where constMap : Lean.ConstMap - univCtx : List Lean.Name + univCtx : List Name bindCtx : List Name - -- (mutual index in recrCtx, weakly equal index (N/A inductives), constant index in array of constants) recrCtx : Std.RBMap Lean.Name RecrCtxEntry compare log : Bool deriving Inhabited +/-- Instantiates a `Yatima.Compiler.CompileEnv` from a map of constants -/ def CompileEnv.init (map : Lean.ConstMap) (log : Bool) : CompileEnv := ⟨map, [], [], .empty, log⟩ +/-- +The monad in which compilation takes place is a stack of `ReaderT`, `ExceptT` +and `StateT` on top of IO +-/ abbrev CompileM := ReaderT CompileEnv $ ExceptT CompileError $ StateT CompileState IO +/-- Basic runner function for `Yatima.Compiler.CompileEnv` -/ def CompileM.run (env : CompileEnv) (ste : CompileState) (m : CompileM α) : IO $ Except CompileError CompileState := do match ← StateT.run (ReaderT.run m env) ste with | (.ok _, ste) => return .ok ste | (.error e, _) => return .error e -def withName (name : Name) : CompileM α → CompileM α := +/-- Computes with a new binder in the monad environment -/ +def withBinder (name : Name) : CompileM α → CompileM α := withReader $ fun e => ⟨e.constMap, e.univCtx, name :: e.bindCtx, e.recrCtx, e.log⟩ -def withResetCompileEnv (levels : List Lean.Name) : +/-- Computes with a given list of levels and reset binders and `recrCtx` -/ +def withLevelsAndReset (levels : List Name) : CompileM α → CompileM α := withReader $ fun e => ⟨e.constMap, levels, [], .empty, e.log⟩ -def withRecrs (recrCtx : RBMap Lean.Name RecrCtxEntry compare) : +/-- Computes with a given `recrCtx` -/ +def withRecrs (recrCtx : RBMap Name RecrCtxEntry compare) : CompileM α → CompileM α := withReader $ fun e => ⟨e.constMap, e.univCtx, e.bindCtx, recrCtx, e.log⟩ -def withLevels (lvls : List Lean.Name) : CompileM α → CompileM α := +/-- Computes with a given list of levels-/ +def withLevels (lvls : List Name) : CompileM α → CompileM α := withReader $ fun e => ⟨e.constMap, lvls, e.bindCtx, e.recrCtx, e.log⟩ -inductive StoreKey : Type → Type - | univ : Ipld.Both Ipld.UnivCid → StoreKey (Ipld.Both Ipld.Univ) - | expr : Ipld.Both Ipld.ExprCid → StoreKey (Ipld.Both Ipld.Expr) - | const : Ipld.Both Ipld.ConstCid → StoreKey (Ipld.Both Ipld.Const) - -def StoreKey.find? (key : StoreKey A) : CompileM (Option A) := do - let store := (← get).store - match key with - | .univ univCid => - match store.univ_anon.find? univCid.anon, store.univ_meta.find? univCid.meta with - | some univAnon, some univMeta => pure $ some ⟨ univAnon, univMeta ⟩ - | _, _ => pure none - | .expr exprCid => - match store.expr_anon.find? exprCid.anon, store.expr_meta.find? exprCid.meta with - | some exprAnon, some exprMeta => pure $ some ⟨ exprAnon, exprMeta ⟩ - | _, _ => pure none - | .const constCid => - match store.const_anon.find? constCid.anon, store.const_meta.find? constCid.meta with - | some constAnon, some constMeta => pure $ some ⟨ constAnon, constMeta ⟩ - | _, _ => pure none - -def StoreKey.find! (key : StoreKey A) : CompileM A := do - let some value ← StoreKey.find? key | throw $ .custom "Cannot find key in store" - return value - -inductive StoreValue : Type → Type - | univ : Ipld.Both Ipld.Univ → StoreValue (Ipld.Both Ipld.UnivCid) - | expr : Ipld.Both Ipld.Expr → StoreValue (Ipld.Both Ipld.ExprCid) - | const : Ipld.Both Ipld.Const → StoreValue (Ipld.Both Ipld.ConstCid) - -def StoreValue.insert : StoreValue A → CompileM A +/-- Possibly gets a `Yatima.Compiler.RecrCtxEntry` from the `recrCtx` by name -/ +def getFromRecrCtx (name : Name) : CompileM $ Option RecrCtxEntry := + return (← read).recrCtx.find? name + +/-- Forcibly gets a `Yatima.Compiler.RecrCtxEntry` from the `recrCtx` by name -/ +def getFromRecrCtx! (name : Name) : CompileM $ RecrCtxEntry := do + match ← getFromRecrCtx name with + | some entry => pure entry + | none => throw $ .notFoundInRecrCtx name + +/-- Auxiliary type to standardize additions of CIDs to the store -/ +inductive StoreEntry : Type → Type + | univ : Ipld.Both Ipld.Univ → StoreEntry (Ipld.Both Ipld.UnivCid) + | expr : Ipld.Both Ipld.Expr → StoreEntry (Ipld.Both Ipld.ExprCid) + | const : Ipld.Both Ipld.Const → StoreEntry (Ipld.Both Ipld.ConstCid) + +/-- Adds CID data to the store, but also returns it for practical reasons -/ +def addToStore : StoreEntry A → CompileM A | .univ obj => let cid := ⟨ ToIpld.univToCid obj.anon, ToIpld.univToCid obj.meta ⟩ modifyGet (fun stt => (cid, { stt with store := @@ -132,7 +129,7 @@ def StoreValue.insert : StoreValue A → CompileM A | .const obj => let cid := ⟨ ToIpld.constToCid obj.anon, ToIpld.constToCid obj.meta ⟩ match obj.anon, obj.meta with - -- Mutual definition/inductive blocks do not get added to the set of definitions + -- Mutual definition/inductive blocks do not get added to the set of constants | .mutDefBlock .., .mutDefBlock .. | .mutIndBlock .., .mutIndBlock .. => modifyGet fun stt => (cid, { stt with store := @@ -144,14 +141,16 @@ def StoreValue.insert : StoreValue A → CompileM A const_meta := stt.store.const_meta.insert cid.meta obj.meta, consts := stt.store.consts.insert cid } }) +/-- Adds data associated with a name to the cache -/ def addToCache (name : Name) (c : ConstCid × ConstIdx) : CompileM Unit := do modify fun stt => { stt with cache := stt.cache.insert name c } -def addToConsts (idx : Nat) (c : Const): CompileM Unit := do +/-- Adds a constant to the array of constants at a given index -/ +def addToConsts (idx : ConstIdx) (c : Const) : CompileM Unit := do let consts := (← get).consts if h : idx < consts.size then modify fun stt => { stt with consts := consts.set ⟨idx, h⟩ c } else - throw $ .invalidDereferringIndex idx consts.size + throw $ .invalidConstantIndex idx consts.size end Yatima.Compiler diff --git a/Yatima/Compiler/Compiler.lean b/Yatima/Compiler/Compiler.lean index b2519a0b..36073bd2 100644 --- a/Yatima/Compiler/Compiler.lean +++ b/Yatima/Compiler/Compiler.lean @@ -32,36 +32,39 @@ instance : Coe Lean.QuotKind QuotKind where coe open ToIpld +/-- Gets a constant from the array of constants -/ def derefConst (idx : ConstIdx) : CompileM Const := do let consts := (← get).consts let size := consts.size if h : idx < size then return consts[idx]'h else - throw $ .invalidDereferringIndex idx size + throw $ .invalidConstantIndex idx size -def findConstant (name : Lean.Name) : CompileM Lean.ConstantInfo := do +/-- Retrieves a Lean constant from the environment by its name -/ +def getLeanConstant (name : Lean.Name) : CompileM Lean.ConstantInfo := do match (← read).constMap.find? name with | some const => pure const | none => throw $ .unknownConstant name -def toYatimaUniv (l : Lean.Level) : CompileM (UnivCid × Univ) := do +/-- Compiles a Lean universe level and adds it to the store -/ +def compileUniv (l : Lean.Level) : CompileM (UnivCid × Univ) := do let (value, univ) ← match l with | .zero => do let value : Ipld.Both Ipld.Univ := ⟨ .zero, .zero ⟩ pure (value, .zero) | .succ n => do - let (univCid, univ) ← toYatimaUniv n + let (univCid, univ) ← compileUniv n let value : Ipld.Both Ipld.Univ := ⟨ .succ univCid.anon, .succ univCid.meta ⟩ pure (value, .succ univ) | .max a b => do - let (univACid, univA) ← toYatimaUniv a - let (univBCid, univB) ← toYatimaUniv b + let (univACid, univA) ← compileUniv a + let (univBCid, univB) ← compileUniv b let value : Ipld.Both Ipld.Univ := ⟨ .max univACid.anon univBCid.anon, .max univACid.meta univBCid.meta ⟩ pure (value, .max univA univB) | .imax a b => do - let (univACid, univA) ← toYatimaUniv a - let (univBCid, univB) ← toYatimaUniv b + let (univACid, univA) ← compileUniv a + let (univBCid, univB) ← compileUniv b let value : Ipld.Both Ipld.Univ := ⟨ .imax univACid.anon univBCid.anon, .imax univACid.meta univBCid.meta ⟩ pure (value, .imax univA univB) | .param name => do @@ -72,9 +75,10 @@ def toYatimaUniv (l : Lean.Level) : CompileM (UnivCid × Univ) := do pure (value, .var name n) | none => throw $ .levelNotFound name lvls | .mvar .. => throw $ .unfilledLevelMetavariable l - let cid ← StoreValue.insert $ .univ value + let cid ← addToStore $ .univ value pure (cid, univ) +/-- Defines an ordering for Lean universes -/ def cmpLevel (x : Lean.Level) (y : Lean.Level) : (CompileM Ordering) := do match x, y with | .mvar .., _ => throw $ .unfilledLevelMetavariable x @@ -98,20 +102,25 @@ def cmpLevel (x : Lean.Level) (y : Lean.Level) : (CompileM Ordering) := do | none, _ => throw $ .levelNotFound x lvls | _, none => throw $ .levelNotFound y lvls +def isInternalRec (expr : Lean.Expr) (name : Lean.Name) : Bool := + match expr with + | .forallE _ t e _ => match e with + | .forallE .. => isInternalRec e name + -- t is the major premise + | _ => isInternalRec t name + | .app e .. => isInternalRec e name + | .const n .. => n == name + | _ => false + mutual /-- - Process a Lean constant into a Yatima constant, returning both the Yatima - constant and its cid. - - Different behavior is taken if the input `leanConst` is in a mutual block, - since `toYatimaConst` returns the constant of the entire block (see - `toYatimaConst`). We avoid returning the entire block and return the `mutDef` - corresponding the input. + Gets the Yatima constant references off of a Lean constant, returning its CID + and its index in the array of constants. - Side effects: caches any new processed values in `cache`, `expr_cache`, and - `const_cache`. + If the result is already cached, returns it right away. Otherwise, calls + `compileConstant` to do the actual compilation. -/ - partial def processYatimaConst (const : Lean.ConstantInfo) : + partial def getCompiledConst (const : Lean.ConstantInfo) : CompileM $ ConstCid × ConstIdx := do let name := const.name let log := (← read).log @@ -120,33 +129,43 @@ mutual | none => if log then IO.println s!"↡ Stacking {name}{const.formatAll}" - let c ← toYatimaConst const + let c ← compileConstant const if log then IO.println s!"↟ Popping {name}" return c - partial def toYatimaConst : Lean.ConstantInfo → CompileM (ConstCid × ConstIdx) + /-- + Performs the compilation of Lean constants. + + For the `.defnInfo`, `.inductInfo`, `.ctorInfo` and `.recInfo` cases, the + side-effects are responsability of the auxiliary functions. + + For other cases, adds them to the cache, the store and the array of constants. + -/ + partial def compileConstant : Lean.ConstantInfo → CompileM (ConstCid × ConstIdx) -- These cases add multiple constants at the same time - | .inductInfo struct => withResetCompileEnv struct.levelParams $ toYatimaInductive struct - | .defnInfo struct => withResetCompileEnv struct.levelParams $ toYatimaDef struct + | .defnInfo struct => withLevelsAndReset struct.levelParams $ compileDefinition struct + | .inductInfo struct => withLevelsAndReset struct.levelParams $ compileInductive struct -- These cases are subsumed by the inductive case | .ctorInfo struct => do - match ← findConstant struct.induct with - | .inductInfo ind => processYatimaConst (.inductInfo ind) - | const => throw $ .invalidConstantKind const "inductive" - processYatimaConst (.ctorInfo struct) + discard $ match ← getLeanConstant struct.induct with + | .inductInfo ind => getCompiledConst (.inductInfo ind) + | const => throw $ .invalidConstantKind const "inductive" + getCompiledConst (.ctorInfo struct) | .recInfo struct => do - match ← findConstant struct.getInduct with - | .inductInfo ind => processYatimaConst (.inductInfo ind) - | const => throw $ .invalidConstantKind const "inductive" - processYatimaConst (.recInfo struct) + discard $ match ← getLeanConstant struct.getInduct with + | .inductInfo ind => getCompiledConst (.inductInfo ind) + | const => throw $ .invalidConstantKind const "inductive" + getCompiledConst (.recInfo struct) -- The rest adds the constants to the cache one by one - | const => withResetCompileEnv const.levelParams do - -- It is important to push first with some value so we don't lose the position of the constant in a recursive call - let constIdx ← modifyGet (fun stt => (stt.consts.size, { stt with consts := stt.consts.push default })) + | const => withLevelsAndReset const.levelParams do + -- It is important to push first with some value so we don't lose the + -- position of the constant in a recursive call + let constIdx ← modifyGet fun stt => + (stt.consts.size, { stt with consts := stt.consts.push default }) let values : Ipld.Both Ipld.Const × Const ← match const with | .axiomInfo struct => - let (typeCid, type) ← toYatimaExpr struct.type + let (typeCid, type) ← compileExpr struct.type let ax := { name := struct.name lvls := struct.levelParams @@ -155,9 +174,9 @@ mutual let value := ⟨ .axiom $ ax.toIpld typeCid, .axiom $ ax.toIpld typeCid ⟩ pure (value, .axiom ax) | .thmInfo struct => - let (typeCid, type) ← toYatimaExpr struct.type + let (typeCid, type) ← compileExpr struct.type -- Theorems are never truly recursive, though they can use recursive schemes - let (valueCid, value) ← toYatimaExpr struct.value + let (valueCid, value) ← compileExpr struct.value let thm := { name := struct.name lvls := struct.levelParams @@ -166,9 +185,8 @@ mutual let value := ⟨.theorem $ thm.toIpld typeCid valueCid, .theorem $ thm.toIpld typeCid valueCid⟩ pure (value, Const.theorem thm) | .opaqueInfo struct => - let (typeCid, type) ← toYatimaExpr struct.type - -- TODO: Is `RBMap.single` correct? Shouldn't we add a new entry to the underlying `recrCtx`? - let (valueCid, value) ← withRecrs (RBMap.single struct.name (0, some 0, constIdx)) $ toYatimaExpr struct.value + let (typeCid, type) ← compileExpr struct.type + let (valueCid, value) ← withRecrs (RBMap.single struct.name (0, some 0, constIdx)) $ compileExpr struct.value let opaq := { name := struct.name lvls := struct.levelParams @@ -178,7 +196,7 @@ mutual let value := ⟨.opaque $ opaq.toIpld typeCid valueCid, .opaque $ opaq.toIpld typeCid valueCid⟩ pure (value, .opaque opaq) | .quotInfo struct => - let (typeCid, type) ← toYatimaExpr struct.type + let (typeCid, type) ← compileExpr struct.type let quot := { name := struct.name lvls := struct.levelParams @@ -186,138 +204,168 @@ mutual kind := struct.kind } let value := ⟨.quotient $ quot.toIpld typeCid, .quotient $ quot.toIpld typeCid⟩ pure (value, .quotient quot) - | _ => unreachable! - let cid ← StoreValue.insert $ .const values.fst + | _ => unreachable! -- the other cases were already dealt with + let cid ← addToStore $ .const values.fst addToConsts constIdx values.snd addToCache const.name (cid, constIdx) pure (cid, constIdx) - partial def toYatimaExpr : Lean.Expr → CompileM (ExprCid × Expr) - | .mdata _ e => toYatimaExpr e + /-- + Compiles a Lean expression and adds it to the store. + + Constants are the tricky case, for which there are two possibilities: + * The constant belongs to `recrCtx`, representing a recursive call. Those are + encoded as variables with indexes that go beyond the bind indexes + * The constant doesn't belong to `recrCtx`, meaning that it's not a recursion + and thus we can compile the actual constant right away + -/ + partial def compileExpr : Lean.Expr → CompileM (ExprCid × Expr) + | .mdata _ e => compileExpr e | expr => do let (value, expr) ← match expr with | .bvar idx => match (← read).bindCtx.get? idx with + -- Bound variables must be in the bind context | some name => let value : Ipld.Both Ipld.Expr := ⟨ .var idx () [], .var name (Split.injᵣ none) [] ⟩ pure (value, .var name idx) | none => throw $ .invalidBVarIndex idx | .sort lvl => - let (univCid, univ) ← toYatimaUniv lvl + let (univCid, univ) ← compileUniv lvl let value : Ipld.Both Ipld.Expr := ⟨ .sort univCid.anon, .sort univCid.meta ⟩ pure (value, .sort univ) | .const name lvls => - let pairs ← lvls.mapM $ toYatimaUniv - let (univCids, univs) ← pairs.foldrM (fun pair pairs => pure (pair.fst :: pairs.fst, pair.snd :: pairs.snd)) ([], []) + let pairs ← lvls.mapM $ compileUniv + let (univCids, univs) ← pairs.foldrM (init := ([], [])) + fun pair pairs => pure (pair.fst :: pairs.fst, pair.snd :: pairs.snd) match (← read).recrCtx.find? name with - | some (i, i?, ref) => + | some (i, i?, ref) => -- recursing! let idx := (← read).bindCtx.length + i let value : Ipld.Both Ipld.Expr := ⟨ .var idx () (univCids.map (·.anon)), .var name i? (univCids.map (·.meta)) ⟩ pure (value, .const name ref univs) | none => - let const ← findConstant name - let (constCid, const) ← processYatimaConst const + let const ← getLeanConstant name + let (constCid, const) ← getCompiledConst const let value : Ipld.Both Ipld.Expr := ⟨ .const () constCid.anon $ univCids.map (·.anon), .const name constCid.meta $ univCids.map (·.meta) ⟩ pure (value, .const name const univs) | .app fnc arg => - let (fncCid, fnc) ← toYatimaExpr fnc - let (argCid, arg) ← toYatimaExpr arg - let value : Ipld.Both Ipld.Expr := ⟨ .app fncCid.anon argCid.anon, .app fncCid.meta argCid.meta ⟩ + let (fncCid, fnc) ← compileExpr fnc + let (argCid, arg) ← compileExpr arg + let value : Ipld.Both Ipld.Expr := + ⟨ .app fncCid.anon argCid.anon, .app fncCid.meta argCid.meta ⟩ pure (value, .app fnc arg) | .lam name typ bod bnd => - let (typCid, typ) ← toYatimaExpr typ - let (bodCid, bod) ← withName name $ toYatimaExpr bod - let value : Ipld.Both Ipld.Expr := ⟨ .lam () bnd typCid.anon bodCid.anon, .lam name () typCid.meta bodCid.meta ⟩ + let (typCid, typ) ← compileExpr typ + let (bodCid, bod) ← withBinder name $ compileExpr bod + let value : Ipld.Both Ipld.Expr := + ⟨ .lam () bnd typCid.anon bodCid.anon, .lam name () typCid.meta bodCid.meta ⟩ pure (value, .lam name bnd typ bod) | .forallE name dom img bnd => - let (domCid, dom) ← toYatimaExpr dom - let (imgCid, img) ← withName name $ toYatimaExpr img - let value : Ipld.Both Ipld.Expr := ⟨ .pi () bnd domCid.anon imgCid.anon, .pi name () domCid.meta imgCid.meta ⟩ + let (domCid, dom) ← compileExpr dom + let (imgCid, img) ← withBinder name $ compileExpr img + let value : Ipld.Both Ipld.Expr := + ⟨ .pi () bnd domCid.anon imgCid.anon, .pi name () domCid.meta imgCid.meta ⟩ pure (value, .pi name bnd dom img) | .letE name typ exp bod _ => - let (typCid, typ) ← toYatimaExpr typ - let (expCid, exp) ← toYatimaExpr exp - let (bodCid, bod) ← withName name $ toYatimaExpr bod - let value : Ipld.Both Ipld.Expr := ⟨ .letE () typCid.anon expCid.anon bodCid.anon, .letE name typCid.meta expCid.meta bodCid.meta ⟩ + let (typCid, typ) ← compileExpr typ + let (expCid, exp) ← compileExpr exp + let (bodCid, bod) ← withBinder name $ compileExpr bod + let value : Ipld.Both Ipld.Expr := + ⟨ .letE () typCid.anon expCid.anon bodCid.anon, .letE name typCid.meta expCid.meta bodCid.meta ⟩ pure (value, .letE name typ exp bod) - | .lit lit=> + | .lit lit => let value : Ipld.Both Ipld.Expr := ⟨ .lit lit, .lit () ⟩ pure (value, .lit lit) - | .proj _ idx exp=> do - let (expCid, exp) ← toYatimaExpr exp + | .proj _ idx exp => + let (expCid, exp) ← compileExpr exp let value : Ipld.Both Ipld.Expr := ⟨ .proj idx expCid.anon, .proj () expCid.meta ⟩ pure (value, .proj idx exp) - | .fvar .. => throw $ .freeVariableExpr expr - | .mvar .. => throw $ .metaVariableExpr expr + | .fvar .. => throw $ .freeVariableExpr expr + | .mvar .. => throw $ .metaVariableExpr expr | .mdata .. => throw $ .metaDataExpr expr - let cid ← StoreValue.insert $ .expr value + let cid ← addToStore $ .expr value pure (cid, expr) - partial def toYatimaInductive (initInd : Lean.InductiveVal) : CompileM (ConstCid × ConstIdx) := do - -- `constList` is the list of the names of all constants associated with an inductive block: the inductives themselves, - -- the constructors and the recursors. - let mut constList : List Lean.Name := [] + /-- + Compiles an inductive and all inductives in the mutual block as a mutual + block, even if the inductive itself is not in a mutual block. + + The compilation of an inductive involves the compilation of its associated + constructors and recursors, hence the lenght of this function. + -/ + partial def compileInductive (initInd : Lean.InductiveVal) : + CompileM (ConstCid × ConstIdx) := do + -- `mutualConsts` is the list of the names of all constants associated with an + -- inductive block: the inductives themselves, the constructors and the recursors + let mut mutualConsts : List Lean.Name := [] for indName in initInd.all do - match ← findConstant indName with + match ← getLeanConstant indName with | .inductInfo ind => - let leanRecs := (← read).constMap.childrenOfWith ind.name -- reverses once - fun c => match c with | .recInfo _ => true | _ => false - let leanRecs := leanRecs.map (·.name) - let addList := (indName :: ind.ctors).append leanRecs - constList := constList.append addList - | const => throw $ .invalidConstantKind const "inductive" - - -- All inductives, constructors and recursors are done in one go, so we must append an array of `constList.length` to `consts` - -- and save the mapping of all names in `constList` to their respective indices - let mut firstIdx ← modifyGet (fun stt => (stt.consts.size, { stt with consts := stt.consts.append (mkArray constList.length default) })) - let mut mutualIdxs : RBMap Lean.Name RecrCtxEntry compare := RBMap.empty - for (i, n) in constList.enum do - mutualIdxs := mutualIdxs.insert n (i, none, firstIdx + i) - - -- This part will build the inductive block and add all inductives, constructors and recursors to `consts` - let indInfos : List (Ipld.Both Ipld.Inductive) ← initInd.all.foldrM (init := []) fun name acc => do - match ← findConstant name with - | .inductInfo ind => do - withRecrs mutualIdxs do - let ipldInd ← toYatimaIpldInductive ind - pure $ ipldInd :: acc + let leanRecs := ((← read).constMap.childrenOfWith ind.name + fun c => match c with | .recInfo _ => true | _ => false).map (·.name) + mutualConsts := mutualConsts ++ (indName :: ind.ctors) ++ leanRecs | const => throw $ .invalidConstantKind const "inductive" - let indBlock := ⟨.mutIndBlock $ indInfos.map (·.anon), .mutIndBlock $ indInfos.map (·.meta)⟩ - let indBlockCid ← StoreValue.insert $ .const indBlock + -- All inductives, constructors and recursors are done in one go, so we must + -- append an array of `mutualConsts.length` to `consts` and save the mapping + -- of all names in `mutualConsts` to their respective indices + let mut firstIdx : ConstIdx ← modifyGet fun stt => + (stt.consts.size, + { stt with consts := stt.consts ++ mkArray mutualConsts.length default }) + + let recrCtx := mutualConsts.enum.foldl (init := default) + fun acc (i, n) => acc.insert n (i, none, firstIdx + i) + + -- This part will build the inductive block and add all inductives, + -- constructors and recursors to `consts` + let ipldInds : List (Ipld.Both Ipld.Inductive) ← initInd.all.foldrM (init := []) + fun name acc => do + match ← getLeanConstant name with + | .inductInfo ind => do + withRecrs recrCtx do + let ipldInd ← toYatimaIpldInductive ind + pure $ ipldInd :: acc + | const => throw $ .invalidConstantKind const "inductive" + let indBlockCid ← addToStore $ .const + ⟨.mutIndBlock $ ipldInds.map (·.anon), .mutIndBlock $ ipldInds.map (·.meta)⟩ + + -- While iterating on the inductives from the mutual block, we need to track + -- the correct objects to return let mut ret? : Option (ConstCid × ConstIdx) := none + + -- `constIdx` keeps track of the constant index for the next addition to cache let mut constIdx := firstIdx - for (indIdx, ⟨indAnon, indMeta⟩) in indInfos.enum do - -- Add the IPLD inductive projections and inductives to the cache + for (indIdx, ⟨indAnon, indMeta⟩) in ipldInds.enum do + -- Store and cache inductive projections let name := indMeta.name.projᵣ let indProj := ⟨ .inductiveProj ⟨ (), indAnon.lvls, indAnon.type, indBlockCid.anon, indIdx ⟩ , .inductiveProj ⟨ indMeta.name, indMeta.lvls, indMeta.type, indBlockCid.meta, () ⟩ ⟩ - let cid ← StoreValue.insert $ .const indProj - addToCache name (cid, constIdx) + let cid ← addToStore $ .const indProj if name == initInd.name then ret? := some (cid, constIdx) + addToCache name (cid, constIdx) constIdx := constIdx + 1 for (ctorIdx, (ctorAnon, ctorMeta)) in (indAnon.ctors.zip indMeta.ctors).enum do - -- Add the IPLD constructor projections and constructors to the cache + -- Store and cache constructor projections let name := ctorMeta.name.projᵣ let ctorProj := ⟨ .constructorProj ⟨ (), ctorAnon.lvls, ctorAnon.type, indBlockCid.anon, indIdx, ctorIdx ⟩ , .constructorProj ⟨ ctorMeta.name, ctorMeta.lvls, ctorMeta.type, indBlockCid.meta, (), () ⟩ ⟩ - let cid ← StoreValue.insert $ .const ctorProj + let cid ← addToStore $ .const ctorProj addToCache name (cid, constIdx) constIdx := constIdx + 1 for (recrIdx, (recrAnon, recrMeta)) in (indAnon.recrs.zip indMeta.recrs).enum do - -- Add the IPLD recursor projections and recursors to the cache + -- Store and cache recursor projections let name := recrMeta.2.name.projᵣ let recrProj := ⟨ .recursorProj ⟨ (), recrAnon.2.lvls, recrAnon.2.type, indBlockCid.anon, indIdx, recrIdx ⟩ , .recursorProj ⟨ recrMeta.2.name, recrMeta.2.lvls, recrMeta.2.type, indBlockCid.meta, (), () ⟩ ⟩ - let cid ← StoreValue.insert $ .const recrProj + let cid ← addToStore $ .const recrProj addToCache name (cid, constIdx) constIdx := constIdx + 1 @@ -325,28 +373,32 @@ mutual | some ret => return ret | none => throw $ .constantNotCompiled initInd.name + /-- Encodes a Lean inductive to IPLD -/ partial def toYatimaIpldInductive (ind : Lean.InductiveVal) : CompileM $ Ipld.Both Ipld.Inductive := do let leanRecs := (← read).constMap.childrenOfWith ind.name fun c => match c with | .recInfo _ => true | _ => false - let (recs, ctors) : ((List $ Ipld.Both (Sigma fun x => Ipld.Recursor x ·)) × (List $ Ipld.Both Ipld.Constructor)) := - ← leanRecs.foldrM (init := ([], [])) fun r (recs, ctors) => + let (recs, ctors) : (List $ Ipld.Both (Sigma fun x => Ipld.Recursor x ·)) × + (List $ Ipld.Both Ipld.Constructor) := + ← leanRecs.foldrM (init := ([], [])) fun r (recs, ctors) => do match r with - | .recInfo rv => do - if ← isInternalRec rv.type ind.name then do + | .recInfo rv => + if isInternalRec rv.type ind.name then let (thisRec, thisCtors) := ← toYatimaIpldInternalRec ind.ctors r - let recs := (⟨Sigma.mk .intr thisRec.anon, Sigma.mk .intr thisRec.meta⟩) :: recs + let recs := ⟨Sigma.mk .intr thisRec.anon, Sigma.mk .intr thisRec.meta⟩ :: recs return (recs, thisCtors) - else do - let thisRec := ← toYatimaIpldExternalRec r - let recs := (⟨Sigma.mk .extr thisRec.anon, Sigma.mk .extr thisRec.meta⟩) :: recs + else + let thisRec ← toYatimaIpldExternalRec r + let recs := ⟨Sigma.mk .extr thisRec.anon, Sigma.mk .extr thisRec.meta⟩ :: recs return (recs, ctors) | _ => throw $ .nonRecursorExtractedFromChildren r.name - let (typeCid, type) ← toYatimaExpr ind.type + let (typeCid, type) ← compileExpr ind.type + -- Structures can't be recursive nor have indices let struct ← if ind.isRec || ind.numIndices != 0 then pure none else match ind.ctors with - | [ctor] => do - let some (_, _, ctorIdx) := (← read).recrCtx.find? ctor | throw $ .unknownConstant ctor + -- Structures can only have one constructor + | [ctorName] => do + let (_, _, ctorIdx) ← getFromRecrCtx! ctorName match ← derefConst ctorIdx with | .constructor ctor => pure $ some ctor | const => throw $ .invalidConstantKind' const "constructor" @@ -366,7 +418,7 @@ mutual unit := unit struct := struct } - let some (_, _, constIdx) := (← read).recrCtx.find? ind.name | throw $ .unknownConstant ind.name + let (_, _, constIdx) ← getFromRecrCtx! ind.name addToConsts constIdx tcInd return { anon := ⟨ () @@ -388,34 +440,24 @@ mutual , () , () , () ⟩ } - partial def isInternalRec (expr : Lean.Expr) (name : Lean.Name) : CompileM Bool := - match expr with - | .forallE _ t e _ => match e with - | .forallE .. => isInternalRec e name - -- t is the major premise - | _ => isInternalRec t name - | .app e .. => isInternalRec e name - | .const n .. => return n == name - | _ => return false - + /-- Encodes an internal recursor to to IPLD -/ partial def toYatimaIpldInternalRec (ctors : List Lean.Name) : - Lean.ConstantInfo → CompileM - (Ipld.Both (Ipld.Recursor .intr) × (List $ Ipld.Both Ipld.Constructor)) + Lean.ConstantInfo → CompileM + (Ipld.Both (Ipld.Recursor .intr) × (List $ Ipld.Both Ipld.Constructor)) | .recInfo rec => do withLevels rec.levelParams do - let (typeCid, type) ← toYatimaExpr rec.type + let (typeCid, type) ← compileExpr rec.type let ctorMap : RBMap Name (Ipld.Both Ipld.Constructor) compare ← rec.rules.foldlM (init := .empty) fun ctorMap r => do - match ctors.indexOf? r.ctor with - | some _ => - let ctor ← toYatimaConstructor r + if ctors.contains r.ctor then + let ctor ← toYatimaIpldConstructor r return ctorMap.insert ctor.meta.name.projᵣ ctor -- this is an external recursor rule - | none => return ctorMap - let retCtors ← ctors.mapM fun ctor => do - match ctorMap.find? ctor with + else return ctorMap + let retCtors ← ctors.mapM fun ctorName => do + match ctorMap.find? ctorName with | some thisCtor => pure thisCtor - | none => throw $ .unknownConstant ctor + | none => throw $ .custom s!"Couldn't find constructor {ctorName}" let tcRecr : Const := .intRecursor { name := rec.name lvls := rec.levelParams @@ -425,7 +467,7 @@ mutual motives := rec.numMotives minors := rec.numMinors k := rec.k } - let some (_, _, constIdx) := (← read).recrCtx.find? rec.name | throw $ .unknownConstant rec.name + let (_, _, constIdx) ← getFromRecrCtx! rec.name addToConsts constIdx tcRecr let recr := ⟨ { name := () @@ -449,11 +491,13 @@ mutual return (recr, retCtors) | const => throw $ .invalidConstantKind const "recursor" - partial def toYatimaConstructor (rule : Lean.RecursorRule) : CompileM $ Ipld.Both Ipld.Constructor := do - let (rhsCid, rhs) ← toYatimaExpr rule.rhs - match ← findConstant rule.ctor with + /-- Encodes a Lean constructor to IPLD -/ + partial def toYatimaIpldConstructor (rule : Lean.RecursorRule) : + CompileM $ Ipld.Both Ipld.Constructor := do + let (rhsCid, rhs) ← compileExpr rule.rhs + match ← getLeanConstant rule.ctor with | .ctorInfo ctor => - let (typeCid, type) ← toYatimaExpr ctor.type + let (typeCid, type) ← compileExpr ctor.type let tcCtor : Const := .constructor { name := ctor.name lvls := ctor.levelParams @@ -464,7 +508,7 @@ mutual rhs := rhs safe := not ctor.isUnsafe } - let some (_, _, constIdx) := (← read).recrCtx.find? ctor.name | throw $ .unknownConstant ctor.name + let (_, _, constIdx) ← getFromRecrCtx! ctor.name addToConsts constIdx tcCtor return ⟨ { rhs := rhsCid.anon @@ -485,54 +529,55 @@ mutual safe := () } ⟩ | const => throw $ .invalidConstantKind const "constructor" + /-- Encodes an external recursor to IPLD -/ partial def toYatimaIpldExternalRec : Lean.ConstantInfo → CompileM (Ipld.Both (Ipld.Recursor .extr)) - | .recInfo rec => do - withLevels rec.levelParams do - let (typeCid, type) ← toYatimaExpr rec.type - let (rules, tcRules) : Ipld.Both (fun k => List $ Ipld.RecursorRule k) × List RecursorRule := ← rec.rules.foldlM - (init := (⟨[], []⟩, [])) fun rules r => do - let (recrRule, tcRecrRule) ← toYatimaExternalRecRule r - return (⟨recrRule.anon::rules.1.anon, recrRule.meta::rules.1.meta⟩, tcRecrRule::rules.2) - let tcRecr : Const := .extRecursor { - name := rec.name - lvls := rec.levelParams - type := type + | .recInfo rec => withLevels rec.levelParams do + let (typeCid, type) ← compileExpr rec.type + let (rules, tcRules) : Ipld.Both (fun k => List $ Ipld.RecursorRule k) × List RecursorRule := ← rec.rules.foldlM + (init := (⟨[], []⟩, [])) fun rules r => do + let (recrRule, tcRecrRule) ← toYatimaIpldExternalRecRule r + return (⟨recrRule.anon::rules.1.anon, recrRule.meta::rules.1.meta⟩, tcRecrRule::rules.2) + let tcRecr : Const := .extRecursor { + name := rec.name + lvls := rec.levelParams + type := type + params := rec.numParams + indices := rec.numIndices + motives := rec.numMotives + minors := rec.numMinors + rules := tcRules + k := rec.k + } + let (_, _, constIdx) ← getFromRecrCtx! rec.name + addToConsts constIdx tcRecr + return ⟨ + { name := () + lvls := rec.levelParams.length + type := typeCid.anon params := rec.numParams indices := rec.numIndices motives := rec.numMotives minors := rec.numMinors - rules := tcRules - k := rec.k - } - let some (_, _, constIdx) := (← read).recrCtx.find? rec.name | throw $ .unknownConstant rec.name - addToConsts constIdx tcRecr - return ⟨ - { name := () - lvls := rec.levelParams.length - type := typeCid.anon - params := rec.numParams - indices := rec.numIndices - motives := rec.numMotives - minors := rec.numMinors - rules := rules.anon - k := rec.k }, - { name := rec.name - lvls := rec.levelParams - type := typeCid.meta - params := () - indices := () - motives := () - minors := () - rules := rules.meta - k := () } ⟩ + rules := rules.anon + k := rec.k }, + { name := rec.name + lvls := rec.levelParams + type := typeCid.meta + params := () + indices := () + motives := () + minors := () + rules := rules.meta + k := () } ⟩ | const => throw $ .invalidConstantKind const "recursor" - - partial def toYatimaExternalRecRule (rule : Lean.RecursorRule) : + + /-- Encodes an external recursor rule to IPLD -/ + partial def toYatimaIpldExternalRecRule (rule : Lean.RecursorRule) : CompileM (Ipld.Both Ipld.RecursorRule × RecursorRule) := do - let (rhsCid, rhs) ← toYatimaExpr rule.rhs - let const ← findConstant rule.ctor - let (ctorCid, ctor?) ← processYatimaConst const + let (rhsCid, rhs) ← compileExpr rule.rhs + let const ← getLeanConstant rule.ctor + let (ctorCid, ctor?) ← getCompiledConst const let ctor ← match ← derefConst ctor? with | .constructor ctor => pure ctor | const => throw $ .invalidConstantKind' const "constructor" @@ -542,41 +587,62 @@ mutual let tcRecrRule := { ctor := ctor, fields := rule.nfields, rhs := rhs } return (recrRule, tcRecrRule) - partial def toYatimaDef (struct : Lean.DefinitionVal) : CompileM (ConstCid × ConstIdx) := do - let all := if struct.all.length == 1 then [struct.name] else struct.all + /-- + Compiles adefinition and all definitions in the mutual block as a mutual + block, even if the definition itself is not in a mutual block. + + This function is rather similar to `Yatima.Compiler.compileInductive`. + -/ + partial def compileDefinition (struct : Lean.DefinitionVal) : + CompileM (ConstCid × ConstIdx) := do + let mutualSize := struct.all.length + + -- This solves an issue in which the constant name comes different in the + -- `all` attribute + let all := if mutualSize == 1 then [struct.name] else struct.all + -- Collecting and sorting all definitions in the mutual block let mutualDefs ← all.mapM fun name => do - match ← findConstant name with + match ← getLeanConstant name with | .defnInfo defn => pure defn | const => throw $ .invalidConstantKind const "definition" let mutualDefs ← sortDefs [mutualDefs] - let mutualSize := struct.all.length + + -- Reserving the slots in the array of constants let mut firstIdx ← modifyGet fun stt => - (stt.consts.size, { stt with consts := stt.consts.append (mkArray mutualSize default) }) - let mut mutualIdxs : RBMap Lean.Name RecrCtxEntry compare := RBMap.empty + (stt.consts.size, { stt with consts := stt.consts ++ mkArray mutualSize default }) + + -- Building the `recrCtx` + let mut recrCtx : RBMap Name RecrCtxEntry compare := RBMap.empty let mut mutIdx := 0 for (i, ds) in mutualDefs.enum do for (j, d) in ds.enum do - mutualIdxs := mutualIdxs.insert d.name (i, some j, firstIdx + mutIdx) + recrCtx := recrCtx.insert d.name (i, some j, firstIdx + mutIdx) mutIdx := mutIdx + 1 - let definitions ← withRecrs mutualIdxs $ - mutualDefs.mapM fun ds => ds.mapM $ toYatimaDefIpld + + let definitions ← withRecrs recrCtx $ mutualDefs.mapM (·.mapM toYatimaIpldDefinition) + + -- Building and storing the block let definitionsAnon := (definitions.map fun ds => match ds.head? with | some d => [d.1.anon] | none => []).join let definitionsMeta := definitions.map fun ds => ds.map $ (·.meta) ∘ Prod.fst let block : Ipld.Both Ipld.Const := ⟨ .mutDefBlock definitionsAnon, .mutDefBlock definitionsMeta ⟩ - let blockCid ← StoreValue.insert $ .const block + let blockCid ← addToStore $ .const block + -- While iterating on the definitions from the mutual block, we need to track + -- the correct objects to return let mut ret? : Option (ConstCid × ConstIdx) := none let mut i : Nat := 0 for (⟨defnAnon, defnMeta⟩, defn) in definitions.join do - let some (idx, _) := mutualIdxs.find? defn.name | throw $ .cantFindMutDefIndex defn.name + -- Storing and caching the definition projection + -- Also adds the constant to the array of constants + let some (idx, _) := recrCtx.find? defn.name | throw $ .cantFindMutDefIndex defn.name let value := ⟨ .definitionProj $ ⟨(), defn.lvls.length, defnAnon.type, blockCid.anon, idx⟩ , .definitionProj $ ⟨defn.name, defn.lvls, defnMeta.type, blockCid.meta, i⟩ ⟩ - let cid ← StoreValue.insert $ .const value + let cid ← addToStore $ .const value let constIdx := i + firstIdx - addToConsts constIdx $ .definition defn addToCache defn.name (cid, constIdx) + addToConsts constIdx $ .definition defn if defn.name == struct.name then ret? := some (cid, constIdx) i := i + 1 @@ -584,10 +650,11 @@ mutual | some ret => return ret | none => throw $ .constantNotCompiled struct.name - partial def toYatimaDefIpld (defn : Lean.DefinitionVal) : + /-- Encodes a definition to IPLD -/ + partial def toYatimaIpldDefinition (defn : Lean.DefinitionVal) : CompileM (Ipld.Both Ipld.Definition × Definition) := do - let (typeCid, type) ← toYatimaExpr defn.type - let (valueCid, value) ← toYatimaExpr defn.value + let (typeCid, type) ← compileExpr defn.type + let (valueCid, value) ← compileExpr defn.value let defn := { name := defn.name lvls := defn.levelParams @@ -596,15 +663,19 @@ mutual safety := defn.safety } return (⟨defn.toIpld typeCid valueCid, defn.toIpld typeCid valueCid⟩, defn) - partial def cmpExpr (names : Std.RBMap Lean.Name Nat compare) : + /-- + A name-irrelevant ordering of Lean expressions. + `weakOrd` contains the best known current mutual ordering + -/ + partial def cmpExpr (weakOrd : Std.RBMap Name Nat compare) : Lean.Expr → Lean.Expr → CompileM Ordering | e@(.mvar ..), _ => throw $ .unfilledExprMetavariable e | _, e@(.mvar ..) => throw $ .unfilledExprMetavariable e | e@(.fvar ..), _ => throw $ .freeVariableExpr e | _, e@(.fvar ..) => throw $ .freeVariableExpr e - | .mdata _ x, .mdata _ y => cmpExpr names x y - | .mdata _ x, y => cmpExpr names x y - | x, .mdata _ y => cmpExpr names x y + | .mdata _ x, .mdata _ y => cmpExpr weakOrd x y + | .mdata _ x, y => cmpExpr weakOrd x y + | x, .mdata _ y => cmpExpr weakOrd x y | .bvar x, .bvar y => return (compare x y) | .bvar .., _ => return .lt | _, .bvar .. => return .gt @@ -612,28 +683,32 @@ mutual | .sort .., _ => return .lt | _, .sort .. => return .gt | .const x xls, .const y yls => do - let univs ← concatOrds <$> (List.zip xls yls).mapM (fun (x,y) => cmpLevel x y) + let univs ← concatOrds <$> (xls.zip yls).mapM fun (x,y) => cmpLevel x y if univs != .eq then return univs - match names.find? x, names.find? y with + match weakOrd.find? x, weakOrd.find? y with | some nx, some ny => return compare nx ny | none, some _ => return .gt | some _, none => return .lt | none, none => do - let xCid := (← processYatimaConst (← findConstant x)).fst - let yCid := (← processYatimaConst (← findConstant y)).fst - return (compare xCid.anon yCid.anon) + let xCid := (← getCompiledConst (← getLeanConstant x)).fst + let yCid := (← getCompiledConst (← getLeanConstant y)).fst + return compare xCid.anon yCid.anon | .const .., _ => return .lt | _, .const .. => return .gt - | .app xf xa, .app yf ya => (· * ·) <$> cmpExpr names xf yf <*> cmpExpr names xa ya + | .app xf xa, .app yf ya => + (· * ·) <$> cmpExpr weakOrd xf yf <*> cmpExpr weakOrd xa ya | .app .., _ => return .lt | _, .app .. => return .gt - | .lam _ xt xb _, .lam _ yt yb _ => (· * ·) <$> cmpExpr names xt yt <*> cmpExpr names xb yb + | .lam _ xt xb _, .lam _ yt yb _ => + (· * ·) <$> cmpExpr weakOrd xt yt <*> cmpExpr weakOrd xb yb | .lam .., _ => return .lt | _, .lam .. => return .gt - | .forallE _ xt xb _, .forallE _ yt yb _ => (· * ·) <$> cmpExpr names xt yt <*> cmpExpr names xb yb + | .forallE _ xt xb _, .forallE _ yt yb _ => + (· * ·) <$> cmpExpr weakOrd xt yt <*> cmpExpr weakOrd xb yb | .forallE .., _ => return .lt | _, .forallE .. => return .gt - | .letE _ xt xv xb _, .letE _ yt yv yb _ => (· * · * ·) <$> cmpExpr names xt yt <*> cmpExpr names xv yv <*> cmpExpr names xb yb + | .letE _ xt xv xb _, .letE _ yt yv yb _ => + (· * · * ·) <$> cmpExpr weakOrd xt yt <*> cmpExpr weakOrd xv yv <*> cmpExpr weakOrd xb yb | .letE .., _ => return .lt | _, .letE .. => return .gt | .lit x, .lit y => @@ -641,36 +716,72 @@ mutual | .lit .., _ => return .lt | _, .lit .. => return .gt | .proj _ nx tx, .proj _ ny ty => do - let ts ← cmpExpr names tx ty + let ts ← cmpExpr weakOrd tx ty return concatOrds [compare nx ny, ts] - partial def cmpDef (names : Std.RBMap Lean.Name Nat compare) + /-- AST comparison of two Lean definitions. + `weakOrd` contains the best known current mutual ordering -/ + partial def cmpDef (weakOrd : Std.RBMap Name Nat compare) (x : Lean.DefinitionVal) (y : Lean.DefinitionVal) : CompileM Ordering := do let ls := compare x.levelParams.length y.levelParams.length - let ts ← cmpExpr names x.type y.type - let vs ← cmpExpr names x.value y.value + let ts ← cmpExpr weakOrd x.type y.type + let vs ← cmpExpr weakOrd x.value y.value return concatOrds [ls, ts, vs] - partial def eqDef (names : Std.RBMap Lean.Name Nat compare) - (x : Lean.DefinitionVal) (y : Lean.DefinitionVal) : - CompileM Bool := do - match (← cmpDef names x y) with - | .eq => pure true - | _ => pure false - - /-- todo -/ + /-- AST equality between two Lean definitions. + `weakOrd` contains the best known current mutual ordering -/ + partial def eqDef (weakOrd : Std.RBMap Name Nat compare) + (x : Lean.DefinitionVal) (y : Lean.DefinitionVal) : CompileM Bool := do + match (← cmpDef weakOrd x y) with + | .eq => pure true + | _ => pure false + + /-- `sortDefs` recursively sorts a list of mutual definitions into weakly equal blocks. + At each stage, we take as input the current best approximation of known weakly equal + blocks as a List of blocks, hence the `List (List DefinitionVal)` as the argument type. + We recursively take the input blocks and resort to improve the approximate known + weakly equal blocks, obtaining a sequence of list of blocks: + ``` + dss₀ := [startDefs] + dss₁ := sortDefs dss₀ + dss₂ := sortDefs dss₁ + dss₍ᵢ₊₁₎ := sortDefs dssᵢ ... + ``` + Initially, `startDefs` is simply the list of definitions we receive from `DefinitionVal.all`; + since there is no order yet, we treat it as one block all weakly equal. On the other hand, + at the end, there is some point where `dss₍ᵢ₊₁₎ := dssᵢ`, then we have hit a fixed point + and we may end the sorting process. (We claim that such a fixed point exists, although + technically we don't really have a proof.) + + On each iteration, we hope to improve our knowledge of weakly equal blocks and use that + knowledge in the next iteration. e.g. We start with just one block with everything in it, + but the first sort may differentiate the one block into 3 blocks. Then in the second + iteration, we have more information than than first, since the relationship of the 3 blocks + gives us more information; this information may then be used to sort again, turning 3 blocks + into 4 blocks, and again 4 blocks into 6 blocks, etc, until we have hit a fixed point. + This step is done in the computation of `newDss` and then comparing it to the original `dss`. + + Two optimizations: + + 1. `names := enum dss` records the ordering information in a map for faster access. + Directly using `List.findIdx?` on dss is slow and introduces `Option` everywhere. + `names` is used as a custom comparison in `ds.sortByM (cmpDef names)`. + 2. `normDss/normNewDss`. We want to compare if two lists of blocks are equal. + Technically blocks are sets and their order doesn't matter, but we have encoded + them as lists. To fix this, we sort the list by name before comparing. Note we + could maybe also use `List (RBTree ..)` everywhere, but it seemed like a hassle. -/ partial def sortDefs (dss : List (List Lean.DefinitionVal)) : CompileM (List (List Lean.DefinitionVal)) := do let enum (ll : List (List Lean.DefinitionVal)) := Std.RBMap.ofList (ll.enum.map fun (n, xs) => xs.map (·.name, n)).join - let names := enum dss + let weakOrd := enum dss let newDss ← (← dss.mapM fun ds => match ds with | [] => unreachable! | [d] => return [[d]] - | ds => return (← List.groupByM (eqDef names) $ - ← ds.sortByM (cmpDef names))).joinM + | ds => return (← List.groupByM (eqDef weakOrd) $ + ← ds.sortByM (cmpDef weakOrd))).joinM -- must normalize, see comments let normDss := dss.map fun ds => ds.map (·.name) |>.sort @@ -682,19 +793,28 @@ mutual end +/-- Iterates over the constants of a `Lean.ConstMap` triggering their compilation -/ def compileM (constMap : Lean.ConstMap) : CompileM Unit := do let log := (← read).log constMap.forM fun _ const => do - let (_, c) ← processYatimaConst const + let (_, c) ← getCompiledConst const if log then IO.println "\n=========================================" IO.println const.name IO.println "=========================================" - IO.println s!"{PrintLean.printLeanConst const}" + IO.println $ PrintLean.printLeanConst const IO.println "=========================================" - IO.println s!"{← PrintYatima.printYatimaConst (← derefConst c)}" + IO.println $ ← PrintYatima.printYatimaConst (← derefConst c) IO.println "=========================================\n" +/-- +Compiles the "delta" of a file, that is, the content that is added on top of +what is imported by it. + +Important: constants with open references in their expressions are filtered out. +Open references are variables that point to names which aren't present in the +`Lean.ConstMap`. +-/ def compile (filePath : System.FilePath) (log : Bool := false) (stt : CompileState := default) : IO $ Except CompileError CompileState := do let filePathStr := filePath.toString @@ -722,6 +842,8 @@ def compile (filePath : System.FilePath) (log : Bool := false) CompileM.run (.init map log) stt (compileM delta) /-- +Sets the directories where `olean` files can be found. + This function must be called before `compile` if the file to be compiled has imports (the automatic imports from `Init` also count). -/ diff --git a/Yatima/Compiler/Printing.lean b/Yatima/Compiler/Printing.lean index 9cbc7e45..f9892555 100644 --- a/Yatima/Compiler/Printing.lean +++ b/Yatima/Compiler/Printing.lean @@ -50,11 +50,11 @@ def isProp : Expr → Bool | _ => false def isAtomAux : Expr → Bool - | .const .. | .var .. | .lit .. | .lty .. => true + | .const .. | .var .. | .lit .. => true | _ => false def isAtom : Expr → Bool - | .const .. | .var .. | .lit .. | .lty .. => true + | .const .. | .var .. | .lit .. => true | .proj _ e => isAtom e | e => isProp e @@ -117,9 +117,6 @@ mutual | .lit lit => return match lit with | .nat num => s!"{num}" | .str str => s!"\"{str}\"" - | .lty lty => return match lty with - | .nat => "Nat" - | .str => "String" | .proj idx expr => return s!"{← paren expr}.{idx})" end diff --git a/Yatima/Converter/Converter.lean b/Yatima/Converter/Converter.lean index 7f17e6aa..73de6bbc 100644 --- a/Yatima/Converter/Converter.lean +++ b/Yatima/Converter/Converter.lean @@ -200,7 +200,6 @@ mutual let bod ← exprFromIpld ⟨bodAnon, bodMeta⟩ pure $ .letE name typ val bod | .lit lit, .lit () => pure $ .lit lit - | .lty lty, .lty () => pure $ .lty lty | .proj idx bodAnon, .proj () bodMeta => let bod ← exprFromIpld ⟨bodAnon, bodMeta⟩ pure $ .proj idx bod diff --git a/Yatima/Datatypes/Expr.lean b/Yatima/Datatypes/Expr.lean index c9ca6d14..a94392cd 100644 --- a/Yatima/Datatypes/Expr.lean +++ b/Yatima/Datatypes/Expr.lean @@ -11,12 +11,6 @@ inductive BinderInfo | auxDecl deriving BEq, Inhabited -/-- There are only two types of literals -/ -inductive LitType - | nat : LitType - | str : LitType - deriving BEq, Inhabited - /-- The literal values: numbers or words -/ inductive Literal | nat : Nat → Literal @@ -51,7 +45,6 @@ inductive Expr (k : Kind) | pi : Nameₘ k → BinderInfoₐ k → ExprCid k → ExprCid k → Expr k | letE : Nameₘ k → ExprCid k → ExprCid k → ExprCid k → Expr k | lit : Split Literal Unit k → Expr k - | lty : Split LitType Unit k → Expr k | proj : Natₐ k → ExprCid k → Expr k deriving BEq, Inhabited @@ -64,13 +57,12 @@ def Expr.ctorName : Expr k → String | .pi .. => "pi" | .letE .. => "let" | .lit .. => "lit" - | .lty .. => "lty" | .proj .. => "proj" end Ipld --- Points to a constant in an array of constants -scoped notation "ConstIdx" => Nat +/-- Points to a constant in an array of constants -/ +abbrev ConstIdx := Nat /-- Representation of expressions for typechecking and transpilation -/ inductive Expr @@ -82,7 +74,6 @@ inductive Expr | pi : Name → BinderInfo → Expr → Expr → Expr | letE : Name → Expr → Expr → Expr → Expr | lit : Literal → Expr - | lty : LitType → Expr | proj : Nat → Expr → Expr deriving Inhabited, BEq @@ -155,7 +146,6 @@ def ctorName : Expr → String | pi .. => "pi" | letE .. => "let" | lit .. => "lit" - | lty .. => "lty" | proj .. => "proj" -- Gets the depth of a Yatima Expr (helpful for debugging later) diff --git a/Yatima/Ipld/ToIpld.lean b/Yatima/Ipld/ToIpld.lean index 428a4baa..c7508ce5 100644 --- a/Yatima/Ipld/ToIpld.lean +++ b/Yatima/Ipld/ToIpld.lean @@ -134,10 +134,6 @@ instance : Coe BinderInfo Ipld where coe | .instImplicit => .number 3 | .auxDecl => .number 4 -instance : Coe LitType Ipld where coe - | .nat => .number 0 - | .str => .number 1 - instance : Coe Literal Ipld where coe | .nat n => n | .str s => .string s @@ -204,8 +200,7 @@ def exprToIpld : (Ipld.Expr k) → Ipld | .pi n i d c => .array #[.number $ Ipld.EXPR k, .number 5, n, i, d, c] | .letE n t v b => .array #[.number $ Ipld.EXPR k, .number 6, n, t, v, b] | .lit l => .array #[.number $ Ipld.EXPR k, .number 7, l] - | .lty l => .array #[.number $ Ipld.EXPR k, .number 8, l] - | .proj n e => .array #[.number $ Ipld.EXPR k, .number 10, n, e] + | .proj n e => .array #[.number $ Ipld.EXPR k, .number 8, n, e] def constToIpld : (Ipld.Const k) → Ipld | .axiom ⟨n, l, t, s⟩ => .array #[.number $ Ipld.CONST k, .number 0, n, l, t, s] diff --git a/Yatima/Transpiler/Transpiler.lean b/Yatima/Transpiler/Transpiler.lean index e6d041e2..1595e01b 100644 --- a/Yatima/Transpiler/Transpiler.lean +++ b/Yatima/Transpiler/Transpiler.lean @@ -56,9 +56,9 @@ This clearly duplicates `Nat` a lot, so we should try to find an more compact representation for constructor/inductive data. 2. `Prod` - a. `Prod.mk` is `(lambda (:1 :2 fst snd) ((Prod :1 :2) 0 fst snd))` + a. `Prod.mk` is `(lambda (α β fst snd) ((Prod α β) 0 α β fst snd))` -Note that `(Prod :1 :2)` reduces to `("Prod" 2 0)`, allowing us to access +Note that `(Prod α β)` reduces to `("Prod" 2 0)`, allowing us to access the inductive data. ## Recursors @@ -70,8 +70,28 @@ TODO ## Projections -TODO +Yatima projections are encoded by `.proj idx e`, where `e` is the expression +and `idx` is the index of the data we want to extract out of `e`. + +Recall that we encode inductive constructors with a list: `ctorᵢ a₁ a₂ ..`. +In particular, structures only have one constructor: `struct.mk`, and their +data is simply recorded in a list. Hence, one may think that we can simply +write `getelem e (idx + 2)` (the `+2` is to skip the name and constructor index). + +However! This fails because structures may have parameters. For example, +the tuple `(1, 2)` is encoded in Lurk as `e := ((Prod Nat Nat) 0 Nat Nat 1 2)`. +If we want `e.1`, then `getelem e (1 + 2)` is `Nat`. In general, constructor +arguments will always first contain the parameters and indices, before the +actual fields of the constructor. So we modify our first solution to include +these. `let args := cdr (cdr e) in getelem args (idx + params + indices)`. + +Great! Now how do we find `params` and `indices`? We can simply look at the +head of `e`: since `e` is an inductive, we know that the head holds some +inductive type `(( ) )`! This is the reason we +include the two `params` and `indices` in the inductive data. +Note that because we don't know what the head of `e` is until we reduce it, +this logic occurs *at run time*! -/ namespace Yatima.Transpiler @@ -121,7 +141,7 @@ mutual let lurkBinds := binds.foldr ( fun (n : Name) (acc : Lurk.Expr) => ⟦(cons $n $acc)⟧ ) ⟦nil⟧ - -- TODO(Winston): Explain + -- if the inductive has arguments, then apply them from `ctor`'s bindings let ind : Lurk.Expr := match (indices + params) with | 0 => ⟦$(name.getPrefix)⟧ | _ => .app ⟦$(name.getPrefix)⟧ $ binds.take (params + indices) |>.map fun (n : Name) => ⟦$n⟧ @@ -161,7 +181,9 @@ mutual if (← get).visited.contains ind.name then break let (_, ⟨binds⟩) := descendPi ind.type #[] - -- TODO(Winston): Explain + -- when the inductive type is a function, i.e. + -- we have params or indices, then `lurkInd` is + -- encoded as a lambda let lurkInd := if binds.length == 0 then ⟦,($(toString ind.name) $(ind.params) $(ind.indices))⟧ else @@ -173,8 +195,7 @@ mutual partial def exprToLurkExpr (e : Expr) : TranspileM Lurk.Expr := do IO.print ">> exprToLurkExpr: " match e with - | .sort .. - | .lty .. => return ⟦nil⟧ + | .sort .. => return ⟦nil⟧ | .var name _ => IO.println s!"var {name}" return ⟦$name⟧ diff --git a/Yatima/Transpiler/Utils.lean b/Yatima/Transpiler/Utils.lean index 65fd1233..6ea88d22 100644 --- a/Yatima/Transpiler/Utils.lean +++ b/Yatima/Transpiler/Utils.lean @@ -2,7 +2,10 @@ import Yatima.Transpiler.TranspileM namespace Yatima.Transpiler -open Yatima.Compiler +inductive StoreKey : Type → Type + | univ : Ipld.Both Ipld.UnivCid → StoreKey (Ipld.Both Ipld.Univ) + | expr : Ipld.Both Ipld.ExprCid → StoreKey (Ipld.Both Ipld.Expr) + | const : Ipld.Both Ipld.ConstCid → StoreKey (Ipld.Both Ipld.Const) def StoreKey.find? : (key : StoreKey A) → TranspileM (Option A) | .univ univCid => do diff --git a/Yatima/Typechecker/Equal.lean b/Yatima/Typechecker/Equal.lean index 8b6ba828..c1499260 100644 --- a/Yatima/Typechecker/Equal.lean +++ b/Yatima/Typechecker/Equal.lean @@ -57,7 +57,6 @@ mutual if isU || isP then pure true else match term, term' with | .lit lit, .lit lit' => pure $ lit == lit' - | .lty lty, .lty lty' => pure $ lty == lty' | .sort u, .sort u' => pure $ equalUniv u u' | .pi name _ dom img env, .pi name' _ dom' img' env' => do -- For equality we don't need to know the universe levels, only the "shape" of the type. diff --git a/Yatima/Typechecker/Eval.lean b/Yatima/Typechecker/Eval.lean index c0308971..dfce387e 100644 --- a/Yatima/Typechecker/Eval.lean +++ b/Yatima/Typechecker/Eval.lean @@ -148,7 +148,6 @@ mutual let env := (← read).env pure $ Value.sort (instBulkReduce env.univs univ) | .lit lit => pure $ Value.lit lit - | .lty lty => pure $ Value.lty lty | .proj idx expr => do match (← eval expr) with | .app neu@(.const _ ctor _) args => match (← read).store.get! ctor with diff --git a/Yatima/Typechecker/Infer.lean b/Yatima/Typechecker/Infer.lean index bd79557c..8e6f6e89 100644 --- a/Yatima/Typechecker/Infer.lean +++ b/Yatima/Typechecker/Infer.lean @@ -80,7 +80,6 @@ mutual | _ => throw CheckError.notTyp | .lit (Literal.nat _) => pure $ Value.lty LitType.nat | .lit (Literal.str _) => pure $ Value.lty LitType.str - | .lty .. => pure $ Value.sort (Univ.succ Univ.zero) | .const _ k const_univs => let univs := (← read).env.univs let const := (← read).store.get! k diff --git a/Yatima/Typechecker/Printing.lean b/Yatima/Typechecker/Printing.lean index c3463036..1d79eb57 100644 --- a/Yatima/Typechecker/Printing.lean +++ b/Yatima/Typechecker/Printing.lean @@ -23,8 +23,6 @@ def printExpr (expr : Expr) : String := | .letE nam typ val bod => s!"let {nam} : {printExpr typ} := {printExpr val} in {printExpr bod}" | .lit (.nat x) => s!"{x}" | .lit (.str x) => s!"\"{x}\"" - | .lty .nat => s!"Nat" - | .lty .str => s!"String" | .proj idx val => s!"{printExpr val}.{idx}" mutual @@ -79,8 +77,6 @@ partial def printLamBod (expr : Expr) (env : Env Value) : String := | .letE nam typ val bod => s!"let {nam} : {printLamBod typ env} := {printLamBod val env} in {printLamBod bod env}" | .lit (.nat x) => s!"{x}" | .lit (.str x) => s!"\"{x}\"" - | .lty .nat => s!"Nat" - | .lty .str => s!"String" | .proj idx val => s!"{printLamBod val env}.{idx}" partial def printSpine (neu : Neutral) (args : Args) : String := diff --git a/Yatima/Typechecker/Value.lean b/Yatima/Typechecker/Value.lean index 3fada824..79858618 100644 --- a/Yatima/Typechecker/Value.lean +++ b/Yatima/Typechecker/Value.lean @@ -50,6 +50,12 @@ inductive CheckError where | impossible : CheckError deriving Inhabited +/-- There are only two types of literals -/ +inductive LitType + | nat : LitType + | str : LitType + deriving BEq, Inhabited + mutual -- A neutral term is either a variable or a constant with not enough arguments to reduce. -- They appear as the head of a stuck application.