diff --git a/packages/SwingSet/test/gc/bootstrap.js b/packages/SwingSet/test/gc/bootstrap.js index 3c08bb01b4e..8cdf7ce4dfe 100644 --- a/packages/SwingSet/test/gc/bootstrap.js +++ b/packages/SwingSet/test/gc/bootstrap.js @@ -23,5 +23,10 @@ export function buildRootObject() { async makeInvitation0() { await E(target).makeInvitationTarget(zoe); }, + + // for #9939 + async storePresenceInWeakSet() { + await E(target).store(A); + }, }); } diff --git a/packages/SwingSet/test/gc/gc-vat.test.js b/packages/SwingSet/test/gc/gc-vat.test.js index a593ff6cec7..617e4b07a7e 100644 --- a/packages/SwingSet/test/gc/gc-vat.test.js +++ b/packages/SwingSet/test/gc/gc-vat.test.js @@ -158,3 +158,84 @@ test('forward to fake zoe', async t => { // 'makeInvitationTarget' result promise with it, then dropped it t.is(findClist(c, targetID, invitation), undefined); }); + +// see #9939 +test('drop without retire', async t => { + let targetID; + let didBOYD = false; + // const msgs = ['deliver', 'deliver-result', 'syscall', 'syscall-result']; + + function slogSender(slogObj) { + const { + time: _1, + replay: _2, + crankNum: _3, + deliveryNum: _4, + monotime: _5, + ...o + } = slogObj; + if (o.vatID !== targetID) return; + if (o.type === 'deliver' && o.kd[0] === 'bringOutYourDead') { + didBOYD = true; + } + // if (msgs.includes(o.type)) console.log(JSON.stringify(o)); + } + const config = { + bootstrap: 'bootstrap', // v6 + vats: { + bootstrap: { + // v6 + sourceSpec: new URL('bootstrap.js', import.meta.url).pathname, + }, + target: { + // v1 + sourceSpec: new URL('vat-target.js', import.meta.url).pathname, + // avoid V8's GC nondeterminism, only needed on the target vat + creationOptions: { managerType: 'xs-worker' }, + }, + }, + }; + const kernelStorage = initSwingStore().kernelStorage; + await initializeSwingset(config, [], kernelStorage); + const c = await makeSwingsetController(kernelStorage, {}, { slogSender }); + t.teardown(c.shutdown); + + c.pinVatRoot('bootstrap'); + targetID = c.vatNameToID('target'); + c.pinVatRoot('target'); + + await c.run(); + + c.queueToVatRoot('bootstrap', 'storePresenceInWeakSet', []); + await c.run(); + + // now do enough dummy messages to trigger a new BOYD + didBOYD = false; + while (!didBOYD) { + c.queueToVatRoot('target', 'dummy', []); + await c.run(); + } + + // now tell vat-target to drop its WeakSet + c.queueToVatRoot('target', 'drop', []); + await c.run(); + + // and trigger a second BOYD + didBOYD = false; + while (!didBOYD) { + // this will fail once the vat is terminated + try { + c.queueToVatRoot('target', 'dummy', []); + } catch (e) { + if (/vat name "target" must exist/.test(e.message)) { + t.fail('vat terminated, bug is present'); + break; + } + } + await c.run(); + } + t.true(didBOYD); + + // the test passes if the vat survived + return t.pass(); +}); diff --git a/packages/SwingSet/test/gc/vat-target.js b/packages/SwingSet/test/gc/vat-target.js index dd4c1d5d0a6..53900b66055 100644 --- a/packages/SwingSet/test/gc/vat-target.js +++ b/packages/SwingSet/test/gc/vat-target.js @@ -1,6 +1,9 @@ import { Far, E } from '@endo/far'; -export function buildRootObject() { +export function buildRootObject(_vatPowers, _vatParameters, baggage) { + /** @type { WeakSet | undefined } */ + let ws = new WeakSet(); + return Far('root', { async two(A, B) { // A=ko26 B=ko27 @@ -10,5 +13,14 @@ export function buildRootObject() { makeInvitationTarget(zoe) { return E(zoe).makeInvitationZoe(); }, + + // these three are for testing #9939 + store: x => { + baggage.init('x', x); + assert(ws); + ws.add(x); + }, + dummy: () => 0, + drop: () => (ws = undefined), }); } diff --git a/packages/boot/test/orchestration/restart-contracts.test.ts b/packages/boot/test/orchestration/restart-contracts.test.ts index fe6104b6b12..dcf79a6db83 100644 --- a/packages/boot/test/orchestration/restart-contracts.test.ts +++ b/packages/boot/test/orchestration/restart-contracts.test.ts @@ -183,9 +183,7 @@ test.serial('stakeAtom', async t => { // restarting that one. For them to share bootstrap they'll each need a unique // instance name, which will require paramatizing the the two builders scripts // and the two core-eval functions. -// -// TODO(#9939): Flaky under Node.js until liveslots problem exposed by vows is fixed. -test.serial.skip('basicFlows', async t => { +test.serial('basicFlows', async t => { const { walletFactoryDriver, buildProposal, diff --git a/packages/swingset-liveslots/src/boyd-gc.js b/packages/swingset-liveslots/src/boyd-gc.js new file mode 100644 index 00000000000..8f6c341039c --- /dev/null +++ b/packages/swingset-liveslots/src/boyd-gc.js @@ -0,0 +1,470 @@ +import { parseVatSlot } from './parseVatSlots.js'; + +/* + Theory of Operation (imports/Presences) + + Imports are in one of 5 states: UNKNOWN, REACHABLE, UNREACHABLE, + COLLECTED, FINALIZED. Note that there's no actual state machine with those + values, and we can't observe all of the transitions from JavaScript, but + we can describe what operations could cause a transition, and what our + observations allow us to deduce about the state: + + * UNKNOWN moves to REACHABLE when a crank introduces a new import + * userspace holds a reference only in REACHABLE + * REACHABLE moves to UNREACHABLE only during a userspace crank + * UNREACHABLE moves to COLLECTED when GC runs, which queues the finalizer + * COLLECTED moves to FINALIZED when a new turn runs the finalizer + * liveslots moves from FINALIZED to UNKNOWN by syscalling dropImports + + convertSlotToVal either imports a vref for the first time, or + re-introduces a previously-seen vref. It transitions from: + + * UNKNOWN to REACHABLE by creating a new Presence + * UNREACHABLE to REACHABLE by re-using the old Presence that userspace + forgot about + * COLLECTED/FINALIZED to REACHABLE by creating a new Presence + + Our tracking tables hold data that depends on the current state: + + * slotToVal holds a WeakRef in [REACHABLE, UNREACHABLE, COLLECTED] + * that WeakRef .deref()s into something in [REACHABLE, UNREACHABLE] + * possiblyDeadSet holds the vref only in FINALIZED + * (TODO) re-introduction could remove the vref from possiblyDeadSet + + Each state thus has a set of perhaps-measurable properties: + + * UNKNOWN: slotToVal[baseRef] is missing, baseRef not in possiblyDeadSet + * REACHABLE: slotToVal has live weakref, userspace can reach + * UNREACHABLE: slotToVal has live weakref, userspace cannot reach + * COLLECTED: slotToVal[baseRef] has dead weakref + * FINALIZED: slotToVal[baseRef] is missing, baseRef is in possiblyDeadSet + + Our finalizer callback is queued by the engine's transition from + UNREACHABLE to COLLECTED, but the baseRef might be re-introduced before the + callback has a chance to run. There might even be multiple copies of the + finalizer callback queued. So the callback must deduce the current state + and only perform cleanup (i.e. delete the slotToVal entry and add the + baseRef to possiblyDeadSet) in the COLLECTED state. + + Our general rule is "trust the finalizer". The GC code below + considers a Presence to be reachable (the vref's "RAM pillar" + remains "up") until it moves to the FINALIZED state. We do this to + avoid race conditions between some other pillar dropping (and a + BOYD sampling the WeakRef) while it is in the COLLECTED state. If + we treated COLLECTED as unreachable,, then the subsequent + finalizer callback would examine the vref a second time, + potentially causing a vat-fatal double syscall.dropImports. This + rule may change if/when we use FinalizationRegistry better, by + explicitly de-registering the vref when we drop it, which JS + guarantees will remove and pending callback from the queue. This + may help us avoid probing the WeakRef during BOYD (instead relying + upon the fired/not-fired state of the FR), since that probing can + cause engines to retain objects longer than necessary. +*/ + +/* + Additional notes: + + There are three categories of vrefs: + * Presence-style (o-NN, imports, durable) + * Remotable-style (o+NN, exportable, ephemeral) + * Representative-style (o+vKK/II or o+dKK/II, exportable, virtual/durable) + + We don't necessarily have a Presence for every o-NN vref that the + vat can reach, because the vref might be held in virtual/durable + data ("vdata") while the in-RAM `Presence` object was + dropped. Likewise the in-RAM `Representative` can be dropped while + the o+dKK/II vref is kept REACHABLE by either vdata or an export to + the kernel. We *do* always have a Remotable for every o+NN vref that + the vat knows about, because Remotables are ephemeral. + + We aren't recording any information about the kernel-facing import + status (c-list state) for Presence-style vrefs (o-NN), so we rely + upon the invariant that you only add a vref to possiblyDeadSet if it + was REACHABLE first. That way, possiblyDeadSet.has(vref) means that + the import status was REACHABLE. Likewise, code should only add to + possiblyRetiredSet if the vref was at least RECOGNIZABLE + beforehand. This helps us avoid a vat-fatal duplicate dropImports or + retireImports syscall. + + For imports, the lifetime is controlled by the upstream vat: we + might drop REACHABLE today and maintain RECOGNIZABLE for days until + the object is finally retired. For exports *we* control the + lifetime, so when we determine an export is no longer REACHABLE, we + delete it and retire the vref immediately, and it does not + observably linger in the RECOGNIZABLE state. This simplifies our + tracking, and allows the deletion of Remotables and + Representative-type vrefs to be idempotent. + + Each vref's reachability status is determined by a set of + "pillars". For Presence-style vrefs, there are two: the RAM pillar + (the `Presence` object), and the vdata pillar. The vdata pillar is + tracked in a vatstore key named `vom.rc.${vref}`, which stores the + reachable/recognizable refcounts. + + For Representative-style vrefs, we add the export-status pillar, + because anything that we've exported to the kernel must be kept + alive until the kernel issues a dispatch.dropExports. That gives us + three pillars: + * the RAM pillar is the `Representative` object + * the vdata pillar is stored in `vom.rc.${vref}` + * the export-status pillar is stored in `vom.es.${vref}` + + Remotables have only the RAM pillar. When a Remotable-style vref is + exported to the kernel, the Remotable is added to the + `exportedRemotables` set. And when it is stored in vdata, it appears + as a key of the `remotableRefCounts` map. That keeps the Remotable + itself alive until the other reachability pathways have gone + away. We don't do this for Representatives because it would violate + our "don't use RAM for inactive virtual objects" rule. + + When an imported Presence becomes UNREACHABLE, it might still be + RECOGNIZABLE, by virtue of being the key of one or more weak + collections. If not, it might transit from REACHABLE directly to + NONE. The code that reacts to the UNREACHABLE transition must check + for recognizers, and do a retireImports right away if there are + none. Otherwise, recognizer will remain until either the kernel + retires the object (dispatch.retireImports), or the weak collection + is deleted, in which case possiblyRetiredSet will be updated with + the vref that might no longer be recognized. There will be a race + between us ceasing to recognize the vref (which should trigger a + syscall.retireImports), and the kernel telling us the object has + been deleted (via dispatch.retireImports). Either one must inhibit + the other. + + possiblyRetiredSet only cares about Presence-style vrefs, because + they represent imports, whose lifetime is not under our control. The + collection-deletion code will add Remotable- and Representative- + style vrefs in possiblyRetiredSet, but we can remove and ignore + them. + + We use slotToVal.has(vref) everywhere for our "is it still + reachable" check, which returns true for the REACHABLE / UNREACHABLE + / COLLECTED states, and false for the FINALIZED state. In contrast, + getValForSlot(vref) returns false for both COLLECTED and + FINALIZED. We want COLLECTED to qualify as "still reachable" because + it means there's still a finalizer callback queued, which will be + run eventually, and we need that callback to not trigger a duplicate + drop. We use slotToVal.has() in the possiblyRetiredSet loop (to + inhibit retirement of imported vrefs whose Presences are in the + COLLECTED state, and which just lost a recognizer), because + getValForSlot would allow such COLLECTED vrefs to be retired, even + before the finalizer had fired and could do a dropImports. + + When we decide to delete a virtual object, we will delete its + `state`, decrementing the refcounts of any objects therein, which + might shake loose more data. So we keep looping until we've stopped + adding things to possiblyDeadSet. The same can happen when we use + vrm.ceaseRecognition() to delete any weak collection values keyed by + it. We also call ceaseRecognition when we realize that a Remotable + has been deleted. But the possiblyDeadSet processing loop cannot + make the decision to retire a Presence-style vref: those are deleted + outside our vat, and the kernel notifies us of the vref's retirement + with dispatch.retireImports (which also calls ceaseRecognition). The + only thing possiblyDeadSet can tell us about Presences is that our + vat can no longer *reach* the vref, which means we need to do a + syscall.dropImports, which cannot immediately release more data. + + When the kernel sends us a dispatch.bringOutYourDead (or "BOYD" for + short), this scanForDeadObjects() will be called. This is the only + appropriate time for the syscall behavior to depend upon engine GC + behavior: during all other deliveries, we want the syscalls to be a + deterministic function of delivery contents, userspace behavior, and + vatstore data. + + During BOYD, we still try to minimize the variation of behavior as + much as possible. The first step is to ask the engine to perform a + full GC sweep, to collect any remaining UNREACHABLE objects, and + allow the finalizer callbacks to run before looking at the + results. We also sort the vrefs before processing them, to remove + sensitivity to the exact order of finalizer invocation. + + That makes BOYD a safe time to look inside WeakRefs and make + syscalls based on the contents, or to read the sets that are + modified during FinalizationRegistry callbacks and make syscalls to + query their state further. This this is the only time we examine and + clear possiblyDeadSet and possiblyRetiredSet, or probe + slotToVal. Outside of BOYD, in convertSlotToVal, we must probe the + WeakRefs to see whether we must build a new Presence or + Representative, or not, but we have carefully designed that code to + avoid making syscalls during the unmarshalling process, so the only + consequence of GC differences should be differences in metering and + memory allocation patterns. + + Our general strategy is to look at the baseRefs/vrefs whose state + might have changed, determine their new reachability / + recognizability status, and then resolve any discrepancies between + that status and that of other parties who need to match. + + The kernel is one such party. If the kernel thinks we can reach an + imported o-NN vref, but we've now determined that we cannot, we must + send a syscall.dropImports to resolve the difference. Once sent, the + kernel will update our c-list entry to reflect the unreachable (but + still recognizable) status. Likewise, if the kernel thinks *it* can + recognize an exported o+NN vref, but we've just retired it, we need + to update the kernel with a syscall.retireExports, so it can notify + downstream vats that have weak collections with our vref as a key. + + The DB-backed `state` of a virtual object is another such party. If + the object is unreachable, but still has state data, we must delete + that state, and decrement refcounts it might have held. + + Our plan is summarized as: + * outer loop + * gcAndFinalize + * sort possiblyDeadSet, examine each by type + * presence (vref): + * if unreachable: + * dropImports + * add to possiblyRetiredSet + * remotable (vref): + * if unreachable: + * retireExports if kernelRecognizableRemotables + * vrm.ceaseRecognition + * VOM (baseref) + * if unreachable: + * deleteVirtualObject (and retireExports retirees) + * all: remove from possiblyDeadSet + * repeat loop if gcAgain or possiblyDeadSet.size > 0 + * now sort and process possiblyRetiredSet. for each: + * ignore unless presence + * if unreachable and unrecognizable: retireImport + (that's a duplicate reachability check, but note the answer might + be different now) +*/ + +/* + BaseRef vs vref + + For multi-faceted virtual/durable objects (eg `defineKind()` with + faceted `behavior` argument), each time userspace create a new + instance, we create a full "cohort" of facets, passing a record of + Representative objects (one per facet) back to the caller. Each + facet gets is own vref, but they all share a common prefix, known as + the "baseRef". For example, `o+d44/2` is a BaseRef for a cohort, the + second instance created of the `o+d44` Kind, whose individual facets + would have vrefs of `o+d44/2:0` and `o+d44/2:1`. + + We use a WeakMap to ensure that holding any facet will keep all the + others alive, so the cohort lives and dies as a group. The GC + tracking code needs to track the whole cohort at once, not the + individual facets, and any data structure which refers to cohorts + will use the BaseRef instead of a single vref. So `slotToVal` is + keyed by a BaseRef, and its values are a cohort of facets. But + `valToSlot` is keyed by the facets, and its values are the + individual facet's vref. + + Most of the GC-related APIs that appear here take vrefs, but the + exceptions are: + * slotToVal is keyed by baseRef + * possiblyDeadSet holds baseRefs, that's what our WeakRefs track + * vrm.isVirtualObjectReachable takes baseRef + * vrm.deleteVirtualObject takes baseRef, returns [bool, retireees=vrefs] + * vrm.ceaseRecognition takes either baseRef or vref + (if given a baseRef, it will process all the facets) + + For Presence- and Remotable- style objects, the vref and baseref are + the same. +*/ + +export const makeBOYDKit = ({ + gcTools, + slotToVal, + vrm, + kernelRecognizableRemotables, + syscall, + possiblyDeadSet, + possiblyRetiredSet, +}) => { + // Presence (o-NN) lifetimes are controlled by the upstream vat, or + // the kernel. If the vref was in possiblyDeadSet, then it *was* + // reachable before, so we can safely presume the kernel to think we + // can reach it. + + const checkImportPresence = vref => { + // RAM pillar || vdata pillar + const isReachable = slotToVal.has(vref) || vrm.isPresenceReachable(vref); + // use slotToVal.has, not getSlotForVal(), to avoid duplicate drop + let dropImport; + if (!isReachable) { + dropImport = vref; + } + return { dropImport }; + }; + + // Remotable (o+NN) lifetimes are controlled by us: we delete/retire + // the object as soon as it becomes unreachable. We only track the + // Remotable/Far object (the RAM pillar) directly: exports retain + // the Remotable in the exportedRemotables set, and vdata retains it + // as keys of the remotableRefCounts map. So when we decide the vref + // is unreachable, the Remotable is already gone, and it had no + // other data we need to delete, so our task is to remove any local + // recognition records, and to inform the kernel with a + // retireExports if kernelRecognizableRemotables says that the + // kernel still cares. + + // note: we track export status for remotables in the + // kernelRecognizableRemotables set, not vom.es.VREF records. We + // don't currently track recognition records with + // vom.ir.VREF|COLLECTION, but we should, see #9956 + + const checkExportRemotable = vref => { + let gcAgain = false; + let exportsToRetire = []; + + // Remotables have only the RAM pillar + const isReachable = slotToVal.has(vref); + if (!isReachable) { + // We own the object, so retire it immediately. If the kernel + // was recognizing it, we tell them it is now retired + if (kernelRecognizableRemotables.has(vref)) { + kernelRecognizableRemotables.delete(vref); + exportsToRetire = [vref]; + // the kernel must not have been able to reach the object, + // else it would still be pinned by exportedRemotables + } + // and remove it from any local weak collections + gcAgain = vrm.ceaseRecognition(vref); + } + return { gcAgain, exportsToRetire }; + }; + + // Representative (o+dNN/II or o+vNN/II) lifetimes are also + // controlled by us. We allow the Representative object to go away + // without deleting the vref, so we must track all three pillars: + // Representative (RAM), export, and vdata. When we decide the vref + // is unreachable, we must delete the virtual object's state, as + // well as retiring the object (by telling the kernel it has been + // retired, if the kernel cares, and removing any local recognition + // records). + + const checkExportRepresentative = baseRef => { + // RAM pillar || (vdata pillar || export pillar) + const isReachable = + slotToVal.has(baseRef) || vrm.isVirtualObjectReachable(baseRef); + let gcAgain = false; + let exportsToRetire = []; + if (!isReachable) { + // again, we own the object, so we retire it immediately + [gcAgain, exportsToRetire] = vrm.deleteVirtualObject(baseRef); + } + return { gcAgain, exportsToRetire }; + }; + + const scanForDeadObjects = async () => { + await null; + + // `possiblyDeadSet` holds vrefs which have lost a supporting + // pillar (in-memory, export, or virtualized data refcount) since + // the last call to scanForDeadObjects. The vref might still be + // supported by a remaining pillar, or the pillar which was + // dropped might have been restored (e.g., re-exported after a + // drop, or given a new in-memory manifestation). + + const importsToDrop = new Set(); + const importsToRetire = new Set(); + const exportsToRetire = new Set(); + let gcAgain = false; + + do { + gcAgain = false; + await gcTools.gcAndFinalize(); + + // process a sorted list of vref/baseRefs we need to check for + // reachability, one at a time + + for (const vrefOrBaseRef of [...possiblyDeadSet].sort()) { + // remove the vref now, but some deleteVirtualObject might + // shake it loose again for a future pass to investigate + possiblyDeadSet.delete(vrefOrBaseRef); + + const parsed = parseVatSlot(vrefOrBaseRef); + assert.equal(parsed.type, 'object', vrefOrBaseRef); + + let res = {}; + if (parsed.virtual || parsed.durable) { + const baseRef = vrefOrBaseRef; + res = checkExportRepresentative(baseRef); + } else if (parsed.allocatedByVat) { + const vref = vrefOrBaseRef; + res = checkExportRemotable(vref); + } else { + const vref = vrefOrBaseRef; + res = checkImportPresence(vref); + } + + // prepare our end-of-crank syscalls + if (res.dropImport) { + importsToDrop.add(res.dropImport); + possiblyRetiredSet.add(res.dropImport); + } + for (const facetVref of res.exportsToRetire || []) { + exportsToRetire.add(facetVref); + } + gcAgain ||= !!res.gcAgain; + } + + // Deleting virtual object state, or freeing weak-keyed + // collection entries, might shake loose more + // objects. possiblyDeadSet and possiblyRetiredSet are added + // when vdata vrefs are decreffed to zero, and gcAgain means that + // something in RAM might now be free. In both cases we should + // do another pass, including gcAndFinalize(), until we've + // cleared everything we can. + } while (possiblyDeadSet.size > 0 || gcAgain); + + // Now we process potential retirements, by which we really mean + // de-recognitions, where this vat has ceased to even recognize a + // previously unreachable-yet-recognizable + // vref. addToPossiblyRetiredSet() is called from + // ceaseRecognition() when a recognizer goes away, such when a + // weak collection being deleted and it no longer recognizes all + // its former keys. ceaseRecognition() can be called from the loop + // above (when a Remotable-style object is deleted, or from within + // deleteVirtualObject), or in response to a retireImport() + // delivery. We assume possiblyRetiredSet is given vrefs of all + // sorts, but we only care about Presence-type, because we must do + // retireImports for them: the kernel doesn't care if/when we stop + // recognizing our own (maybe-exported) Remotable- and + // Representative- type vrefs. + + for (const vref of [...possiblyRetiredSet].sort()) { + possiblyRetiredSet.delete(vref); + const parsed = parseVatSlot(vref); + assert.equal(parsed.type, 'object', vref); + // not allocated by us? that makes it a Presence-type + if (!parsed.allocatedByVat) { + // if we're dropping the vref, checkImportPresence() already + // did our isReachable check, so we can safely skip it (and + // save a vatstore syscall) + const isReachable = + !importsToDrop.has(vref) && + (slotToVal.has(vref) || vrm.isPresenceReachable(vref)); + // note: the old version used getValForSlot, which treats + // COLLECTED as unreachable, which would allow a retirement at + // the wrong time (before the finalizer fires and does the + // dropImport) + const isRecognizable = isReachable || vrm.isVrefRecognizable(vref); + if (!isRecognizable) { + importsToRetire.add(vref); + } + } + } + + // note that retiring Presence-type vrefs cannot shake loose any + // local data, so we don't need to loop back around + + if (importsToDrop.size) { + syscall.dropImports([...importsToDrop].sort()); + } + if (importsToRetire.size) { + syscall.retireImports([...importsToRetire].sort()); + } + if (exportsToRetire.size) { + syscall.retireExports([...exportsToRetire].sort()); + } + }; + + return { scanForDeadObjects }; +}; +harden(makeBOYDKit); diff --git a/packages/swingset-liveslots/src/liveslots.js b/packages/swingset-liveslots/src/liveslots.js index 4490679c4d5..10747156c2d 100644 --- a/packages/swingset-liveslots/src/liveslots.js +++ b/packages/swingset-liveslots/src/liveslots.js @@ -12,6 +12,7 @@ import { makeVirtualReferenceManager } from './virtualReferences.js'; import { makeVirtualObjectManager } from './virtualObjectManager.js'; import { makeCollectionManager } from './collectionManager.js'; import { makeWatchedPromiseManager } from './watchedPromises.js'; +import { makeBOYDKit } from './boyd-gc.js'; const SYSCALL_CAPDATA_BODY_SIZE_LIMIT = 10_000_000; const SYSCALL_CAPDATA_SLOTS_LENGTH_LIMIT = 10_000; @@ -165,52 +166,6 @@ function build( } } - /* - Imports are in one of 5 states: UNKNOWN, REACHABLE, UNREACHABLE, - COLLECTED, FINALIZED. Note that there's no actual state machine with those - values, and we can't observe all of the transitions from JavaScript, but - we can describe what operations could cause a transition, and what our - observations allow us to deduce about the state: - - * UNKNOWN moves to REACHABLE when a crank introduces a new import - * userspace holds a reference only in REACHABLE - * REACHABLE moves to UNREACHABLE only during a userspace crank - * UNREACHABLE moves to COLLECTED when GC runs, which queues the finalizer - * COLLECTED moves to FINALIZED when a new turn runs the finalizer - * liveslots moves from FINALIZED to UNKNOWN by syscalling dropImports - - convertSlotToVal either imports a vref for the first time, or - re-introduces a previously-seen vref. It transitions from: - - * UNKNOWN to REACHABLE by creating a new Presence - * UNREACHABLE to REACHABLE by re-using the old Presence that userspace - forgot about - * COLLECTED/FINALIZED to REACHABLE by creating a new Presence - - Our tracking tables hold data that depends on the current state: - - * slotToVal holds a WeakRef in [REACHABLE, UNREACHABLE, COLLECTED] - * that WeakRef .deref()s into something in [REACHABLE, UNREACHABLE] - * deadSet holds the vref only in FINALIZED - * re-introduction must ensure the vref is not in the deadSet - - Each state thus has a set of perhaps-measurable properties: - - * UNKNOWN: slotToVal[baseRef] is missing, baseRef not in deadSet - * REACHABLE: slotToVal has live weakref, userspace can reach - * UNREACHABLE: slotToVal has live weakref, userspace cannot reach - * COLLECTED: slotToVal[baseRef] has dead weakref - * FINALIZED: slotToVal[baseRef] is missing, baseRef is in deadSet - - Our finalizer callback is queued by the engine's transition from - UNREACHABLE to COLLECTED, but the baseRef might be re-introduced before the - callback has a chance to run. There might even be multiple copies of the - finalizer callback queued. So the callback must deduce the current state - and only perform cleanup (i.e. delete the slotToVal entry and add the - baseRef to the deadSet) in the COLLECTED state. - - */ - function finalizeDroppedObject(baseRef) { // TODO: Ideally this function should assert that it is not metered. This // appears to be fine in practice, but it breaks a number of unit tests in @@ -235,113 +190,6 @@ function build( } const vreffedObjectRegistry = new FinalizationRegistry(finalizeDroppedObject); - async function scanForDeadObjects() { - // `possiblyDeadSet` accumulates vrefs which have lost a supporting - // pillar (in-memory, export, or virtualized data refcount) since the - // last call to scanForDeadObjects. The vref might still be supported - // by a remaining pillar, or the pillar which was dropped might be back - // (e.g., given a new in-memory manifestation). - - const importsToDrop = new Set(); - const importsToRetire = new Set(); - const exportsToRetire = new Set(); - let doMore; - await null; - do { - doMore = false; - - await gcTools.gcAndFinalize(); - - // possiblyDeadSet contains a baseref for everything (Presences, - // Remotables, Representatives) that might have lost a - // pillar. The object might still be supported by other pillars, - // and the lost pillar might have been reinstantiated by the - // time we get here. The first step is to filter this down to a - // list of definitely dead baserefs. - - const deadSet = new Set(); - - for (const baseRef of possiblyDeadSet) { - if (slotToVal.has(baseRef)) { - continue; // RAM pillar remains - } - const { virtual, durable, type } = parseVatSlot(baseRef); - assert(type === 'object', `unprepared to track ${type}`); - if (virtual || durable) { - // eslint-disable-next-line no-use-before-define - if (vrm.isVirtualObjectReachable(baseRef)) { - continue; // vdata or export pillar remains - } - } - deadSet.add(baseRef); - } - possiblyDeadSet.clear(); - - // deadSet now contains objects which are certainly dead - - // possiblyRetiredSet holds (a subset of??) baserefs which have - // lost a recognizer recently. TODO recheck this - - for (const vref of possiblyRetiredSet) { - // eslint-disable-next-line no-use-before-define - if (!getValForSlot(vref) && !deadSet.has(vref)) { - // Don't retire things that haven't yet made the transition to dead, - // i.e., always drop before retiring - // eslint-disable-next-line no-use-before-define - if (!vrm.isVrefRecognizable(vref)) { - importsToRetire.add(vref); - } - } - } - possiblyRetiredSet.clear(); - - const deadBaseRefs = Array.from(deadSet); - deadBaseRefs.sort(); - for (const baseRef of deadBaseRefs) { - const { virtual, durable, allocatedByVat, type } = - parseVatSlot(baseRef); - type === 'object' || Fail`unprepared to track ${type}`; - if (virtual || durable) { - // Representative: send nothing, but perform refcount checking - // eslint-disable-next-line no-use-before-define - const [gcAgain, retirees] = vrm.deleteVirtualObject(baseRef); - if (retirees) { - retirees.map(retiree => exportsToRetire.add(retiree)); - } - doMore = doMore || gcAgain; - } else if (allocatedByVat) { - // Remotable: send retireExport - // for remotables, vref === baseRef - if (kernelRecognizableRemotables.has(baseRef)) { - kernelRecognizableRemotables.delete(baseRef); - exportsToRetire.add(baseRef); - } - } else { - // Presence: send dropImport unless reachable by VOM - // eslint-disable-next-line no-lonely-if, no-use-before-define - if (!vrm.isPresenceReachable(baseRef)) { - importsToDrop.add(baseRef); - // eslint-disable-next-line no-use-before-define - if (!vrm.isVrefRecognizable(baseRef)) { - // for presences, baseRef === vref - importsToRetire.add(baseRef); - } - } - } - } - } while (possiblyDeadSet.size > 0 || possiblyRetiredSet.size > 0 || doMore); - - if (importsToDrop.size) { - syscall.dropImports(Array.from(importsToDrop).sort()); - } - if (importsToRetire.size) { - syscall.retireImports(Array.from(importsToRetire).sort()); - } - if (exportsToRetire.size) { - syscall.retireExports(Array.from(exportsToRetire).sort()); - } - } - /** * Remember disavowed Presences which will kill the vat if you try to talk * to them @@ -1545,16 +1393,15 @@ function build( // metered const unmeteredDispatch = meterControl.unmetered(dispatchToUserspace); - async function bringOutYourDead() { - await scanForDeadObjects(); - // Now flush all the vatstore changes (deletions and refcounts) we - // made. dispatch() calls afterDispatchActions() automatically for - // most methods, but not bringOutYourDead(). - // eslint-disable-next-line no-use-before-define - afterDispatchActions(); - // XXX TODO: make this conditional on a config setting - return getRetentionStats(); - } + const { scanForDeadObjects } = makeBOYDKit({ + gcTools, + slotToVal, + vrm, + kernelRecognizableRemotables, + syscall, + possiblyDeadSet, + possiblyRetiredSet, + }); /** * @param { import('./types.js').SwingSetCapData } _disconnectObjectCapData @@ -1574,6 +1421,16 @@ function build( vom.flushStateCache(); } + const bringOutYourDead = async () => { + await scanForDeadObjects(); + // Now flush all the vatstore changes (deletions and refcounts) we + // made. dispatch() calls afterDispatchActions() automatically for + // most methods, but not bringOutYourDead(). + afterDispatchActions(); + // XXX TODO: make this conditional on a config setting + return getRetentionStats(); + }; + /** * This 'dispatch' function is the entry point for the vat as a whole: the * vat-worker supervisor gives us VatDeliveryObjects (like diff --git a/packages/swingset-liveslots/test/dropped-weakset-9939.test.js b/packages/swingset-liveslots/test/dropped-weakset-9939.test.js new file mode 100644 index 00000000000..251bfcaff4e --- /dev/null +++ b/packages/swingset-liveslots/test/dropped-weakset-9939.test.js @@ -0,0 +1,80 @@ +import test from 'ava'; +import { Far } from '@endo/marshal'; +import { kser, kslot } from '@agoric/kmarshal'; +import { makeLiveSlots } from '../src/liveslots.js'; +import { buildSyscall } from './liveslots-helpers.js'; +import { makeStartVat, makeMessage, makeBringOutYourDead } from './util.js'; +import { makeMockGC } from './mock-gc.js'; + +// Test for https://github.com/Agoric/agoric-sdk/issues/9939 + +test('weakset deletion vs retire', async t => { + const { syscall, log } = buildSyscall(); + const gcTools = makeMockGC(); + + // #9939 was a bug in liveslots that caused a vat to emit + // syscall.retireImports despite not having done dropImports + // first. The setup is: + // + // * import a Presence (raising the RAM pillar) + // * store it in a virtual object (raising the vdata pillar) + // * use it as a key of a voAwareWeakMap or voAwareWeakSet + // * drop the Presence (dropping the RAM pillar) + // * do a BOYD + // * delete the voAwareWeakSet + // * do a BOYD + // + // When the voAwareWeakSet is collected, a finalizer callback named + // finalizeDroppedCollection is called, which clears the entries, + // and adds all its vref keys to possiblyRetiredSet. Later, during + // BOYD, a loop examines possiblyRetiredSet and adds qualified vrefs + // to importsToRetire, for the syscall.retireImports at the end. + // + // That qualification check was sufficient to prevent the retirement + // of vrefs that still have a RAM pillar, and also vrefs that were + // being dropped in this particular BOYD, but it was not sufficient + // to protect vrefs that still have a vdata pillar. + + let myVOAwareWeakSet; + let myPresence; + function buildRootObject(vatPowers, _vatParameters, baggage) { + const { WeakSet } = vatPowers; + myVOAwareWeakSet = new WeakSet(); + return Far('root', { + store: p => { + myPresence = p; + myVOAwareWeakSet.add(p); + baggage.init('presence', p); + }, + }); + } + + const makeNS = () => ({ buildRootObject }); + const ls = makeLiveSlots(syscall, 'vatA', {}, {}, gcTools, undefined, makeNS); + const { dispatch } = ls; + await dispatch(makeStartVat(kser())); + t.truthy(myVOAwareWeakSet); + + await dispatch(makeMessage('o+0', 'store', [kslot('o-1')])); + t.truthy(myPresence); + + log.length = 0; + gcTools.kill(myPresence); + gcTools.flushAllFRs(); + await dispatch(makeBringOutYourDead()); + + log.length = 0; + gcTools.kill(myVOAwareWeakSet); + gcTools.flushAllFRs(); + await dispatch(makeBringOutYourDead()); + + // The imported vref is still reachable by the 'baggage' durable + // store, so it must not be dropped or retired yet. The bug caused + // the vref to be retired without first doing a drop, which is a + // vat-fatal syscall error + const gcCalls = log.filter( + l => l.type === 'dropImports' || l.type === 'retireImports', + ); + t.deepEqual(gcCalls, []); + log.length = 0; +}); diff --git a/packages/swingset-liveslots/test/gc-before-finalizer.test.js b/packages/swingset-liveslots/test/gc-before-finalizer.test.js new file mode 100644 index 00000000000..b0b5f7c7c19 --- /dev/null +++ b/packages/swingset-liveslots/test/gc-before-finalizer.test.js @@ -0,0 +1,234 @@ +import test from 'ava'; +import { Far } from '@endo/marshal'; +import { kser, kslot } from '@agoric/kmarshal'; +import { makeLiveSlots } from '../src/liveslots.js'; +import { buildSyscall } from './liveslots-helpers.js'; +import { makeStartVat, makeMessage, makeBringOutYourDead } from './util.js'; +import { makeMockGC } from './mock-gc.js'; + +const justGC = log => + log.filter( + l => + l.type === 'dropImports' || + l.type === 'retireImports' || + l.type === 'retireExports', + ); + +test('presence in COLLECTED state is not dropped yet', async t => { + const { syscall, log } = buildSyscall(); + const gcTools = makeMockGC(); + + // our GC terminology (see boyd-gc.js for notes): + // * REACHABLE means userspace can reach a Presence + // * UNREACHABLE means it cannot + // * COLLECTED means the engine GC has noticed, so the + // slotToVal.get(vref) WeakRef is empty, even though + // slotToVal.has(vref) is still true. A finalizer + // callback has been queued. + // * FINALIZED means the callback has been run. Our callback does + // slotToVal.delete(vref) before adding the vref to + // possiblyDeadSet + + // slotToVal.has() returns true for REACHABLE / UNREACHABLE / + // COLLECTED, and false for FINALIZED. getValForSlot() is another + // way to probe for reachability, but it also looks to see if the + // WeakRef is full, so it returns false for both COLLECTED and + // FINALIZED. + + // We use slotToVal.has(vref) as the "is it still reachable" probe + // in scanForDeadObjects(), because if we have a Presence in the + // COLLECTED state, we must not dropImports it during this BOYD. The + // finalizer is still enqueued, so some future turn will run it, and + // the vref will be added to possiblyDeadSet again. We want the drop + // to happen exactly once, and we don't remove the finalizer from + // the queue, which means the drop must happen in the future BOYD. + + // We can get into this situation with the following sequence: + // 1: vref is held by both Presence and vdata + // 2: Presence is dropped by userspace (->UNREACHABLE) + // 3: userspace store.delete(), drops vdata refcount, addToPossiblyDeadSet + // 4: organic GC happens (->COLLECTED, WeakRef is empty) + // 5: BOYD is called (finalizer has not run yet) + + // The order of steps 3 and 4 is not important. What matters is that + // the WeakRef is empty, but the finalizer has not yet run, at the + // time that BOYD happens. And that the vref is in possiblyDeadSet + // (because of the vdata drop, not the finalizer), so this BOYD will + // examine the vref. + + // This test simulates this case with our mockGC tools. It would + // fail if boyd-gc.js used getValForSlot instead of slotToVal.has. + + // The GC code used to call getValForSlot() instead of + // slotToVal.has(), in the possiblyRetiredSet. The second test + // exercises this case, and would fail if it still used + // getValForSlot + + let myPresence; + function buildRootObject(vatPowers) { + const { VatData } = vatPowers; + const { makeScalarBigMapStore } = VatData; + const store = makeScalarBigMapStore(); + return Far('root', { + store: p => { + myPresence = p; + store.init('presence', p); + }, + drop: () => store.delete('presence'), + }); + } + + const makeNS = () => ({ buildRootObject }); + const ls = makeLiveSlots(syscall, 'vatA', {}, {}, gcTools, undefined, makeNS); + const { dispatch } = ls; + await dispatch(makeStartVat(kser())); + + await dispatch(makeMessage('o+0', 'store', [kslot('o-1')])); + t.truthy(myPresence); + + // clear out everything before our check + await dispatch(makeBringOutYourDead()); + log.length = 0; + + // drop the vdata reference + await dispatch(makeMessage('o+0', 'drop', [])); + log.length = 0; + + // and, before BOYD can happen, collect (but do not finalize) the Presence + gcTools.kill(myPresence); + + // now BOYD + await dispatch(makeBringOutYourDead()); + + // the BOYD must not have done dropImports or retireImports + t.deepEqual(log, []); + + // eventually, the finalizer runs + gcTools.flushAllFRs(); + + // *now* a BOYD will drop+retire + await dispatch(makeBringOutYourDead()); + t.deepEqual(justGC(log), [ + { type: 'dropImports', slots: ['o-1'] }, + { type: 'retireImports', slots: ['o-1'] }, + ]); +}); + +// disabled until #9956 and #9959 are fixed, which interfere with this +// test + +/* + +test.failing('presence in COLLECTED state is not retired early', async t => { + const { syscall, log } = buildSyscall(); + const gcTools = makeMockGC(); + + // The GC code used to call getValForSlot() instead of + // slotToVal.has(), in the possiblyRetiredSet. This test would fail + // if it still used getValForSlot. + + // The setup is a Presence in the COLLECTED state as the only + // pillar, but not in possiblyDeadSet, whose vref also appears in + // possiblyRetiredSet (because a recognizer was just deleted). On + // the BOYD that handles the possiblyRetiredSet, the "reachability + // inhibits retirement" check should treat the COLLECTED state as + // reachable, so the retirement is deferred for a later BOYD (which + // would drop the vref first) + // + // To build this, we have an anchored (virtual) MapStore msA holding + // the only reference (vdata) to a (virtual) WeakSetStore wssB. wssB + // has one (weak) key, o-1, for which there is a Presence P. + // + // 1: Construct everything, kill the wssB Representative, BOYD. That + // will process wssB, but leave it alive because of the vdata in + // msA. + + // 2: Use msA.delete(key) to drop its vdata ref to wssB (adding wssB + // to possiblyDeadSet), and use gcTools.kill(P) to mock-mark it + // as COLLECTED (but not finalized) + + // 3: Do a BOYD. The first pass will see wssB is unreachable, and + // delete it. The collection deleter will put o-1 in + // possiblyRetiredSet. There might be a second pass (doMore=1), + // but it won't have anything in possiblyDeadSet and will do + // nothing. Then the post-deadSet loop will process + // possiblyRetiredSet, which will contain our o-1. That + // processing step contains the valToSlot.has (or the + // would-be-incorrect getValForSlot) that we want to exercise. + + let myPresence; + let myWeakStore; + + function buildRootObject(vatPowers) { + const { VatData, WeakSet } = vatPowers; + const { makeScalarBigMapStore, makeScalarBigWeakSetStore } = VatData; + const store = makeScalarBigMapStore(); + myWeakStore = makeScalarBigWeakSetStore(); + return Far('root', { + store: p => { + myPresence = p; + myWeakStore.add(p); + t.truthy(myWeakStore.has(p)); + store.init('weakstore', myWeakStore); + }, + dropWeakStore: () => store.delete('weakstore'), + }); + } + + const makeNS = () => ({ buildRootObject }); + const ls = makeLiveSlots(syscall, 'vatA', {}, {}, gcTools, undefined, makeNS); + const { dispatch, testHooks } = ls; + const { possiblyDeadSet, possiblyRetiredSet, slotToVal } = testHooks; + await dispatch(makeStartVat(kser())); + + // step 1 (setup): store, kill WeakSetStore representative, BOYD + await dispatch(makeMessage('o+0', 'store', [kslot('o-1')])); + t.truthy(myPresence); + gcTools.kill(myWeakStore); + gcTools.flushAllFRs(); + await dispatch(makeBringOutYourDead()); + log.length = 0; + + // myPresence vref is held by the Presence, and recognized by myWeakStore + t.is(possiblyDeadSet.size, 0); + t.is(possiblyRetiredSet.size, 0); + + console.log(`-- starting`); + // step 2: delete vdata ref to weakstore, make myPresence COLLECTED + await dispatch(makeMessage('o+0', 'dropWeakStore', [])); + gcTools.kill(myPresence) + log.length = 0; + // weakstore is possiblyDead (NARRATORS VOICE: it's dead). Presence + // is not, because the finalizer hasn't run. + t.is(possiblyDeadSet.size, 1); + t.is(possiblyRetiredSet.size, 0); + t.not([...possiblyDeadSet][0], 'o-1'); + // the empty weakref is still there + t.true(slotToVal.has('o-1')); + + + // step 3: BOYD. It will collect myWeakStore on the first pass, + // whose deleter should clear all entries, which will add its + // recognized vrefs to possiblyRetiredSet. The post-pass will check + // o-1 for reachability with slotToVal.has, and because that says it + // is reachable, it will not be retired (even though it has no + // recognizer by now) + console.log(`-- doing first BOYD`); + await dispatch(makeBringOutYourDead()); + t.deepEqual(justGC(log), []); + log.length = 0; + + // eventually, the finalizer runs + gcTools.flushAllFRs(); + + console.log(`-- doing second BOYD`); + // *now* a BOYD will drop+retire + await dispatch(makeBringOutYourDead()); + console.log(log); + t.deepEqual(justGC(log), [ + { type: 'dropImports', slots: ['o-1'] }, + { type: 'retireImports', slots: ['o-1'] }, + ]); +}); + +*/ diff --git a/packages/swingset-liveslots/test/liveslots-real-gc.test.js b/packages/swingset-liveslots/test/liveslots-real-gc.test.js index 8832eb04546..80641d82a44 100644 --- a/packages/swingset-liveslots/test/liveslots-real-gc.test.js +++ b/packages/swingset-liveslots/test/liveslots-real-gc.test.js @@ -308,6 +308,14 @@ test.serial('GC dispatch.dropExports', async t => { // that should allow ex1 to be collected t.true(collected.result); + // upon collection, the vat should scan for local recognizers (weak + // collection keys) in case any need to be dropped, and find none + t.deepEqual(log.shift(), { + type: 'vatstoreGetNextKey', + priorKey: `vom.ir.${ex1}|`, + result: 'vom.rc.o+d6/1', + }); + // and once it's collected, the vat should emit `syscall.retireExport` // because nobody else will be able to recognize it again const l2 = log.shift(); @@ -394,8 +402,16 @@ test.serial( // which should let the export be collected t.true(collected.result); - // the vat should *not* emit `syscall.retireExport`, because it already - // received a dispatch.retireExport + // the vat should scan for local recognizers (weak collection + // keys) in case any need to be dropped, and find none + t.deepEqual(log.shift(), { + type: 'vatstoreGetNextKey', + priorKey: 'vom.ir.o+10|', + result: 'vom.rc.o+d6/1', + }); + + // the vat should *not* emit `syscall.retireExport`, because it + // already received a dispatch.retireExport t.deepEqual(log, []); }, );