Skip to content

Commit

Permalink
fix(swingset): retire unreachable orphans
Browse files Browse the repository at this point in the history
If a kernel object ("koid", the object subset of krefs) is
unreachable, and then becomes orphaned (either because the owning vat
was terminated, or called `syscall.abandonExports`, or was upgraded
and the koid was ephemeral), then retire it immediately.

The argument is that the previously-owning vat can never again talk
about the object, so it can never become reachable again, which is
normally the point at which the owning vat would retire it. But
because the owning vat can't retire it by itself, the kernel needs to
do the retirement on its behalf.

We now consolidate retirement responsibilities into
processRefcounts(): when terminateVat or syscall.abandonExports use
abandonKernelObjects() to mark a kref as orphaned, it also adds the
kref to maybeFreeKrefs, and then processRefcounts() is responsible for
noticing the kref is both orphaned and unreachable, and then notifying
any importers of its retirement.

I double-checked that cleanupAfterTerminatedVat will always be
followed by a processRefcounts(), by virtue of either being called
from processDeliveryMessage (in the crankResults.terminate clause), or
from within a device invocation syscall (which only happens during a
delivery, so follows the same path). We need this to ensure that any
maybeFreeKrefs created by the cleanup's abandonKernelObjects() will
get processed promptly.

Changes getObjectRefCount to tolerate deleted krefs (missing
`koNN.refCount`) by just returning 0,0. This fixes a potential kernel
panic in the new approach, when a kref is recognizable by one vat but
only reachable by a send-message on the run-queue, then becomes
unreachable as that message is delivered (the run-queue held the last
strong reference), if the target vat does syscall.exit during the
delivery. The decref pushes the kref onto maybeFreeKrefs, the
terminateVat retires the merely-recognizable now-orphaned kref, then
processRefcounts used getObjectRefCount() to grab the refcount for the
now-retired (and deleted) kref, which asserted that the koNN.refCount
key still existed, which didn't.

This occured in zoe - secondPriceAuction -- valid input , where the
contract did syscall.exit in response to a timer wake() message sent
to a single-use wakeObj.

closes #7212
  • Loading branch information
warner committed Jun 27, 2024
1 parent 12f9068 commit 5e34f0f
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 9 deletions.
58 changes: 50 additions & 8 deletions packages/SwingSet/src/kernel/state/kernelKeeper.js
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,9 @@ export default function makeKernelKeeper(kernelStorage, kernelSlog) {

function getObjectRefCount(kernelSlot) {
const data = kvStore.get(`${kernelSlot}.refCount`);
if (!data) {
return { reachable: 0, recognizable: 0 };
}
data || Fail`getObjectRefCount(${kernelSlot}) was missing`;
const [reachable, recognizable] = commaSplit(data).map(Number);
reachable <= recognizable ||
Expand Down Expand Up @@ -920,6 +923,14 @@ export default function makeKernelKeeper(kernelStorage, kernelSlog) {
*
*/
function cleanupAfterTerminatedVat(vatID, budget = undefined) {
// this is called from terminateVat, which is called from either:
// * end of processDeliveryMessage, if crankResults.terminate
// * device-vat-admin (when vat-v-a does adminNode.terminateVat)
// (which always happens inside processDeliveryMessage)
// so we're always followed by a call to processRefcounts, at
// end-of-delivery in processDeliveryMessage, after checking
// crankResults.terminate

insistVatID(vatID);
let cleanups = 0;
const work = {
Expand Down Expand Up @@ -1485,27 +1496,58 @@ export default function makeKernelKeeper(kernelStorage, kernelSlog) {
deleteKernelPromise(kpid);
}
}

if (type === 'object') {
const { reachable, recognizable } = getObjectRefCount(kref);
if (reachable === 0) {
const ownerVatID = ownerOfKernelObject(kref);
if (ownerVatID) {
// We avoid ownerOfKernelObject(), which will report
// 'undefined' if the owner is dead (and being slowly
// deleted). Message delivery should use that, but not us.
const ownerKey = `${kref}.owner`;
let ownerVatID = kvStore.get(ownerKey);
const terminated = terminatedVats.includes(ownerVatID);

if (ownerVatID && !terminated) {
const vatKeeper = provideVatKeeper(ownerVatID);
const isReachable = vatKeeper.getReachableFlag(kref);
if (isReachable) {
// the reachable count is zero, but the vat doesn't realize it
actions.add(`${ownerVatID} dropExport ${kref}`);
}
if (recognizable === 0) {
// TODO: rethink this
// TODO: rethink this assert
// assert.equal(isReachable, false, `${kref} is reachable but not recognizable`);
actions.add(`${ownerVatID} retireExport ${kref}`);
}
} else if (recognizable === 0) {
// unreachable, unrecognizable, orphaned: delete the
// empty refcount here, since we can't send a GC
// action without an ownerVatID
deleteKernelObject(kref);
}
if (ownerVatID && terminated) {
// When we're slowly deleting a vat, and one of its
// exports becomes unreferenced, we obviously must not
// send dropExports or retireExports into the dead vat.
// We fast-forward the abandonment that slow-deletion
// would have done, then treat the object as orphaned.

const { vatSlot } = getReachableAndVatSlot(ownerVatID, kref);
// delete directly, not abandonKernelObjects(), which
// would re-submit to maybeFreeKrefs
kvStore.delete(ownerKey);
kvStore.delete(`${ownerVatID}.c.${kref}`);
kvStore.delete(`${ownerVatID}.c.${vatSlot}`);
// now fall through to the orphaned case
ownerVatID = undefined;
}
if (!ownerVatID) {
// orphaned and unreachable, so retire it
if (recognizable) {
// there are importers, tell them about the
// retirement. retireKernelObjects will also
// deleteKernelObject
retireKernelObjects([kref]);
} else {
// just delete, without scanning for importers (since
// we know there are none)
deleteKernelObject(kref);
}
}
}
}
Expand Down
123 changes: 122 additions & 1 deletion packages/SwingSet/test/gc-kernel-orphan.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ for (const cause of ['abandon', 'terminate']) {
// the fix was to change kernelKeeper.getRefCounts to handle a missing
// koNN.refCounts by just returning 0,0

test('termination plus maybeFreeKrefs', async t => {
test('termination plus maybeFreeKrefs - dropped', async t => {
const { kernel, kvStore } = await makeKernel();
await kernel.start();

Expand All @@ -267,6 +267,23 @@ test('termination plus maybeFreeKrefs', async t => {
// it (but doesn't retire it), vatB will self-terminate upon
// receiving that message, creating two places that try to retire it

// The order of events will be:
// * 'terminate' translated, drops reachable to zero, adds to maybeFreeKrefs
// * 'terminate' delivered, delivery marks vat for termination
// * post-delivery crankResults.terminate check marks vat as terminated
// (but slow-deletion means nothing is deleted on that crank)
// * post-delivery does processRefCounts()
// * that processes ko22/billy, sees 0,1, owner=v2, v2 is terminated
// so it orphans ko22 (removes from vatB c-list and clears .owner)
// and falls through to (owner=undefined) case
// which sees recognizable=1 and retires the object
// (i.e. pushes retireImport gcAction and deletes .owner and .refCounts)
// * next crank starts cleanup, walks c-list, orphans ko21/bob
// which adds ko21 to maybeFreeKrefs
// * post-cleanup processRefCounts() does ko21, sees 1,1, owner=undefined
// does nothing, since vatA still holds an (orphaned) reference
// * cleanup finishes

// Our two root objects (alice and bob) are pinned so they don't
// disappear while the test is talking to them, so vatB exports
// "billy".
Expand Down Expand Up @@ -339,4 +356,108 @@ test('termination plus maybeFreeKrefs', async t => {

t.is(kvStore.get(`${billyKref}.owner`), undefined);
t.is(kvStore.get(`${billyKref}.refCounts`), undefined);
t.is(kvStore.get(`${vatA}.c.${billyKref}`), undefined);
t.is(kvStore.get(`${vatA}.c.${vrefs.billyForA}`), undefined);
t.is(kvStore.get(`${vatB}.c.${billyKref}`), undefined);
t.is(kvStore.get(`${vatB}.c.${vrefs.billyForB}`), undefined);
});

// like above, but the object doesn't remain recognizable
test('termination plus maybeFreeKrefs - retired', async t => {
const { kernel, kvStore } = await makeKernel();
await kernel.start();

const vrefs = {}; // track vrefs within vats

// vatB exports an object to vatA, vatA sends it a message and drops
// and retires it, vatB will self-terminate upon receiving that
// message. The order of events will be:
// * 'terminate' translated, drops refcount to zero, adds to maybeFreeKrefs
// * 'terminate' delivered, delivery marks vat for termination
// * post-delivery crankResults.terminate check marks vat as terminated
// (but slow-deletion means nothing is deleted on that crank)
// * post-delivery does processRefCounts()
// * that processes ko22/billy, sees 0,0, owner=v2, v2 is terminated
// so it orphans ko22 (removes from vatB c-list and clears .owner)
// and falls through to (owner=undefined) case
// which sees recognizable=0 and deletes the object (just .refCount now)
// * next crank starts cleanup, walks c-list, orphans ko21/bob
// which adds ko21 to maybeFreeKrefs
// * post-cleanup processRefCounts() does ko21, sees 1,1, owner=undefined
// does nothing, since vatA still holds an (orphaned) reference
// * cleanup finishes

vrefs.aliceForA = 'o+100';
vrefs.bobForB = 'o+200';
vrefs.billyForB = 'o+201';
let billyKref;

let vatA;
function setupA(syscall, _state, _helpers, _vatPowers) {
function dispatch(vd) {
if (vd[0] === 'startVat') {
return; // skip startVat
}
console.log(`deliverA:`, JSON.stringify(vd));
if (vd[0] === 'message') {
const methargs = kunser(vd[2].methargs);
const [method] = methargs;
if (method === 'call-billy') {
t.is(vd[2].methargs.slots.length, 1);
vrefs.billyForA = vd[2].methargs.slots[0];
t.is(vrefs.billyForA, 'o-50'); // probably
billyKref = kvStore.get(`${vatA}.c.${vrefs.billyForA}`);
syscall.send(
vrefs.billyForA,
kser(['terminate', [kslot(vrefs.billyForA, 'billy-A')]]),
);
syscall.dropImports([vrefs.billyForA]);
syscall.retireImports([vrefs.billyForA]);
}
}
}
return dispatch;
}
await kernel.createTestVat('vatA', setupA);
vatA = kernel.vatNameToID('vatA');
const alice = kernel.addExport(vatA, vrefs.aliceForA);

function setupB(syscall, _state, _helpers, _vatPowers) {
function dispatch(vd) {
if (vd[0] === 'startVat') {
return; // skip startVat
}
console.log(`deliverB:`, JSON.stringify(vd));
if (vd[0] === 'message') {
const [method] = kunser(vd[2].methargs);
if (method === 'init') {
vrefs.aliceForB = vd[2].methargs.slots[0];
syscall.send(
vrefs.aliceForB,
kser(['call-billy', [kslot(vrefs.billyForB, 'billy-B')]]),
);
}
if (method === 'terminate') {
t.is(vd[2].methargs.slots.length, 1);
assert.equal(vd[2].methargs.slots[0], vrefs.billyForB);
syscall.exit(false, kser('reason'));
}
}
}
return dispatch;
}
await kernel.createTestVat('vatB', setupB);
const vatB = kernel.vatNameToID('vatB');
const bob = kernel.addExport(vatB, vrefs.bobForB);

// this triggers everything, the bug was a kernel crash
kernel.queueToKref(bob, 'init', [kslot(alice, 'alice')], 'none');
await kernel.run();

t.is(kvStore.get(`${billyKref}.owner`), undefined);
t.is(kvStore.get(`${billyKref}.refCounts`), undefined);
t.is(kvStore.get(`${vatA}.c.${billyKref}`), undefined);
t.is(kvStore.get(`${vatA}.c.${vrefs.billyForA}`), undefined);
t.is(kvStore.get(`${vatB}.c.${billyKref}`), undefined);
t.is(kvStore.get(`${vatB}.c.${vrefs.billyForB}`), undefined);
});

0 comments on commit 5e34f0f

Please sign in to comment.