Skip to content

Commit

Permalink
fix(liveslots): collection deletion and retirement bugs (#9961)
Browse files Browse the repository at this point in the history
Liveslots was suffering from the following bugs:

* vrefs weren't properly encoded into DB keys and "encodePassable"
  keys when deleting collections (such that `isEncodedRemotable()`
  always answered false, which skipped the refcount processing of
  keys, and removal of the "ordinal-assignment" record)

* recognition records (vom.ir. keys) weren't created or removed for
  Remotable-style keys in weak collections

* Presence-style vrefs were not submitted for retirement processing
  when deleting a weak collection

As a result:

* `collection.clear()` left the "ordinal-assignment"` records in the
  vatstore, causing subsequent .has(oldkey) to incorrectly return
  true, and .init(oldkey, newvalue) to throw a "already registered"
  error, as well as consuming DB space longer than necessary.

* Invoking the `.clear()` method on a virtual/durable collection, or
  dereferencing the collection itself (and allowing it to be garbage
  collected), did not free any Remotables, virtual/durable
  objects (Representatives), or imported Presences used as keys. This
  could cause data in other weak collections (or other vats) to be
  retained longer than necessary.

* retiring (deleting) a Remotable used as a key in a weak
  virtual/durable collection did not free the corresponding value,
  causing data (in local and/or remote vats) to be retained longer
  than necessary

* Allowing a weak virtual/durable collection to be garbage collected
  did not inform the kernel that the vat can no longer recognize the
  Presence-style keys, consuming c-list entries longer than necessary.

This commit fixes those bugs, which fixes the immediate cause of
issues #8756 , #7355 , and #9956 . As a change to liveslots, full
deployment requires both restarting the kernel with this new
code, *and* triggering a vat upgrade of all vats. Once that is done,
no new collection entries will suffer the problems listed above.

However, this commit makes no attempt to remediate any existing data
corruption. See #8759 for plans to build a tool that can audit the
virtual-data reference graph and detect the consequences of these bugs
in pre-existing kernel databases, and see the issues listed above for
notes on efforts to build remediation tools.
  • Loading branch information
mergify[bot] authored Aug 31, 2024
2 parents 661af66 + 685e629 commit 813208a
Show file tree
Hide file tree
Showing 8 changed files with 1,206 additions and 210 deletions.
438 changes: 283 additions & 155 deletions packages/swingset-liveslots/src/boyd-gc.js

Large diffs are not rendered by default.

71 changes: 55 additions & 16 deletions packages/swingset-liveslots/src/collectionManager.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import {
zeroPad,
makeEncodePassable,
makeDecodePassable,
isEncodedRemotable,
compareRank,
} from '@endo/marshal';
import {
Expand Down Expand Up @@ -66,6 +65,12 @@ function prefixc(collectionID, dbEntryKey) {
return `vc.${collectionID}.${dbEntryKey}`;
}

export const collectionMetaKeys = new Set([
'|entryCount',
'|nextOrdinal',
'|schemata',
]);

/**
* @typedef {object} SchemaCacheValue
* @property {Pattern} keyShape
Expand Down Expand Up @@ -293,6 +298,20 @@ export function makeCollectionManager(
return `${dbKeyPrefix}${dbEntryKey}`;
}

// A "vref" is a string like "o-4" or "o+d44/2:0"
// An "EncodedKey" is the output of encode-passable:
// * strings become `s${string}`, like "foo" -> "sfoo"
// * small positive BigInts become `p${len}:${digits}`, like 47n -> "p2:47"
// * refs are assigned an "ordinal" and use `r${fixedLengthOrdinal}:${vref}`
// * e.g. vref(o-4) becomes "r0000000001:o-4"
// A "DBKey" is used to index the vatstore. DBKeys for collection
// entries join a collection prefix and an EncodedKey. Some
// possible DBKeys for entries of collection "5", using collection
// prefix "vc.5.", are:
// * "foo" -> "vc.5.sfoo"
// * 47n -> "vc.5.p2:47"
// * vref(o-4) -> "vc.5.r0000000001:o-4"

const encodeRemotable = remotable => {
// eslint-disable-next-line no-use-before-define
const ordinal = getOrdinal(remotable);
Expand All @@ -308,10 +327,11 @@ export function makeCollectionManager(
// the resulting function will encode only `Key` arguments.
const encodeKey = makeEncodePassable({ encodeRemotable });

const vrefFromDBKey = dbKey => dbKey.substring(BIGINT_TAG_LEN + 2);
const vrefFromEncodedKey = encodedKey =>
encodedKey.substring(1 + BIGINT_TAG_LEN + 1);

const decodeRemotable = encodedKey =>
convertSlotToVal(vrefFromDBKey(encodedKey));
convertSlotToVal(vrefFromEncodedKey(encodedKey));

// `makeDecodePassable` has three named options:
// `decodeRemotable`, `decodeError`, and `decodePromise`.
Expand Down Expand Up @@ -347,10 +367,16 @@ export function makeCollectionManager(
}

function dbKeyToKey(dbKey) {
// convert e.g. vc.5.r0000000001:o+v10/1 to r0000000001:o+v10/1
const dbEntryKey = dbKey.substring(dbKeyPrefix.length);
return decodeKey(dbEntryKey);
}

function dbKeyToEncodedKey(dbKey) {
assert(dbKey.startsWith(dbKeyPrefix), dbKey);
return dbKey.substring(dbKeyPrefix.length);
}

function has(key) {
const { keyShape } = getSchema();
if (!matches(key, keyShape)) {
Expand Down Expand Up @@ -553,26 +579,38 @@ export function makeCollectionManager(
*/
function clearInternalFull() {
let doMoreGC = false;
const [coverStart, coverEnd] = getRankCover(M.any(), encodeKey);
const start = prefix(coverStart);
const end = prefix(coverEnd);

// this yields all keys for which (start <= key < end)
for (const dbKey of enumerateKeysStartEnd(syscall, start, end)) {
const value = JSON.parse(syscall.vatstoreGet(dbKey));
doMoreGC =
value.slots.map(vrm.removeReachableVref).some(b => b) || doMoreGC;
syscall.vatstoreDelete(dbKey);
if (isEncodedRemotable(dbKey)) {
const keyVref = vrefFromDBKey(dbKey);

// visit every DB entry associated with the collection, which
// (due to sorting) will be collection entries first, and then a
// mixture of ordinal-assignment mappings and size-independent
// metadata (both of which start with "|").
for (const dbKey of enumerateKeysWithPrefix(syscall, dbKeyPrefix)) {
const encodedKey = dbKeyToEncodedKey(dbKey);

// preserve general metadata ("|entryCount" and friends are
// cleared by our caller)
if (collectionMetaKeys.has(encodedKey)) continue;

if (encodedKey.startsWith('|')) {
// ordinal assignment; decref or de-recognize its vref
const keyVref = encodedKey.substring(1);
parseVatSlot(keyVref);
if (hasWeakKeys) {
vrm.removeRecognizableVref(keyVref, `${collectionID}`, true);
} else {
doMoreGC = vrm.removeReachableVref(keyVref) || doMoreGC;
}
syscall.vatstoreDelete(prefix(`|${keyVref}`));
} else {
// a collection entry; decref slots from its value
const value = JSON.parse(syscall.vatstoreGet(dbKey));
doMoreGC =
value.slots.map(vrm.removeReachableVref).some(b => b) || doMoreGC;
}

// in either case, delete the DB entry
syscall.vatstoreDelete(dbKey);
}

return doMoreGC;
}

Expand All @@ -586,6 +624,7 @@ export function makeCollectionManager(
}
}
if (!hasWeakKeys && !isDeleting) {
// TODO: broken, see #10007
syscall.vatstoreSet(prefix('|entryCount'), '0');
}
return doMoreGC;
Expand Down
101 changes: 78 additions & 23 deletions packages/swingset-liveslots/src/virtualReferences.js
Original file line number Diff line number Diff line change
Expand Up @@ -500,26 +500,51 @@ export function makeVirtualReferenceManager(
}

/**
* A vref is "recognizable" when it is used as the key of a weak Map
* or Set: that Map/Set can be used to query whether a future
* specimen matches the original or not, without holding onto the
* original.
* A vref is "recognizable" when it is used as the key of a weak
* collection, like a virtual/durable WeakMapStore or WeakSetStore,
* or the ephemeral voAwareWeakMap/Set that we impose upon userspace
* as "WeakMap/WeakSet". The collection can be used to query whether
* a future specimen matches the original or not, without holding
* onto the original.
*
* This 'vrefRecognizers' is a Map from those vrefs to the set of
* recognizing weak collections, for virtual keys and non-virtual
* collections. Specifically, the vrefs correspond to imported
* Presences or virtual-object Representatives (Remotables do not
* participate: they are keyed by the actual Remotable object, not
* its vref). The collections are either a VirtualObjectAwareWeakMap
* or a VirtualObjectAwareWeakSet. We remove the entry when the key
* is removed from the collection, and when the entire collection is
* deleted.
* We need "recognition records" to map from the vref to the
* collection that can recognize it. When the vref is retired, we
* use the record to find all the collections from which we need to
* delete entries, so we can release the matching values. This might
* happen because the vref was for a Presence and the kernel just
* told us the upstream vat has deleted it (dispatch.retireImports),
* or because it was for a locally-managed object (an ephemeral
* Remotable or a virtual/durable Representative) and we decided to
* delete it.
*
* It is critical that each collection have exactly one recognizer that is
* unique to that collection, because the recognizers themselves will be
* tracked by their object identities, but the recognizer cannot be the
* collection itself else it would prevent the collection from being garbage
* collected.
* The virtual/durable collections track their "recognition records"
* in the vatstore, in keys like "vom.ir.${vref}|${collectionID}".
* These records do not contribute to our RAM usage.
*
* voAwareWeakMap and voAwareWeakSet store their recognition records
* in RAM, using this Map named 'vrefRecognizers'. Each key is a
* vref, and the value is a Set of recognizers. Each recognizer is
* the internal 'virtualObjectMap' in which the collection maps from
* vref to value. These in-RAM collections only use virtualObjectMap
* to track Presence-style (imports) and Representative-style
* (virtual/durable) vrefs: any Remotable-style keys are stored in
* the collection's internal (real) WeakMap under the Remotable
* object itself (because the engine handles the bookkeeping, and
* there is no virtual data in the value that we need to clean up at
* deletion time).
*
* Each voAwareWeakMap/Set must have a distinct recognizer, so we
* can remove the key from the right ones. The recognizer is held
* strongly by the recognition record, so it must not be the
* voAwareWeakMap/Set itself (which would inhibit GC).
*
* When an individual entry is deleted from the weak collection, we
* must also delete the recognition record. When the collection
* itself is deleted (i.e. because nothing was referencing it), we
* must both delete all recognition records and also notify the
* kernel about any Presence-style vrefs that we can no longer
* recognize (syscall.retireImports). The kernel doesn't care about
* Remotable- or Representative- style vrefs, only the imports.
*
* TODO: all the "recognizers" in principle could be, and probably should be,
* reduced to deleter functions. However, since the VirtualObjectAware
Expand All @@ -535,14 +560,24 @@ export function makeVirtualReferenceManager(
/** @type {Map<string, Set<Recognizer>>} */
const vrefRecognizers = new Map();

/**
* @param {*} value The vref-bearing object used as the collection key
* @param {string|Recognizer} recognizer The collectionID or virtualObjectMap for the collection
* @param {boolean} [recognizerIsVirtual] true for virtual/durable Stores, false for voAwareWeakMap/Set
*/
function addRecognizableValue(value, recognizer, recognizerIsVirtual) {
const vref = getSlotForVal(value);
if (vref) {
const { type, allocatedByVat, virtual, durable } = parseVatSlot(vref);
if (type === 'object' && (!allocatedByVat || virtual || durable)) {
if (type === 'object') {
// recognizerSet (voAwareWeakMap/Set) doesn't track Remotables
const notRemotable = !allocatedByVat || virtual || durable;

if (recognizerIsVirtual) {
assert.typeof(recognizer, 'string');
syscall.vatstoreSet(`vom.ir.${vref}|${recognizer}`, '1');
} else {
} else if (notRemotable) {
assert.typeof(recognizer, 'object');
let recognizerSet = vrefRecognizers.get(vref);
if (!recognizerSet) {
recognizerSet = new Set();
Expand All @@ -554,25 +589,45 @@ export function makeVirtualReferenceManager(
}
}

/**
* @param {string} vref The vref or the object used as the collection key
* @param {string|Recognizer} recognizer The collectionID or virtualObjectMap for the collection
* @param {boolean} [recognizerIsVirtual] true for virtual/durable Stores, false for voAwareWeakMap/Set
*/
function removeRecognizableVref(vref, recognizer, recognizerIsVirtual) {
const { type, allocatedByVat, virtual, durable } = parseVatSlot(vref);
if (type === 'object' && (!allocatedByVat || virtual || durable)) {
if (type === 'object') {
// addToPossiblyDeadSet only needs Presence-style vrefs
const isPresence = !allocatedByVat;
// recognizerSet (voAwareWeakMap/Set) doesn't track Remotables
const notRemotable = !allocatedByVat || virtual || durable;

if (recognizerIsVirtual) {
assert.typeof(recognizer, 'string');
syscall.vatstoreDelete(`vom.ir.${vref}|${recognizer}`);
} else {
if (isPresence) {
addToPossiblyRetiredSet(vref);
}
} else if (notRemotable) {
assert.typeof(recognizer, 'object');
const recognizerSet = vrefRecognizers.get(vref);
assert(recognizerSet && recognizerSet.has(recognizer));
recognizerSet.delete(recognizer);
if (recognizerSet.size === 0) {
vrefRecognizers.delete(vref);
if (!allocatedByVat) {
if (isPresence) {
addToPossiblyRetiredSet(vref);
}
}
}
}
}

/**
* @param {*} value The vref-bearing object used as the collection key
* @param {string|Recognizer} recognizer The collectionID or virtualObjectMap for the collection
* @param {boolean} [recognizerIsVirtual] true for virtual/durable Stores, false for voAwareWeakMap/Set
*/
function removeRecognizableValue(value, recognizer, recognizerIsVirtual) {
const vref = getSlotForVal(value);
if (vref) {
Expand Down
Loading

0 comments on commit 813208a

Please sign in to comment.