diff --git a/src/Neo.VM/ReferenceCounter.cs b/src/Neo.VM/ReferenceCounter.cs index be7844dac8..f9ea08a2e2 100644 --- a/src/Neo.VM/ReferenceCounter.cs +++ b/src/Neo.VM/ReferenceCounter.cs @@ -22,86 +22,180 @@ namespace Neo.VM /// public sealed class ReferenceCounter { + // If set to true, all items will be tracked regardless of their type. private const bool TrackAllItems = false; - private readonly HashSet tracked_items = new(ReferenceEqualityComparer.Instance); - private readonly HashSet zero_referred = new(ReferenceEqualityComparer.Instance); - private LinkedList>? cached_components; - private int references_count = 0; + // Stores items that are being tracked for references. + // Only CompoundType and Buffer items are tracked. + private readonly HashSet _trackedItems = new(ReferenceEqualityComparer.Instance); + + // Stores items that have zero references. + private readonly HashSet _zeroReferred = new(ReferenceEqualityComparer.Instance); + + // Caches strongly connected components for optimization. + private LinkedList>? _cachedComponents; + + // Keeps the total count of references. + private int _referencesCount = 0; /// - /// Indicates the number of this counter. + /// Gets the count of references. /// - public int Count => references_count; + public int Count => _referencesCount; + /// + /// Determines if an item needs to be tracked based on its type. + /// + /// The item to check. + /// True if the item needs to be tracked, otherwise false. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool NeedTrack(StackItem item) { + // Track all items if TrackAllItems is true. #pragma warning disable CS0162 if (TrackAllItems) return true; #pragma warning restore CS0162 + + // Track the item if it is a CompoundType or Buffer. if (item is CompoundType or Buffer) return true; return false; } + /// + /// Adds a reference to a specified item with a parent compound type. + /// + /// This method is used when an item gains a new reference through a parent compound type. + /// It increments the reference count and updates the tracking structures if necessary. + /// + /// Use this method when you need to add a reference from a compound type to a stack item. + /// + /// The item to add a reference to. + /// The parent compound type. internal void AddReference(StackItem item, CompoundType parent) { - references_count++; + // Increment the reference count. + _referencesCount++; + + // If the item doesn't need to be tracked, return early. + // Only track CompoundType and Buffer items. if (!NeedTrack(item)) return; - cached_components = null; - tracked_items.Add(item); - item.ObjectReferences ??= new(ReferenceEqualityComparer.Instance); + + // Invalidate the cached components since the tracked items are changing. + _cachedComponents = null; + + // Add the item to the set of tracked items. + _trackedItems.Add(item); + + // Initialize the ObjectReferences dictionary if it is null. + item.ObjectReferences ??= new Dictionary(ReferenceEqualityComparer.Instance); + + // Add the parent to the item's ObjectReferences dictionary and increment its reference count. if (!item.ObjectReferences.TryGetValue(parent, out var pEntry)) { - pEntry = new(parent); + pEntry = new StackItem.ObjectReferenceEntry(parent); item.ObjectReferences.Add(parent, pEntry); } pEntry.References++; } + /// + /// Adds a stack reference to a specified item with a count. + /// + /// This method is used when an item gains a new stack reference, usually due to being pushed onto the evaluation stack. + /// It increments the reference count and updates the tracking structures if necessary. + /// + /// Use this method when you need to add one or more stack references to a stack item. + /// + /// The item to add a stack reference to. + /// The number of references to add. internal void AddStackReference(StackItem item, int count = 1) { - references_count += count; + // Increment the reference count by the specified count. + _referencesCount += count; + + // If the item doesn't need to be tracked, return early. if (!NeedTrack(item)) return; - if (tracked_items.Add(item)) - cached_components?.AddLast(new HashSet(ReferenceEqualityComparer.Instance) { item }); + + // Add the item to the set of tracked items and to the cached components if needed. + if (_trackedItems.Add(item)) + _cachedComponents?.AddLast(new HashSet(ReferenceEqualityComparer.Instance) { item }); + + // Increment the item's stack references by the specified count. item.StackReferences += count; - zero_referred.Remove(item); + + // Remove the item from the _zeroReferred set since it now has references. + _zeroReferred.Remove(item); } + /// + /// Adds an item to the zero-referred list. + /// + /// This method is used when an item has no remaining references. + /// It adds the item to the zero-referred list to be checked for cleanup later. + /// + /// Use this method when you detect that an item has zero references and may need to be cleaned up. + /// + /// The item to add. internal void AddZeroReferred(StackItem item) { - zero_referred.Add(item); + // Add the item to the _zeroReferred set. + _zeroReferred.Add(item); + + // If the item doesn't need to be tracked, return early. if (!NeedTrack(item)) return; - cached_components?.AddLast(new HashSet(ReferenceEqualityComparer.Instance) { item }); - tracked_items.Add(item); + + // Add the item to the cached components and the set of tracked items. + _cachedComponents?.AddLast(new HashSet(ReferenceEqualityComparer.Instance) { item }); + _trackedItems.Add(item); } + /// + /// Checks and processes items that have zero references. + /// + /// This method is used to check items in the zero-referred list and clean up those that are no longer needed. + /// It uses Tarjan's algorithm to find strongly connected components and remove those with no references. + /// + /// Use this method periodically to clean up items with zero references and free up memory. + /// + /// The current reference count. internal int CheckZeroReferred() { - if (zero_referred.Count > 0) + // If there are items with zero references, process them. + if (_zeroReferred.Count > 0) { - zero_referred.Clear(); - if (cached_components is null) + // Clear the zero_referred set since we are going to process all of them. + _zeroReferred.Clear(); + + // If cached components are null, we need to recompute the strongly connected components (SCCs). + if (_cachedComponents is null) { - //Tarjan tarjan = new(tracked_items.Where(p => p.StackReferences == 0)); - Tarjan tarjan = new(tracked_items); - cached_components = tarjan.Invoke(); + // Create a new Tarjan object and invoke it to find all SCCs in the tracked_items graph. + Tarjan tarjan = new(_trackedItems); + _cachedComponents = tarjan.Invoke(); } - foreach (StackItem item in tracked_items) + + // Reset all tracked items' Tarjan algorithm-related fields (DFN, LowLink, and OnStack). + foreach (StackItem item in _trackedItems) item.Reset(); - for (var node = cached_components.First; node != null;) + + // Process each SCC in the cached_components list. + for (var node = _cachedComponents.First; node != null;) { var component = node.Value; bool on_stack = false; + + // Check if any item in the SCC is still on the stack. foreach (StackItem item in component) { + // An item is considered 'on stack' if it has stack references or if its parent items are still on stack. if (item.StackReferences > 0 || item.ObjectReferences?.Values.Any(p => p.References > 0 && p.Item.OnStack) == true) { on_stack = true; break; } } + + // If any item in the component is on stack, mark all items in the component as on stack. if (on_stack) { foreach (StackItem item in component) @@ -110,46 +204,93 @@ internal int CheckZeroReferred() } else { + // Otherwise, remove the component and clean up the items. foreach (StackItem item in component) { - tracked_items.Remove(item); + _trackedItems.Remove(item); + + // If the item is a CompoundType, adjust the reference count and clean up its sub-items. if (item is CompoundType compound) { - references_count -= compound.SubItemsCount; + // Decrease the reference count by the number of sub-items. + _referencesCount -= compound.SubItemsCount; foreach (StackItem subitem in compound.SubItems) { + // Skip sub-items that are in the same component or don't need tracking. if (component.Contains(subitem)) continue; if (!NeedTrack(subitem)) continue; + + // Remove the parent reference from the sub-item. subitem.ObjectReferences!.Remove(compound); } } + + // Perform cleanup for the item. item.Cleanup(); } + + // Move to the next component and remove the current one from the cached_components list. var nodeToRemove = node; node = node.Next; - cached_components.Remove(nodeToRemove); + _cachedComponents.Remove(nodeToRemove); } } } - return references_count; + + // Return the current total reference count. + return _referencesCount; } + + /// + /// Removes a reference from a specified item with a parent compound type. + /// + /// This method is used when an item loses a reference from a parent compound type. + /// It decrements the reference count and updates the tracking structures if necessary. + /// + /// Use this method when you need to remove a reference from a compound type to a stack item. + /// + /// The item to remove a reference from. + /// The parent compound type. internal void RemoveReference(StackItem item, CompoundType parent) { - references_count--; + // Decrement the reference count. + _referencesCount--; + + // If the item doesn't need to be tracked, return early. if (!NeedTrack(item)) return; - cached_components = null; + + // Invalidate the cached components since the tracked items are changing. + _cachedComponents = null; + + // Decrement the reference count for the parent in the item's ObjectReferences dictionary. item.ObjectReferences![parent].References--; + + // If the item has no stack references, add it to the zero_referred set. if (item.StackReferences == 0) - zero_referred.Add(item); + _zeroReferred.Add(item); } + /// + /// Removes a stack reference from a specified item. + /// + /// This method is used when an item loses a stack reference, usually due to being popped off the evaluation stack. + /// It decrements the reference count and updates the tracking structures if necessary. + /// + /// Use this method when you need to remove one or more stack references from a stack item. + /// + /// The item to remove a stack reference from. internal void RemoveStackReference(StackItem item) { - references_count--; + // Decrement the reference count. + _referencesCount--; + + // If the item doesn't need to be tracked, return early. if (!NeedTrack(item)) return; + + // Decrement the item's stack references and add it to the zero_referred set if it has no references. if (--item.StackReferences == 0) - zero_referred.Add(item); + _zeroReferred.Add(item); } } } diff --git a/src/Neo.VM/ReferenceCounter.md b/src/Neo.VM/ReferenceCounter.md new file mode 100644 index 0000000000..87c9bd489e --- /dev/null +++ b/src/Neo.VM/ReferenceCounter.md @@ -0,0 +1,241 @@ +# ReferenceCounter + +`ReferenceCounter` is a reference counting manager for use in a virtual machine (VM). It is designed to track and manage the reference counts of objects to ensure they are correctly cleaned up when no longer referenced, thereby preventing memory leaks. + +## Purpose + +In a virtual machine, managing object memory is crucial. The main purposes of `ReferenceCounter` are: + +1. **Tracking Object References**: Manage and track the reference relationships between objects. +2. **Memory Management**: Ensure that objects are correctly released when they are no longer referenced. +3. **Preventing Memory Leaks**: Avoid memory leaks by using reference counting and detecting circular references. + +## Technical Principles + +### Reference Counting + +Reference counting is a memory management technique used to track the number of references to each object. When an object's reference count drops to zero, it indicates that the object is no longer in use and can be safely cleaned up. `ReferenceCounter` uses the principles of reference counting to manage the lifecycle of objects: + +- **Increment Reference Count**: Increase the reference count when an object is referenced. +- **Decrement Reference Count**: Decrease the reference count when a reference is removed. +- **Cleanup Object**: Cleanup the object when its reference count drops to zero. + +### What is Tracked + +In the Neo VM, the `ReferenceCounter` class is used to count references to objects to track and manage `StackItem` references. The `reference_count` calculates the total number of current references, including stack references and object references. Specifically, the `reference_count` increases or decreases in the following situations: + +#### Increment Reference + +Use the `AddReference` method to increment the reference count of an object: + +```csharp +internal void AddReference(StackItem item, CompoundType parent) +{ + references_count++; + if (!NeedTrack(item)) return; + cached_components = null; + tracked_items.Add(item); + item.ObjectReferences ??= new(ReferenceEqualityComparer.Instance); + if (!item.ObjectReferences.TryGetValue(parent, out var pEntry)) + { + pEntry = new(parent); + item.ObjectReferences.Add(parent, pEntry); + } + pEntry.References++; +} +``` + +#### Decrement Reference + +Use the `RemoveReference` method to decrement the reference count of an object: + +```csharp +internal void RemoveReference(StackItem item, CompoundType parent) +{ + references_count--; + if (!NeedTrack(item)) return; + cached_components = null; + item.ObjectReferences![parent].References--; + if (item.StackReferences == 0) + zero_referred.Add(item); +} +``` + +#### Increment Stack Reference + +Use the `AddStackReference` method to increment the stack reference count of an object: + +```csharp +internal void AddStackReference(StackItem item, int count = 1) +{ + references_count += count; + if (!NeedTrack(item)) return; + if (tracked_items.Add(item)) + cached_components?.AddLast(new HashSet(ReferenceEqualityComparer.Instance) { item }); + item.StackReferences += count; + zero_referred.Remove(item); +} +``` + +#### Decrement Stack Reference + +Use the `RemoveStackReference` method to decrement the stack reference count of an object: + +```csharp +internal void RemoveStackReference(StackItem item) +{ + references_count--; + if (!NeedTrack(item)) return; + if (--item.StackReferences == 0) + zero_referred.Add(item); +} +``` + +### Circular References + +Circular references occur when objects reference each other, preventing their reference counts from dropping to zero, which can lead to memory leaks. `ReferenceCounter` addresses circular references using the following methods: + +1. **Mark and Sweep**: Detect and clean up strongly connected components when circular references are identified using algorithms like Tarjan's algorithm. +2. **Recursive Reference Management**: Recursively manage the reference counts of nested objects to ensure all reference relationships are correctly handled. + +### Tarjan's Algorithm + +Tarjan's algorithm is a graph theory algorithm for finding strongly connected components (SCCs) in a directed graph. An SCC is a maximal subgraph where every vertex is reachable from every other vertex in the subgraph. In the context of `ReferenceCounter`, Tarjan's algorithm is used to detect circular references, allowing for efficient memory management and cleanup of objects that are no longer reachable. + +#### How Tarjan's Algorithm Works + +1. **Initialization**: + - Each node (object) in the graph is initially unvisited. The algorithm uses a stack to keep track of the current path and arrays (or lists) to store the discovery time (`DFN`) and the lowest point reachable (`LowLink`) for each node. + +2. **Depth-First Search (DFS)**: + - Starting from an unvisited node, the algorithm performs a DFS. Each node visited is assigned a discovery time and a `LowLink` value, both initially set to the node's discovery time. + +3. **Update LowLink**: + - For each node, the algorithm updates the `LowLink` value based on the nodes reachable from its descendants. If a descendant node points back to an ancestor in the current path (stack), the `LowLink` value of the current node is updated to the minimum of its own `LowLink` and the descendant's `LowLink`. + +4. **Identify SCCs**: + - When a node's `LowLink` value is equal to its discovery time, it indicates the root of an SCC. The algorithm then pops nodes from the stack until it reaches the current node, forming an SCC. + +5. **Cleanup**: + - Once SCCs are identified, nodes that have no remaining references are cleaned up, preventing memory leaks caused by circular references. + +### Tarjan's Algorithm in `ReferenceCounter` + +The `CheckZeroReferred` method in `ReferenceCounter` uses Tarjan's algorithm to detect and handle circular references. Here’s a detailed breakdown of the algorithm as used in `CheckZeroReferred`: + +```csharp +internal int CheckZeroReferred() +{ + // If there are items with zero references, process them. + if (zero_referred.Count > 0) + { + // Clear the zero_referred set since we are going to process all of them. + zero_referred.Clear(); + + // If cached components are null, we need to recompute the strongly connected components (SCCs). + if (cached_components is null) + { + // Create a new Tarjan object and invoke it to find all SCCs in the tracked_items graph. + Tarjan tarjan = new(tracked_items); + cached_components = tarjan.Invoke(); + } + + // Reset all tracked items' Tarjan algorithm-related fields (DFN, LowLink, and OnStack). + foreach (StackItem item in tracked_items) + item.Reset(); + + // Process each SCC in the cached_components list. + for (var node = cached_components.First; node != null;) + { + var component = node.Value; + bool on_stack = false; + + // Check if any item in the SCC is still on the stack. + foreach (StackItem item in component) + { + // An item is considered 'on stack' if it has stack references or if its parent items are still on stack. + if (item.StackReferences > 0 || item.ObjectReferences?.Values.Any(p => p.References > 0 && p.Item.OnStack) == true) + { + on_stack = true; + break; + } + } + + // If any item in the component is on stack, mark all items in the component as on stack. + if (on_stack) + { + foreach (StackItem item in component) + item.OnStack = true; + node = node.Next; + } + else + { + // Otherwise, remove the component and clean up the items. + foreach (StackItem item in component) + { + tracked_items.Remove(item); + + // If the item is a CompoundType, adjust the reference count and clean up its sub-items. + if (item is CompoundType compound) + { + // Decrease the reference count by the number of sub-items. + references_count -= compound.SubItemsCount; + foreach (StackItem subitem in compound.SubItems) + { + // Skip sub-items that are in the same component or don't need tracking. + if (component.Contains(subitem)) continue; + if (!NeedTrack(subitem)) continue; + + // Remove the parent reference from the sub-item. + subitem.ObjectReferences!.Remove(compound); + } + } + + // Perform cleanup for the item. + item.Cleanup(); + } + + // Move to the next component and remove the current one from the cached_components list. + var nodeToRemove = node; + node = node.Next; + cached_components.Remove(nodeToRemove); + } + } + } + + // Return the current total reference count. + return references_count; +} +``` + +### Detailed Explanation + +1. **Initialization and Check for Zero References**: + - The method starts by checking if there are any items in `zero_referred`. If there are, it clears the `zero_referred` set. + +2. **Compute Strongly Connected Components (SCCs)**: + - If there are no cached SCCs, it recomputes them using Tarjan's algorithm. This involves creating a `Tarjan` object, passing the `tracked_items` to it, and invoking the algorithm to get the SCCs. + +3. **Reset Tarjan-related Fields**: + - It resets the Tarjan-related fields (`DFN`, `LowLink`, `OnStack`) of all tracked items to prepare for processing SCCs. + +4. **Process Each SCC**: + - It iterates through each SCC (component) in the cached components list. For each component, it checks if any item is still on the stack by looking at its `StackReferences` or if any of its parent items are on the stack. + +5. **Mark Items as On Stack**: + - If any item in the component is still on the stack, it marks all items in the component as on the stack and moves to the next component. + +6. **Remove and Clean Up Items**: + - If no items in the component are on the stack, it removes the component and cleans up all items within it. For `CompoundType` items, it adjusts the reference count and removes parent references from their sub-items. It then performs cleanup on each item and removes the component from the cached components list. + +7. **Return Reference Count**: + - Finally, it returns the current total reference count. + +## Features + +`ReferenceCounter` provides the following features: + +1. **Increment Reference Count**: Increment the reference count of objects. +2. **Decrement Reference Count**: Decrement the reference count of objects. +3. **Check Zero-Referenced Objects**: Detect and clean up objects with a reference count of zero. +4. **Manage Nested References**: Recursively manage the reference counts of nested objects, supporting nested arrays of arbitrary depth. diff --git a/src/Neo.VM/Types/StackItem.Vertex.cs b/src/Neo.VM/Types/StackItem.Vertex.cs index a988d5db32..d3e9ed4dbd 100644 --- a/src/Neo.VM/Types/StackItem.Vertex.cs +++ b/src/Neo.VM/Types/StackItem.Vertex.cs @@ -17,23 +17,109 @@ namespace Neo.VM.Types { partial class StackItem { + /// + /// Represents an entry for an object reference. + /// + /// This class is used to keep track of references from compound types to other or . + /// It contains the referenced item and the number of references to it. + /// + /// Use this class to manage references from compound types to their child items. + /// + /// This is used to track references numbers from the same parent to the same child. + /// This is used for the purpose of determining strongly connect components. + /// + /// internal class ObjectReferenceEntry { + /// + /// The referenced StackItem. + /// public StackItem Item; + + /// + /// The number of references to the StackItem. + /// public int References; + + /// + /// Initializes a new instance of the ObjectReferenceEntry class with the specified StackItem. + /// + /// The referenced StackItem. public ObjectReferenceEntry(StackItem item) => Item = item; } + /// + /// The number of references to this StackItem from the evaluation stack. + /// + /// This field tracks how many times this item is referenced by the evaluation stack. + /// It is incremented when the item is pushed onto the stack and decremented when it is popped off. + /// + /// Use this field to manage stack references and determine when an item is no longer needed. + /// internal int StackReferences = 0; + + /// + /// A dictionary mapping compound types to their object reference entries. + /// + /// This dictionary is used to track references from compound types to their child items. + /// It allows efficient lookup and management of references. + /// + /// Use this dictionary to manage references from compound types to their children. + /// Only and will be assigned an , + /// other types will be null. + /// internal Dictionary? ObjectReferences; + + /// + /// Depth-First Number for Tarjan's algorithm. + /// internal int DFN = -1; + + /// + /// Low-link value for Tarjan's algorithm. + /// internal int LowLink = 0; + + /// + /// Indicates whether the item is currently on the stack for Tarjan's algorithm. + /// + /// + /// This should only be used for Tarjan algorithm, it can not be used to indicate + /// whether an item is on the stack or not since it can still be false if a value is + /// on the stack but the algorithm is not yet running. + /// + /// internal bool OnStack = false; + /// + /// Returns the successors of the current item based on object references. + /// + /// This property provides an enumerable of StackItems that are referenced by this item. + /// It is used by Tarjan's algorithm to find strongly connected components. + /// + /// Use this property when you need to iterate over the successors of a StackItem. + /// internal IEnumerable Successors => ObjectReferences?.Values.Where(p => p.References > 0).Select(p => p.Item) ?? System.Array.Empty(); + /// + /// Resets the strongly connected components-related fields. + /// + /// This method resets the DFN, LowLink, and OnStack fields to their default values. + /// It is used before running Tarjan's algorithm to ensure a clean state. + /// + /// Use this method to reset the state of a StackItem for strongly connected components analysis. + /// internal void Reset() => (DFN, LowLink, OnStack) = (-1, 0, false); + /// + /// Generates a hash code based on the item's span. + /// + /// This method provides a hash code for the StackItem based on its byte span. + /// It is used for efficient storage and retrieval in hash-based collections. + /// + /// Use this method when you need a hash code for a StackItem. + /// + /// The hash code for the StackItem. public override int GetHashCode() => HashCode.Combine(GetSpan().ToArray()); }