From e653848a2cdbd6571a325bededd3535a5dc09de2 Mon Sep 17 00:00:00 2001 From: LotP1 <68976644+LotP1@users.noreply.github.com> Date: Fri, 22 Nov 2024 22:33:44 +0100 Subject: [PATCH] JIT Sparse Function Table (#250) More up to date build of the JIT Sparse PR for continued development. JIT Sparse Function Table was originally developed by riperiperi for the original Ryujinx project, and decreased the amount of layers in the Function Table structure, to decrease lookup times at the cost of slightly higher RAM usage. This PR rebalances the JIT Sparse Function Table to be a bit more RAM intensive, but faster in workloads where the JIT Function Table is a bottleneck. Faster RAM will see a bigger impact and slower RAM (DDR3 and potentially slow DDR4) will see a slight performance decrease. This PR also implements a base for a PPTC profile system that could allow for PPTC with ExeFS mods enabled in the future. This PR also potentially fixes a strange issue where Avalonia would time out in some rare instances, e.g. when running ExeFS mods with TotK and a strange controller configuration. --------- Co-authored-by: Evan Husted --- src/ARMeilleure/Common/AddressTable.cs | 252 --------- src/ARMeilleure/Common/AddressTableLevel.cs | 44 ++ src/ARMeilleure/Common/AddressTablePresets.cs | 75 +++ src/ARMeilleure/Common/Allocator.cs | 2 +- src/ARMeilleure/Common/IAddressTable.cs | 51 ++ src/ARMeilleure/Common/NativeAllocator.cs | 2 +- .../Instructions/InstEmitFlowHelper.cs | 26 + .../Signal/NativeSignalHandlerGenerator.cs | 2 +- .../Translation/ArmEmitterContext.cs | 4 +- src/ARMeilleure/Translation/PTC/Ptc.cs | 15 +- src/ARMeilleure/Translation/Translator.cs | 30 +- .../Translation/TranslatorStubs.cs | 4 +- src/Ryujinx.Cpu/AddressTable.cs | 482 ++++++++++++++++++ src/Ryujinx.Cpu/AppleHv/HvCpuContext.cs | 2 +- src/Ryujinx.Cpu/ICpuContext.cs | 2 +- src/Ryujinx.Cpu/Jit/JitCpuContext.cs | 10 +- .../Arm32/Target/Arm64/InstEmitFlow.cs | 62 ++- .../Arm64/Target/Arm64/InstEmitSystem.cs | 62 ++- .../LightningJit/LightningJitCpuContext.cs | 11 +- src/Ryujinx.Cpu/LightningJit/Translator.cs | 23 +- .../LightningJit/TranslatorStubs.cs | 4 +- src/Ryujinx.HLE/HOS/ArmProcessContext.cs | 8 +- .../HOS/ArmProcessContextFactory.cs | 2 +- src/Ryujinx.Memory/SparseMemoryBlock.cs | 125 +++++ src/Ryujinx.Tests/Cpu/CpuContext.cs | 3 +- src/Ryujinx.Tests/Cpu/EnvironmentTests.cs | 7 +- src/Ryujinx.Tests/Memory/PartialUnmaps.cs | 7 +- 27 files changed, 990 insertions(+), 327 deletions(-) delete mode 100644 src/ARMeilleure/Common/AddressTable.cs create mode 100644 src/ARMeilleure/Common/AddressTableLevel.cs create mode 100644 src/ARMeilleure/Common/AddressTablePresets.cs create mode 100644 src/ARMeilleure/Common/IAddressTable.cs create mode 100644 src/Ryujinx.Cpu/AddressTable.cs create mode 100644 src/Ryujinx.Memory/SparseMemoryBlock.cs diff --git a/src/ARMeilleure/Common/AddressTable.cs b/src/ARMeilleure/Common/AddressTable.cs deleted file mode 100644 index a3ffaf470e..0000000000 --- a/src/ARMeilleure/Common/AddressTable.cs +++ /dev/null @@ -1,252 +0,0 @@ -using ARMeilleure.Diagnostics; -using System; -using System.Collections.Generic; -using System.Runtime.InteropServices; - -namespace ARMeilleure.Common -{ - /// - /// Represents a table of guest address to a value. - /// - /// Type of the value - public unsafe class AddressTable : IDisposable where TEntry : unmanaged - { - /// - /// Represents a level in an . - /// - public readonly struct Level - { - /// - /// Gets the index of the in the guest address. - /// - public int Index { get; } - - /// - /// Gets the length of the in the guest address. - /// - public int Length { get; } - - /// - /// Gets the mask which masks the bits used by the . - /// - public ulong Mask => ((1ul << Length) - 1) << Index; - - /// - /// Initializes a new instance of the structure with the specified - /// and . - /// - /// Index of the - /// Length of the - public Level(int index, int length) - { - (Index, Length) = (index, length); - } - - /// - /// Gets the value of the from the specified guest . - /// - /// Guest address - /// Value of the from the specified guest - public int GetValue(ulong address) - { - return (int)((address & Mask) >> Index); - } - } - - private bool _disposed; - private TEntry** _table; - private readonly List _pages; - - /// - /// Gets the bits used by the of the instance. - /// - public ulong Mask { get; } - - /// - /// Gets the s used by the instance. - /// - public Level[] Levels { get; } - - /// - /// Gets or sets the default fill value of newly created leaf pages. - /// - public TEntry Fill { get; set; } - - /// - /// Gets the base address of the . - /// - /// instance was disposed - public nint Base - { - get - { - ObjectDisposedException.ThrowIf(_disposed, this); - - lock (_pages) - { - return (nint)GetRootPage(); - } - } - } - - /// - /// Constructs a new instance of the class with the specified list of - /// . - /// - /// is null - /// Length of is less than 2 - public AddressTable(Level[] levels) - { - ArgumentNullException.ThrowIfNull(levels); - - if (levels.Length < 2) - { - throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels)); - } - - _pages = new List(capacity: 16); - - Levels = levels; - Mask = 0; - - foreach (var level in Levels) - { - Mask |= level.Mask; - } - } - - /// - /// Determines if the specified is in the range of the - /// . - /// - /// Guest address - /// if is valid; otherwise - public bool IsValid(ulong address) - { - return (address & ~Mask) == 0; - } - - /// - /// Gets a reference to the value at the specified guest . - /// - /// Guest address - /// Reference to the value at the specified guest - /// instance was disposed - /// is not mapped - public ref TEntry GetValue(ulong address) - { - ObjectDisposedException.ThrowIf(_disposed, this); - - if (!IsValid(address)) - { - throw new ArgumentException($"Address 0x{address:X} is not mapped onto the table.", nameof(address)); - } - - lock (_pages) - { - return ref GetPage(address)[Levels[^1].GetValue(address)]; - } - } - - /// - /// Gets the leaf page for the specified guest . - /// - /// Guest address - /// Leaf page for the specified guest - private TEntry* GetPage(ulong address) - { - TEntry** page = GetRootPage(); - - for (int i = 0; i < Levels.Length - 1; i++) - { - ref Level level = ref Levels[i]; - ref TEntry* nextPage = ref page[level.GetValue(address)]; - - if (nextPage == null) - { - ref Level nextLevel = ref Levels[i + 1]; - - nextPage = i == Levels.Length - 2 ? - (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true) : - (TEntry*)Allocate(1 << nextLevel.Length, nint.Zero, leaf: false); - } - - page = (TEntry**)nextPage; - } - - return (TEntry*)page; - } - - /// - /// Lazily initialize and get the root page of the . - /// - /// Root page of the - private TEntry** GetRootPage() - { - if (_table == null) - { - _table = (TEntry**)Allocate(1 << Levels[0].Length, fill: nint.Zero, leaf: false); - } - - return _table; - } - - /// - /// Allocates a block of memory of the specified type and length. - /// - /// Type of elements - /// Number of elements - /// Fill value - /// if leaf; otherwise - /// Allocated block - private nint Allocate(int length, T fill, bool leaf) where T : unmanaged - { - var size = sizeof(T) * length; - var page = (nint)NativeAllocator.Instance.Allocate((uint)size); - var span = new Span((void*)page, length); - - span.Fill(fill); - - _pages.Add(page); - - TranslatorEventSource.Log.AddressTableAllocated(size, leaf); - - return page; - } - - /// - /// Releases all resources used by the instance. - /// - public void Dispose() - { - Dispose(true); - GC.SuppressFinalize(this); - } - - /// - /// Releases all unmanaged and optionally managed resources used by the - /// instance. - /// - /// to dispose managed resources also; otherwise just unmanaged resouces - protected virtual void Dispose(bool disposing) - { - if (!_disposed) - { - foreach (var page in _pages) - { - Marshal.FreeHGlobal(page); - } - - _disposed = true; - } - } - - /// - /// Frees resources used by the instance. - /// - ~AddressTable() - { - Dispose(false); - } - } -} diff --git a/src/ARMeilleure/Common/AddressTableLevel.cs b/src/ARMeilleure/Common/AddressTableLevel.cs new file mode 100644 index 0000000000..6107726eef --- /dev/null +++ b/src/ARMeilleure/Common/AddressTableLevel.cs @@ -0,0 +1,44 @@ +namespace ARMeilleure.Common +{ + /// + /// Represents a level in an . + /// + public readonly struct AddressTableLevel + { + /// + /// Gets the index of the in the guest address. + /// + public int Index { get; } + + /// + /// Gets the length of the in the guest address. + /// + public int Length { get; } + + /// + /// Gets the mask which masks the bits used by the . + /// + public ulong Mask => ((1ul << Length) - 1) << Index; + + /// + /// Initializes a new instance of the structure with the specified + /// and . + /// + /// Index of the + /// Length of the + public AddressTableLevel(int index, int length) + { + (Index, Length) = (index, length); + } + + /// + /// Gets the value of the from the specified guest . + /// + /// Guest address + /// Value of the from the specified guest + public int GetValue(ulong address) + { + return (int)((address & Mask) >> Index); + } + } +} diff --git a/src/ARMeilleure/Common/AddressTablePresets.cs b/src/ARMeilleure/Common/AddressTablePresets.cs new file mode 100644 index 0000000000..977e84a363 --- /dev/null +++ b/src/ARMeilleure/Common/AddressTablePresets.cs @@ -0,0 +1,75 @@ +namespace ARMeilleure.Common +{ + public static class AddressTablePresets + { + private static readonly AddressTableLevel[] _levels64Bit = + new AddressTableLevel[] + { + new(31, 17), + new(23, 8), + new(15, 8), + new( 7, 8), + new( 2, 5), + }; + + private static readonly AddressTableLevel[] _levels32Bit = + new AddressTableLevel[] + { + new(31, 17), + new(23, 8), + new(15, 8), + new( 7, 8), + new( 1, 6), + }; + + private static readonly AddressTableLevel[] _levels64BitSparseTiny = + new AddressTableLevel[] + { + new( 11, 28), + new( 2, 9), + }; + + private static readonly AddressTableLevel[] _levels32BitSparseTiny = + new AddressTableLevel[] + { + new( 10, 22), + new( 1, 9), + }; + + private static readonly AddressTableLevel[] _levels64BitSparseGiant = + new AddressTableLevel[] + { + new( 38, 1), + new( 2, 36), + }; + + private static readonly AddressTableLevel[] _levels32BitSparseGiant = + new AddressTableLevel[] + { + new( 31, 1), + new( 1, 30), + }; + + //high power will run worse on DDR3 systems and some DDR4 systems due to the higher ram utilization + //low power will never run worse than non-sparse, but for most systems it won't be necessary + //high power is always used, but I've left low power in here for future reference + public static AddressTableLevel[] GetArmPreset(bool for64Bits, bool sparse, bool lowPower = false) + { + if (sparse) + { + if (lowPower) + { + return for64Bits ? _levels64BitSparseTiny : _levels32BitSparseTiny; + } + else + { + return for64Bits ? _levels64BitSparseGiant : _levels32BitSparseGiant; + } + } + else + { + return for64Bits ? _levels64Bit : _levels32Bit; + } + } + } +} diff --git a/src/ARMeilleure/Common/Allocator.cs b/src/ARMeilleure/Common/Allocator.cs index 6905a614f0..de6a77ebef 100644 --- a/src/ARMeilleure/Common/Allocator.cs +++ b/src/ARMeilleure/Common/Allocator.cs @@ -2,7 +2,7 @@ namespace ARMeilleure.Common { - unsafe abstract class Allocator : IDisposable + public unsafe abstract class Allocator : IDisposable { public T* Allocate(ulong count = 1) where T : unmanaged { diff --git a/src/ARMeilleure/Common/IAddressTable.cs b/src/ARMeilleure/Common/IAddressTable.cs new file mode 100644 index 0000000000..65077ec436 --- /dev/null +++ b/src/ARMeilleure/Common/IAddressTable.cs @@ -0,0 +1,51 @@ +using System; + +namespace ARMeilleure.Common +{ + public interface IAddressTable : IDisposable where TEntry : unmanaged + { + /// + /// True if the address table's bottom level is sparsely mapped. + /// This also ensures the second bottom level is filled with a dummy page rather than 0. + /// + bool Sparse { get; } + + /// + /// Gets the bits used by the of the instance. + /// + ulong Mask { get; } + + /// + /// Gets the s used by the instance. + /// + AddressTableLevel[] Levels { get; } + + /// + /// Gets or sets the default fill value of newly created leaf pages. + /// + TEntry Fill { get; set; } + + /// + /// Gets the base address of the . + /// + /// instance was disposed + nint Base { get; } + + /// + /// Determines if the specified is in the range of the + /// . + /// + /// Guest address + /// if is valid; otherwise + bool IsValid(ulong address); + + /// + /// Gets a reference to the value at the specified guest . + /// + /// Guest address + /// Reference to the value at the specified guest + /// instance was disposed + /// is not mapped + ref TEntry GetValue(ulong address); + } +} diff --git a/src/ARMeilleure/Common/NativeAllocator.cs b/src/ARMeilleure/Common/NativeAllocator.cs index ca5d3a850f..ffcffa4bc6 100644 --- a/src/ARMeilleure/Common/NativeAllocator.cs +++ b/src/ARMeilleure/Common/NativeAllocator.cs @@ -3,7 +3,7 @@ namespace ARMeilleure.Common { - unsafe sealed class NativeAllocator : Allocator + public unsafe sealed class NativeAllocator : Allocator { public static NativeAllocator Instance { get; } = new(); diff --git a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs index 2009bafdac..a602ea49ed 100644 --- a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs +++ b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -193,6 +193,8 @@ private static void EmitTableBranch(ArmEmitterContext context, Operand guestAddr Operand hostAddress; + var table = context.FunctionTable; + // If address is mapped onto the function table, we can skip the table walk. Otherwise we fallback // onto the dispatch stub. if (guestAddress.Kind == OperandKind.Constant && context.FunctionTable.IsValid(guestAddress.Value)) @@ -203,6 +205,30 @@ private static void EmitTableBranch(ArmEmitterContext context, Operand guestAddr hostAddress = context.Load(OperandType.I64, hostAddressAddr); } + else if (table.Sparse) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + // Deliberately attempts to avoid branches. + + Operand tableBase = !context.HasPtc ? + Const(table.Base) : + Const(table.Base, Ptc.FunctionTableSymbol); + + hostAddress = tableBase; + + for (int i = 0; i < table.Levels.Length; i++) + { + var level = table.Levels[i]; + int clearBits = 64 - (level.Index + level.Length); + + Operand index = context.ShiftLeft( + context.ShiftRightUI(context.ShiftLeft(guestAddress, Const(clearBits)), Const(clearBits + level.Index)), + Const(3) + ); + + hostAddress = context.Load(OperandType.I64, context.Add(hostAddress, index)); + } + } else { hostAddress = !context.HasPtc ? diff --git a/src/ARMeilleure/Signal/NativeSignalHandlerGenerator.cs b/src/ARMeilleure/Signal/NativeSignalHandlerGenerator.cs index 1b3689e3f4..35747d7a4d 100644 --- a/src/ARMeilleure/Signal/NativeSignalHandlerGenerator.cs +++ b/src/ARMeilleure/Signal/NativeSignalHandlerGenerator.cs @@ -8,7 +8,7 @@ namespace ARMeilleure.Signal { public static class NativeSignalHandlerGenerator { - public const int MaxTrackedRanges = 8; + public const int MaxTrackedRanges = 16; private const int StructAddressOffset = 0; private const int StructWriteOffset = 4; diff --git a/src/ARMeilleure/Translation/ArmEmitterContext.cs b/src/ARMeilleure/Translation/ArmEmitterContext.cs index 5d79171a27..82f12bb027 100644 --- a/src/ARMeilleure/Translation/ArmEmitterContext.cs +++ b/src/ARMeilleure/Translation/ArmEmitterContext.cs @@ -46,7 +46,7 @@ public Block CurrBlock public IMemoryManager Memory { get; } public EntryTable CountTable { get; } - public AddressTable FunctionTable { get; } + public IAddressTable FunctionTable { get; } public TranslatorStubs Stubs { get; } public ulong EntryAddress { get; } @@ -62,7 +62,7 @@ public Block CurrBlock public ArmEmitterContext( IMemoryManager memory, EntryTable countTable, - AddressTable funcTable, + IAddressTable funcTable, TranslatorStubs stubs, ulong entryAddress, bool highCq, diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs index 8236150fe9..c722ce6be2 100644 --- a/src/ARMeilleure/Translation/PTC/Ptc.cs +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -30,7 +30,7 @@ class Ptc : IPtcLoadState private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 6950; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 6992; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; @@ -41,6 +41,7 @@ class Ptc : IPtcLoadState public static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1); public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2); public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3); + public static readonly Symbol FunctionTableSymbol = new(SymbolType.Special, 4); private const byte FillingByte = 0x00; private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest; @@ -101,7 +102,7 @@ public Ptc() Disable(); } - public void Initialize(string titleIdText, string displayVersion, bool enabled, MemoryManagerType memoryMode) + public void Initialize(string titleIdText, string displayVersion, bool enabled, MemoryManagerType memoryMode, string cacheSelector) { Wait(); @@ -127,6 +128,8 @@ public void Initialize(string titleIdText, string displayVersion, bool enabled, DisplayVersion = !string.IsNullOrEmpty(displayVersion) ? displayVersion : DisplayVersionDefault; _memoryMode = memoryMode; + Logger.Info?.Print(LogClass.Ptc, $"PPTC (v{InternalVersion}) Profile: {DisplayVersion}-{cacheSelector}"); + string workPathActual = Path.Combine(AppDataManager.GamesDirPath, TitleIdText, "cache", "cpu", ActualDir); string workPathBackup = Path.Combine(AppDataManager.GamesDirPath, TitleIdText, "cache", "cpu", BackupDir); @@ -140,8 +143,8 @@ public void Initialize(string titleIdText, string displayVersion, bool enabled, Directory.CreateDirectory(workPathBackup); } - CachePathActual = Path.Combine(workPathActual, DisplayVersion); - CachePathBackup = Path.Combine(workPathBackup, DisplayVersion); + CachePathActual = Path.Combine(workPathActual, DisplayVersion) + "-" + cacheSelector; + CachePathBackup = Path.Combine(workPathBackup, DisplayVersion) + "-" + cacheSelector; PreLoad(); Profiler.PreLoad(); @@ -706,6 +709,10 @@ private static void PatchCode(Translator translator, Span code, RelocEntry { imm = translator.Stubs.DispatchStub; } + else if (symbol == FunctionTableSymbol) + { + imm = translator.FunctionTable.Base; + } if (imm == null) { diff --git a/src/ARMeilleure/Translation/Translator.cs b/src/ARMeilleure/Translation/Translator.cs index 24fbd76219..162368782f 100644 --- a/src/ARMeilleure/Translation/Translator.cs +++ b/src/ARMeilleure/Translation/Translator.cs @@ -22,33 +22,13 @@ namespace ARMeilleure.Translation { public class Translator { - private static readonly AddressTable.Level[] _levels64Bit = - new AddressTable.Level[] - { - new(31, 17), - new(23, 8), - new(15, 8), - new( 7, 8), - new( 2, 5), - }; - - private static readonly AddressTable.Level[] _levels32Bit = - new AddressTable.Level[] - { - new(31, 17), - new(23, 8), - new(15, 8), - new( 7, 8), - new( 1, 6), - }; - private readonly IJitMemoryAllocator _allocator; private readonly ConcurrentQueue> _oldFuncs; private readonly Ptc _ptc; internal TranslatorCache Functions { get; } - internal AddressTable FunctionTable { get; } + internal IAddressTable FunctionTable { get; } internal EntryTable CountTable { get; } internal TranslatorStubs Stubs { get; } internal TranslatorQueue Queue { get; } @@ -57,7 +37,7 @@ public class Translator private Thread[] _backgroundTranslationThreads; private volatile int _threadCount; - public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for64Bits) + public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, IAddressTable functionTable) { _allocator = allocator; Memory = memory; @@ -72,15 +52,15 @@ public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for CountTable = new EntryTable(); Functions = new TranslatorCache(); - FunctionTable = new AddressTable(for64Bits ? _levels64Bit : _levels32Bit); + FunctionTable = functionTable; Stubs = new TranslatorStubs(FunctionTable); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; } - public IPtcLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled) + public IPtcLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled, string cacheSelector) { - _ptc.Initialize(titleIdText, displayVersion, enabled, Memory.Type); + _ptc.Initialize(titleIdText, displayVersion, enabled, Memory.Type, cacheSelector); return _ptc; } diff --git a/src/ARMeilleure/Translation/TranslatorStubs.cs b/src/ARMeilleure/Translation/TranslatorStubs.cs index 364cca13c5..bd9aed8d45 100644 --- a/src/ARMeilleure/Translation/TranslatorStubs.cs +++ b/src/ARMeilleure/Translation/TranslatorStubs.cs @@ -19,7 +19,7 @@ class TranslatorStubs : IDisposable private bool _disposed; - private readonly AddressTable _functionTable; + private readonly IAddressTable _functionTable; private readonly Lazy _dispatchStub; private readonly Lazy _dispatchLoop; private readonly Lazy _contextWrapper; @@ -86,7 +86,7 @@ public WrapperFunction ContextWrapper /// /// Function table used to store pointers to the functions that the guest code will call /// is null - public TranslatorStubs(AddressTable functionTable) + public TranslatorStubs(IAddressTable functionTable) { ArgumentNullException.ThrowIfNull(functionTable); diff --git a/src/Ryujinx.Cpu/AddressTable.cs b/src/Ryujinx.Cpu/AddressTable.cs new file mode 100644 index 0000000000..d87b12ab01 --- /dev/null +++ b/src/Ryujinx.Cpu/AddressTable.cs @@ -0,0 +1,482 @@ +using ARMeilleure.Memory; +using Ryujinx.Common; +using Ryujinx.Cpu.Signal; +using Ryujinx.Memory; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Threading; +using static Ryujinx.Cpu.MemoryEhMeilleure; + +namespace ARMeilleure.Common +{ + /// + /// Represents a table of guest address to a value. + /// + /// Type of the value + public unsafe class AddressTable : IAddressTable where TEntry : unmanaged + { + /// + /// Represents a page of the address table. + /// + private readonly struct AddressTablePage + { + /// + /// True if the allocation belongs to a sparse block, false otherwise. + /// + public readonly bool IsSparse; + + /// + /// Base address for the page. + /// + public readonly IntPtr Address; + + public AddressTablePage(bool isSparse, IntPtr address) + { + IsSparse = isSparse; + Address = address; + } + } + + /// + /// A sparsely mapped block of memory with a signal handler to map pages as they're accessed. + /// + private readonly struct TableSparseBlock : IDisposable + { + public readonly SparseMemoryBlock Block; + private readonly TrackingEventDelegate _trackingEvent; + + public TableSparseBlock(ulong size, Action ensureMapped, PageInitDelegate pageInit) + { + var block = new SparseMemoryBlock(size, pageInit, null); + + _trackingEvent = (ulong address, ulong size, bool write) => + { + ulong pointer = (ulong)block.Block.Pointer + address; + ensureMapped((IntPtr)pointer); + return pointer; + }; + + bool added = NativeSignalHandler.AddTrackedRegion( + (nuint)block.Block.Pointer, + (nuint)(block.Block.Pointer + (IntPtr)block.Block.Size), + Marshal.GetFunctionPointerForDelegate(_trackingEvent)); + + if (!added) + { + throw new InvalidOperationException("Number of allowed tracked regions exceeded."); + } + + Block = block; + } + + public void Dispose() + { + NativeSignalHandler.RemoveTrackedRegion((nuint)Block.Block.Pointer); + + Block.Dispose(); + } + } + + private bool _disposed; + private TEntry** _table; + private readonly List _pages; + private TEntry _fill; + + private readonly MemoryBlock _sparseFill; + private readonly SparseMemoryBlock _fillBottomLevel; + private readonly TEntry* _fillBottomLevelPtr; + + private readonly List _sparseReserved; + private readonly ReaderWriterLockSlim _sparseLock; + + private ulong _sparseBlockSize; + private ulong _sparseReservedOffset; + + public bool Sparse { get; } + + /// + public ulong Mask { get; } + + /// + public AddressTableLevel[] Levels { get; } + + /// + public TEntry Fill + { + get + { + return _fill; + } + set + { + UpdateFill(value); + } + } + + /// + public IntPtr Base + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + lock (_pages) + { + return (IntPtr)GetRootPage(); + } + } + } + + /// + /// Constructs a new instance of the class with the specified list of + /// . + /// + /// Levels for the address table + /// True if the bottom page should be sparsely mapped + /// is null + /// Length of is less than 2 + public AddressTable(AddressTableLevel[] levels, bool sparse) + { + ArgumentNullException.ThrowIfNull(levels); + + _pages = new List(capacity: 16); + + Levels = levels; + Mask = 0; + + foreach (var level in Levels) + { + Mask |= level.Mask; + } + + Sparse = sparse; + + if (sparse) + { + // If the address table is sparse, allocate a fill block + + _sparseFill = new MemoryBlock(268435456ul, MemoryAllocationFlags.Mirrorable); //low Power TC uses size: 65536ul + + ulong bottomLevelSize = (1ul << levels.Last().Length) * (ulong)sizeof(TEntry); + + _fillBottomLevel = new SparseMemoryBlock(bottomLevelSize, null, _sparseFill); + _fillBottomLevelPtr = (TEntry*)_fillBottomLevel.Block.Pointer; + + _sparseReserved = new List(); + _sparseLock = new ReaderWriterLockSlim(); + + _sparseBlockSize = bottomLevelSize; + } + } + + /// + /// Create an instance for an ARM function table. + /// Selects the best table structure for A32/A64, taking into account the selected memory manager type. + /// + /// True if the guest is A64, false otherwise + /// Memory manager type + /// An for ARM function lookup + public static AddressTable CreateForArm(bool for64Bits, MemoryManagerType type) + { + // Assume software memory means that we don't want to use any signal handlers. + bool sparse = type != MemoryManagerType.SoftwareMmu && type != MemoryManagerType.SoftwarePageTable; + + return new AddressTable(AddressTablePresets.GetArmPreset(for64Bits, sparse), sparse); + } + + /// + /// Update the fill value for the bottom level of the table. + /// + /// New fill value + private void UpdateFill(TEntry fillValue) + { + if (_sparseFill != null) + { + Span span = _sparseFill.GetSpan(0, (int)_sparseFill.Size); + MemoryMarshal.Cast(span).Fill(fillValue); + } + + _fill = fillValue; + } + + /// + /// Signal that the given code range exists. + /// + /// + /// + public void SignalCodeRange(ulong address, ulong size) + { + AddressTableLevel bottom = Levels.Last(); + ulong bottomLevelEntries = 1ul << bottom.Length; + + ulong entryIndex = address >> bottom.Index; + ulong entries = size >> bottom.Index; + entries += entryIndex - BitUtils.AlignDown(entryIndex, bottomLevelEntries); + + _sparseBlockSize = Math.Max(_sparseBlockSize, BitUtils.AlignUp(entries, bottomLevelEntries) * (ulong)sizeof(TEntry)); + } + + /// + public bool IsValid(ulong address) + { + return (address & ~Mask) == 0; + } + + /// + public ref TEntry GetValue(ulong address) + { + ObjectDisposedException.ThrowIf(_disposed, this); + + if (!IsValid(address)) + { + throw new ArgumentException($"Address 0x{address:X} is not mapped onto the table.", nameof(address)); + } + + lock (_pages) + { + TEntry* page = GetPage(address); + + int index = Levels[^1].GetValue(address); + + EnsureMapped((IntPtr)(page + index)); + + return ref page[index]; + } + } + + /// + /// Gets the leaf page for the specified guest . + /// + /// Guest address + /// Leaf page for the specified guest + private TEntry* GetPage(ulong address) + { + TEntry** page = GetRootPage(); + + for (int i = 0; i < Levels.Length - 1; i++) + { + ref AddressTableLevel level = ref Levels[i]; + ref TEntry* nextPage = ref page[level.GetValue(address)]; + + if (nextPage == null || nextPage == _fillBottomLevelPtr) + { + ref AddressTableLevel nextLevel = ref Levels[i + 1]; + + if (i == Levels.Length - 2) + { + nextPage = (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true); + } + else + { + nextPage = (TEntry*)Allocate(1 << nextLevel.Length, GetFillValue(i), leaf: false); + } + } + + page = (TEntry**)nextPage; + } + + return (TEntry*)page; + } + + /// + /// Ensure the given pointer is mapped in any overlapping sparse reservations. + /// + /// Pointer to be mapped + private void EnsureMapped(IntPtr ptr) + { + if (Sparse) + { + // Check sparse allocations to see if the pointer is in any of them. + // Ensure the page is committed if there's a match. + + _sparseLock.EnterReadLock(); + + try + { + foreach (TableSparseBlock reserved in _sparseReserved) + { + SparseMemoryBlock sparse = reserved.Block; + + if (ptr >= sparse.Block.Pointer && ptr < sparse.Block.Pointer + (IntPtr)sparse.Block.Size) + { + sparse.EnsureMapped((ulong)(ptr - sparse.Block.Pointer)); + + break; + } + } + } + finally + { + _sparseLock.ExitReadLock(); + } + } + } + + /// + /// Get the fill value for a non-leaf level of the table. + /// + /// Level to get the fill value for + /// The fill value + private IntPtr GetFillValue(int level) + { + if (_fillBottomLevel != null && level == Levels.Length - 2) + { + return (IntPtr)_fillBottomLevelPtr; + } + else + { + return IntPtr.Zero; + } + } + + /// + /// Lazily initialize and get the root page of the . + /// + /// Root page of the + private TEntry** GetRootPage() + { + if (_table == null) + { + if (Levels.Length == 1) + _table = (TEntry**)Allocate(1 << Levels[0].Length, Fill, leaf: true); + else + _table = (TEntry**)Allocate(1 << Levels[0].Length, GetFillValue(0), leaf: false); + } + + return _table; + } + + /// + /// Initialize a leaf page with the fill value. + /// + /// Page to initialize + private void InitLeafPage(Span page) + { + MemoryMarshal.Cast(page).Fill(_fill); + } + + /// + /// Reserve a new sparse block, and add it to the list. + /// + /// The new sparse block that was added + private TableSparseBlock ReserveNewSparseBlock() + { + var block = new TableSparseBlock(_sparseBlockSize, EnsureMapped, InitLeafPage); + + _sparseReserved.Add(block); + _sparseReservedOffset = 0; + + return block; + } + + /// + /// Allocates a block of memory of the specified type and length. + /// + /// Type of elements + /// Number of elements + /// Fill value + /// if leaf; otherwise + /// Allocated block + private IntPtr Allocate(int length, T fill, bool leaf) where T : unmanaged + { + var size = sizeof(T) * length; + + AddressTablePage page; + + if (Sparse && leaf) + { + _sparseLock.EnterWriteLock(); + + SparseMemoryBlock block; + + if (_sparseReserved.Count == 0) + { + block = ReserveNewSparseBlock().Block; + } + else + { + block = _sparseReserved.Last().Block; + + if (_sparseReservedOffset == block.Block.Size) + { + block = ReserveNewSparseBlock().Block; + } + } + + page = new AddressTablePage(true, block.Block.Pointer + (IntPtr)_sparseReservedOffset); + + _sparseReservedOffset += (ulong)size; + + _sparseLock.ExitWriteLock(); + } + else + { + var address = (IntPtr)NativeAllocator.Instance.Allocate((uint)size); + page = new AddressTablePage(false, address); + + var span = new Span((void*)page.Address, length); + span.Fill(fill); + } + + _pages.Add(page); + + //TranslatorEventSource.Log.AddressTableAllocated(size, leaf); + + return page.Address; + } + + /// + /// Releases all resources used by the instance. + /// + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + /// + /// Releases all unmanaged and optionally managed resources used by the + /// instance. + /// + /// to dispose managed resources also; otherwise just unmanaged resouces + protected virtual void Dispose(bool disposing) + { + if (!_disposed) + { + foreach (var page in _pages) + { + if (!page.IsSparse) + { + Marshal.FreeHGlobal(page.Address); + } + } + + if (Sparse) + { + foreach (TableSparseBlock block in _sparseReserved) + { + block.Dispose(); + } + + _sparseReserved.Clear(); + + _fillBottomLevel.Dispose(); + _sparseFill.Dispose(); + _sparseLock.Dispose(); + } + + _disposed = true; + } + } + + /// + /// Frees resources used by the instance. + /// + ~AddressTable() + { + Dispose(false); + } + } +} diff --git a/src/Ryujinx.Cpu/AppleHv/HvCpuContext.cs b/src/Ryujinx.Cpu/AppleHv/HvCpuContext.cs index 99e4c0479d..784949441c 100644 --- a/src/Ryujinx.Cpu/AppleHv/HvCpuContext.cs +++ b/src/Ryujinx.Cpu/AppleHv/HvCpuContext.cs @@ -32,7 +32,7 @@ public void InvalidateCacheRegion(ulong address, ulong size) { } - public IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled) + public IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled, string cacheSelector) { return new DummyDiskCacheLoadState(); } diff --git a/src/Ryujinx.Cpu/ICpuContext.cs b/src/Ryujinx.Cpu/ICpuContext.cs index edcebdfc4a..1fb3b674db 100644 --- a/src/Ryujinx.Cpu/ICpuContext.cs +++ b/src/Ryujinx.Cpu/ICpuContext.cs @@ -48,7 +48,7 @@ public interface ICpuContext : IDisposable /// Version of the application /// True if the cache should be loaded from disk if it exists, false otherwise /// Disk cache load progress reporter and manager - IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled); + IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled, string cacheSelector); /// /// Indicates that code has been loaded into guest memory, and that it might be executed in the future. diff --git a/src/Ryujinx.Cpu/Jit/JitCpuContext.cs b/src/Ryujinx.Cpu/Jit/JitCpuContext.cs index 9893c59b29..0793f382d2 100644 --- a/src/Ryujinx.Cpu/Jit/JitCpuContext.cs +++ b/src/Ryujinx.Cpu/Jit/JitCpuContext.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Memory; using ARMeilleure.Translation; using Ryujinx.Cpu.Signal; @@ -9,11 +10,13 @@ class JitCpuContext : ICpuContext { private readonly ITickSource _tickSource; private readonly Translator _translator; + private readonly AddressTable _functionTable; public JitCpuContext(ITickSource tickSource, IMemoryManager memory, bool for64Bit) { _tickSource = tickSource; - _translator = new Translator(new JitMemoryAllocator(forJit: true), memory, for64Bit); + _functionTable = AddressTable.CreateForArm(for64Bit, memory.Type); + _translator = new Translator(new JitMemoryAllocator(forJit: true), memory, _functionTable); if (memory.Type.IsHostMappedOrTracked()) { @@ -47,14 +50,15 @@ public void InvalidateCacheRegion(ulong address, ulong size) } /// - public IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled) + public IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled, string cacheSelector) { - return new JitDiskCacheLoadState(_translator.LoadDiskCache(titleIdText, displayVersion, enabled)); + return new JitDiskCacheLoadState(_translator.LoadDiskCache(titleIdText, displayVersion, enabled, cacheSelector)); } /// public void PrepareCodeRange(ulong address, ulong size) { + _functionTable.SignalCodeRange(address, size); _translator.PrepareCodeRange(address, size); } diff --git a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs index 7f5e4835c8..48bdbb573f 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs @@ -140,6 +140,10 @@ public unsafe static void WriteCallWithGuestAddress( bool isTail = false) { int tempRegister; + int tempGuestAddress = -1; + + bool inlineLookup = guestAddress.Kind != OperandKind.Constant && + funcTable is { Sparse: true }; if (guestAddress.Kind == OperandKind.Constant) { @@ -153,9 +157,16 @@ public unsafe static void WriteCallWithGuestAddress( else { asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset); + + if (inlineLookup && guestAddress.Value == 0) + { + // X0 will be overwritten. Move the address to a temp register. + tempGuestAddress = regAlloc.AllocateTempGprRegister(); + asm.Mov(Register(tempGuestAddress), guestAddress); + } } - tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1; + tempRegister = NextFreeRegister(1, tempGuestAddress); if (!isTail) { @@ -176,6 +187,40 @@ public unsafe static void WriteCallWithGuestAddress( asm.Mov(rn, funcPtrLoc & ~0xfffUL); asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL)); } + else if (inlineLookup) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + + Operand indexReg = Register(NextFreeRegister(tempRegister + 1, tempGuestAddress)); + + if (tempGuestAddress != -1) + { + guestAddress = Register(tempGuestAddress); + } + + ulong tableBase = (ulong)funcTable.Base; + + // Index into the table. + asm.Mov(rn, tableBase); + + for (int i = 0; i < funcTable.Levels.Length; i++) + { + var level = funcTable.Levels[i]; + asm.Ubfx(indexReg, guestAddress, level.Index, level.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + // Index into the page. + asm.Add(rn, rn, indexReg); + + // Load the page address. + asm.LdrRiUn(rn, rn, 0); + } + + if (tempGuestAddress != -1) + { + regAlloc.FreeTempGprRegister(tempGuestAddress); + } + } else { asm.Mov(rn, (ulong)funcPtr); @@ -252,5 +297,20 @@ private static Operand Register(int register, OperandType type = OperandType.I64 { return new Operand(register, RegisterType.Integer, type); } + + private static Operand Const(long value, OperandType type = OperandType.I64) + { + return new Operand(type, (ulong)value); + } + + private static int NextFreeRegister(int start, int avoid) + { + if (start == avoid) + { + start++; + } + + return start; + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs b/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs index 1eeeb746e1..f534e8b6e7 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs @@ -305,6 +305,10 @@ public unsafe static void WriteCallWithGuestAddress( bool isTail = false) { int tempRegister; + int tempGuestAddress = -1; + + bool inlineLookup = guestAddress.Kind != OperandKind.Constant && + funcTable is { Sparse: true }; if (guestAddress.Kind == OperandKind.Constant) { @@ -318,9 +322,16 @@ public unsafe static void WriteCallWithGuestAddress( else { asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset); + + if (inlineLookup && guestAddress.Value == 0) + { + // X0 will be overwritten. Move the address to a temp register. + tempGuestAddress = regAlloc.AllocateTempGprRegister(); + asm.Mov(Register(tempGuestAddress), guestAddress); + } } - tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1; + tempRegister = NextFreeRegister(1, tempGuestAddress); if (!isTail) { @@ -341,6 +352,40 @@ public unsafe static void WriteCallWithGuestAddress( asm.Mov(rn, funcPtrLoc & ~0xfffUL); asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL)); } + else if (inlineLookup) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + + Operand indexReg = Register(NextFreeRegister(tempRegister + 1, tempGuestAddress)); + + if (tempGuestAddress != -1) + { + guestAddress = Register(tempGuestAddress); + } + + ulong tableBase = (ulong)funcTable.Base; + + // Index into the table. + asm.Mov(rn, tableBase); + + for (int i = 0; i < funcTable.Levels.Length; i++) + { + var level = funcTable.Levels[i]; + asm.Ubfx(indexReg, guestAddress, level.Index, level.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + // Index into the page. + asm.Add(rn, rn, indexReg); + + // Load the page address. + asm.LdrRiUn(rn, rn, 0); + } + + if (tempGuestAddress != -1) + { + regAlloc.FreeTempGprRegister(tempGuestAddress); + } + } else { asm.Mov(rn, (ulong)funcPtr); @@ -613,5 +658,20 @@ private static Operand Register(int register, OperandType type = OperandType.I64 { return new Operand(register, RegisterType.Integer, type); } + + private static Operand Const(long value, OperandType type = OperandType.I64) + { + return new Operand(type, (ulong)value); + } + + private static int NextFreeRegister(int start, int avoid) + { + if (start == avoid) + { + start++; + } + + return start; + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs b/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs index b63636e39a..0f47ffb154 100644 --- a/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs +++ b/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Memory; using Ryujinx.Cpu.Jit; using Ryujinx.Cpu.LightningJit.State; @@ -8,11 +9,16 @@ class LightningJitCpuContext : ICpuContext { private readonly ITickSource _tickSource; private readonly Translator _translator; + private readonly AddressTable _functionTable; public LightningJitCpuContext(ITickSource tickSource, IMemoryManager memory, bool for64Bit) { _tickSource = tickSource; - _translator = new Translator(memory, for64Bit); + + _functionTable = AddressTable.CreateForArm(for64Bit, memory.Type); + + _translator = new Translator(memory, _functionTable); + memory.UnmapEvent += UnmapHandler; } @@ -40,7 +46,7 @@ public void InvalidateCacheRegion(ulong address, ulong size) } /// - public IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled) + public IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled, string cacheSelector) { return new DummyDiskCacheLoadState(); } @@ -48,6 +54,7 @@ public IDiskCacheLoadState LoadDiskCache(string titleIdText, string displayVersi /// public void PrepareCodeRange(ulong address, ulong size) { + _functionTable.SignalCodeRange(address, size); } public void Dispose() diff --git a/src/Ryujinx.Cpu/LightningJit/Translator.cs b/src/Ryujinx.Cpu/LightningJit/Translator.cs index b4710e34e1..4c4011f113 100644 --- a/src/Ryujinx.Cpu/LightningJit/Translator.cs +++ b/src/Ryujinx.Cpu/LightningJit/Translator.cs @@ -19,25 +19,6 @@ class Translator : IDisposable // Should be enabled on platforms that enforce W^X. private static bool IsNoWxPlatform => false; - private static readonly AddressTable.Level[] _levels64Bit = - new AddressTable.Level[] - { - new(31, 17), - new(23, 8), - new(15, 8), - new( 7, 8), - new( 2, 5), - }; - - private static readonly AddressTable.Level[] _levels32Bit = - new AddressTable.Level[] - { - new(23, 9), - new(15, 8), - new( 7, 8), - new( 1, 6), - }; - private readonly ConcurrentQueue> _oldFuncs; private readonly NoWxCache _noWxCache; private bool _disposed; @@ -47,7 +28,7 @@ class Translator : IDisposable internal TranslatorStubs Stubs { get; } internal IMemoryManager Memory { get; } - public Translator(IMemoryManager memory, bool for64Bits) + public Translator(IMemoryManager memory, AddressTable functionTable) { Memory = memory; @@ -63,7 +44,7 @@ public Translator(IMemoryManager memory, bool for64Bits) } Functions = new TranslatorCache(); - FunctionTable = new AddressTable(for64Bits ? _levels64Bit : _levels32Bit); + FunctionTable = functionTable; Stubs = new TranslatorStubs(FunctionTable, _noWxCache); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; diff --git a/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs b/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs index e88414d5e4..c5231e506b 100644 --- a/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs +++ b/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs @@ -23,7 +23,7 @@ class TranslatorStubs : IDisposable private bool _disposed; - private readonly AddressTable _functionTable; + private readonly IAddressTable _functionTable; private readonly NoWxCache _noWxCache; private readonly GetFunctionAddressDelegate _getFunctionAddressRef; private readonly nint _getFunctionAddress; @@ -79,7 +79,7 @@ public DispatcherFunction DispatchLoop /// Function table used to store pointers to the functions that the guest code will call /// Cache used on platforms that enforce W^X, otherwise should be null /// is null - public TranslatorStubs(AddressTable functionTable, NoWxCache noWxCache) + public TranslatorStubs(IAddressTable functionTable, NoWxCache noWxCache) { ArgumentNullException.ThrowIfNull(functionTable); diff --git a/src/Ryujinx.HLE/HOS/ArmProcessContext.cs b/src/Ryujinx.HLE/HOS/ArmProcessContext.cs index fde489ab7d..09a7216442 100644 --- a/src/Ryujinx.HLE/HOS/ArmProcessContext.cs +++ b/src/Ryujinx.HLE/HOS/ArmProcessContext.cs @@ -13,7 +13,8 @@ IDiskCacheLoadState Initialize( string displayVersion, bool diskCacheEnabled, ulong codeAddress, - ulong codeSize); + ulong codeSize, + string cacheSelector); } class ArmProcessContext : IArmProcessContext where T : class, IVirtualMemoryManagerTracked, IMemoryManager @@ -67,10 +68,11 @@ public IDiskCacheLoadState Initialize( string displayVersion, bool diskCacheEnabled, ulong codeAddress, - ulong codeSize) + ulong codeSize, + string cacheSelector) { _cpuContext.PrepareCodeRange(codeAddress, codeSize); - return _cpuContext.LoadDiskCache(titleIdText, displayVersion, diskCacheEnabled); + return _cpuContext.LoadDiskCache(titleIdText, displayVersion, diskCacheEnabled, cacheSelector); } public void InvalidateCacheRegion(ulong address, ulong size) diff --git a/src/Ryujinx.HLE/HOS/ArmProcessContextFactory.cs b/src/Ryujinx.HLE/HOS/ArmProcessContextFactory.cs index 6646826cb0..14775fb1d6 100644 --- a/src/Ryujinx.HLE/HOS/ArmProcessContextFactory.cs +++ b/src/Ryujinx.HLE/HOS/ArmProcessContextFactory.cs @@ -114,7 +114,7 @@ public IProcessContext Create(KernelContext context, ulong pid, ulong addressSpa } } - DiskCacheLoadState = processContext.Initialize(_titleIdText, _displayVersion, _diskCacheEnabled, _codeAddress, _codeSize); + DiskCacheLoadState = processContext.Initialize(_titleIdText, _displayVersion, _diskCacheEnabled, _codeAddress, _codeSize, "default"); //Ready for exefs profiles return processContext; } diff --git a/src/Ryujinx.Memory/SparseMemoryBlock.cs b/src/Ryujinx.Memory/SparseMemoryBlock.cs new file mode 100644 index 0000000000..523685de13 --- /dev/null +++ b/src/Ryujinx.Memory/SparseMemoryBlock.cs @@ -0,0 +1,125 @@ +using Ryujinx.Common; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; + +namespace Ryujinx.Memory +{ + public delegate void PageInitDelegate(Span page); + + public class SparseMemoryBlock : IDisposable + { + private const ulong MapGranularity = 1UL << 17; + + private readonly PageInitDelegate _pageInit; + + private readonly object _lock = new object(); + private readonly ulong _pageSize; + private readonly MemoryBlock _reservedBlock; + private readonly List _mappedBlocks; + private ulong _mappedBlockUsage; + + private readonly ulong[] _mappedPageBitmap; + + public MemoryBlock Block => _reservedBlock; + + public SparseMemoryBlock(ulong size, PageInitDelegate pageInit, MemoryBlock fill) + { + _pageSize = MemoryBlock.GetPageSize(); + _reservedBlock = new MemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.ViewCompatible); + _mappedBlocks = new List(); + _pageInit = pageInit; + + int pages = (int)BitUtils.DivRoundUp(size, _pageSize); + int bitmapEntries = BitUtils.DivRoundUp(pages, 64); + _mappedPageBitmap = new ulong[bitmapEntries]; + + if (fill != null) + { + // Fill the block with mappings from the fill block. + + if (fill.Size % _pageSize != 0) + { + throw new ArgumentException("Fill memory block should be page aligned.", nameof(fill)); + } + + int repeats = (int)BitUtils.DivRoundUp(size, fill.Size); + + ulong offset = 0; + for (int i = 0; i < repeats; i++) + { + _reservedBlock.MapView(fill, 0, offset, Math.Min(fill.Size, size - offset)); + offset += fill.Size; + } + } + + // If a fill block isn't provided, the pages that aren't EnsureMapped are unmapped. + // The caller can rely on signal handler to fill empty pages instead. + } + + private void MapPage(ulong pageOffset) + { + // Take a page from the latest mapped block. + MemoryBlock block = _mappedBlocks.LastOrDefault(); + + if (block == null || _mappedBlockUsage == MapGranularity) + { + // Need to map some more memory. + + block = new MemoryBlock(MapGranularity, MemoryAllocationFlags.Mirrorable); + + _mappedBlocks.Add(block); + + _mappedBlockUsage = 0; + } + + _pageInit(block.GetSpan(_mappedBlockUsage, (int)_pageSize)); + _reservedBlock.MapView(block, _mappedBlockUsage, pageOffset, _pageSize); + + _mappedBlockUsage += _pageSize; + } + + public void EnsureMapped(ulong offset) + { + int pageIndex = (int)(offset / _pageSize); + int bitmapIndex = pageIndex >> 6; + + ref ulong entry = ref _mappedPageBitmap[bitmapIndex]; + ulong bit = 1UL << (pageIndex & 63); + + if ((Volatile.Read(ref entry) & bit) == 0) + { + // Not mapped. + + lock (_lock) + { + // Check the bit while locked to make sure that this only happens once. + + ulong lockedEntry = Volatile.Read(ref entry); + + if ((lockedEntry & bit) == 0) + { + MapPage(offset & ~(_pageSize - 1)); + + lockedEntry |= bit; + + Interlocked.Exchange(ref entry, lockedEntry); + } + } + } + } + + public void Dispose() + { + _reservedBlock.Dispose(); + + foreach (MemoryBlock block in _mappedBlocks) + { + block.Dispose(); + } + + GC.SuppressFinalize(this); + } + } +} diff --git a/src/Ryujinx.Tests/Cpu/CpuContext.cs b/src/Ryujinx.Tests/Cpu/CpuContext.cs index 96b4965a21..81e8ba8c91 100644 --- a/src/Ryujinx.Tests/Cpu/CpuContext.cs +++ b/src/Ryujinx.Tests/Cpu/CpuContext.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Memory; using ARMeilleure.State; using ARMeilleure.Translation; @@ -12,7 +13,7 @@ public class CpuContext public CpuContext(IMemoryManager memory, bool for64Bit) { - _translator = new Translator(new JitMemoryAllocator(), memory, for64Bit); + _translator = new Translator(new JitMemoryAllocator(), memory, AddressTable.CreateForArm(for64Bit, memory.Type)); memory.UnmapEvent += UnmapHandler; } diff --git a/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs b/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs index 2a4775a319..43c84c1935 100644 --- a/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs +++ b/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs @@ -1,3 +1,5 @@ +using ARMeilleure.Common; +using ARMeilleure.Memory; using ARMeilleure.Translation; using NUnit.Framework; using Ryujinx.Cpu.Jit; @@ -17,7 +19,10 @@ internal class EnvironmentTests private static void EnsureTranslator() { // Create a translator, as one is needed to register the signal handler or emit methods. - _translator ??= new Translator(new JitMemoryAllocator(), new MockMemoryManager(), true); + _translator ??= new Translator( + new JitMemoryAllocator(), + new MockMemoryManager(), + AddressTable.CreateForArm(true, MemoryManagerType.SoftwarePageTable)); } [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)] diff --git a/src/Ryujinx.Tests/Memory/PartialUnmaps.cs b/src/Ryujinx.Tests/Memory/PartialUnmaps.cs index 6d2ad8fb01..3e5b474238 100644 --- a/src/Ryujinx.Tests/Memory/PartialUnmaps.cs +++ b/src/Ryujinx.Tests/Memory/PartialUnmaps.cs @@ -1,3 +1,5 @@ +using ARMeilleure.Common; +using ARMeilleure.Memory; using ARMeilleure.Signal; using ARMeilleure.Translation; using NUnit.Framework; @@ -53,7 +55,10 @@ private static int CountThreads(ref PartialUnmapState state) private static void EnsureTranslator() { // Create a translator, as one is needed to register the signal handler or emit methods. - _translator ??= new Translator(new JitMemoryAllocator(), new MockMemoryManager(), true); + _translator ??= new Translator( + new JitMemoryAllocator(), + new MockMemoryManager(), + AddressTable.CreateForArm(true, MemoryManagerType.SoftwarePageTable)); } [Test]