From f9067e999f09e27c766b20a8baf39bc2d00a71a2 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Sat, 23 Mar 2024 11:21:54 -0700 Subject: [PATCH] Initial spec for the meaning and relationships between data contracts (#99936) * Initial spec for the meaning and relationships between data contracts * Adjust to feedback * actually inclde the new spec in the new location * Merge in Jan's preamble * Add total type size to the layout info. * Add example algorithmic contracts - To start, I've added the contracts for iterating the thread list and gathering information about individual threads - Note that this algorithmic description is paired with a simplification of the SList data structure - See PR #100107 * Tweaks to some of the algorithms as suggested by Jan --- docs/design/datacontracts/GCHandle.md | 28 ++ docs/design/datacontracts/SList.md | 78 ++++ docs/design/datacontracts/Thread.md | 195 +++++++++ .../contract_csharp_api_design.cs | 386 ++++++++++++++++++ .../datacontracts/datacontracts_design.md | 329 +++++++++++++++ 5 files changed, 1016 insertions(+) create mode 100644 docs/design/datacontracts/GCHandle.md create mode 100644 docs/design/datacontracts/SList.md create mode 100644 docs/design/datacontracts/Thread.md create mode 100644 docs/design/datacontracts/contract_csharp_api_design.cs create mode 100644 docs/design/datacontracts/datacontracts_design.md diff --git a/docs/design/datacontracts/GCHandle.md b/docs/design/datacontracts/GCHandle.md new file mode 100644 index 0000000000000..250b87f1e37ab --- /dev/null +++ b/docs/design/datacontracts/GCHandle.md @@ -0,0 +1,28 @@ +# Contract GCHandle + +This contract allows decoding and reading of GCHandles. This will also include handle enumeration in the future + +## Data structures defined by contract +``` csharp +struct DacGCHandle +{ + DacGCHandle(TargetPointer value) { Value = value; } + TargetPointer Value; +} +``` + +## Apis of contract +``` csharp +TargetPointer GetObject(DacGCHandle gcHandle); +``` + +## Version 1 + +``` csharp +TargetPointer GetObject(DacGCHandle gcHandle) +{ + if (gcHandle.Value == TargetPointer.Null) + return TargetPointer.Null; + return Target.ReadTargetPointer(gcHandle.Value); +} +``` diff --git a/docs/design/datacontracts/SList.md b/docs/design/datacontracts/SList.md new file mode 100644 index 0000000000000..ee1e9c66e06be --- /dev/null +++ b/docs/design/datacontracts/SList.md @@ -0,0 +1,78 @@ +# Contract SList + +This contract allows reading and iterating over an SList data structure. + +## Data structures defined by contract +``` csharp +class SListReader +{ + public abstract TargetPointer GetHead(TargetPointer slistPointer); + public abstract TargetPointer GetNext(TargetPointer entryInSList); + public IEnumerator EnumerateList(TargetPointer slistPointer) + { + TargetPointer current = GetHead(slistPointer); + + while (current != TargetPointer.Null) + { + yield return current; + current = GetNext(current); + } + } + public IEnumerator EnumerateListFromEntry(TargetPointer entryInSList) + { + TargetPointer current = entryInSList; + + while (current != TargetPointer.Null) + { + yield return current; + current = GetNext(current); + } + } +} +``` + +## Apis of contract +``` csharp +SListReader GetReader(string typeOfDataStructure); +``` + +## Version 1 + +``` csharp +private class SListReaderV1 : SListReader +{ + uint _offsetToSLinkField; + Target Target; + + SListReaderV1(Target target, string typeToEnumerate) + { + Target = target; + _offsetToSLinkField = Target.Contracts.GetFieldLayout(typeToEnumerate, "m_Link").Offset; + } + public override TargetPointer GetHead(TargetPointer slistPointer) + { + TargetPointer headPointer = new SListBase(Target, slistPointer).m_pHead; + TargetPointer slinkInHeadObject = new SLink(Target, headPointer).m_pNext; + if (slinkInHeadObject == TargetPointer.Null) + return TargetPointer.Null; + return slinkInHeadObject - _offsetToSLinkField; + } + + public override TargetPointer GetNext(TargetPointer entryInSList) + { + if (entryInSList == TargetPointer.Null) + throw new ArgumentException(); + + TargetPointer slinkPointer = entryInSList + _offsetToSLinkField; + TargetPointer slinkInObject = new SLink(Target, slinkPointer).m_pNext; + if (slinkInObject == TargetPointer.Null) + return TargetPointer.Null; + return slinkInHeadObject - _offsetToSLinkField; + } +} + +SListReader GetReader(string typeOfDataStructure) +{ + return new SListReaderV1(typeOfDataStructure); +} +``` diff --git a/docs/design/datacontracts/Thread.md b/docs/design/datacontracts/Thread.md new file mode 100644 index 0000000000000..7bee0fe79fdc7 --- /dev/null +++ b/docs/design/datacontracts/Thread.md @@ -0,0 +1,195 @@ +# Contract Thread + +This contract is for reading and iterating the threads of the process. + +## Data structures defined by contract +``` csharp +record struct DacThreadStoreData ( + int ThreadCount, + TargetPointer FirstThread, + TargetPointer FinalizerThread, + TargetPointer GcThread); + +record struct DacThreadStoreCounts ( + int UnstartedThreadCount, + int BackgroundThreadCount, + int PendingThreadCount, + int DeadThreadCount); + +enum ThreadState +{ + TS_Unknown = 0x00000000, // threads are initialized this way + + TS_AbortRequested = 0x00000001, // Abort the thread + + TS_GCSuspendPending = 0x00000002, // ThreadSuspend::SuspendRuntime watches this thread to leave coop mode. + TS_GCSuspendRedirected = 0x00000004, // ThreadSuspend::SuspendRuntime has redirected the thread to suspention routine. + TS_GCSuspendFlags = TS_GCSuspendPending | TS_GCSuspendRedirected, // used to track suspension progress. Only SuspendRuntime writes/resets these. + + TS_DebugSuspendPending = 0x00000008, // Is the debugger suspending threads? + TS_GCOnTransitions = 0x00000010, // Force a GC on stub transitions (GCStress only) + + TS_LegalToJoin = 0x00000020, // Is it now legal to attempt a Join() + + TS_ExecutingOnAltStack = 0x00000040, // Runtime is executing on an alternate stack located anywhere in the memory + + TS_Hijacked = 0x00000080, // Return address has been hijacked + + // unused = 0x00000100, + TS_Background = 0x00000200, // Thread is a background thread + TS_Unstarted = 0x00000400, // Thread has never been started + TS_Dead = 0x00000800, // Thread is dead + + TS_WeOwn = 0x00001000, // Exposed object initiated this thread + TS_CoInitialized = 0x00002000, // CoInitialize has been called for this thread + + TS_InSTA = 0x00004000, // Thread hosts an STA + TS_InMTA = 0x00008000, // Thread is part of the MTA + + // Some bits that only have meaning for reporting the state to clients. + TS_ReportDead = 0x00010000, // in WaitForOtherThreads() + TS_FullyInitialized = 0x00020000, // Thread is fully initialized and we are ready to broadcast its existence to external clients + + TS_TaskReset = 0x00040000, // The task is reset + + TS_SyncSuspended = 0x00080000, // Suspended via WaitSuspendEvent + TS_DebugWillSync = 0x00100000, // Debugger will wait for this thread to sync + + TS_StackCrawlNeeded = 0x00200000, // A stackcrawl is needed on this thread, such as for thread abort + // See comment for s_pWaitForStackCrawlEvent for reason. + + // unused = 0x00400000, + + // unused = 0x00800000, + TS_TPWorkerThread = 0x01000000, // is this a threadpool worker thread? + + TS_Interruptible = 0x02000000, // sitting in a Sleep(), Wait(), Join() + TS_Interrupted = 0x04000000, // was awakened by an interrupt APC. !!! This can be moved to TSNC + + TS_CompletionPortThread = 0x08000000, // Completion port thread + + TS_AbortInitiated = 0x10000000, // set when abort is begun + + TS_Finalized = 0x20000000, // The associated managed Thread object has been finalized. + // We can clean up the unmanaged part now. + + TS_FailStarted = 0x40000000, // The thread fails during startup. + TS_Detached = 0x80000000, // Thread was detached by DllMain +} + +record struct DacThreadData ( + uint ThreadId; + TargetNUint OsThreadId; + ThreadState State; + bool PreemptiveGCDisabled + TargetPointer AllocContextPointer; + TargetPointer AllocContextLimit; + TargetPointer Frame; + TargetPointer FirstNestedException; + TargetPointer TEB; + DacGCHandle LastThrownObjectHandle; + TargetPointer NextThread; +); +``` + +## Apis of contract +``` csharp +DacThreadStoreData GetThreadStoreData(); +DacThreadStoreCounts GetThreadCounts(); +DacThreadData GetThreadData(TargetPointer threadPointer); +TargetPointer GetNestedExceptionInfo(TargetPointer nestedExceptionPointer, out TargetPointer nextNestedException); +TargetPointer GetManagedThreadObject(TargetPointer threadPointer); +``` + +## Version 1 + + + +``` csharp +SListReader ThreadListReader = Contracts.SList.GetReader("Thread"); + +DacThreadStoreData GetThreadStoreData() +{ + TargetPointer threadStore = Target.ReadGlobalTargetPointer("s_pThreadStore"); + var runtimeThreadStore = new ThreadStore(Target, threadStore); + + TargetPointer firstThread = ThreadListReader.GetHead(runtimeThreadStore.SList.Pointer); + + return new DacThreadStoreData( + ThreadCount : runtimeThreadStore.m_ThreadCount, + FirstThread: firstThread, + FinalizerThread: Target.ReadGlobalTargetPointer("g_pFinalizerThread"), + GcThread: Target.ReadGlobalTargetPointer("g_pSuspensionThread")); +} + +DacThreadStoreCounts GetThreadCounts() +{ + TargetPointer threadStore = Target.ReadGlobalTargetPointer("s_pThreadStore"); + var runtimeThreadStore = new ThreadStore(Target, threadStore); + + return new DacThreadStoreCounts( + ThreadCount : runtimeThreadStore.m_ThreadCount, + UnstartedThreadCount : runtimeThreadStore.m_UnstartedThreadCount, + BackgroundThreadCount : runtimeThreadStore.m_BackgroundThreadCount, + PendingThreadCount : runtimeThreadStore.m_PendingThreadCount, + DeadThreadCount: runtimeThreadStore.m_DeadThreadCount, +} + +DacThreadData GetThreadData(TargetPointer threadPointer) +{ + var runtimeThread = new Thread(Target, threadPointer); + + TargetPointer firstNestedException = TargetPointer.Null; + if (Target.ReadGlobalInt32("FEATURE_EH_FUNCLETS")) + { + if (runtimeThread.m_ExceptionState.m_pCurrentTracker != TargetPointer.Null) + { + firstNestedException = new ExceptionTrackerBase(Target, runtimeThread.m_ExceptionState.m_pCurrentTracker).m_pPrevNestedInfo; + } + } + else + { + firstNestedException = runtimeThread.m_ExceptionState.m_currentExInfo.m_pPrevNestedInfo; + } + + return new DacThread( + ThreadId : runtimeThread.m_ThreadId, + OsThreadId : (OsThreadId)runtimeThread.m_OSThreadId, + State : (ThreadState)runtimeThread.m_State, + PreemptiveGCDisabled : thread.m_fPreemptiveGCDisabled != 0, + AllocContextPointer : thread.m_alloc_context.alloc_ptr, + AllocContextLimit : thread.m_alloc_context.alloc_limit, + Frame : thread.m_pFrame, + TEB : thread.Has_m_pTEB ? thread.m_pTEB : TargetPointer.Null, + LastThreadObjectHandle : new DacGCHandle(thread.m_LastThrownObjectHandle), + FirstNestedException : firstNestedException, + NextThread : ThreadListReader.GetHead.GetNext(threadPointer) + ); +} + +TargetPointer GetNestedExceptionInfo(TargetPointer nestedExceptionPointer, out TargetPointer nextNestedException) +{ + if (nestedExceptionPointer == TargetPointer.Null) + { + throw new InvalidArgumentException(); + } + if (Target.ReadGlobalInt32("FEATURE_EH_FUNCLETS")) + { + var exData = new ExceptionTrackerBase(Target, nestedExceptionPointer); + nextNestedException = exData.m_pPrevNestedInfo; + return Contracts.GCHandle.GetObject(exData.m_hThrowable); + } + else + { + var exData = new ExInfo(Target, nestedExceptionPointer); + nextNestedException = exData.m_pPrevNestedInfo; + return Contracts.GCHandle.GetObject(exData.m_hThrowable); + } +} + +TargetPointer GetManagedThreadObject(TargetPointer threadPointer) +{ + var runtimeThread = new Thread(Target, threadPointer); + return Contracts.GCHandle.GetObject(new DacGCHandle(runtimeThread.m_ExposedObject)); +} +``` diff --git a/docs/design/datacontracts/contract_csharp_api_design.cs b/docs/design/datacontracts/contract_csharp_api_design.cs new file mode 100644 index 0000000000000..062c04806003c --- /dev/null +++ b/docs/design/datacontracts/contract_csharp_api_design.cs @@ -0,0 +1,386 @@ +namespace DataContracts +{ + + // Indicate that this type is a DataContractType which should have the DataContractTypeSourceGenerator applied to it + // Also that any types nested in this type with the DataContractLayout define particular versioned layouts for data structures + class DataContractTypeAttribute : System.Attribute {} + + + // Defined on each specific data layout, the fields of the type are defined by the fields of the class + class DataContractLayoutAttribute : System.Attribute + { + public DataContractLayoutAttribute(uint version, uint typeSize) { Version = version; TypeSize = typeSize; } + public uint Version; + public uint TypeSize; + } + + // Defined on the class that contains global fields for a contract. The name and version are used to identify the contract + class DataContractGlobalsAttribute : System.Attribute + { + public DataContractGlobalsAttribute(string name, uint version) { Name = name; Version = version; } + public string Name; + public uint Version; + } + + // Defined on the class that contains an algorithmic contract. The version, and base type of the associated type are used to identify the contract, + // there must exist a constructor of the type with the following signature (DataContracts.Target target, uint contractVersion) + class DataContractAlgorithmAttribute : System.Attribute + { + public DataContractAlgorithmAttribute(params uint []version) { Name = name; Version = version; } + public uint[] Version; + } + + struct TargetPointer + { + public ulong Value; + public static TargetPointer Null = new TargetPointer(0); + // Add a full set of operators to support pointer arithmetic + } + + struct TargetNInt + { + public long Value; + // Add a full set of operators to support arithmetic as well as casting to/from TargetPointer + } + + struct TargetNUInt + { + public ulong Value; + // Add a full set of operators to support arithmetic as well as casting to/from TargetPointer + } + + enum FieldType + { + Int8Type, + UInt8Type, + Int16Type, + UInt16Type, + Int32Type, + UInt32Type, + Int64Type, + UInt64Type, + NIntType, + NUIntType, + PointerType, + + // Other values are dynamically assigned by the type definition rules + } + + struct FieldLayout + { + public int Offset; + public FieldType Type; + } + + interface IAlgorithmContract + { + void Init(); + } + + interface IContract + { + string Name { get; } + uint Version { get; } + } + class Target + { + // Users of the data contract may adjust this number to force re-reading of all data + public int CurrentEpoch = 0; + + sbyte ReadInt8(TargetPointer pointer); + byte ReadUInt8(TargetPointer pointer); + short ReadInt16(TargetPointer pointer); + ushort ReadUInt16(TargetPointer pointer); + int ReadInt32(TargetPointer pointer); + uint ReadUInt32(TargetPointer pointer); + long ReadInt64(TargetPointer pointer); + ulong ReadUInt64(TargetPointer pointer); + TargetPointer ReadTargetPointer(TargetPointer pointer); + TargetNInt ReadNInt(TargetPointer pointer); + TargetNUInt ReadNUint(TargetPointer pointer); + byte[] ReadByteArray(TargetPointer pointer, ulong size); + void FillByteArray(TargetPointer pointer, byte[] array, ulong size); + + bool TryReadInt8(TargetPointer pointer, out sbyte value); + bool TryReadUInt8(TargetPointer pointer, out byte value); + bool TryReadInt16(TargetPointer pointer, out short value); + bool TryReadUInt16(TargetPointer pointer, out ushort value); + bool TryReadInt32(TargetPointer pointer, out int value); + bool TryReadUInt32(TargetPointer pointer, out uint value); + bool TryReadInt64(TargetPointer pointer, out long value); + bool TryReadUInt64(TargetPointer pointer, out ulong value); + bool TryReadTargetPointer(TargetPointer pointer, out TargetPointer value); + bool TryReadNInt(TargetPointer pointer, out TargetNInt value); + bool TryReadNUInt(TargetPointer pointer, out TargetNUInt value); + bool TryReadByteArray(TargetPointer pointer, ulong size, out byte[] value); + bool TryFillByteArray(TargetPointer pointer, byte[] array, ulong size); + + // If pointer is 0, then the return value will be 0 + TargetPointer GetTargetPointerForField(TargetPointer pointer, FieldLayout fieldLayout); + + sbyte ReadGlobalInt8(string globalName); + byte ReadGlobalUInt8(string globalName); + short ReadGlobalInt16(string globalName); + ushort ReadGlobalUInt16(string globalName); + int ReadGlobalInt32(string globalName); + uint ReadGlobalUInt32(string globalName); + long ReadGlobalInt64(string globalName); + ulong ReadGlobalUInt64(string globalName); + TargetPointer ReadGlobalTargetPointer(string globalName); + + bool TryReadGlobalInt8(string globalName, out sbyte value); + bool TryReadGlobalUInt8(string globalName, out byte value); + bool TryReadGlobalInt16(string globalName, out short value); + bool TryReadGlobalUInt16(string globalName, out ushort value); + bool TryReadGlobalInt32(string globalName, out int value); + bool TryReadGlobalUInt32(string globalName, out uint value); + bool TryReadGlobalInt64(string globalName, out long value); + bool TryReadGlobalUInt64(string globalName, out ulong value); + bool TryReadGlobalTargetPointer(string globalName, out TargetPointer value); + + Contracts Contract { get; } + + partial class Contracts + { + FieldLayout GetFieldLayout(string typeName, string fieldName); + bool TryGetFieldLayout(string typeName, string fieldName, out FieldLayout layout); + int GetTypeSize(string typeName); + bool TryGetTypeSize(string typeName, out int size); + + object GetContract(string contractName); + bool TryGetContract(string contractName, out object contract); + + // Every contract that is defined has a field here. As an example this document defines a MethodTableContract + // If the contract is not supported by the runtime in use, then the implementation of the contract will be the base type which + // is defined to throw if it is ever used. + + // List of contracts will be inserted here by source generator + MethodTableContract MethodTableContract; + } + } + + // Types defined by contracts live here + namespace ContractDefinitions + { + class CompositeContract + { + List> Subcontracts; + } + + class DataStructureContract + { + string MethodTableName {get;} + List> FieldData; + } + + // Insert Algorithmic Contract definitions here + class MethodTableContract + { + public virtual int DynamicTypeID(TargetPointer methodTablePointer) { throw new NotImplementedException(); } + public virtual int BaseSize(TargetPointer methodTablePointer) { throw new NotImplementedException(); } + } + } + + namespace ContractImplementation + { + // Get contract from the predefined contract database + static class PredefinedContracts + { + public static IContract GetContract(string name, uint version, Target target) + { + // Do some lookup and allocate an instance of the contract requested + // + // This lookup can either be reflection based, or we can do it based on a source generator. + } + } + + [DataContractGlobals("FeatureFlags", 1)] + public class FeatureFlags_1 + { + public const int FeatureComInterop = 0; + } + + [DataContractGlobals("FeatureFlags", 2)] + public class FeatureFlags_2 + { + public const int FeatureComInterop = 1; + } + + [DataContractAlgorithm(1)] + class MethodTableContract_1 : ContractDefinitions.MethodTableContract, IAlgorithmContract + { + DataContracts.Target Target; + readonly uint ContractVersion; + public MethodTableContract_1(DataContracts.Target target, uint contractVersion) { Target = target; ContractVersion = contractVersion; } + + public virtual int DynamicTypeID(TargetPointer methodTablePointer) { return new MethodTable(_target, methodTablePointer).dynamicTypeId; } + public virtual int BaseSize(TargetPointer methodTablePointer) { return new MethodTable(_target, methodTablePointer).baseSizeAndFlags & 0x3FFFFFFF; } + } + + // This is used for version 2 and 3 of the contract, where the dynamic type id is no longer present, and baseSize has a new limitation in that it can only be a value up to 0x1FFFFFFF in v3 + [DataContractAlgorithm(2, 3)] + class MethodTableContract_2 : ContractDefinitions.MethodTableContract, IAlgorithmContract + { + DataContracts.Target Target; + readonly uint ContractVersion; + public MethodTableContract_2(DataContracts.Target target, uint contractVersion) { Target = target; } + + public virtual int DynamicTypeID(TargetPointer methodTablePointer) + { + throw new NotImplementedException(); + } + public virtual int BaseSize(TargetPointer methodTablePointer) + { + return new MethodTable(_target, methodTablePointer).baseSizeAndFlags & ((ContractVersion == 3) ? 0x1FFFFFFF : 0x3FFFFFFF); + } + } + + // We use a source generator to generate the actual runtime properties, and api for working with the fields on this type. + // + // The source generator would fill in most of the apis, and provide a bunch of properties that give a granular failure model where if a particular field isn't defined, it fails at the access point + // This example shows access to a type. + [DataContractType] + partial struct MethodTable + { + partial void Get_dynamicTypeId_optional(ref int value); + partial void Get_baseSizeAndFlags(ref int value); + + [DataContractLayout(1, 8)] + public class DataLayout1 + { + [FieldOffset(0)] + public int dynamicTypeId; + [FieldOffset(4)] + public int baseSize; + } + [DataContractLayout(2, 4)] + public class DataLayout2 + { + [FieldOffset(0)] + public int baseSize; + } + + // The rest of this is generated by a source generator + public uint TypeSize => _layout.TypeSize; + void Get_dynamicTypeId_optional(ref int value) + { + value = dynamicTypeId; + } + void Get_baseSizeAndFlags(ref int value) + { + value = baseSizeAndFlags; + } + + private static int LayoutIndex = DataContracts.Target.RegisterLayout(MethodTableLayout.GetLayoutByTarget); + + public readonly TargetPointer Pointer; + private int _epoch; + private readonly MethodTableLayout _layout; + + public MethodTable(DataContracts.Target target, TargetPointer pointer) + { + Pointer = pointer; + _epoch = Int32.MinInt; + _layout = target.GetLayoutByIndex(LayoutIndex); + } + class MethodTableLayout + { + public static object GetLayoutByTarget(DataContracts.Target target) + { + return new MethodTableLayout(target); + } + + public readonly uint TypeSize; + + private MethodTableLayout(DataContracts.Target target) + { + Target = target; + TypeSize = target.Contract.GetTypeSize("MethodTable"); + if (!_target.Contract.TryGetFieldLayout("MethodTable", "dynamicTypeId", out var dynamicTypeIdField)) + { + dynamicTypeId_Offset = -1; + } + else + { + if (dynamicTypeIdField.Type != FieldType.Int32Type) + dynamicTypeId_Offset = -2; + else + dynamicTypeId_Offset = dynamicTypeIdField.Offset; + } + if (!_target.Contract.TryGetFieldLayout("MethodTable", "baseSizeAndFlags", out var baseSizeAndFlagsField)) + { + baseSizeAndFlags_Offset = -1; + } + else + { + if (baseSizeAndFlagsField.Type != FieldType.Int32Type) + baseSizeAndFlags_Offset = -2; + else + baseSizeAndFlags_Offset = baseSizeAndFlagsField.Offset; + } + } + public readonly DataContracts.Target Target; + + int dynamicTypeId_Offset; + public int dynamicTypeId(TargetPointer pointer) + { + if (dynamicTypeId_Offset == -1) + { + throw new Exception("MethodTable has no field dynamicTypeId"); + } + if (dynamicTypeId_Offset == -2) + { + throw new Exception("MethodTable field dynamicTypeId does not have type int32"); + } + return _target.ReadInt32(pointer + dynamicTypeId_Offset); + } + public bool Has_dynamicTypeId => dynamicTypeId_Offset >= 0; + + int baseSizeAndFlags_Offset; + public int baseSizeAndFlags(TargetPointer pointer) + { + if (baseSizeAndFlags_Offset == -1) + { + throw new Exception("MethodTable has no field baseSizeAndFlags"); + } + if (baseSizeAndFlags_Offset == -2) + { + throw new Exception("MethodTable field baseSizeAndFlags does not have type int32"); + } + return _target.ReadInt32(pointer + baseSizeAndFlags_Offset); + } + } + + private int _dynamicTypeId; + public int dynamicTypeId + { + get + { + int currentEpoch = _layout.Target.CurrentEpoch; + if (_epoch != currentEpoch) + { + _dynamicTypeId = _layout.dynamicTypeId(Pointer); + _epoch = currentEpoch; + } + return _dynamicTypeId; + } + } + public bool Has_dynamicTypeId => layout.Has_dynamicTypeId; + + private int _baseSizeAndFlags; + public int baseSizeAndFlags + { + get + { + int currentEpoch = _layout.Target.CurrentEpoch; + if (_epoch != currentEpoch) + { + _baseSizeAndFlags = _layout.baseSizeAndFlags(Pointer); + _epoch = currentEpoch; + } + return _baseSizeAndFlags; + } + } + } + } +} diff --git a/docs/design/datacontracts/datacontracts_design.md b/docs/design/datacontracts/datacontracts_design.md new file mode 100644 index 0000000000000..8a52243fcdcb2 --- /dev/null +++ b/docs/design/datacontracts/datacontracts_design.md @@ -0,0 +1,329 @@ +# Data Contracts + +The diagnostic data contract documents a subset of internal .NET runtime in-memory data structures. It enables diagnostic tools to inspect state of .NET runtime process by directly reading and interpreting process memory. It is meant to be used debuggers - for both live and post-mortem debugging, profilers, and other diagnostic tools. We expect it to enable innovative solutions like [unwinding through JITed code using eBPF filters](https://github.com/dotnet/runtime/issues/93550). + +The diagnostic data contract addresses multiple problems of the established .NET runtime debugger architecture. The established CoreCLR debugger architecture requires debugger to acquire and load DAC and DBI libraries that exactly match the version of .NET runtime being debugged. It comes with multiple challenges: +- *Security*: The DBI and DAC libraries that match the exact .NET runtime may be untrusted (e.g. custom or 3rd party build of .NET runtime). https://github.com/dotnet/runtime/blob/main/docs/workflow/debugging/coreclr/debugging-runtime.md#resolving-signature-validation-errors-in-visual-studio has some additional context. +- *Servicing*: It is difficult to ship a debugger-only fix in DBI and DAC libraries without shipping a new runtime build. Instead, we create a new runtime build and debugger behavior only improves once the new runtime build is targeted. +- *Acquisition*: Where to acquire the DBI and DAC libraries that match the exact .NET runtime version from. +- *Cross-architecture*: The host/target of DBI and DAC libraries may not be available. https://github.com/dotnet/runtime/blob/main/docs/design/features/cross-dac.md has some additional context. + +Diagnostic data contract addressed these challenges by eliminating the need for exactly matching DAC and DBI libraries. +Data contracts represent the manner in which a tool which is not the runtime can reliably understand and observe the behavior of the runtime. Contracts are defined by their documentation, and the runtime describes what contracts are applicable to understanding that runtime. + +## Data Contract Descriptor +The physical layout of this data is not defined in this document, but its practical effects are. + +The Data Contract Descriptor has a set of records of the following forms. + +### Global Values +Global values which can be of types (int8, uint8, int16, uint16, int32, uint32, int64, uint64, pointer, nint, nuint, string) +All global values have a string describing their name, and a value of one of the above types. + +### Compatible Contract +Each compatible contract is described by a string naming the contract, and a uint32 version. It is an ERROR if multiple versions of a contract are specified in the contract descriptor. + +### Data Structure Layout +Each data structure layout has a name for the type, followed by a list of fields. These fields can be of primitive types (int8, uint8, int16, uint16, int32, uint32, int64, uint64, nint, nuint, pointer) or of another named data structure type. Each field descriptor provides the offset of the field, the name of the field, and the type of the field. + +## Versioning of contracts +Contracts are described an integer version number. A higher version number is not more recent, it just means different. In order to avoid conflicts, all contracts should be documented in the main branch of the dotnet repository with a version number which does not conflict with any other. It is expected that every version of every contract describes the same functionality/data layout/set of global values. + +## Contract data model +Logically a contract may refer to another contract. If it does so, it will typically refer to other contracts by names which do not include the contract version. This is to allow for version flexibility. Logically once the Data Contract Descriptor is fully processed, there is a single list of contracts that represents the set of contracts useable with whatever runtime instance is being processed. + +## Types of contracts + +There are 3 different types of contracts each representing a different phase of execution of the data contract system. + +### Composition contracts +These contracts indicate the version numbers of other contracts. This is done to reduce the size of contract list needed in the Data Contract Descriptor. In general it is intended that as a runtime nears shipping, the product team can gather up all of the current versions of the contracts into a single magic value, which can be used to initialize most of the contract versions of the data contract system. A specific version number in the Data Contract Descriptor for a given contract will override any composition contracts specified in the Data Contract Descriptor. If there are multiple composition contracts in a Data Contract Descriptor which specify the same contract to have a different version, the first composition contract linearly in the Data Contract Descriptor wins. This is intended to allow for a composite contract for the architecture/os indepedent work, and a separate composite contract for the non independent work. If a contract is specified explicitly in the Data Contract Descriptor and a different version is specified via the composition contract mechanism, the explicitly specified contract takes precedence. + +### Fixed value contracts +These contracts represent data which is entirely determined by the contract version + contract name. There are 2 subtypes of this form of contract. + +#### Global Value Contract +A global value contract specifies numbers which can be referred to by other contracts. If a global value is specified directly in the Data Contract Descriptor, then the global value defintion in the Data Contract Descriptor takes precedence. The intention is that these global variable contracts represent magic numbers and values which are useful for the operation of algorithmic contracts. For instance, we will likely have a `TargetPointerSize` global value represented via a contract, and things like `FEATURE_SUPPORTS_COM` can also be a global value contract, with a value of 1. + +#### Data Structure Definition Contract +A data structure definition contract defines a single type's physical layout. It MUST be named "MyDataStructureType_layout". If a data structure layout is specified directly in the Data Contract Descriptor, then the data structure defintion in the Data Contract Descriptor takes precedence. These contracts are responsible for declaring the field layout of individual fields. While not all versions of a data structure are required to have the same fields/type of fields, algorithms may be built targetting the union of the set of field types defined in the version of a given data structure definition contract. Access to a field which isn't defined on the current runtime will produce an error. + +### Algorithmic contracts +Algorithmic contracts define how to process a given set of data structures to produce useful results. These are effectively code snippets which utilize the abstracted data structures provided by Data Structure Definition Contracts and Global Value Contract to produce useful output about a given program. Descriptions of these contracts may refer to functionality provided by other contracts to do their work. The algorithms provided in these contracts are designed to operate given the ability to read various primitive types and defined data structures from the process memory space, as well as perform general purpose computation. + +It is entirely reasonable for an algorithmic contract to have multiple entrypoints which take different inputs. For example imagine a contract which provides information about a `MethodTable`. It may provide the an api to get the `BaseSize` of a `MethodTable`, and an api to get the `DynamicTypeID` of a `MethodTable`. However, while the set of contracts which describe an older version of .NET may provide a means by which the `DynamicTypeID` may be acquired for a `MethodTable`, a newer runtime may not have that concept. In such a case, it is very reasonable to define that the `GetDynamicTypeID` api portion of that contract is defined to simply `throw new NotSupportedException();` + +For simplicity, as it can be expected that all developers who work on the .NET runtime understand C# to a fair degree, it is preferred that the algorithms be defined in C#, or at least psuedocode that looks like C#. It is also condsidered entirely permissable to refer to other specifications if the algorithm is a general purpose one which is well defined by the OS or some other body. (For example, it is expected that the unwinding algorithms will be defined by references into either the DWARF spec, or various Windows Unwind specifications.) + +For working with data from the target process/other contracts, the following C# interface is intended to be used within the algorithmic descriptions: + +Best practice is to either write the algorithm in C# like psuedocode working on top of the [C# style api](contract_csharp_api_design.cs) or by reference to specifications which are not co-developed with the runtime, such as OS/architecture specifications. Within the contract algorithm specification, the intention is that all interesting api work is done by using an instance of the `Target` class. + +## Arrangement of contract specifications in the repo + +Specs shall be stored in the repo in a set of directories. `docs/design/datacontracts` Each one of them shall be a seperate markdown file named with the name of contract. `docs/design/datacontracts/datalayout/.md` Every version of each contract shall be located in the same file to facilitate understanding how variations between different contracts work. + +### Global Value Contracts +The format of each contract spec shall be + + +``` +# Contract + +Insert description of contract, and what its for here. + +## Version + +Insert description (if possible) about what is interesting about this particular version of the contract + +### Values +| Global Name | Type | Value | +| --- | --- | --- | +| SomeGlobal | Int32 | 1 | +| SomeOtherGlobal | Int8 | 0 | + +## Version + +Insert description (if possible) about what is interesting about this particular version of the contract + +### Values +| Global Name | Type | Value | +| --- | --- | --- | +| SomeGlobal | Int32 | 1 | +| SomeOtherGlobal | Int8 | 1 | +``` + +Which should format like: +# Contract + +Insert description of contract, and what its for here. + +## Version + +Insert description (if possible) about what is interesting about this particular version of the contract + +### Values +| Global Name | Type | Value | +| --- | --- | --- | +| SomeGlobal | Int32 | 1 | +| SomeOtherGlobal | Int8 | 0 | + +## Version + +Insert description (if possible) about what is interesting about this particular version of the contract + +### Values +| Global Name | Type | Value | +| --- | --- | --- | +| SomeGlobal | Int32 | 1 | +| SomeOtherGlobal | Int8 | 1 | + + +### Data Structure Contracts +Data structure contracts describe the field layout of individual types in the that are referred to by algorithmic contracts. If one of the versions is marked as DEFAULT then that version exists if no specific version is specified in the Data Contract Descriptor. + +``` +# Contract _layout + +Insert description of type, and what its for here. + +## Version , DEFAULT + +Insert description (if possible) about what is interesting about this particular version of the contract + +### Structure Size +8 bytes + +### Fields +| Field Name | Type | Offset | +| --- | --- | --- | +| FirstField | Int32 | 0 | +| SecondField | Int64 | 4 | + +## Version + +Insert description (if possible) about what is interesting about this particular version of the contract + +### Structure Size +16 bytes + +### Fields +| Field Name | Type | Offset | +| --- | --- | --- | +| FirstField | Int32 | 0 | +| SecondField | Int64 | 8 | +``` + +Which should format like: +# Contract _layout + +Insert description of type, and what its for here. + +## Version , DEFAULT + +Insert description (if possible) about what is interesting about this particular version of the contract + +### Structure Size +8 bytes + +### Fields +| Field Name | Type | Offset | +| --- | --- | --- | +| FirstField | Int32 | 0 | +| SecondField | Int64 | 4 | + +## Version + +Insert description (if possible) about what is interesting about this particular version of the contract + +### Structure Size +16 bytes + +### Fields +| Field Name | Type | Offset | +| --- | --- | --- | +| FirstField | Int32 | 0 | +| SecondField | Int64 | 8 | + +### Algorthmic Contract + +Algorithmic contracts these describe how an algorithm that processes over data layouts work. Unlike all other contract forms, every version of an algorithmic contract presents a consistent api to consumers of the contract. + +There are several sections: +1. The header, where a description of what the contract can do is placed. +2. The exposed data structures of the contract. +3. The api surface of the contract +4. The set of versions of the contract. + +For each version of the contract, there shall be the set of versions that are associated with a particular implementation as well as some form of description of how the algorithm works for that version. Best practice is to either write the algorithm in C# like psuedocode working on top of the [C# style api](contract_csharp_api_design.cs) or by reference to specifications which are not co-developed with the runtime, such as OS/architecture specifications. + +`````` +# Contract `` + +Insert description of contract, and what it can do here. + +## Data structures defined by contract +``` csharp +record struct SomeStructUsedAsPartOfContractApi (int Value, int Value2); +``` + +## Apis of contract +``` csharp +SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName); +int ComputeInterestingValue(TargetPointer pointerName); +int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct); +``` + +## Version 1 + +Version 1 is what we started with + +``` csharp +SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName) +{ + var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName); + return new SomeStructUSedAsPartOfContractApi(runtimeDataStruct.Field1, runtimeDataStruct.Field2); +} +int ComputeInterestingValue(TargetPointer pointerName) +{ + var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName); + return runtimeDataStruct.Field1 + runtimeDataStruct.Field2; +} +int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct) +{ + return struct.Value2; +} +``` + +## Version 2-5 + +Versions 2 to 5 are similar in most ways, but differ based on their ContractVersion in others. + +``` csharp +SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName) +{ + var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName); + return new SomeStructUSedAsPartOfContractApi(runtimeDataStruct.Field1, runtimeDataStruct.Field2); +} +int ComputeInterestingValue(TargetPointer pointerName) +{ + var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName); + if (ContractVersion > 3) + return runtimeDataStruct.Field3 + runtimeDataStruct.Field2; + else + return runtimeDataStruct.Field3 ^ runtimeDataStruct.Field2; +} +int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct) +{ + if (ContractVersion > 4) + return struct.Value2; + else + return struct.Value1; +} +``` +`````` + +Which should format like: +# Contract `` + +Insert description of contract, and what it can do here. + +## Data structures defined by contract +``` csharp +record struct SomeStructUsedAsPartOfContractApi (int Value, int Value2); +``` + +## Apis of contract +``` csharp +SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName); +int ComputeInterestingValue(TargetPointer pointerName); +int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct); +``` + +## Version 1 + +Version 1 is what we started with + +``` csharp +SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName) +{ + var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName); + return new SomeStructUSedAsPartOfContractApi(runtimeDataStruct.Field1, runtimeDataStruct.Field2); +} +int ComputeInterestingValue(TargetPointer pointerName) +{ + var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName); + return runtimeDataStruct.Field1 + runtimeDataStruct.Field2; +} +int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct) +{ + return struct.Value2; +} +``` + +## Version 2-5 + +Versions 2 to 5 are similar in most ways, but differ based on their ContractVersion in others. + +``` csharp +SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName) +{ + var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName); + return new SomeStructUSedAsPartOfContractApi(runtimeDataStruct.Field1, runtimeDataStruct.Field2); +} +int ComputeInterestingValue(TargetPointer pointerName) +{ + var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName); + if (ContractVersion > 3) + return runtimeDataStruct.Field3 + runtimeDataStruct.Field2; + else + return runtimeDataStruct.Field3 ^ runtimeDataStruct.Field2; +} +int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct) +{ + if (ContractVersion > 4) + return struct.Value2; + else + return struct.Value1; +} +``` \ No newline at end of file