From 4b38bb728e07fd6f21119b028253e69c369e9e61 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sun, 14 Jan 2024 23:04:57 -0500 Subject: [PATCH 01/37] metal gpu matrix3D addition test --- llm/tests/metal/MetalAdder.h | 17 ++++ llm/tests/metal/MetalAdder.m | 165 +++++++++++++++++++++++++++++++++++ llm/tests/metal/add.metal | 21 +++++ llm/tests/metal/main.m | 23 +++++ 4 files changed, 226 insertions(+) create mode 100644 llm/tests/metal/MetalAdder.h create mode 100644 llm/tests/metal/MetalAdder.m create mode 100644 llm/tests/metal/add.metal create mode 100644 llm/tests/metal/main.m diff --git a/llm/tests/metal/MetalAdder.h b/llm/tests/metal/MetalAdder.h new file mode 100644 index 00000000..b1db6bb0 --- /dev/null +++ b/llm/tests/metal/MetalAdder.h @@ -0,0 +1,17 @@ +/* +A class to manage all of the Metal objects this app creates. +*/ + +#import +#import +#include "common.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MetalAdder : NSObject +- (instancetype) initWithDevice: (id) device; +- (void) prepareData; +- (void) sendComputeCommand; +@end + +NS_ASSUME_NONNULL_END \ No newline at end of file diff --git a/llm/tests/metal/MetalAdder.m b/llm/tests/metal/MetalAdder.m new file mode 100644 index 00000000..06fd9825 --- /dev/null +++ b/llm/tests/metal/MetalAdder.m @@ -0,0 +1,165 @@ +/* +A class to manage all of the Metal objects. +*/ + +#import "MetalAdder.h" + +// The number of floats in each array, and the size of the arrays in bytes. +const unsigned int x_dim = 64; +const unsigned int y_dim = 64; +const unsigned int z_dim = 64; +const unsigned int arrayLength = x_dim*y_dim*z_dim; +const unsigned int bufferSize = arrayLength * sizeof(float); + +@implementation MetalAdder +{ + id _mDevice; + + // The compute pipeline generated from the compute kernel in the .metal shader file. + id _mAddFunctionPSO; + + // The command queue used to pass commands to the device. + id _mCommandQueue; + + // Buffers to hold data. + id _mBufferA; + id _mBufferB; + id _mBufferResult; + +} + +- (instancetype) initWithDevice: (id) device +{ + self = [super init]; + if (self) + { + _mDevice = device; + + NSError* error = nil; + + // Load the shader files with a .metal file extension in the project + + id defaultLibrary = [_mDevice newDefaultLibrary]; + if (defaultLibrary == nil) + { + NSLog(@"Failed to find the default library."); + return nil; + } + + id addFunction = [defaultLibrary newFunctionWithName:@"elementwise_add"]; + if (addFunction == nil) + { + NSLog(@"Failed to find the adder function."); + return nil; + } + + // Create a compute pipeline state object. + _mAddFunctionPSO = [_mDevice newComputePipelineStateWithFunction: addFunction error:&error]; + if (_mAddFunctionPSO == nil) + { + NSLog(@"Failed to created pipeline state object, error %@.", error); + return nil; + } + + _mCommandQueue = [_mDevice newCommandQueue]; + if (_mCommandQueue == nil) + { + NSLog(@"Failed to find the command queue."); + return nil; + } + } + + return self; +} + +- (void) prepareData +{ + // Allocate three buffers to hold our initial data and the result. + _mBufferA = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared]; + _mBufferB = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared]; + _mBufferResult = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared]; + + [self generateRandomFloatData:_mBufferA]; + [self generateRandomFloatData:_mBufferB]; +} + +- (void) sendComputeCommand +{ + // Create a command buffer to hold commands. + id commandBuffer = [_mCommandQueue commandBuffer]; + assert(commandBuffer != nil); + + // Start a compute pass. + id computeEncoder = [commandBuffer computeCommandEncoder]; + assert(computeEncoder != nil); + + [self encodeAddCommand:computeEncoder]; + + // End the compute pass. + [computeEncoder endEncoding]; + + // Execute the command. + [commandBuffer commit]; + + [commandBuffer waitUntilCompleted]; + + [self verifyResults]; +} + +- (void)encodeAddCommand:(id)computeEncoder { + + // Encode the pipeline state object and its parameters. + [computeEncoder setComputePipelineState:_mAddFunctionPSO]; + [computeEncoder setBuffer:_mBufferA offset:0 atIndex:0]; + [computeEncoder setBuffer:_mBufferB offset:0 atIndex:1]; + [computeEncoder setBuffer:_mBufferResult offset:0 atIndex:2]; + + MTLSize gridSize = MTLSizeMake(arrayLength, 1, 1); + + // Calculate a threadgroup size. + NSUInteger threadGroupSize = _mAddFunctionPSO.maxTotalThreadsPerThreadgroup; + if (threadGroupSize > arrayLength) + { + threadGroupSize = arrayLength; + } + MTLSize threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1); + + // Encode the compute command. + [computeEncoder dispatchThreads:gridSize + threadsPerThreadgroup:threadgroupSize]; +} + +- (void) generateRandomFloatData: (id) buffer +{ + Matrix3D dataPtr = buffer.contents; + dataPtr.m_dim_x = x_dim + dataPtr.m_dim_y = y_dim + dataPtr.m_dim_z = z_dim + for (unsigned long x = 0; x < x_dim; x++) { + for (unsigned long y = 0; y < y_dim; y++) { + for (unsigned long z = 0; z < z_dim; z++) { + dataPtr(x,y,z) = (float)rand()/(float)(RAND_MAX); + } + } + } +} +- (void) verifyResults +{ + float* a = _mBufferA.contents; + float* b = _mBufferB.contents; + float* result = _mBufferResult.contents; + + for (unsigned long x = 0; x < x_dim; x++) { + for (unsigned long y = 0; y < y_dim; y++) { + for (unsigned long z = 0; z < z_dim; z++) { + if (result(x, y, z) != (a(x, y, z) + b(x, y, z))) { + printf("Compute ERROR: result=%g vs %g=a+b\n", + result(x, y, z), a(x, y, z) + b(x, y, z)); + assert(result(x, y, z) == (a(x, y, z) + b(x, y, z))); + } + } + } + } + printf("Compute results as expected\n"); +} +@end diff --git a/llm/tests/metal/add.metal b/llm/tests/metal/add.metal new file mode 100644 index 00000000..9dae4b9b --- /dev/null +++ b/llm/tests/metal/add.metal @@ -0,0 +1,21 @@ +/* +A kernel that adds two arrays of floats. +*/ + +#include +#include "common.h" +using namespace metal; +// Assume that inputA, inputB, and output have the same dimension +kernel void elementwise_add( + device const Matrix3D &inputA, + device const Matrix3D &inputB, + device const Matrix3D &output, + uint3 gid [[ thread_position_in_grid ]] +) +{ + // // Calculate the global linear index for Matrix3D + // uint index = gid.x + gid.y * inputA.m_dim_y + gid.z * inputA.m_dim_y * inputA.m_dim_z; + + // Perform element-wise addition + output(gid.x, gid.y, gid.z) = inputA(gid.x, gid.y, gid.z) + inputB(gid.x, gid.y, gid.z); +} diff --git a/llm/tests/metal/main.m b/llm/tests/metal/main.m new file mode 100644 index 00000000..8965926f --- /dev/null +++ b/llm/tests/metal/main.m @@ -0,0 +1,23 @@ +#import +#import +#import "MetalAdder.h" + +int main(int argc, const char * argv[]) { + @autoreleasepool { + + id device = MTLCreateSystemDefaultDevice(); + + // Create the custom object used to encapsulate the Metal code. + // Initializes objects to communicate with the GPU. + MetalAdder* adder = [[MetalAdder alloc] initWithDevice:device]; + + // Create buffers to hold data + [adder prepareData]; + + // Send a command to the GPU to perform the calculation. + [adder sendComputeCommand]; + + NSLog(@"Execution finished"); + } + return 0; +} From cb5419ae3b74573679a2a5e21054d82e9610a4c1 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sun, 14 Jan 2024 23:11:14 -0500 Subject: [PATCH 02/37] metal gpu matrix3D addition test In the process of adding metal gpu support, created a folder named metal for element-wise addition of Matrix3D test. --- metal-cpp/Foundation/Foundation.hpp | 47 + metal-cpp/Foundation/NSArray.hpp | 115 + metal-cpp/Foundation/NSAutoreleasePool.hpp | 83 + metal-cpp/Foundation/NSBundle.hpp | 374 +++ metal-cpp/Foundation/NSData.hpp | 54 + metal-cpp/Foundation/NSDate.hpp | 53 + metal-cpp/Foundation/NSDefines.hpp | 45 + metal-cpp/Foundation/NSDictionary.hpp | 128 + metal-cpp/Foundation/NSEnumerator.hpp | 78 + metal-cpp/Foundation/NSError.hpp | 173 ++ metal-cpp/Foundation/NSLock.hpp | 118 + metal-cpp/Foundation/NSNotification.hpp | 110 + metal-cpp/Foundation/NSNumber.hpp | 501 ++++ metal-cpp/Foundation/NSObjCRuntime.hpp | 43 + metal-cpp/Foundation/NSObject.hpp | 302 +++ metal-cpp/Foundation/NSPrivate.hpp | 505 ++++ metal-cpp/Foundation/NSProcessInfo.hpp | 354 +++ metal-cpp/Foundation/NSRange.hpp | 83 + metal-cpp/Foundation/NSSet.hpp | 87 + metal-cpp/Foundation/NSSharedPtr.hpp | 311 +++ metal-cpp/Foundation/NSString.hpp | 248 ++ metal-cpp/Foundation/NSTypes.hpp | 51 + metal-cpp/Foundation/NSURL.hpp | 90 + metal-cpp/Metal/MTLAccelerationStructure.hpp | 1081 ++++++++ ...MTLAccelerationStructureCommandEncoder.hpp | 290 +++ .../Metal/MTLAccelerationStructureTypes.hpp | 169 ++ metal-cpp/Metal/MTLArgument.hpp | 841 ++++++ metal-cpp/Metal/MTLArgumentEncoder.hpp | 241 ++ metal-cpp/Metal/MTLBinaryArchive.hpp | 139 + metal-cpp/Metal/MTLBlitCommandEncoder.hpp | 246 ++ metal-cpp/Metal/MTLBlitPass.hpp | 165 ++ metal-cpp/Metal/MTLBuffer.hpp | 109 + metal-cpp/Metal/MTLCaptureManager.hpp | 220 ++ metal-cpp/Metal/MTLCaptureScope.hpp | 92 + metal-cpp/Metal/MTLCommandBuffer.hpp | 474 ++++ metal-cpp/Metal/MTLCommandEncoder.hpp | 101 + metal-cpp/Metal/MTLCommandQueue.hpp | 89 + metal-cpp/Metal/MTLComputeCommandEncoder.hpp | 330 +++ metal-cpp/Metal/MTLComputePass.hpp | 181 ++ metal-cpp/Metal/MTLComputePipeline.hpp | 373 +++ metal-cpp/Metal/MTLCounters.hpp | 258 ++ metal-cpp/Metal/MTLDefines.hpp | 41 + metal-cpp/Metal/MTLDepthStencil.hpp | 269 ++ metal-cpp/Metal/MTLDevice.hpp | 1352 ++++++++++ metal-cpp/Metal/MTLDrawable.hpp | 99 + metal-cpp/Metal/MTLDynamicLibrary.hpp | 82 + metal-cpp/Metal/MTLEvent.hpp | 159 ++ metal-cpp/Metal/MTLFence.hpp | 57 + metal-cpp/Metal/MTLFunctionConstantValues.hpp | 85 + metal-cpp/Metal/MTLFunctionDescriptor.hpp | 156 ++ metal-cpp/Metal/MTLFunctionHandle.hpp | 61 + metal-cpp/Metal/MTLFunctionLog.hpp | 114 + metal-cpp/Metal/MTLFunctionStitching.hpp | 305 +++ metal-cpp/Metal/MTLHeaderBridge.hpp | 2286 +++++++++++++++++ metal-cpp/Metal/MTLHeap.hpp | 329 +++ metal-cpp/Metal/MTLIOCommandBuffer.hpp | 200 ++ metal-cpp/Metal/MTLIOCommandQueue.hpp | 225 ++ metal-cpp/Metal/MTLIOCompressor.hpp | 92 + metal-cpp/Metal/MTLIndirectCommandBuffer.hpp | 212 ++ metal-cpp/Metal/MTLIndirectCommandEncoder.hpp | 187 ++ .../Metal/MTLIntersectionFunctionTable.hpp | 163 ++ metal-cpp/Metal/MTLLibrary.hpp | 644 +++++ metal-cpp/Metal/MTLLinkedFunctions.hpp | 115 + .../Metal/MTLParallelRenderCommandEncoder.hpp | 94 + metal-cpp/Metal/MTLPipeline.hpp | 109 + metal-cpp/Metal/MTLPixelFormat.hpp | 173 ++ metal-cpp/Metal/MTLPrivate.hpp | 156 ++ metal-cpp/Metal/MTLRasterizationRate.hpp | 386 +++ metal-cpp/Metal/MTLRenderCommandEncoder.hpp | 1145 +++++++++ metal-cpp/Metal/MTLRenderPass.hpp | 786 ++++++ metal-cpp/Metal/MTLRenderPipeline.hpp | 1598 ++++++++++++ metal-cpp/Metal/MTLResource.hpp | 178 ++ .../Metal/MTLResourceStateCommandEncoder.hpp | 103 + metal-cpp/Metal/MTLResourceStatePass.hpp | 165 ++ metal-cpp/Metal/MTLSampler.hpp | 319 +++ .../Metal/MTLStageInputOutputDescriptor.hpp | 381 +++ metal-cpp/Metal/MTLTexture.hpp | 684 +++++ metal-cpp/Metal/MTLTypes.hpp | 168 ++ metal-cpp/Metal/MTLVersion.hpp | 32 + metal-cpp/Metal/MTLVertexDescriptor.hpp | 344 +++ metal-cpp/Metal/MTLVisibleFunctionTable.hpp | 104 + metal-cpp/Metal/Metal.hpp | 84 + metal-cpp/QuartzCore/CADefines.hpp | 41 + metal-cpp/QuartzCore/CAMetalDrawable.hpp | 57 + metal-cpp/QuartzCore/CAMetalLayer.hpp | 131 + metal-cpp/QuartzCore/CAPrivate.hpp | 132 + metal-cpp/QuartzCore/QuartzCore.hpp | 28 + metal-cpp/README.md | 309 +++ metal-cpp/SingleHeader/MakeSingleHeader.py | 271 ++ 89 files changed, 24563 insertions(+) create mode 100644 metal-cpp/Foundation/Foundation.hpp create mode 100644 metal-cpp/Foundation/NSArray.hpp create mode 100644 metal-cpp/Foundation/NSAutoreleasePool.hpp create mode 100644 metal-cpp/Foundation/NSBundle.hpp create mode 100644 metal-cpp/Foundation/NSData.hpp create mode 100644 metal-cpp/Foundation/NSDate.hpp create mode 100644 metal-cpp/Foundation/NSDefines.hpp create mode 100644 metal-cpp/Foundation/NSDictionary.hpp create mode 100644 metal-cpp/Foundation/NSEnumerator.hpp create mode 100644 metal-cpp/Foundation/NSError.hpp create mode 100644 metal-cpp/Foundation/NSLock.hpp create mode 100644 metal-cpp/Foundation/NSNotification.hpp create mode 100644 metal-cpp/Foundation/NSNumber.hpp create mode 100644 metal-cpp/Foundation/NSObjCRuntime.hpp create mode 100644 metal-cpp/Foundation/NSObject.hpp create mode 100644 metal-cpp/Foundation/NSPrivate.hpp create mode 100644 metal-cpp/Foundation/NSProcessInfo.hpp create mode 100644 metal-cpp/Foundation/NSRange.hpp create mode 100644 metal-cpp/Foundation/NSSet.hpp create mode 100644 metal-cpp/Foundation/NSSharedPtr.hpp create mode 100644 metal-cpp/Foundation/NSString.hpp create mode 100644 metal-cpp/Foundation/NSTypes.hpp create mode 100644 metal-cpp/Foundation/NSURL.hpp create mode 100644 metal-cpp/Metal/MTLAccelerationStructure.hpp create mode 100644 metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp create mode 100644 metal-cpp/Metal/MTLAccelerationStructureTypes.hpp create mode 100644 metal-cpp/Metal/MTLArgument.hpp create mode 100644 metal-cpp/Metal/MTLArgumentEncoder.hpp create mode 100644 metal-cpp/Metal/MTLBinaryArchive.hpp create mode 100644 metal-cpp/Metal/MTLBlitCommandEncoder.hpp create mode 100644 metal-cpp/Metal/MTLBlitPass.hpp create mode 100644 metal-cpp/Metal/MTLBuffer.hpp create mode 100644 metal-cpp/Metal/MTLCaptureManager.hpp create mode 100644 metal-cpp/Metal/MTLCaptureScope.hpp create mode 100644 metal-cpp/Metal/MTLCommandBuffer.hpp create mode 100644 metal-cpp/Metal/MTLCommandEncoder.hpp create mode 100644 metal-cpp/Metal/MTLCommandQueue.hpp create mode 100644 metal-cpp/Metal/MTLComputeCommandEncoder.hpp create mode 100644 metal-cpp/Metal/MTLComputePass.hpp create mode 100644 metal-cpp/Metal/MTLComputePipeline.hpp create mode 100644 metal-cpp/Metal/MTLCounters.hpp create mode 100644 metal-cpp/Metal/MTLDefines.hpp create mode 100644 metal-cpp/Metal/MTLDepthStencil.hpp create mode 100644 metal-cpp/Metal/MTLDevice.hpp create mode 100644 metal-cpp/Metal/MTLDrawable.hpp create mode 100644 metal-cpp/Metal/MTLDynamicLibrary.hpp create mode 100644 metal-cpp/Metal/MTLEvent.hpp create mode 100644 metal-cpp/Metal/MTLFence.hpp create mode 100644 metal-cpp/Metal/MTLFunctionConstantValues.hpp create mode 100644 metal-cpp/Metal/MTLFunctionDescriptor.hpp create mode 100644 metal-cpp/Metal/MTLFunctionHandle.hpp create mode 100644 metal-cpp/Metal/MTLFunctionLog.hpp create mode 100644 metal-cpp/Metal/MTLFunctionStitching.hpp create mode 100644 metal-cpp/Metal/MTLHeaderBridge.hpp create mode 100644 metal-cpp/Metal/MTLHeap.hpp create mode 100644 metal-cpp/Metal/MTLIOCommandBuffer.hpp create mode 100644 metal-cpp/Metal/MTLIOCommandQueue.hpp create mode 100644 metal-cpp/Metal/MTLIOCompressor.hpp create mode 100644 metal-cpp/Metal/MTLIndirectCommandBuffer.hpp create mode 100644 metal-cpp/Metal/MTLIndirectCommandEncoder.hpp create mode 100644 metal-cpp/Metal/MTLIntersectionFunctionTable.hpp create mode 100644 metal-cpp/Metal/MTLLibrary.hpp create mode 100644 metal-cpp/Metal/MTLLinkedFunctions.hpp create mode 100644 metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp create mode 100644 metal-cpp/Metal/MTLPipeline.hpp create mode 100644 metal-cpp/Metal/MTLPixelFormat.hpp create mode 100644 metal-cpp/Metal/MTLPrivate.hpp create mode 100644 metal-cpp/Metal/MTLRasterizationRate.hpp create mode 100644 metal-cpp/Metal/MTLRenderCommandEncoder.hpp create mode 100644 metal-cpp/Metal/MTLRenderPass.hpp create mode 100644 metal-cpp/Metal/MTLRenderPipeline.hpp create mode 100644 metal-cpp/Metal/MTLResource.hpp create mode 100644 metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp create mode 100644 metal-cpp/Metal/MTLResourceStatePass.hpp create mode 100644 metal-cpp/Metal/MTLSampler.hpp create mode 100644 metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp create mode 100644 metal-cpp/Metal/MTLTexture.hpp create mode 100644 metal-cpp/Metal/MTLTypes.hpp create mode 100644 metal-cpp/Metal/MTLVersion.hpp create mode 100644 metal-cpp/Metal/MTLVertexDescriptor.hpp create mode 100644 metal-cpp/Metal/MTLVisibleFunctionTable.hpp create mode 100644 metal-cpp/Metal/Metal.hpp create mode 100644 metal-cpp/QuartzCore/CADefines.hpp create mode 100644 metal-cpp/QuartzCore/CAMetalDrawable.hpp create mode 100644 metal-cpp/QuartzCore/CAMetalLayer.hpp create mode 100644 metal-cpp/QuartzCore/CAPrivate.hpp create mode 100644 metal-cpp/QuartzCore/QuartzCore.hpp create mode 100644 metal-cpp/README.md create mode 100755 metal-cpp/SingleHeader/MakeSingleHeader.py diff --git a/metal-cpp/Foundation/Foundation.hpp b/metal-cpp/Foundation/Foundation.hpp new file mode 100644 index 00000000..4191d06d --- /dev/null +++ b/metal-cpp/Foundation/Foundation.hpp @@ -0,0 +1,47 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/Foundation.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSArray.hpp" +#include "NSAutoreleasePool.hpp" +#include "NSBundle.hpp" +#include "NSData.hpp" +#include "NSDate.hpp" +#include "NSDefines.hpp" +#include "NSDictionary.hpp" +#include "NSEnumerator.hpp" +#include "NSError.hpp" +#include "NSLock.hpp" +#include "NSNotification.hpp" +#include "NSNumber.hpp" +#include "NSObject.hpp" +#include "NSPrivate.hpp" +#include "NSProcessInfo.hpp" +#include "NSRange.hpp" +#include "NSSet.hpp" +#include "NSSharedPtr.hpp" +#include "NSString.hpp" +#include "NSTypes.hpp" +#include "NSURL.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSArray.hpp b/metal-cpp/Foundation/NSArray.hpp new file mode 100644 index 00000000..d5b2e370 --- /dev/null +++ b/metal-cpp/Foundation/NSArray.hpp @@ -0,0 +1,115 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSArray.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSObject.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +class Array : public Copying +{ +public: + static Array* array(); + static Array* array(const Object* pObject); + static Array* array(const Object* const* pObjects, UInteger count); + + static Array* alloc(); + + Array* init(); + Array* init(const Object* const* pObjects, UInteger count); + Array* init(const class Coder* pCoder); + + template + _Object* object(UInteger index) const; + UInteger count() const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Array::array() +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(array)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Array::array(const Object* pObject) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(arrayWithObject_), pObject); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Array::array(const Object* const* pObjects, UInteger count) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(arrayWithObjects_count_), pObjects, count); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Array::alloc() +{ + return NS::Object::alloc(_NS_PRIVATE_CLS(NSArray)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Array::init() +{ + return NS::Object::init(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Array::init(const Object* const* pObjects, UInteger count) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithObjects_count_), pObjects, count); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Array::init(const class Coder* pCoder) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::Array::count() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(count)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Object* NS::Array::object(UInteger index) const +{ + return Object::sendMessage<_Object*>(this, _NS_PRIVATE_SEL(objectAtIndex_), index); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSAutoreleasePool.hpp b/metal-cpp/Foundation/NSAutoreleasePool.hpp new file mode 100644 index 00000000..4fc2594d --- /dev/null +++ b/metal-cpp/Foundation/NSAutoreleasePool.hpp @@ -0,0 +1,83 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSAutoreleasePool.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSObject.hpp" +#include "NSPrivate.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +class AutoreleasePool : public Object +{ +public: + static AutoreleasePool* alloc(); + AutoreleasePool* init(); + + void drain(); + + void addObject(Object* pObject); + + static void showPools(); +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::AutoreleasePool* NS::AutoreleasePool::alloc() +{ + return NS::Object::alloc(_NS_PRIVATE_CLS(NSAutoreleasePool)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::AutoreleasePool* NS::AutoreleasePool::init() +{ + return NS::Object::init(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::AutoreleasePool::drain() +{ + Object::sendMessage(this, _NS_PRIVATE_SEL(drain)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::AutoreleasePool::addObject(Object* pObject) +{ + Object::sendMessage(this, _NS_PRIVATE_SEL(addObject_), pObject); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::AutoreleasePool::showPools() +{ + Object::sendMessage(_NS_PRIVATE_CLS(NSAutoreleasePool), _NS_PRIVATE_SEL(showPools)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSBundle.hpp b/metal-cpp/Foundation/NSBundle.hpp new file mode 100644 index 00000000..2a983c42 --- /dev/null +++ b/metal-cpp/Foundation/NSBundle.hpp @@ -0,0 +1,374 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSBundle.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSNotification.hpp" +#include "NSObject.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +_NS_CONST(NotificationName, BundleDidLoadNotification); +_NS_CONST(NotificationName, BundleResourceRequestLowDiskSpaceNotification); + +class String* LocalizedString(const String* pKey, const String*); +class String* LocalizedStringFromTable(const String* pKey, const String* pTbl, const String*); +class String* LocalizedStringFromTableInBundle(const String* pKey, const String* pTbl, const class Bundle* pBdle, const String*); +class String* LocalizedStringWithDefaultValue(const String* pKey, const String* pTbl, const class Bundle* pBdle, const String* pVal, const String*); + +class Bundle : public Referencing +{ +public: + static Bundle* mainBundle(); + + static Bundle* bundle(const class String* pPath); + static Bundle* bundle(const class URL* pURL); + + static Bundle* alloc(); + + Bundle* init(const class String* pPath); + Bundle* init(const class URL* pURL); + + class Array* allBundles() const; + class Array* allFrameworks() const; + + bool load(); + bool unload(); + + bool isLoaded() const; + + bool preflightAndReturnError(class Error** pError) const; + bool loadAndReturnError(class Error** pError); + + class URL* bundleURL() const; + class URL* resourceURL() const; + class URL* executableURL() const; + class URL* URLForAuxiliaryExecutable(const class String* pExecutableName) const; + + class URL* privateFrameworksURL() const; + class URL* sharedFrameworksURL() const; + class URL* sharedSupportURL() const; + class URL* builtInPlugInsURL() const; + class URL* appStoreReceiptURL() const; + + class String* bundlePath() const; + class String* resourcePath() const; + class String* executablePath() const; + class String* pathForAuxiliaryExecutable(const class String* pExecutableName) const; + + class String* privateFrameworksPath() const; + class String* sharedFrameworksPath() const; + class String* sharedSupportPath() const; + class String* builtInPlugInsPath() const; + + class String* bundleIdentifier() const; + class Dictionary* infoDictionary() const; + class Dictionary* localizedInfoDictionary() const; + class Object* objectForInfoDictionaryKey(const class String* pKey); + + class String* localizedString(const class String* pKey, const class String* pValue = nullptr, const class String* pTableName = nullptr) const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_PRIVATE_DEF_CONST(NS::NotificationName, BundleDidLoadNotification); +_NS_PRIVATE_DEF_CONST(NS::NotificationName, BundleResourceRequestLowDiskSpaceNotification); + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::LocalizedString(const String* pKey, const String*) +{ + return Bundle::mainBundle()->localizedString(pKey, nullptr, nullptr); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::LocalizedStringFromTable(const String* pKey, const String* pTbl, const String*) +{ + return Bundle::mainBundle()->localizedString(pKey, nullptr, pTbl); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::LocalizedStringFromTableInBundle(const String* pKey, const String* pTbl, const Bundle* pBdl, const String*) +{ + return pBdl->localizedString(pKey, nullptr, pTbl); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::LocalizedStringWithDefaultValue(const String* pKey, const String* pTbl, const Bundle* pBdl, const String* pVal, const String*) +{ + return pBdl->localizedString(pKey, pVal, pTbl); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Bundle* NS::Bundle::mainBundle() +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSBundle), _NS_PRIVATE_SEL(mainBundle)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Bundle* NS::Bundle::bundle(const class String* pPath) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSBundle), _NS_PRIVATE_SEL(bundleWithPath_), pPath); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Bundle* NS::Bundle::bundle(const class URL* pURL) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSBundle), _NS_PRIVATE_SEL(bundleWithURL_), pURL); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Bundle* NS::Bundle::alloc() +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSBundle), _NS_PRIVATE_SEL(alloc)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Bundle* NS::Bundle::init(const String* pPath) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithPath_), pPath); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Bundle* NS::Bundle::init(const URL* pURL) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithURL_), pURL); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Bundle::allBundles() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(allBundles)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Bundle::allFrameworks() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(allFrameworks)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Bundle::load() +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(load)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Bundle::unload() +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(unload)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Bundle::isLoaded() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(isLoaded)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Bundle::preflightAndReturnError(Error** pError) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(preflightAndReturnError_), pError); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Bundle::loadAndReturnError(Error** pError) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(loadAndReturnError_), pError); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::bundleURL() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(bundleURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::resourceURL() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(resourceURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::executableURL() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(executableURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::URLForAuxiliaryExecutable(const String* pExecutableName) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(URLForAuxiliaryExecutable_), pExecutableName); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::privateFrameworksURL() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(privateFrameworksURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::sharedFrameworksURL() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(sharedFrameworksURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::sharedSupportURL() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(sharedSupportURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::builtInPlugInsURL() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(builtInPlugInsURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::Bundle::appStoreReceiptURL() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(appStoreReceiptURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::bundlePath() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(bundlePath)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::resourcePath() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(resourcePath)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::executablePath() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(executablePath)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::pathForAuxiliaryExecutable(const String* pExecutableName) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(pathForAuxiliaryExecutable_), pExecutableName); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::privateFrameworksPath() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(privateFrameworksPath)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::sharedFrameworksPath() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(sharedFrameworksPath)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::sharedSupportPath() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(sharedSupportPath)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::builtInPlugInsPath() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(builtInPlugInsPath)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::bundleIdentifier() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(bundleIdentifier)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Bundle::infoDictionary() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(infoDictionary)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Bundle::localizedInfoDictionary() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(localizedInfoDictionary)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Object* NS::Bundle::objectForInfoDictionaryKey(const String* pKey) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(objectForInfoDictionaryKey_), pKey); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Bundle::localizedString(const String* pKey, const String* pValue /* = nullptr */, const String* pTableName /* = nullptr */) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(localizedStringForKey_value_table_), pKey, pValue, pTableName); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSData.hpp b/metal-cpp/Foundation/NSData.hpp new file mode 100644 index 00000000..d518f5c6 --- /dev/null +++ b/metal-cpp/Foundation/NSData.hpp @@ -0,0 +1,54 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSData.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSObject.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +class Data : public Copying +{ +public: + void* mutableBytes() const; + UInteger length() const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void* NS::Data::mutableBytes() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(mutableBytes)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::Data::length() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(length)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSDate.hpp b/metal-cpp/Foundation/NSDate.hpp new file mode 100644 index 00000000..f016e617 --- /dev/null +++ b/metal-cpp/Foundation/NSDate.hpp @@ -0,0 +1,53 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSDate.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSObject.hpp" +#include "NSPrivate.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ + +using TimeInterval = double; + +class Date : public Copying +{ +public: + static Date* dateWithTimeIntervalSinceNow(TimeInterval secs); +}; + +} // NS + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Date* NS::Date::dateWithTimeIntervalSinceNow(NS::TimeInterval secs) +{ + return NS::Object::sendMessage(_NS_PRIVATE_CLS(NSDate), _NS_PRIVATE_SEL(dateWithTimeIntervalSinceNow_), secs); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- \ No newline at end of file diff --git a/metal-cpp/Foundation/NSDefines.hpp b/metal-cpp/Foundation/NSDefines.hpp new file mode 100644 index 00000000..c1217e8a --- /dev/null +++ b/metal-cpp/Foundation/NSDefines.hpp @@ -0,0 +1,45 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSDefines.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define _NS_WEAK_IMPORT __attribute__((weak_import)) +#ifdef METALCPP_SYMBOL_VISIBILITY_HIDDEN +#define _NS_EXPORT __attribute__((visibility("hidden"))) +#else +#define _NS_EXPORT __attribute__((visibility("default"))) +#endif // METALCPP_SYMBOL_VISIBILITY_HIDDEN +#define _NS_EXTERN extern "C" _NS_EXPORT +#define _NS_INLINE inline __attribute__((always_inline)) +#define _NS_PACKED __attribute__((packed)) + +#define _NS_CONST(type, name) _NS_EXTERN type const name +#define _NS_ENUM(type, name) enum name : type +#define _NS_OPTIONS(type, name) \ + using name = type; \ + enum : name + +#define _NS_CAST_TO_UINT(value) static_cast(value) +#define _NS_VALIDATE_SIZE(ns, name) static_assert(sizeof(ns::name) == sizeof(ns##name), "size mismatch " #ns "::" #name) +#define _NS_VALIDATE_ENUM(ns, name) static_assert(_NS_CAST_TO_UINT(ns::name) == _NS_CAST_TO_UINT(ns##name), "value mismatch " #ns "::" #name) + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSDictionary.hpp b/metal-cpp/Foundation/NSDictionary.hpp new file mode 100644 index 00000000..017bf44e --- /dev/null +++ b/metal-cpp/Foundation/NSDictionary.hpp @@ -0,0 +1,128 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSDictionary.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSEnumerator.hpp" +#include "NSObject.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +class Dictionary : public NS::Copying +{ +public: + static Dictionary* dictionary(); + static Dictionary* dictionary(const Object* pObject, const Object* pKey); + static Dictionary* dictionary(const Object* const* pObjects, const Object* const* pKeys, UInteger count); + + static Dictionary* alloc(); + + Dictionary* init(); + Dictionary* init(const Object* const* pObjects, const Object* const* pKeys, UInteger count); + Dictionary* init(const class Coder* pCoder); + + template + Enumerator<_KeyType>* keyEnumerator() const; + + template + _Object* object(const Object* pKey) const; + UInteger count() const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Dictionary::dictionary() +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSDictionary), _NS_PRIVATE_SEL(dictionary)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Dictionary::dictionary(const Object* pObject, const Object* pKey) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSDictionary), _NS_PRIVATE_SEL(dictionaryWithObject_forKey_), pObject, pKey); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Dictionary::dictionary(const Object* const* pObjects, const Object* const* pKeys, UInteger count) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSDictionary), _NS_PRIVATE_SEL(dictionaryWithObjects_forKeys_count_), + pObjects, pKeys, count); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Dictionary::alloc() +{ + return NS::Object::alloc(_NS_PRIVATE_CLS(NSDictionary)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Dictionary::init() +{ + return NS::Object::init(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Dictionary::init(const Object* const* pObjects, const Object* const* pKeys, UInteger count) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithObjects_forKeys_count_), pObjects, pKeys, count); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Dictionary::init(const class Coder* pCoder) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE NS::Enumerator<_KeyType>* NS::Dictionary::keyEnumerator() const +{ + return Object::sendMessage*>(this, _NS_PRIVATE_SEL(keyEnumerator)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Object* NS::Dictionary::object(const Object* pKey) const +{ + return Object::sendMessage<_Object*>(this, _NS_PRIVATE_SEL(objectForKey_), pKey); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::Dictionary::count() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(count)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSEnumerator.hpp b/metal-cpp/Foundation/NSEnumerator.hpp new file mode 100644 index 00000000..60343086 --- /dev/null +++ b/metal-cpp/Foundation/NSEnumerator.hpp @@ -0,0 +1,78 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSEnumerator.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSObject.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +struct FastEnumerationState +{ + unsigned long state; + Object** itemsPtr; + unsigned long* mutationsPtr; + unsigned long extra[5]; +} _NS_PACKED; + +class FastEnumeration : public Referencing +{ +public: + NS::UInteger countByEnumerating(FastEnumerationState* pState, Object** pBuffer, NS::UInteger len); +}; + +template +class Enumerator : public Referencing, FastEnumeration> +{ +public: + _ObjectType* nextObject(); + class Array* allObjects(); +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::FastEnumeration::countByEnumerating(FastEnumerationState* pState, Object** pBuffer, NS::UInteger len) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(countByEnumeratingWithState_objects_count_), pState, pBuffer, len); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _ObjectType* NS::Enumerator<_ObjectType>::nextObject() +{ + return Object::sendMessage<_ObjectType*>(this, _NS_PRIVATE_SEL(nextObject)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE NS::Array* NS::Enumerator<_ObjectType>::allObjects() +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(allObjects)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSError.hpp b/metal-cpp/Foundation/NSError.hpp new file mode 100644 index 00000000..1bc39de1 --- /dev/null +++ b/metal-cpp/Foundation/NSError.hpp @@ -0,0 +1,173 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSError.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSObject.hpp" +#include "NSPrivate.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +using ErrorDomain = class String*; + +_NS_CONST(ErrorDomain, CocoaErrorDomain); +_NS_CONST(ErrorDomain, POSIXErrorDomain); +_NS_CONST(ErrorDomain, OSStatusErrorDomain); +_NS_CONST(ErrorDomain, MachErrorDomain); + +using ErrorUserInfoKey = class String*; + +_NS_CONST(ErrorUserInfoKey, UnderlyingErrorKey); +_NS_CONST(ErrorUserInfoKey, LocalizedDescriptionKey); +_NS_CONST(ErrorUserInfoKey, LocalizedFailureReasonErrorKey); +_NS_CONST(ErrorUserInfoKey, LocalizedRecoverySuggestionErrorKey); +_NS_CONST(ErrorUserInfoKey, LocalizedRecoveryOptionsErrorKey); +_NS_CONST(ErrorUserInfoKey, RecoveryAttempterErrorKey); +_NS_CONST(ErrorUserInfoKey, HelpAnchorErrorKey); +_NS_CONST(ErrorUserInfoKey, DebugDescriptionErrorKey); +_NS_CONST(ErrorUserInfoKey, LocalizedFailureErrorKey); +_NS_CONST(ErrorUserInfoKey, StringEncodingErrorKey); +_NS_CONST(ErrorUserInfoKey, URLErrorKey); +_NS_CONST(ErrorUserInfoKey, FilePathErrorKey); + +class Error : public Copying +{ +public: + static Error* error(ErrorDomain domain, Integer code, class Dictionary* pDictionary); + + static Error* alloc(); + Error* init(); + Error* init(ErrorDomain domain, Integer code, class Dictionary* pDictionary); + + Integer code() const; + ErrorDomain domain() const; + class Dictionary* userInfo() const; + + class String* localizedDescription() const; + class Array* localizedRecoveryOptions() const; + class String* localizedRecoverySuggestion() const; + class String* localizedFailureReason() const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_PRIVATE_DEF_CONST(NS::ErrorDomain, CocoaErrorDomain); +_NS_PRIVATE_DEF_CONST(NS::ErrorDomain, POSIXErrorDomain); +_NS_PRIVATE_DEF_CONST(NS::ErrorDomain, OSStatusErrorDomain); +_NS_PRIVATE_DEF_CONST(NS::ErrorDomain, MachErrorDomain); + +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, UnderlyingErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedDescriptionKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedFailureReasonErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedRecoverySuggestionErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedRecoveryOptionsErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, RecoveryAttempterErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, HelpAnchorErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, DebugDescriptionErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedFailureErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, StringEncodingErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, URLErrorKey); +_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, FilePathErrorKey); + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Error* NS::Error::error(ErrorDomain domain, Integer code, class Dictionary* pDictionary) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSError), _NS_PRIVATE_SEL(errorWithDomain_code_userInfo_), domain, code, pDictionary); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Error* NS::Error::alloc() +{ + return Object::alloc(_NS_PRIVATE_CLS(NSError)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Error* NS::Error::init() +{ + return Object::init(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Error* NS::Error::init(ErrorDomain domain, Integer code, class Dictionary* pDictionary) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithDomain_code_userInfo_), domain, code, pDictionary); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Integer NS::Error::code() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(code)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::ErrorDomain NS::Error::domain() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(domain)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Error::userInfo() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(userInfo)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Error::localizedDescription() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(localizedDescription)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::Error::localizedRecoveryOptions() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(localizedRecoveryOptions)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Error::localizedRecoverySuggestion() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(localizedRecoverySuggestion)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Error::localizedFailureReason() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(localizedFailureReason)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSLock.hpp b/metal-cpp/Foundation/NSLock.hpp new file mode 100644 index 00000000..7fee9435 --- /dev/null +++ b/metal-cpp/Foundation/NSLock.hpp @@ -0,0 +1,118 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSLock.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSObject.hpp" +#include "NSPrivate.hpp" +#include "NSTypes.hpp" +#include "NSDate.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ + +template +class Locking : public _Base +{ +public: + void lock(); + void unlock(); +}; + +class Condition : public Locking +{ +public: + static Condition* alloc(); + + Condition* init(); + + void wait(); + bool waitUntilDate(Date* pLimit); + void signal(); + void broadcast(); +}; + +} // NS + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE void NS::Locking<_Class, _Base>::lock() +{ + NS::Object::sendMessage(this, _NS_PRIVATE_SEL(lock)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE void NS::Locking<_Class, _Base>::unlock() +{ + NS::Object::sendMessage(this, _NS_PRIVATE_SEL(unlock)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Condition* NS::Condition::alloc() +{ + return NS::Object::alloc(_NS_PRIVATE_CLS(NSCondition)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Condition* NS::Condition::init() +{ + return NS::Object::init(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::Condition::wait() +{ + NS::Object::sendMessage(this, _NS_PRIVATE_SEL(wait)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Condition::waitUntilDate(NS::Date* pLimit) +{ + return NS::Object::sendMessage(this, _NS_PRIVATE_SEL(waitUntilDate_), pLimit); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::Condition::signal() +{ + NS::Object::sendMessage(this, _NS_PRIVATE_SEL(signal)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::Condition::broadcast() +{ + NS::Object::sendMessage(this, _NS_PRIVATE_SEL(broadcast)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- \ No newline at end of file diff --git a/metal-cpp/Foundation/NSNotification.hpp b/metal-cpp/Foundation/NSNotification.hpp new file mode 100644 index 00000000..8eb5f804 --- /dev/null +++ b/metal-cpp/Foundation/NSNotification.hpp @@ -0,0 +1,110 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSNotification.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSDictionary.hpp" +#include "NSObject.hpp" +#include "NSString.hpp" +#include "NSTypes.hpp" +#include + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +using NotificationName = class String*; + +class Notification : public NS::Referencing +{ +public: + NS::String* name() const; + NS::Object* object() const; + NS::Dictionary* userInfo() const; +}; + +using ObserverBlock = void(^)(Notification*); +using ObserverFunction = std::function; + +class NotificationCenter : public NS::Referencing +{ + public: + static class NotificationCenter* defaultCenter(); + Object* addObserver(NotificationName name, Object* pObj, void* pQueue, ObserverBlock block); + Object* addObserver(NotificationName name, Object* pObj, void* pQueue, ObserverFunction &handler); + void removeObserver(Object* pObserver); + +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Notification::name() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(name)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Object* NS::Notification::object() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(object)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::Notification::userInfo() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(userInfo)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::NotificationCenter* NS::NotificationCenter::defaultCenter() +{ + return NS::Object::sendMessage(_NS_PRIVATE_CLS(NSNotificationCenter), _NS_PRIVATE_SEL(defaultCenter)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Object* NS::NotificationCenter::addObserver(NS::NotificationName name, Object* pObj, void* pQueue, NS::ObserverBlock block) +{ + return NS::Object::sendMessage(this, _NS_PRIVATE_SEL(addObserverName_object_queue_block_), name, pObj, pQueue, block); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Object* NS::NotificationCenter::addObserver(NS::NotificationName name, Object* pObj, void* pQueue, NS::ObserverFunction &handler) +{ + __block ObserverFunction blockFunction = handler; + + return addObserver(name, pObj, pQueue, ^(NS::Notification* pNotif) {blockFunction(pNotif);}); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::NotificationCenter::removeObserver(Object* pObserver) +{ + return NS::Object::sendMessage(this, _NS_PRIVATE_SEL(removeObserver_), pObserver); +} + diff --git a/metal-cpp/Foundation/NSNumber.hpp b/metal-cpp/Foundation/NSNumber.hpp new file mode 100644 index 00000000..4eaaf193 --- /dev/null +++ b/metal-cpp/Foundation/NSNumber.hpp @@ -0,0 +1,501 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSNumber.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSObjCRuntime.hpp" +#include "NSObject.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +class Value : public Copying +{ +public: + static Value* value(const void* pValue, const char* pType); + static Value* value(const void* pPointer); + + static Value* alloc(); + + Value* init(const void* pValue, const char* pType); + Value* init(const class Coder* pCoder); + + void getValue(void* pValue, UInteger size) const; + const char* objCType() const; + + bool isEqualToValue(Value* pValue) const; + void* pointerValue() const; +}; + +class Number : public Copying +{ +public: + static Number* number(char value); + static Number* number(unsigned char value); + static Number* number(short value); + static Number* number(unsigned short value); + static Number* number(int value); + static Number* number(unsigned int value); + static Number* number(long value); + static Number* number(unsigned long value); + static Number* number(long long value); + static Number* number(unsigned long long value); + static Number* number(float value); + static Number* number(double value); + static Number* number(bool value); + + static Number* alloc(); + + Number* init(const class Coder* pCoder); + Number* init(char value); + Number* init(unsigned char value); + Number* init(short value); + Number* init(unsigned short value); + Number* init(int value); + Number* init(unsigned int value); + Number* init(long value); + Number* init(unsigned long value); + Number* init(long long value); + Number* init(unsigned long long value); + Number* init(float value); + Number* init(double value); + Number* init(bool value); + + char charValue() const; + unsigned char unsignedCharValue() const; + short shortValue() const; + unsigned short unsignedShortValue() const; + int intValue() const; + unsigned int unsignedIntValue() const; + long longValue() const; + unsigned long unsignedLongValue() const; + long long longLongValue() const; + unsigned long long unsignedLongLongValue() const; + float floatValue() const; + double doubleValue() const; + bool boolValue() const; + Integer integerValue() const; + UInteger unsignedIntegerValue() const; + class String* stringValue() const; + + ComparisonResult compare(const Number* pOtherNumber) const; + bool isEqualToNumber(const Number* pNumber) const; + + class String* descriptionWithLocale(const Object* pLocale) const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Value* NS::Value::value(const void* pValue, const char* pType) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSValue), _NS_PRIVATE_SEL(valueWithBytes_objCType_), pValue, pType); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Value* NS::Value::value(const void* pPointer) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSValue), _NS_PRIVATE_SEL(valueWithPointer_), pPointer); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Value* NS::Value::alloc() +{ + return NS::Object::alloc(_NS_PRIVATE_CLS(NSValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Value* NS::Value::init(const void* pValue, const char* pType) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithBytes_objCType_), pValue, pType); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Value* NS::Value::init(const class Coder* pCoder) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::Value::getValue(void* pValue, UInteger size) const +{ + Object::sendMessage(this, _NS_PRIVATE_SEL(getValue_size_), pValue, size); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE const char* NS::Value::objCType() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(objCType)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Value::isEqualToValue(Value* pValue) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(isEqualToValue_), pValue); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void* NS::Value::pointerValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(pointerValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(char value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithChar_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(unsigned char value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedChar_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(short value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithShort_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(unsigned short value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedShort_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(int value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithInt_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(unsigned int value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedInt_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(long value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithLong_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(unsigned long value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedLong_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(long long value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithLongLong_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(unsigned long long value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedLongLong_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(float value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithFloat_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(double value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithDouble_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::number(bool value) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithBool_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::alloc() +{ + return NS::Object::alloc(_NS_PRIVATE_CLS(NSNumber)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(const Coder* pCoder) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(char value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithChar_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(unsigned char value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithUnsignedChar_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(short value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithShort_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(unsigned short value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithUnsignedShort_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(int value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithInt_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(unsigned int value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithUnsignedInt_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(long value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithLong_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(unsigned long value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithUnsignedLong_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(long long value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithLongLong_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(unsigned long long value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithUnsignedLongLong_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(float value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithFloat_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(double value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithDouble_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Number* NS::Number::init(bool value) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithBool_), value); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE char NS::Number::charValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(charValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE unsigned char NS::Number::unsignedCharValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(unsignedCharValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE short NS::Number::shortValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(shortValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE unsigned short NS::Number::unsignedShortValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(unsignedShortValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE int NS::Number::intValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(intValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE unsigned int NS::Number::unsignedIntValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(unsignedIntValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE long NS::Number::longValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(longValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE unsigned long NS::Number::unsignedLongValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(unsignedLongValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE long long NS::Number::longLongValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(longLongValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE unsigned long long NS::Number::unsignedLongLongValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(unsignedLongLongValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE float NS::Number::floatValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(floatValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE double NS::Number::doubleValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(doubleValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Number::boolValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(boolValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Integer NS::Number::integerValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(integerValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::Number::unsignedIntegerValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(unsignedIntegerValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Number::stringValue() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(stringValue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::ComparisonResult NS::Number::compare(const Number* pOtherNumber) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(compare_), pOtherNumber); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Number::isEqualToNumber(const Number* pNumber) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(isEqualToNumber_), pNumber); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Number::descriptionWithLocale(const Object* pLocale) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(descriptionWithLocale_), pLocale); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSObjCRuntime.hpp b/metal-cpp/Foundation/NSObjCRuntime.hpp new file mode 100644 index 00000000..e97592b1 --- /dev/null +++ b/metal-cpp/Foundation/NSObjCRuntime.hpp @@ -0,0 +1,43 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSObjCRuntime.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ + +_NS_ENUM(Integer, ComparisonResult) { + OrderedAscending = -1, + OrderedSame = 0, + OrderedDescending = 1, +}; + +const Integer NotFound = IntegerMax; + +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSObject.hpp b/metal-cpp/Foundation/NSObject.hpp new file mode 100644 index 00000000..7ece1fdb --- /dev/null +++ b/metal-cpp/Foundation/NSObject.hpp @@ -0,0 +1,302 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSObject.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSPrivate.hpp" +#include "NSTypes.hpp" + +#include +#include + +#include + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +template +class _NS_EXPORT Referencing : public _Base +{ +public: + _Class* retain(); + void release(); + + _Class* autorelease(); + + UInteger retainCount() const; +}; + +template +class Copying : public Referencing<_Class, _Base> +{ +public: + _Class* copy() const; +}; + +template +class SecureCoding : public Referencing<_Class, _Base> +{ +}; + +class Object : public Referencing +{ +public: + UInteger hash() const; + bool isEqual(const Object* pObject) const; + + class String* description() const; + class String* debugDescription() const; + +protected: + friend class Referencing; + + template + static _Class* alloc(const char* pClassName); + template + static _Class* alloc(const void* pClass); + template + _Class* init(); + + template + static _Dst bridgingCast(const void* pObj); + static class MethodSignature* methodSignatureForSelector(const void* pObj, SEL selector); + static bool respondsToSelector(const void* pObj, SEL selector); + template + static constexpr bool doesRequireMsgSendStret(); + template + static _Ret sendMessage(const void* pObj, SEL selector, _Args... args); + template + static _Ret sendMessageSafe(const void* pObj, SEL selector, _Args... args); + +private: + Object() = delete; + Object(const Object&) = delete; + ~Object() = delete; + + Object& operator=(const Object&) = delete; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Class* NS::Referencing<_Class, _Base>::retain() +{ + return Object::sendMessage<_Class*>(this, _NS_PRIVATE_SEL(retain)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE void NS::Referencing<_Class, _Base>::release() +{ + Object::sendMessage(this, _NS_PRIVATE_SEL(release)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Class* NS::Referencing<_Class, _Base>::autorelease() +{ + return Object::sendMessage<_Class*>(this, _NS_PRIVATE_SEL(autorelease)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE NS::UInteger NS::Referencing<_Class, _Base>::retainCount() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(retainCount)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Class* NS::Copying<_Class, _Base>::copy() const +{ + return Object::sendMessage<_Class*>(this, _NS_PRIVATE_SEL(copy)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Dst NS::Object::bridgingCast(const void* pObj) +{ +#ifdef __OBJC__ + return (__bridge _Dst)pObj; +#else + return (_Dst)pObj; +#endif // __OBJC__ +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE constexpr bool NS::Object::doesRequireMsgSendStret() +{ +#if (defined(__i386__) || defined(__x86_64__)) + constexpr size_t kStructLimit = (sizeof(std::uintptr_t) << 1); + + return sizeof(_Type) > kStructLimit; +#elif defined(__arm64__) + return false; +#elif defined(__arm__) + constexpr size_t kStructLimit = sizeof(std::uintptr_t); + + return std::is_class(_Type) && (sizeof(_Type) > kStructLimit); +#else +#error "Unsupported architecture!" +#endif +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template <> +_NS_INLINE constexpr bool NS::Object::doesRequireMsgSendStret() +{ + return false; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Ret NS::Object::sendMessage(const void* pObj, SEL selector, _Args... args) +{ +#if (defined(__i386__) || defined(__x86_64__)) + if constexpr (std::is_floating_point<_Ret>()) + { + using SendMessageProcFpret = _Ret (*)(const void*, SEL, _Args...); + + const SendMessageProcFpret pProc = reinterpret_cast(&objc_msgSend_fpret); + + return (*pProc)(pObj, selector, args...); + } + else +#endif // ( defined( __i386__ ) || defined( __x86_64__ ) ) +#if !defined(__arm64__) + if constexpr (doesRequireMsgSendStret<_Ret>()) + { + using SendMessageProcStret = void (*)(_Ret*, const void*, SEL, _Args...); + + const SendMessageProcStret pProc = reinterpret_cast(&objc_msgSend_stret); + _Ret ret; + + (*pProc)(&ret, pObj, selector, args...); + + return ret; + } + else +#endif // !defined( __arm64__ ) + { + using SendMessageProc = _Ret (*)(const void*, SEL, _Args...); + + const SendMessageProc pProc = reinterpret_cast(&objc_msgSend); + + return (*pProc)(pObj, selector, args...); + } +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::MethodSignature* NS::Object::methodSignatureForSelector(const void* pObj, SEL selector) +{ + return sendMessage(pObj, _NS_PRIVATE_SEL(methodSignatureForSelector_), selector); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Object::respondsToSelector(const void* pObj, SEL selector) +{ + return sendMessage(pObj, _NS_PRIVATE_SEL(respondsToSelector_), selector); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Ret NS::Object::sendMessageSafe(const void* pObj, SEL selector, _Args... args) +{ + if ((respondsToSelector(pObj, selector)) || (nullptr != methodSignatureForSelector(pObj, selector))) + { + return sendMessage<_Ret>(pObj, selector, args...); + } + + if constexpr (!std::is_void<_Ret>::value) + { + return 0; + } +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Class* NS::Object::alloc(const char* pClassName) +{ + return sendMessage<_Class*>(objc_lookUpClass(pClassName), _NS_PRIVATE_SEL(alloc)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Class* NS::Object::alloc(const void* pClass) +{ + return sendMessage<_Class*>(pClass, _NS_PRIVATE_SEL(alloc)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +_NS_INLINE _Class* NS::Object::init() +{ + return sendMessage<_Class*>(this, _NS_PRIVATE_SEL(init)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::Object::hash() const +{ + return sendMessage(this, _NS_PRIVATE_SEL(hash)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Object::isEqual(const Object* pObject) const +{ + return sendMessage(this, _NS_PRIVATE_SEL(isEqual_), pObject); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Object::description() const +{ + return sendMessage(this, _NS_PRIVATE_SEL(description)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::Object::debugDescription() const +{ + return sendMessageSafe(this, _NS_PRIVATE_SEL(debugDescription)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSPrivate.hpp b/metal-cpp/Foundation/NSPrivate.hpp new file mode 100644 index 00000000..371e8feb --- /dev/null +++ b/metal-cpp/Foundation/NSPrivate.hpp @@ -0,0 +1,505 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSPrivate.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define _NS_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol) +#define _NS_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor) + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#if defined(NS_PRIVATE_IMPLEMENTATION) + +#ifdef METALCPP_SYMBOL_VISIBILITY_HIDDEN +#define _NS_PRIVATE_VISIBILITY __attribute__((visibility("hidden"))) +#else +#define _NS_PRIVATE_VISIBILITY __attribute__((visibility("default"))) +#endif //METALCPP_SYMBOL_VISIBILITY_HIDDEN + +#define _NS_PRIVATE_IMPORT __attribute__((weak_import)) + +#ifdef __OBJC__ +#define _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol)) +#define _NS_PRIVATE_OBJC_GET_PROTOCOL(symbol) ((__bridge void*)objc_getProtocol(#symbol)) +#else +#define _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol) +#define _NS_PRIVATE_OBJC_GET_PROTOCOL(symbol) objc_getProtocol(#symbol) +#endif // __OBJC__ + +#define _NS_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _NS_PRIVATE_VISIBILITY = _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol) +#define _NS_PRIVATE_DEF_PRO(symbol) void* s_k##symbol _NS_PRIVATE_VISIBILITY = _NS_PRIVATE_OBJC_GET_PROTOCOL(symbol) +#define _NS_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _NS_PRIVATE_VISIBILITY = sel_registerName(symbol) +#define _NS_PRIVATE_DEF_CONST(type, symbol) \ + _NS_EXTERN type const NS##symbol _NS_PRIVATE_IMPORT; \ + type const NS::symbol = (nullptr != &NS##symbol) ? NS##symbol : nullptr + +#else + +#define _NS_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol +#define _NS_PRIVATE_DEF_PRO(symbol) extern void* s_k##symbol +#define _NS_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor +#define _NS_PRIVATE_DEF_CONST(type, symbol) extern type const NS::symbol + +#endif // NS_PRIVATE_IMPLEMENTATION + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +namespace Private +{ + namespace Class + { + + _NS_PRIVATE_DEF_CLS(NSArray); + _NS_PRIVATE_DEF_CLS(NSAutoreleasePool); + _NS_PRIVATE_DEF_CLS(NSBundle); + _NS_PRIVATE_DEF_CLS(NSCondition); + _NS_PRIVATE_DEF_CLS(NSDate); + _NS_PRIVATE_DEF_CLS(NSDictionary); + _NS_PRIVATE_DEF_CLS(NSError); + _NS_PRIVATE_DEF_CLS(NSNotificationCenter); + _NS_PRIVATE_DEF_CLS(NSNumber); + _NS_PRIVATE_DEF_CLS(NSObject); + _NS_PRIVATE_DEF_CLS(NSProcessInfo); + _NS_PRIVATE_DEF_CLS(NSSet); + _NS_PRIVATE_DEF_CLS(NSString); + _NS_PRIVATE_DEF_CLS(NSURL); + _NS_PRIVATE_DEF_CLS(NSValue); + + } // Class +} // Private +} // MTL + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +namespace Private +{ + namespace Protocol + { + + } // Protocol +} // Private +} // NS + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +namespace Private +{ + namespace Selector + { + + _NS_PRIVATE_DEF_SEL(addObject_, + "addObject:"); + _NS_PRIVATE_DEF_SEL(addObserverName_object_queue_block_, + "addObserverForName:object:queue:usingBlock:"); + _NS_PRIVATE_DEF_SEL(activeProcessorCount, + "activeProcessorCount"); + _NS_PRIVATE_DEF_SEL(allBundles, + "allBundles"); + _NS_PRIVATE_DEF_SEL(allFrameworks, + "allFrameworks"); + _NS_PRIVATE_DEF_SEL(allObjects, + "allObjects"); + _NS_PRIVATE_DEF_SEL(alloc, + "alloc"); + _NS_PRIVATE_DEF_SEL(appStoreReceiptURL, + "appStoreReceiptURL"); + _NS_PRIVATE_DEF_SEL(arguments, + "arguments"); + _NS_PRIVATE_DEF_SEL(array, + "array"); + _NS_PRIVATE_DEF_SEL(arrayWithObject_, + "arrayWithObject:"); + _NS_PRIVATE_DEF_SEL(arrayWithObjects_count_, + "arrayWithObjects:count:"); + _NS_PRIVATE_DEF_SEL(automaticTerminationSupportEnabled, + "automaticTerminationSupportEnabled"); + _NS_PRIVATE_DEF_SEL(autorelease, + "autorelease"); + _NS_PRIVATE_DEF_SEL(beginActivityWithOptions_reason_, + "beginActivityWithOptions:reason:"); + _NS_PRIVATE_DEF_SEL(boolValue, + "boolValue"); + _NS_PRIVATE_DEF_SEL(broadcast, + "broadcast"); + _NS_PRIVATE_DEF_SEL(builtInPlugInsPath, + "builtInPlugInsPath"); + _NS_PRIVATE_DEF_SEL(builtInPlugInsURL, + "builtInPlugInsURL"); + _NS_PRIVATE_DEF_SEL(bundleIdentifier, + "bundleIdentifier"); + _NS_PRIVATE_DEF_SEL(bundlePath, + "bundlePath"); + _NS_PRIVATE_DEF_SEL(bundleURL, + "bundleURL"); + _NS_PRIVATE_DEF_SEL(bundleWithPath_, + "bundleWithPath:"); + _NS_PRIVATE_DEF_SEL(bundleWithURL_, + "bundleWithURL:"); + _NS_PRIVATE_DEF_SEL(characterAtIndex_, + "characterAtIndex:"); + _NS_PRIVATE_DEF_SEL(charValue, + "charValue"); + _NS_PRIVATE_DEF_SEL(countByEnumeratingWithState_objects_count_, + "countByEnumeratingWithState:objects:count:"); + _NS_PRIVATE_DEF_SEL(cStringUsingEncoding_, + "cStringUsingEncoding:"); + _NS_PRIVATE_DEF_SEL(code, + "code"); + _NS_PRIVATE_DEF_SEL(compare_, + "compare:"); + _NS_PRIVATE_DEF_SEL(copy, + "copy"); + _NS_PRIVATE_DEF_SEL(count, + "count"); + _NS_PRIVATE_DEF_SEL(dateWithTimeIntervalSinceNow_, + "dateWithTimeIntervalSinceNow:"); + _NS_PRIVATE_DEF_SEL(defaultCenter, + "defaultCenter"); + _NS_PRIVATE_DEF_SEL(descriptionWithLocale_, + "descriptionWithLocale:"); + _NS_PRIVATE_DEF_SEL(disableAutomaticTermination_, + "disableAutomaticTermination:"); + _NS_PRIVATE_DEF_SEL(disableSuddenTermination, + "disableSuddenTermination"); + _NS_PRIVATE_DEF_SEL(debugDescription, + "debugDescription"); + _NS_PRIVATE_DEF_SEL(description, + "description"); + _NS_PRIVATE_DEF_SEL(dictionary, + "dictionary"); + _NS_PRIVATE_DEF_SEL(dictionaryWithObject_forKey_, + "dictionaryWithObject:forKey:"); + _NS_PRIVATE_DEF_SEL(dictionaryWithObjects_forKeys_count_, + "dictionaryWithObjects:forKeys:count:"); + _NS_PRIVATE_DEF_SEL(domain, + "domain"); + _NS_PRIVATE_DEF_SEL(doubleValue, + "doubleValue"); + _NS_PRIVATE_DEF_SEL(drain, + "drain"); + _NS_PRIVATE_DEF_SEL(enableAutomaticTermination_, + "enableAutomaticTermination:"); + _NS_PRIVATE_DEF_SEL(enableSuddenTermination, + "enableSuddenTermination"); + _NS_PRIVATE_DEF_SEL(endActivity_, + "endActivity:"); + _NS_PRIVATE_DEF_SEL(environment, + "environment"); + _NS_PRIVATE_DEF_SEL(errorWithDomain_code_userInfo_, + "errorWithDomain:code:userInfo:"); + _NS_PRIVATE_DEF_SEL(executablePath, + "executablePath"); + _NS_PRIVATE_DEF_SEL(executableURL, + "executableURL"); + _NS_PRIVATE_DEF_SEL(fileSystemRepresentation, + "fileSystemRepresentation"); + _NS_PRIVATE_DEF_SEL(fileURLWithPath_, + "fileURLWithPath:"); + _NS_PRIVATE_DEF_SEL(floatValue, + "floatValue"); + _NS_PRIVATE_DEF_SEL(fullUserName, + "fullUserName"); + _NS_PRIVATE_DEF_SEL(getValue_size_, + "getValue:size:"); + _NS_PRIVATE_DEF_SEL(globallyUniqueString, + "globallyUniqueString"); + _NS_PRIVATE_DEF_SEL(hash, + "hash"); + _NS_PRIVATE_DEF_SEL(hostName, + "hostName"); + _NS_PRIVATE_DEF_SEL(infoDictionary, + "infoDictionary"); + _NS_PRIVATE_DEF_SEL(init, + "init"); + _NS_PRIVATE_DEF_SEL(initFileURLWithPath_, + "initFileURLWithPath:"); + _NS_PRIVATE_DEF_SEL(initWithBool_, + "initWithBool:"); + _NS_PRIVATE_DEF_SEL(initWithBytes_objCType_, + "initWithBytes:objCType:"); + _NS_PRIVATE_DEF_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_, + "initWithBytesNoCopy:length:encoding:freeWhenDone:"); + _NS_PRIVATE_DEF_SEL(initWithChar_, + "initWithChar:"); + _NS_PRIVATE_DEF_SEL(initWithCoder_, + "initWithCoder:"); + _NS_PRIVATE_DEF_SEL(initWithCString_encoding_, + "initWithCString:encoding:"); + _NS_PRIVATE_DEF_SEL(initWithDomain_code_userInfo_, + "initWithDomain:code:userInfo:"); + _NS_PRIVATE_DEF_SEL(initWithDouble_, + "initWithDouble:"); + _NS_PRIVATE_DEF_SEL(initWithFloat_, + "initWithFloat:"); + _NS_PRIVATE_DEF_SEL(initWithInt_, + "initWithInt:"); + _NS_PRIVATE_DEF_SEL(initWithLong_, + "initWithLong:"); + _NS_PRIVATE_DEF_SEL(initWithLongLong_, + "initWithLongLong:"); + _NS_PRIVATE_DEF_SEL(initWithObjects_count_, + "initWithObjects:count:"); + _NS_PRIVATE_DEF_SEL(initWithObjects_forKeys_count_, + "initWithObjects:forKeys:count:"); + _NS_PRIVATE_DEF_SEL(initWithPath_, + "initWithPath:"); + _NS_PRIVATE_DEF_SEL(initWithShort_, + "initWithShort:"); + _NS_PRIVATE_DEF_SEL(initWithString_, + "initWithString:"); + _NS_PRIVATE_DEF_SEL(initWithUnsignedChar_, + "initWithUnsignedChar:"); + _NS_PRIVATE_DEF_SEL(initWithUnsignedInt_, + "initWithUnsignedInt:"); + _NS_PRIVATE_DEF_SEL(initWithUnsignedLong_, + "initWithUnsignedLong:"); + _NS_PRIVATE_DEF_SEL(initWithUnsignedLongLong_, + "initWithUnsignedLongLong:"); + _NS_PRIVATE_DEF_SEL(initWithUnsignedShort_, + "initWithUnsignedShort:"); + _NS_PRIVATE_DEF_SEL(initWithURL_, + "initWithURL:"); + _NS_PRIVATE_DEF_SEL(integerValue, + "integerValue"); + _NS_PRIVATE_DEF_SEL(intValue, + "intValue"); + _NS_PRIVATE_DEF_SEL(isEqual_, + "isEqual:"); + _NS_PRIVATE_DEF_SEL(isEqualToNumber_, + "isEqualToNumber:"); + _NS_PRIVATE_DEF_SEL(isEqualToString_, + "isEqualToString:"); + _NS_PRIVATE_DEF_SEL(isEqualToValue_, + "isEqualToValue:"); + _NS_PRIVATE_DEF_SEL(isiOSAppOnMac, + "isiOSAppOnMac"); + _NS_PRIVATE_DEF_SEL(isLoaded, + "isLoaded"); + _NS_PRIVATE_DEF_SEL(isLowPowerModeEnabled, + "isLowPowerModeEnabled"); + _NS_PRIVATE_DEF_SEL(isMacCatalystApp, + "isMacCatalystApp"); + _NS_PRIVATE_DEF_SEL(isOperatingSystemAtLeastVersion_, + "isOperatingSystemAtLeastVersion:"); + _NS_PRIVATE_DEF_SEL(keyEnumerator, + "keyEnumerator"); + _NS_PRIVATE_DEF_SEL(length, + "length"); + _NS_PRIVATE_DEF_SEL(lengthOfBytesUsingEncoding_, + "lengthOfBytesUsingEncoding:"); + _NS_PRIVATE_DEF_SEL(load, + "load"); + _NS_PRIVATE_DEF_SEL(loadAndReturnError_, + "loadAndReturnError:"); + _NS_PRIVATE_DEF_SEL(localizedDescription, + "localizedDescription"); + _NS_PRIVATE_DEF_SEL(localizedFailureReason, + "localizedFailureReason"); + _NS_PRIVATE_DEF_SEL(localizedInfoDictionary, + "localizedInfoDictionary"); + _NS_PRIVATE_DEF_SEL(localizedRecoveryOptions, + "localizedRecoveryOptions"); + _NS_PRIVATE_DEF_SEL(localizedRecoverySuggestion, + "localizedRecoverySuggestion"); + _NS_PRIVATE_DEF_SEL(localizedStringForKey_value_table_, + "localizedStringForKey:value:table:"); + _NS_PRIVATE_DEF_SEL(lock, + "lock"); + _NS_PRIVATE_DEF_SEL(longValue, + "longValue"); + _NS_PRIVATE_DEF_SEL(longLongValue, + "longLongValue"); + _NS_PRIVATE_DEF_SEL(mainBundle, + "mainBundle"); + _NS_PRIVATE_DEF_SEL(maximumLengthOfBytesUsingEncoding_, + "maximumLengthOfBytesUsingEncoding:"); + _NS_PRIVATE_DEF_SEL(methodSignatureForSelector_, + "methodSignatureForSelector:"); + _NS_PRIVATE_DEF_SEL(mutableBytes, + "mutableBytes"); + _NS_PRIVATE_DEF_SEL(name, + "name"); + _NS_PRIVATE_DEF_SEL(nextObject, + "nextObject"); + _NS_PRIVATE_DEF_SEL(numberWithBool_, + "numberWithBool:"); + _NS_PRIVATE_DEF_SEL(numberWithChar_, + "numberWithChar:"); + _NS_PRIVATE_DEF_SEL(numberWithDouble_, + "numberWithDouble:"); + _NS_PRIVATE_DEF_SEL(numberWithFloat_, + "numberWithFloat:"); + _NS_PRIVATE_DEF_SEL(numberWithInt_, + "numberWithInt:"); + _NS_PRIVATE_DEF_SEL(numberWithLong_, + "numberWithLong:"); + _NS_PRIVATE_DEF_SEL(numberWithLongLong_, + "numberWithLongLong:"); + _NS_PRIVATE_DEF_SEL(numberWithShort_, + "numberWithShort:"); + _NS_PRIVATE_DEF_SEL(numberWithUnsignedChar_, + "numberWithUnsignedChar:"); + _NS_PRIVATE_DEF_SEL(numberWithUnsignedInt_, + "numberWithUnsignedInt:"); + _NS_PRIVATE_DEF_SEL(numberWithUnsignedLong_, + "numberWithUnsignedLong:"); + _NS_PRIVATE_DEF_SEL(numberWithUnsignedLongLong_, + "numberWithUnsignedLongLong:"); + _NS_PRIVATE_DEF_SEL(numberWithUnsignedShort_, + "numberWithUnsignedShort:"); + _NS_PRIVATE_DEF_SEL(objCType, + "objCType"); + _NS_PRIVATE_DEF_SEL(object, + "object"); + _NS_PRIVATE_DEF_SEL(objectAtIndex_, + "objectAtIndex:"); + _NS_PRIVATE_DEF_SEL(objectEnumerator, + "objectEnumerator"); + _NS_PRIVATE_DEF_SEL(objectForInfoDictionaryKey_, + "objectForInfoDictionaryKey:"); + _NS_PRIVATE_DEF_SEL(objectForKey_, + "objectForKey:"); + _NS_PRIVATE_DEF_SEL(operatingSystem, + "operatingSystem"); + _NS_PRIVATE_DEF_SEL(operatingSystemVersion, + "operatingSystemVersion"); + _NS_PRIVATE_DEF_SEL(operatingSystemVersionString, + "operatingSystemVersionString"); + _NS_PRIVATE_DEF_SEL(pathForAuxiliaryExecutable_, + "pathForAuxiliaryExecutable:"); + _NS_PRIVATE_DEF_SEL(performActivityWithOptions_reason_usingBlock_, + "performActivityWithOptions:reason:usingBlock:"); + _NS_PRIVATE_DEF_SEL(performExpiringActivityWithReason_usingBlock_, + "performExpiringActivityWithReason:usingBlock:"); + _NS_PRIVATE_DEF_SEL(physicalMemory, + "physicalMemory"); + _NS_PRIVATE_DEF_SEL(pointerValue, + "pointerValue"); + _NS_PRIVATE_DEF_SEL(preflightAndReturnError_, + "preflightAndReturnError:"); + _NS_PRIVATE_DEF_SEL(privateFrameworksPath, + "privateFrameworksPath"); + _NS_PRIVATE_DEF_SEL(privateFrameworksURL, + "privateFrameworksURL"); + _NS_PRIVATE_DEF_SEL(processIdentifier, + "processIdentifier"); + _NS_PRIVATE_DEF_SEL(processInfo, + "processInfo"); + _NS_PRIVATE_DEF_SEL(processName, + "processName"); + _NS_PRIVATE_DEF_SEL(processorCount, + "processorCount"); + _NS_PRIVATE_DEF_SEL(rangeOfString_options_, + "rangeOfString:options:"); + _NS_PRIVATE_DEF_SEL(release, + "release"); + _NS_PRIVATE_DEF_SEL(removeObserver_, + "removeObserver:"); + _NS_PRIVATE_DEF_SEL(resourcePath, + "resourcePath"); + _NS_PRIVATE_DEF_SEL(resourceURL, + "resourceURL"); + _NS_PRIVATE_DEF_SEL(respondsToSelector_, + "respondsToSelector:"); + _NS_PRIVATE_DEF_SEL(retain, + "retain"); + _NS_PRIVATE_DEF_SEL(retainCount, + "retainCount"); + _NS_PRIVATE_DEF_SEL(setAutomaticTerminationSupportEnabled_, + "setAutomaticTerminationSupportEnabled:"); + _NS_PRIVATE_DEF_SEL(setProcessName_, + "setProcessName:"); + _NS_PRIVATE_DEF_SEL(sharedFrameworksPath, + "sharedFrameworksPath"); + _NS_PRIVATE_DEF_SEL(sharedFrameworksURL, + "sharedFrameworksURL"); + _NS_PRIVATE_DEF_SEL(sharedSupportPath, + "sharedSupportPath"); + _NS_PRIVATE_DEF_SEL(sharedSupportURL, + "sharedSupportURL"); + _NS_PRIVATE_DEF_SEL(shortValue, + "shortValue"); + _NS_PRIVATE_DEF_SEL(showPools, + "showPools"); + _NS_PRIVATE_DEF_SEL(signal, + "signal"); + _NS_PRIVATE_DEF_SEL(string, + "string"); + _NS_PRIVATE_DEF_SEL(stringValue, + "stringValue"); + _NS_PRIVATE_DEF_SEL(stringWithString_, + "stringWithString:"); + _NS_PRIVATE_DEF_SEL(stringWithCString_encoding_, + "stringWithCString:encoding:"); + _NS_PRIVATE_DEF_SEL(stringByAppendingString_, + "stringByAppendingString:"); + _NS_PRIVATE_DEF_SEL(systemUptime, + "systemUptime"); + _NS_PRIVATE_DEF_SEL(thermalState, + "thermalState"); + _NS_PRIVATE_DEF_SEL(unload, + "unload"); + _NS_PRIVATE_DEF_SEL(unlock, + "unlock"); + _NS_PRIVATE_DEF_SEL(unsignedCharValue, + "unsignedCharValue"); + _NS_PRIVATE_DEF_SEL(unsignedIntegerValue, + "unsignedIntegerValue"); + _NS_PRIVATE_DEF_SEL(unsignedIntValue, + "unsignedIntValue"); + _NS_PRIVATE_DEF_SEL(unsignedLongValue, + "unsignedLongValue"); + _NS_PRIVATE_DEF_SEL(unsignedLongLongValue, + "unsignedLongLongValue"); + _NS_PRIVATE_DEF_SEL(unsignedShortValue, + "unsignedShortValue"); + _NS_PRIVATE_DEF_SEL(URLForAuxiliaryExecutable_, + "URLForAuxiliaryExecutable:"); + _NS_PRIVATE_DEF_SEL(userInfo, + "userInfo"); + _NS_PRIVATE_DEF_SEL(userName, + "userName"); + _NS_PRIVATE_DEF_SEL(UTF8String, + "UTF8String"); + _NS_PRIVATE_DEF_SEL(valueWithBytes_objCType_, + "valueWithBytes:objCType:"); + _NS_PRIVATE_DEF_SEL(valueWithPointer_, + "valueWithPointer:"); + _NS_PRIVATE_DEF_SEL(wait, + "wait"); + _NS_PRIVATE_DEF_SEL(waitUntilDate_, + "waitUntilDate:"); + } // Class +} // Private +} // MTL + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSProcessInfo.hpp b/metal-cpp/Foundation/NSProcessInfo.hpp new file mode 100644 index 00000000..935122fe --- /dev/null +++ b/metal-cpp/Foundation/NSProcessInfo.hpp @@ -0,0 +1,354 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSProcessInfo.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSNotification.hpp" +#include "NSObject.hpp" +#include "NSPrivate.hpp" +#include "NSTypes.hpp" + +#include + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +_NS_CONST(NotificationName, ProcessInfoThermalStateDidChangeNotification); +_NS_CONST(NotificationName, ProcessInfoPowerStateDidChangeNotification); + +_NS_ENUM(NS::Integer, ProcessInfoThermalState) { + ProcessInfoThermalStateNominal = 0, + ProcessInfoThermalStateFair = 1, + ProcessInfoThermalStateSerious = 2, + ProcessInfoThermalStateCritical = 3 +}; + +_NS_OPTIONS(std::uint64_t, ActivityOptions) { + ActivityIdleDisplaySleepDisabled = (1ULL << 40), + ActivityIdleSystemSleepDisabled = (1ULL << 20), + ActivitySuddenTerminationDisabled = (1ULL << 14), + ActivityAutomaticTerminationDisabled = (1ULL << 15), + ActivityUserInitiated = (0x00FFFFFFULL | ActivityIdleSystemSleepDisabled), + ActivityUserInitiatedAllowingIdleSystemSleep = (ActivityUserInitiated & ~ActivityIdleSystemSleepDisabled), + ActivityBackground = 0x000000FFULL, + ActivityLatencyCritical = 0xFF00000000ULL, +}; + +class ProcessInfo : public Referencing +{ +public: + static ProcessInfo* processInfo(); + + class Array* arguments() const; + class Dictionary* environment() const; + class String* hostName() const; + class String* processName() const; + void setProcessName(const String* pString); + int processIdentifier() const; + class String* globallyUniqueString() const; + + class String* userName() const; + class String* fullUserName() const; + + UInteger operatingSystem() const; + OperatingSystemVersion operatingSystemVersion() const; + class String* operatingSystemVersionString() const; + bool isOperatingSystemAtLeastVersion(OperatingSystemVersion version) const; + + UInteger processorCount() const; + UInteger activeProcessorCount() const; + unsigned long long physicalMemory() const; + TimeInterval systemUptime() const; + + void disableSuddenTermination(); + void enableSuddenTermination(); + + void disableAutomaticTermination(const class String* pReason); + void enableAutomaticTermination(const class String* pReason); + bool automaticTerminationSupportEnabled() const; + void setAutomaticTerminationSupportEnabled(bool enabled); + + class Object* beginActivity(ActivityOptions options, const class String* pReason); + void endActivity(class Object* pActivity); + void performActivity(ActivityOptions options, const class String* pReason, void (^block)(void)); + void performActivity(ActivityOptions options, const class String* pReason, const std::function& func); + void performExpiringActivity(const class String* pReason, void (^block)(bool expired)); + void performExpiringActivity(const class String* pReason, const std::function& func); + + ProcessInfoThermalState thermalState() const; + bool isLowPowerModeEnabled() const; + + bool isiOSAppOnMac() const; + bool isMacCatalystApp() const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_PRIVATE_DEF_CONST(NS::NotificationName, ProcessInfoThermalStateDidChangeNotification); +_NS_PRIVATE_DEF_CONST(NS::NotificationName, ProcessInfoPowerStateDidChangeNotification); + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::ProcessInfo* NS::ProcessInfo::processInfo() +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSProcessInfo), _NS_PRIVATE_SEL(processInfo)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Array* NS::ProcessInfo::arguments() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(arguments)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Dictionary* NS::ProcessInfo::environment() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(environment)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::ProcessInfo::hostName() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(hostName)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::ProcessInfo::processName() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(processName)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::setProcessName(const String* pString) +{ + Object::sendMessage(this, _NS_PRIVATE_SEL(setProcessName_), pString); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE int NS::ProcessInfo::processIdentifier() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(processIdentifier)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::ProcessInfo::globallyUniqueString() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(globallyUniqueString)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::ProcessInfo::userName() const +{ + return Object::sendMessageSafe(this, _NS_PRIVATE_SEL(userName)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::ProcessInfo::fullUserName() const +{ + return Object::sendMessageSafe(this, _NS_PRIVATE_SEL(fullUserName)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::ProcessInfo::operatingSystem() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(operatingSystem)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::OperatingSystemVersion NS::ProcessInfo::operatingSystemVersion() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(operatingSystemVersion)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::ProcessInfo::operatingSystemVersionString() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(operatingSystemVersionString)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::ProcessInfo::isOperatingSystemAtLeastVersion(OperatingSystemVersion version) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(isOperatingSystemAtLeastVersion_), version); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::ProcessInfo::processorCount() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(processorCount)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::ProcessInfo::activeProcessorCount() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(activeProcessorCount)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE unsigned long long NS::ProcessInfo::physicalMemory() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(physicalMemory)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::TimeInterval NS::ProcessInfo::systemUptime() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(systemUptime)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::disableSuddenTermination() +{ + Object::sendMessageSafe(this, _NS_PRIVATE_SEL(disableSuddenTermination)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::enableSuddenTermination() +{ + Object::sendMessageSafe(this, _NS_PRIVATE_SEL(enableSuddenTermination)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::disableAutomaticTermination(const String* pReason) +{ + Object::sendMessageSafe(this, _NS_PRIVATE_SEL(disableAutomaticTermination_), pReason); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::enableAutomaticTermination(const String* pReason) +{ + Object::sendMessageSafe(this, _NS_PRIVATE_SEL(enableAutomaticTermination_), pReason); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::ProcessInfo::automaticTerminationSupportEnabled() const +{ + return Object::sendMessageSafe(this, _NS_PRIVATE_SEL(automaticTerminationSupportEnabled)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::setAutomaticTerminationSupportEnabled(bool enabled) +{ + Object::sendMessageSafe(this, _NS_PRIVATE_SEL(setAutomaticTerminationSupportEnabled_), enabled); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Object* NS::ProcessInfo::beginActivity(ActivityOptions options, const String* pReason) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(beginActivityWithOptions_reason_), options, pReason); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::endActivity(Object* pActivity) +{ + Object::sendMessage(this, _NS_PRIVATE_SEL(endActivity_), pActivity); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::performActivity(ActivityOptions options, const String* pReason, void (^block)(void)) +{ + Object::sendMessage(this, _NS_PRIVATE_SEL(performActivityWithOptions_reason_usingBlock_), options, pReason, block); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::performActivity(ActivityOptions options, const String* pReason, const std::function& function) +{ + __block std::function blockFunction = function; + + performActivity(options, pReason, ^() { blockFunction(); }); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::performExpiringActivity(const String* pReason, void (^block)(bool expired)) +{ + Object::sendMessageSafe(this, _NS_PRIVATE_SEL(performExpiringActivityWithReason_usingBlock_), pReason, block); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE void NS::ProcessInfo::performExpiringActivity(const String* pReason, const std::function& function) +{ + __block std::function blockFunction = function; + + performExpiringActivity(pReason, ^(bool expired) { blockFunction(expired); }); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::ProcessInfoThermalState NS::ProcessInfo::thermalState() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(thermalState)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::ProcessInfo::isLowPowerModeEnabled() const +{ + return Object::sendMessageSafe(this, _NS_PRIVATE_SEL(isLowPowerModeEnabled)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::ProcessInfo::isiOSAppOnMac() const +{ + return Object::sendMessageSafe(this, _NS_PRIVATE_SEL(isiOSAppOnMac)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::ProcessInfo::isMacCatalystApp() const +{ + return Object::sendMessageSafe(this, _NS_PRIVATE_SEL(isMacCatalystApp)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSRange.hpp b/metal-cpp/Foundation/NSRange.hpp new file mode 100644 index 00000000..09b0eb50 --- /dev/null +++ b/metal-cpp/Foundation/NSRange.hpp @@ -0,0 +1,83 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSRange.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +struct Range +{ + static Range Make(UInteger loc, UInteger len); + + Range(UInteger loc, UInteger len); + + bool Equal(const Range& range) const; + bool LocationInRange(UInteger loc) const; + UInteger Max() const; + + UInteger location; + UInteger length; +} _NS_PACKED; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Range::Range(UInteger loc, UInteger len) + : location(loc) + , length(len) +{ +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Range NS::Range::Make(UInteger loc, UInteger len) +{ + return Range(loc, len); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Range::Equal(const Range& range) const +{ + return (location == range.location) && (length == range.length); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::Range::LocationInRange(UInteger loc) const +{ + return (!(loc < location)) && ((loc - location) < length); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::Range::Max() const +{ + return location + length; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSSet.hpp b/metal-cpp/Foundation/NSSet.hpp new file mode 100644 index 00000000..aecca09b --- /dev/null +++ b/metal-cpp/Foundation/NSSet.hpp @@ -0,0 +1,87 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSSet.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSObject.hpp" +#include "NSEnumerator.hpp" + +/*****Immutable Set*******/ + +namespace NS +{ + class Set : public NS::Copying + { + public: + UInteger count() const; + Enumerator* objectEnumerator() const; + + static Set* alloc(); + + Set* init(); + Set* init(const Object* const* pObjects, UInteger count); + Set* init(const class Coder* pCoder); + + }; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::Set::count() const +{ + return NS::Object::sendMessage(this, _NS_PRIVATE_SEL(count)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Enumerator* NS::Set::objectEnumerator() const +{ + return NS::Object::sendMessage*>(this, _NS_PRIVATE_SEL(objectEnumerator)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Set* NS::Set::alloc() +{ + return NS::Object::alloc(_NS_PRIVATE_CLS(NSSet)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Set* NS::Set::init() +{ + return NS::Object::init(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Set* NS::Set::init(const Object* const* pObjects, NS::UInteger count) +{ + return NS::Object::sendMessage(this, _NS_PRIVATE_SEL(initWithObjects_count_), pObjects, count); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Set* NS::Set::init(const class Coder* pCoder) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder); +} diff --git a/metal-cpp/Foundation/NSSharedPtr.hpp b/metal-cpp/Foundation/NSSharedPtr.hpp new file mode 100644 index 00000000..565ead9e --- /dev/null +++ b/metal-cpp/Foundation/NSSharedPtr.hpp @@ -0,0 +1,311 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSSharedPtr.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "NSDefines.hpp" + +namespace NS +{ +template +class SharedPtr +{ +public: + /** + * Create a new null pointer. + */ + SharedPtr(); + + /** + * Destroy this SharedPtr, decreasing the reference count. + */ + ~SharedPtr(); + + /** + * SharedPtr copy constructor. + */ + SharedPtr(const SharedPtr<_Class>& other) noexcept; + + /** + * Construction from another pointee type. + */ + template + SharedPtr(const SharedPtr<_OtherClass>& other, typename std::enable_if_t> * = nullptr) noexcept; + + /** + * SharedPtr move constructor. + */ + SharedPtr(SharedPtr<_Class>&& other) noexcept; + + /** + * Move from another pointee type. + */ + template + SharedPtr(SharedPtr<_OtherClass>&& other, typename std::enable_if_t> * = nullptr) noexcept; + + /** + * Copy assignment operator. + * Copying increases reference count. Only releases previous pointee if objects are different. + */ + SharedPtr& operator=(const SharedPtr<_Class>& other); + + /** + * Copy-assignment from different pointee. + * Copying increases reference count. Only releases previous pointee if objects are different. + */ + template + typename std::enable_if_t, SharedPtr &> + operator=(const SharedPtr<_OtherClass>& other); + + /** + * Move assignment operator. + * Move without affecting reference counts, unless pointees are equal. Moved-from object is reset to nullptr. + */ + SharedPtr& operator=(SharedPtr<_Class>&& other); + + /** + * Move-asignment from different pointee. + * Move without affecting reference counts, unless pointees are equal. Moved-from object is reset to nullptr. + */ + template + typename std::enable_if_t, SharedPtr &> + operator=(SharedPtr<_OtherClass>&& other); + + /** + * Access raw pointee. + * @warning Avoid wrapping the returned value again, as it may lead double frees unless this object becomes detached. + */ + _Class* get() const; + + /** + * Call operations directly on the pointee. + */ + _Class* operator->() const; + + /** + * Implicit cast to bool. + */ + explicit operator bool() const; + + /** + * Reset this SharedPtr to null, decreasing the reference count. + */ + void reset(); + + /** + * Detach the SharedPtr from the pointee, without decreasing the reference count. + */ + void detach(); + + template + friend SharedPtr<_OtherClass> RetainPtr(_OtherClass* ptr); + + template + friend SharedPtr<_OtherClass> TransferPtr(_OtherClass* ptr); + +private: + _Class* m_pObject; +}; + +/** + * Create a SharedPtr by retaining an existing raw pointer. + * Increases the reference count of the passed-in object. + * If the passed-in object was in an AutoreleasePool, it will be removed from it. + */ +template +_NS_INLINE NS::SharedPtr<_Class> RetainPtr(_Class* pObject) +{ + NS::SharedPtr<_Class> ret; + ret.m_pObject = pObject->retain(); + return ret; +} + +/* + * Create a SharedPtr by transfering the ownership of an existing raw pointer to SharedPtr. + * Does not increase the reference count of the passed-in pointer, it is assumed to be >= 1. + * This method does not remove objects from an AutoreleasePool. +*/ +template +_NS_INLINE NS::SharedPtr<_Class> TransferPtr(_Class* pObject) +{ + NS::SharedPtr<_Class> ret; + ret.m_pObject = pObject; + return ret; +} + +} + +template +_NS_INLINE NS::SharedPtr<_Class>::SharedPtr() + : m_pObject(nullptr) +{ +} + +template +_NS_INLINE NS::SharedPtr<_Class>::~SharedPtr() +{ + if (m_pObject) + { + m_pObject->release(); + } +} + +template +_NS_INLINE NS::SharedPtr<_Class>::SharedPtr(const NS::SharedPtr<_Class>& other) noexcept + : m_pObject(other.m_pObject->retain()) +{ +} + +template +template +_NS_INLINE NS::SharedPtr<_Class>::SharedPtr(const NS::SharedPtr<_OtherClass>& other, typename std::enable_if_t> *) noexcept + : m_pObject(reinterpret_cast<_Class*>(other.get()->retain())) +{ +} + +template +_NS_INLINE NS::SharedPtr<_Class>::SharedPtr(NS::SharedPtr<_Class>&& other) noexcept + : m_pObject(other.m_pObject) +{ + other.m_pObject = nullptr; +} + +template +template +_NS_INLINE NS::SharedPtr<_Class>::SharedPtr(NS::SharedPtr<_OtherClass>&& other, typename std::enable_if_t> *) noexcept + : m_pObject(reinterpret_cast<_Class*>(other.get())) +{ + other.detach(); +} + +template +_NS_INLINE _Class* NS::SharedPtr<_Class>::get() const +{ + return m_pObject; +} + +template +_NS_INLINE _Class* NS::SharedPtr<_Class>::operator->() const +{ + return m_pObject; +} + +template +_NS_INLINE NS::SharedPtr<_Class>::operator bool() const +{ + return nullptr != m_pObject; +} + +template +_NS_INLINE void NS::SharedPtr<_Class>::reset() +{ + m_pObject->release(); + m_pObject = nullptr; +} + +template +_NS_INLINE void NS::SharedPtr<_Class>::detach() +{ + m_pObject = nullptr; +} + +template +_NS_INLINE NS::SharedPtr<_Class>& NS::SharedPtr<_Class>::operator=(const SharedPtr<_Class>& other) +{ + if (m_pObject != other.m_pObject) + { + if (m_pObject) + { + m_pObject->release(); + } + m_pObject = other.m_pObject->retain(); + } + return *this; +} + +template +template +typename std::enable_if_t, NS::SharedPtr<_Class> &> +_NS_INLINE NS::SharedPtr<_Class>::operator=(const SharedPtr<_OtherClass>& other) +{ + if (m_pObject != other.get()) + { + if (m_pObject) + { + m_pObject->release(); + } + m_pObject = reinterpret_cast<_Class*>(other.get()->retain()); + } + return *this; +} + +template +_NS_INLINE NS::SharedPtr<_Class>& NS::SharedPtr<_Class>::operator=(SharedPtr<_Class>&& other) +{ + if (m_pObject != other.m_pObject) + { + if (m_pObject) + { + m_pObject->release(); + } + m_pObject = other.m_pObject; + } + else + { + m_pObject = other.m_pObject; + other.m_pObject->release(); + } + other.m_pObject = nullptr; + return *this; +} + +template +template +typename std::enable_if_t, NS::SharedPtr<_Class> &> +_NS_INLINE NS::SharedPtr<_Class>::operator=(SharedPtr<_OtherClass>&& other) +{ + if (m_pObject != other.get()) + { + if (m_pObject) + { + m_pObject->release(); + } + m_pObject = reinterpret_cast<_Class*>(other.get()); + other.detach(); + } + else + { + m_pObject = other.get(); + other.reset(); + } + return *this; +} + +template +_NS_INLINE bool operator==(const NS::SharedPtr<_ClassLhs>& lhs, const NS::SharedPtr<_ClassRhs>& rhs) +{ + return lhs.get() == rhs.get(); +} + +template +_NS_INLINE bool operator!=(const NS::SharedPtr<_ClassLhs>& lhs, const NS::SharedPtr<_ClassRhs>& rhs) +{ + return lhs.get() != rhs.get(); +} diff --git a/metal-cpp/Foundation/NSString.hpp b/metal-cpp/Foundation/NSString.hpp new file mode 100644 index 00000000..51b1cd18 --- /dev/null +++ b/metal-cpp/Foundation/NSString.hpp @@ -0,0 +1,248 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSString.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSObject.hpp" +#include "NSPrivate.hpp" +#include "NSRange.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +_NS_ENUM(NS::UInteger, StringEncoding) { + ASCIIStringEncoding = 1, + NEXTSTEPStringEncoding = 2, + JapaneseEUCStringEncoding = 3, + UTF8StringEncoding = 4, + ISOLatin1StringEncoding = 5, + SymbolStringEncoding = 6, + NonLossyASCIIStringEncoding = 7, + ShiftJISStringEncoding = 8, + ISOLatin2StringEncoding = 9, + UnicodeStringEncoding = 10, + WindowsCP1251StringEncoding = 11, + WindowsCP1252StringEncoding = 12, + WindowsCP1253StringEncoding = 13, + WindowsCP1254StringEncoding = 14, + WindowsCP1250StringEncoding = 15, + ISO2022JPStringEncoding = 21, + MacOSRomanStringEncoding = 30, + + UTF16StringEncoding = UnicodeStringEncoding, + + UTF16BigEndianStringEncoding = 0x90000100, + UTF16LittleEndianStringEncoding = 0x94000100, + + UTF32StringEncoding = 0x8c000100, + UTF32BigEndianStringEncoding = 0x98000100, + UTF32LittleEndianStringEncoding = 0x9c000100 +}; + +_NS_OPTIONS(NS::UInteger, StringCompareOptions) { + CaseInsensitiveSearch = 1, + LiteralSearch = 2, + BackwardsSearch = 4, + AnchoredSearch = 8, + NumericSearch = 64, + DiacriticInsensitiveSearch = 128, + WidthInsensitiveSearch = 256, + ForcedOrderingSearch = 512, + RegularExpressionSearch = 1024 +}; + +using unichar = unsigned short; + +class String : public Copying +{ +public: + static String* string(); + static String* string(const String* pString); + static String* string(const char* pString, StringEncoding encoding); + + static String* alloc(); + String* init(); + String* init(const String* pString); + String* init(const char* pString, StringEncoding encoding); + String* init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer); + + unichar character(UInteger index) const; + UInteger length() const; + + const char* cString(StringEncoding encoding) const; + const char* utf8String() const; + UInteger maximumLengthOfBytes(StringEncoding encoding) const; + UInteger lengthOfBytes(StringEncoding encoding) const; + + bool isEqualToString(const String* pString) const; + Range rangeOfString(const String* pString, StringCompareOptions options) const; + + const char* fileSystemRepresentation() const; + + String* stringByAppendingString(const String* pString) const; +}; + +/// Create an NS::String* from a string literal. +#define MTLSTR( literal ) (NS::String *)__builtin___CFStringMakeConstantString( "" literal "" ) + +template< std::size_t _StringLen > +[[deprecated("please use MTLSTR(str)")]] +constexpr const String* MakeConstantString( const char ( &str )[_StringLen] ) +{ + return reinterpret_cast< const String* >( __CFStringMakeConstantString( str ) ); +} + +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::string() +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSString), _NS_PRIVATE_SEL(string)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::string(const String* pString) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSString), _NS_PRIVATE_SEL(stringWithString_), pString); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::string(const char* pString, StringEncoding encoding) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSString), _NS_PRIVATE_SEL(stringWithCString_encoding_), pString, encoding); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::alloc() +{ + return Object::alloc(_NS_PRIVATE_CLS(NSString)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::init() +{ + return Object::init(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::init(const String* pString) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithString_), pString); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::init(const char* pString, StringEncoding encoding) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithCString_encoding_), pString, encoding); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_), pBytes, len, encoding, freeBuffer); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::unichar NS::String::character(UInteger index) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(characterAtIndex_), index); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::String::length() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(length)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE const char* NS::String::cString(StringEncoding encoding) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(cStringUsingEncoding_), encoding); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE const char* NS::String::utf8String() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(UTF8String)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::String::maximumLengthOfBytes(StringEncoding encoding) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(maximumLengthOfBytesUsingEncoding_), encoding); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::UInteger NS::String::lengthOfBytes(StringEncoding encoding) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(lengthOfBytesUsingEncoding_), encoding); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE bool NS::String::isEqualToString(const NS::String* pString) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(isEqualToString_), pString); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::Range NS::String::rangeOfString(const NS::String* pString, NS::StringCompareOptions options) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(rangeOfString_options_), pString, options); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE const char* NS::String::fileSystemRepresentation() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(fileSystemRepresentation)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::String* NS::String::stringByAppendingString(const String* pString) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(stringByAppendingString_), pString); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSTypes.hpp b/metal-cpp/Foundation/NSTypes.hpp new file mode 100644 index 00000000..c2fef527 --- /dev/null +++ b/metal-cpp/Foundation/NSTypes.hpp @@ -0,0 +1,51 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSTypes.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" + +#include +#include + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +using TimeInterval = double; + +using Integer = std::intptr_t; +using UInteger = std::uintptr_t; + +const Integer IntegerMax = INTPTR_MAX; +const Integer IntegerMin = INTPTR_MIN; +const UInteger UIntegerMax = UINTPTR_MAX; + +struct OperatingSystemVersion +{ + Integer majorVersion; + Integer minorVersion; + Integer patchVersion; +} _NS_PACKED; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSURL.hpp b/metal-cpp/Foundation/NSURL.hpp new file mode 100644 index 00000000..e904a8d1 --- /dev/null +++ b/metal-cpp/Foundation/NSURL.hpp @@ -0,0 +1,90 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Foundation/NSURL.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "NSDefines.hpp" +#include "NSObject.hpp" +#include "NSPrivate.hpp" +#include "NSTypes.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace NS +{ +class URL : public Copying +{ +public: + static URL* fileURLWithPath(const class String* pPath); + + static URL* alloc(); + URL* init(); + URL* init(const class String* pString); + URL* initFileURLWithPath(const class String* pPath); + + const char* fileSystemRepresentation() const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::URL::fileURLWithPath(const String* pPath) +{ + return Object::sendMessage(_NS_PRIVATE_CLS(NSURL), _NS_PRIVATE_SEL(fileURLWithPath_), pPath); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::URL::alloc() +{ + return Object::alloc(_NS_PRIVATE_CLS(NSURL)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::URL::init() +{ + return Object::init(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::URL::init(const String* pString) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithString_), pString); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::URL* NS::URL::initFileURLWithPath(const String* pPath) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initFileURLWithPath_), pPath); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE const char* NS::URL::fileSystemRepresentation() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(fileSystemRepresentation)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Metal/MTLAccelerationStructure.hpp b/metal-cpp/Metal/MTLAccelerationStructure.hpp new file mode 100644 index 00000000..f2d05e0f --- /dev/null +++ b/metal-cpp/Metal/MTLAccelerationStructure.hpp @@ -0,0 +1,1081 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLAccelerationStructure.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLAccelerationStructure.hpp" +#include "MTLAccelerationStructureTypes.hpp" +#include "MTLResource.hpp" +#include "MTLStageInputOutputDescriptor.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_OPTIONS(NS::UInteger, AccelerationStructureUsage) { + AccelerationStructureUsageNone = 0, + AccelerationStructureUsageRefit = 1, + AccelerationStructureUsagePreferFastBuild = 2, + AccelerationStructureUsageExtendedLimits = 4, +}; + +_MTL_OPTIONS(uint32_t, AccelerationStructureInstanceOptions) { + AccelerationStructureInstanceOptionNone = 0, + AccelerationStructureInstanceOptionDisableTriangleCulling = 1, + AccelerationStructureInstanceOptionTriangleFrontFacingWindingCounterClockwise = 2, + AccelerationStructureInstanceOptionOpaque = 4, + AccelerationStructureInstanceOptionNonOpaque = 8, +}; + +class AccelerationStructureDescriptor : public NS::Copying +{ +public: + static class AccelerationStructureDescriptor* alloc(); + + class AccelerationStructureDescriptor* init(); + + MTL::AccelerationStructureUsage usage() const; + void setUsage(MTL::AccelerationStructureUsage usage); +}; + +class AccelerationStructureGeometryDescriptor : public NS::Copying +{ +public: + static class AccelerationStructureGeometryDescriptor* alloc(); + + class AccelerationStructureGeometryDescriptor* init(); + + NS::UInteger intersectionFunctionTableOffset() const; + void setIntersectionFunctionTableOffset(NS::UInteger intersectionFunctionTableOffset); + + bool opaque() const; + void setOpaque(bool opaque); + + bool allowDuplicateIntersectionFunctionInvocation() const; + void setAllowDuplicateIntersectionFunctionInvocation(bool allowDuplicateIntersectionFunctionInvocation); + + NS::String* label() const; + void setLabel(const NS::String* label); + + class Buffer* primitiveDataBuffer() const; + void setPrimitiveDataBuffer(const class Buffer* primitiveDataBuffer); + + NS::UInteger primitiveDataBufferOffset() const; + void setPrimitiveDataBufferOffset(NS::UInteger primitiveDataBufferOffset); + + NS::UInteger primitiveDataStride() const; + void setPrimitiveDataStride(NS::UInteger primitiveDataStride); + + NS::UInteger primitiveDataElementSize() const; + void setPrimitiveDataElementSize(NS::UInteger primitiveDataElementSize); +}; + +_MTL_ENUM(uint32_t, MotionBorderMode) { + MotionBorderModeClamp = 0, + MotionBorderModeVanish = 1, +}; + +class PrimitiveAccelerationStructureDescriptor : public NS::Copying +{ +public: + static class PrimitiveAccelerationStructureDescriptor* alloc(); + + class PrimitiveAccelerationStructureDescriptor* init(); + + NS::Array* geometryDescriptors() const; + void setGeometryDescriptors(const NS::Array* geometryDescriptors); + + MTL::MotionBorderMode motionStartBorderMode() const; + void setMotionStartBorderMode(MTL::MotionBorderMode motionStartBorderMode); + + MTL::MotionBorderMode motionEndBorderMode() const; + void setMotionEndBorderMode(MTL::MotionBorderMode motionEndBorderMode); + + float motionStartTime() const; + void setMotionStartTime(float motionStartTime); + + float motionEndTime() const; + void setMotionEndTime(float motionEndTime); + + NS::UInteger motionKeyframeCount() const; + void setMotionKeyframeCount(NS::UInteger motionKeyframeCount); + + static MTL::PrimitiveAccelerationStructureDescriptor* descriptor(); +}; + +class AccelerationStructureTriangleGeometryDescriptor : public NS::Copying +{ +public: + static class AccelerationStructureTriangleGeometryDescriptor* alloc(); + + class AccelerationStructureTriangleGeometryDescriptor* init(); + + class Buffer* vertexBuffer() const; + void setVertexBuffer(const class Buffer* vertexBuffer); + + NS::UInteger vertexBufferOffset() const; + void setVertexBufferOffset(NS::UInteger vertexBufferOffset); + + MTL::AttributeFormat vertexFormat() const; + void setVertexFormat(MTL::AttributeFormat vertexFormat); + + NS::UInteger vertexStride() const; + void setVertexStride(NS::UInteger vertexStride); + + class Buffer* indexBuffer() const; + void setIndexBuffer(const class Buffer* indexBuffer); + + NS::UInteger indexBufferOffset() const; + void setIndexBufferOffset(NS::UInteger indexBufferOffset); + + MTL::IndexType indexType() const; + void setIndexType(MTL::IndexType indexType); + + NS::UInteger triangleCount() const; + void setTriangleCount(NS::UInteger triangleCount); + + class Buffer* transformationMatrixBuffer() const; + void setTransformationMatrixBuffer(const class Buffer* transformationMatrixBuffer); + + NS::UInteger transformationMatrixBufferOffset() const; + void setTransformationMatrixBufferOffset(NS::UInteger transformationMatrixBufferOffset); + + static MTL::AccelerationStructureTriangleGeometryDescriptor* descriptor(); +}; + +class AccelerationStructureBoundingBoxGeometryDescriptor : public NS::Copying +{ +public: + static class AccelerationStructureBoundingBoxGeometryDescriptor* alloc(); + + class AccelerationStructureBoundingBoxGeometryDescriptor* init(); + + class Buffer* boundingBoxBuffer() const; + void setBoundingBoxBuffer(const class Buffer* boundingBoxBuffer); + + NS::UInteger boundingBoxBufferOffset() const; + void setBoundingBoxBufferOffset(NS::UInteger boundingBoxBufferOffset); + + NS::UInteger boundingBoxStride() const; + void setBoundingBoxStride(NS::UInteger boundingBoxStride); + + NS::UInteger boundingBoxCount() const; + void setBoundingBoxCount(NS::UInteger boundingBoxCount); + + static MTL::AccelerationStructureBoundingBoxGeometryDescriptor* descriptor(); +}; + +class MotionKeyframeData : public NS::Referencing +{ +public: + static class MotionKeyframeData* alloc(); + + class MotionKeyframeData* init(); + + class Buffer* buffer() const; + void setBuffer(const class Buffer* buffer); + + NS::UInteger offset() const; + void setOffset(NS::UInteger offset); + + static MTL::MotionKeyframeData* data(); +}; + +class AccelerationStructureMotionTriangleGeometryDescriptor : public NS::Copying +{ +public: + static class AccelerationStructureMotionTriangleGeometryDescriptor* alloc(); + + class AccelerationStructureMotionTriangleGeometryDescriptor* init(); + + NS::Array* vertexBuffers() const; + void setVertexBuffers(const NS::Array* vertexBuffers); + + MTL::AttributeFormat vertexFormat() const; + void setVertexFormat(MTL::AttributeFormat vertexFormat); + + NS::UInteger vertexStride() const; + void setVertexStride(NS::UInteger vertexStride); + + class Buffer* indexBuffer() const; + void setIndexBuffer(const class Buffer* indexBuffer); + + NS::UInteger indexBufferOffset() const; + void setIndexBufferOffset(NS::UInteger indexBufferOffset); + + MTL::IndexType indexType() const; + void setIndexType(MTL::IndexType indexType); + + NS::UInteger triangleCount() const; + void setTriangleCount(NS::UInteger triangleCount); + + class Buffer* transformationMatrixBuffer() const; + void setTransformationMatrixBuffer(const class Buffer* transformationMatrixBuffer); + + NS::UInteger transformationMatrixBufferOffset() const; + void setTransformationMatrixBufferOffset(NS::UInteger transformationMatrixBufferOffset); + + static MTL::AccelerationStructureMotionTriangleGeometryDescriptor* descriptor(); +}; + +class AccelerationStructureMotionBoundingBoxGeometryDescriptor : public NS::Copying +{ +public: + static class AccelerationStructureMotionBoundingBoxGeometryDescriptor* alloc(); + + class AccelerationStructureMotionBoundingBoxGeometryDescriptor* init(); + + NS::Array* boundingBoxBuffers() const; + void setBoundingBoxBuffers(const NS::Array* boundingBoxBuffers); + + NS::UInteger boundingBoxStride() const; + void setBoundingBoxStride(NS::UInteger boundingBoxStride); + + NS::UInteger boundingBoxCount() const; + void setBoundingBoxCount(NS::UInteger boundingBoxCount); + + static MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* descriptor(); +}; + +struct AccelerationStructureInstanceDescriptor +{ + MTL::PackedFloat4x3 transformationMatrix; + MTL::AccelerationStructureInstanceOptions options; + uint32_t mask; + uint32_t intersectionFunctionTableOffset; + uint32_t accelerationStructureIndex; +} _MTL_PACKED; + +struct AccelerationStructureUserIDInstanceDescriptor +{ + MTL::PackedFloat4x3 transformationMatrix; + MTL::AccelerationStructureInstanceOptions options; + uint32_t mask; + uint32_t intersectionFunctionTableOffset; + uint32_t accelerationStructureIndex; + uint32_t userID; +} _MTL_PACKED; + +_MTL_ENUM(NS::UInteger, AccelerationStructureInstanceDescriptorType) { + AccelerationStructureInstanceDescriptorTypeDefault = 0, + AccelerationStructureInstanceDescriptorTypeUserID = 1, + AccelerationStructureInstanceDescriptorTypeMotion = 2, +}; + +struct AccelerationStructureMotionInstanceDescriptor +{ + MTL::AccelerationStructureInstanceOptions options; + uint32_t mask; + uint32_t intersectionFunctionTableOffset; + uint32_t accelerationStructureIndex; + uint32_t userID; + uint32_t motionTransformsStartIndex; + uint32_t motionTransformsCount; + MTL::MotionBorderMode motionStartBorderMode; + MTL::MotionBorderMode motionEndBorderMode; + float motionStartTime; + float motionEndTime; +} _MTL_PACKED; + +class InstanceAccelerationStructureDescriptor : public NS::Copying +{ +public: + static class InstanceAccelerationStructureDescriptor* alloc(); + + class InstanceAccelerationStructureDescriptor* init(); + + class Buffer* instanceDescriptorBuffer() const; + void setInstanceDescriptorBuffer(const class Buffer* instanceDescriptorBuffer); + + NS::UInteger instanceDescriptorBufferOffset() const; + void setInstanceDescriptorBufferOffset(NS::UInteger instanceDescriptorBufferOffset); + + NS::UInteger instanceDescriptorStride() const; + void setInstanceDescriptorStride(NS::UInteger instanceDescriptorStride); + + NS::UInteger instanceCount() const; + void setInstanceCount(NS::UInteger instanceCount); + + NS::Array* instancedAccelerationStructures() const; + void setInstancedAccelerationStructures(const NS::Array* instancedAccelerationStructures); + + MTL::AccelerationStructureInstanceDescriptorType instanceDescriptorType() const; + void setInstanceDescriptorType(MTL::AccelerationStructureInstanceDescriptorType instanceDescriptorType); + + class Buffer* motionTransformBuffer() const; + void setMotionTransformBuffer(const class Buffer* motionTransformBuffer); + + NS::UInteger motionTransformBufferOffset() const; + void setMotionTransformBufferOffset(NS::UInteger motionTransformBufferOffset); + + NS::UInteger motionTransformCount() const; + void setMotionTransformCount(NS::UInteger motionTransformCount); + + static MTL::InstanceAccelerationStructureDescriptor* descriptor(); +}; + +class AccelerationStructure : public NS::Referencing +{ +public: + NS::UInteger size() const; + + MTL::ResourceID gpuResourceID() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructureDescriptor* MTL::AccelerationStructureDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructureDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructureDescriptor* MTL::AccelerationStructureDescriptor::init() +{ + return NS::Object::init(); +} + +// property: usage +_MTL_INLINE MTL::AccelerationStructureUsage MTL::AccelerationStructureDescriptor::usage() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(usage)); +} + +_MTL_INLINE void MTL::AccelerationStructureDescriptor::setUsage(MTL::AccelerationStructureUsage usage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setUsage_), usage); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructureGeometryDescriptor* MTL::AccelerationStructureGeometryDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructureGeometryDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructureGeometryDescriptor* MTL::AccelerationStructureGeometryDescriptor::init() +{ + return NS::Object::init(); +} + +// property: intersectionFunctionTableOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureGeometryDescriptor::intersectionFunctionTableOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(intersectionFunctionTableOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setIntersectionFunctionTableOffset(NS::UInteger intersectionFunctionTableOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTableOffset_), intersectionFunctionTableOffset); +} + +// property: opaque +_MTL_INLINE bool MTL::AccelerationStructureGeometryDescriptor::opaque() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(opaque)); +} + +_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setOpaque(bool opaque) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOpaque_), opaque); +} + +// property: allowDuplicateIntersectionFunctionInvocation +_MTL_INLINE bool MTL::AccelerationStructureGeometryDescriptor::allowDuplicateIntersectionFunctionInvocation() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(allowDuplicateIntersectionFunctionInvocation)); +} + +_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setAllowDuplicateIntersectionFunctionInvocation(bool allowDuplicateIntersectionFunctionInvocation) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAllowDuplicateIntersectionFunctionInvocation_), allowDuplicateIntersectionFunctionInvocation); +} + +// property: label +_MTL_INLINE NS::String* MTL::AccelerationStructureGeometryDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: primitiveDataBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureGeometryDescriptor::primitiveDataBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(primitiveDataBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setPrimitiveDataBuffer(const MTL::Buffer* primitiveDataBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPrimitiveDataBuffer_), primitiveDataBuffer); +} + +// property: primitiveDataBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureGeometryDescriptor::primitiveDataBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(primitiveDataBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setPrimitiveDataBufferOffset(NS::UInteger primitiveDataBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPrimitiveDataBufferOffset_), primitiveDataBufferOffset); +} + +// property: primitiveDataStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureGeometryDescriptor::primitiveDataStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(primitiveDataStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setPrimitiveDataStride(NS::UInteger primitiveDataStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPrimitiveDataStride_), primitiveDataStride); +} + +// property: primitiveDataElementSize +_MTL_INLINE NS::UInteger MTL::AccelerationStructureGeometryDescriptor::primitiveDataElementSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(primitiveDataElementSize)); +} + +_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setPrimitiveDataElementSize(NS::UInteger primitiveDataElementSize) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPrimitiveDataElementSize_), primitiveDataElementSize); +} + +// static method: alloc +_MTL_INLINE MTL::PrimitiveAccelerationStructureDescriptor* MTL::PrimitiveAccelerationStructureDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLPrimitiveAccelerationStructureDescriptor)); +} + +// method: init +_MTL_INLINE MTL::PrimitiveAccelerationStructureDescriptor* MTL::PrimitiveAccelerationStructureDescriptor::init() +{ + return NS::Object::init(); +} + +// property: geometryDescriptors +_MTL_INLINE NS::Array* MTL::PrimitiveAccelerationStructureDescriptor::geometryDescriptors() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(geometryDescriptors)); +} + +_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setGeometryDescriptors(const NS::Array* geometryDescriptors) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setGeometryDescriptors_), geometryDescriptors); +} + +// property: motionStartBorderMode +_MTL_INLINE MTL::MotionBorderMode MTL::PrimitiveAccelerationStructureDescriptor::motionStartBorderMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionStartBorderMode)); +} + +_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionStartBorderMode(MTL::MotionBorderMode motionStartBorderMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionStartBorderMode_), motionStartBorderMode); +} + +// property: motionEndBorderMode +_MTL_INLINE MTL::MotionBorderMode MTL::PrimitiveAccelerationStructureDescriptor::motionEndBorderMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionEndBorderMode)); +} + +_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionEndBorderMode(MTL::MotionBorderMode motionEndBorderMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionEndBorderMode_), motionEndBorderMode); +} + +// property: motionStartTime +_MTL_INLINE float MTL::PrimitiveAccelerationStructureDescriptor::motionStartTime() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionStartTime)); +} + +_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionStartTime(float motionStartTime) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionStartTime_), motionStartTime); +} + +// property: motionEndTime +_MTL_INLINE float MTL::PrimitiveAccelerationStructureDescriptor::motionEndTime() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionEndTime)); +} + +_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionEndTime(float motionEndTime) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionEndTime_), motionEndTime); +} + +// property: motionKeyframeCount +_MTL_INLINE NS::UInteger MTL::PrimitiveAccelerationStructureDescriptor::motionKeyframeCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionKeyframeCount)); +} + +_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionKeyframeCount(NS::UInteger motionKeyframeCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionKeyframeCount_), motionKeyframeCount); +} + +// static method: descriptor +_MTL_INLINE MTL::PrimitiveAccelerationStructureDescriptor* MTL::PrimitiveAccelerationStructureDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLPrimitiveAccelerationStructureDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructureTriangleGeometryDescriptor* MTL::AccelerationStructureTriangleGeometryDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructureTriangleGeometryDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructureTriangleGeometryDescriptor* MTL::AccelerationStructureTriangleGeometryDescriptor::init() +{ + return NS::Object::init(); +} + +// property: vertexBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureTriangleGeometryDescriptor::vertexBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setVertexBuffer(const MTL::Buffer* vertexBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBuffer_), vertexBuffer); +} + +// property: vertexBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::vertexBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setVertexBufferOffset(NS::UInteger vertexBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBufferOffset_), vertexBufferOffset); +} + +// property: vertexFormat +_MTL_INLINE MTL::AttributeFormat MTL::AccelerationStructureTriangleGeometryDescriptor::vertexFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexFormat)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setVertexFormat(MTL::AttributeFormat vertexFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexFormat_), vertexFormat); +} + +// property: vertexStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::vertexStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setVertexStride(NS::UInteger vertexStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexStride_), vertexStride); +} + +// property: indexBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureTriangleGeometryDescriptor::indexBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setIndexBuffer(const MTL::Buffer* indexBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBuffer_), indexBuffer); +} + +// property: indexBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::indexBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setIndexBufferOffset(NS::UInteger indexBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBufferOffset_), indexBufferOffset); +} + +// property: indexType +_MTL_INLINE MTL::IndexType MTL::AccelerationStructureTriangleGeometryDescriptor::indexType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexType)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setIndexType(MTL::IndexType indexType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexType_), indexType); +} + +// property: triangleCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::triangleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(triangleCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setTriangleCount(NS::UInteger triangleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTriangleCount_), triangleCount); +} + +// property: transformationMatrixBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureTriangleGeometryDescriptor::transformationMatrixBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(transformationMatrixBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setTransformationMatrixBuffer(const MTL::Buffer* transformationMatrixBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTransformationMatrixBuffer_), transformationMatrixBuffer); +} + +// property: transformationMatrixBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::transformationMatrixBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(transformationMatrixBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setTransformationMatrixBufferOffset(NS::UInteger transformationMatrixBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTransformationMatrixBufferOffset_), transformationMatrixBufferOffset); +} + +// static method: descriptor +_MTL_INLINE MTL::AccelerationStructureTriangleGeometryDescriptor* MTL::AccelerationStructureTriangleGeometryDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLAccelerationStructureTriangleGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructureBoundingBoxGeometryDescriptor* MTL::AccelerationStructureBoundingBoxGeometryDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructureBoundingBoxGeometryDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructureBoundingBoxGeometryDescriptor* MTL::AccelerationStructureBoundingBoxGeometryDescriptor::init() +{ + return NS::Object::init(); +} + +// property: boundingBoxBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureBoundingBoxGeometryDescriptor::boundingBoxBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(boundingBoxBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureBoundingBoxGeometryDescriptor::setBoundingBoxBuffer(const MTL::Buffer* boundingBoxBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBoundingBoxBuffer_), boundingBoxBuffer); +} + +// property: boundingBoxBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureBoundingBoxGeometryDescriptor::boundingBoxBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(boundingBoxBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureBoundingBoxGeometryDescriptor::setBoundingBoxBufferOffset(NS::UInteger boundingBoxBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBoundingBoxBufferOffset_), boundingBoxBufferOffset); +} + +// property: boundingBoxStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureBoundingBoxGeometryDescriptor::boundingBoxStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(boundingBoxStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureBoundingBoxGeometryDescriptor::setBoundingBoxStride(NS::UInteger boundingBoxStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBoundingBoxStride_), boundingBoxStride); +} + +// property: boundingBoxCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureBoundingBoxGeometryDescriptor::boundingBoxCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(boundingBoxCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureBoundingBoxGeometryDescriptor::setBoundingBoxCount(NS::UInteger boundingBoxCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBoundingBoxCount_), boundingBoxCount); +} + +// static method: descriptor +_MTL_INLINE MTL::AccelerationStructureBoundingBoxGeometryDescriptor* MTL::AccelerationStructureBoundingBoxGeometryDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLAccelerationStructureBoundingBoxGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + +// static method: alloc +_MTL_INLINE MTL::MotionKeyframeData* MTL::MotionKeyframeData::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLMotionKeyframeData)); +} + +// method: init +_MTL_INLINE MTL::MotionKeyframeData* MTL::MotionKeyframeData::init() +{ + return NS::Object::init(); +} + +// property: buffer +_MTL_INLINE MTL::Buffer* MTL::MotionKeyframeData::buffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(buffer)); +} + +_MTL_INLINE void MTL::MotionKeyframeData::setBuffer(const MTL::Buffer* buffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffer_), buffer); +} + +// property: offset +_MTL_INLINE NS::UInteger MTL::MotionKeyframeData::offset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(offset)); +} + +_MTL_INLINE void MTL::MotionKeyframeData::setOffset(NS::UInteger offset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOffset_), offset); +} + +// static method: data +_MTL_INLINE MTL::MotionKeyframeData* MTL::MotionKeyframeData::data() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLMotionKeyframeData), _MTL_PRIVATE_SEL(data)); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructureMotionTriangleGeometryDescriptor* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionTriangleGeometryDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructureMotionTriangleGeometryDescriptor* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::init() +{ + return NS::Object::init(); +} + +// property: vertexBuffers +_MTL_INLINE NS::Array* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::vertexBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexBuffers)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setVertexBuffers(const NS::Array* vertexBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBuffers_), vertexBuffers); +} + +// property: vertexFormat +_MTL_INLINE MTL::AttributeFormat MTL::AccelerationStructureMotionTriangleGeometryDescriptor::vertexFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexFormat)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setVertexFormat(MTL::AttributeFormat vertexFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexFormat_), vertexFormat); +} + +// property: vertexStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionTriangleGeometryDescriptor::vertexStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setVertexStride(NS::UInteger vertexStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexStride_), vertexStride); +} + +// property: indexBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::indexBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setIndexBuffer(const MTL::Buffer* indexBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBuffer_), indexBuffer); +} + +// property: indexBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionTriangleGeometryDescriptor::indexBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setIndexBufferOffset(NS::UInteger indexBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBufferOffset_), indexBufferOffset); +} + +// property: indexType +_MTL_INLINE MTL::IndexType MTL::AccelerationStructureMotionTriangleGeometryDescriptor::indexType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexType)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setIndexType(MTL::IndexType indexType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexType_), indexType); +} + +// property: triangleCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionTriangleGeometryDescriptor::triangleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(triangleCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setTriangleCount(NS::UInteger triangleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTriangleCount_), triangleCount); +} + +// property: transformationMatrixBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::transformationMatrixBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(transformationMatrixBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setTransformationMatrixBuffer(const MTL::Buffer* transformationMatrixBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTransformationMatrixBuffer_), transformationMatrixBuffer); +} + +// property: transformationMatrixBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionTriangleGeometryDescriptor::transformationMatrixBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(transformationMatrixBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setTransformationMatrixBufferOffset(NS::UInteger transformationMatrixBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTransformationMatrixBufferOffset_), transformationMatrixBufferOffset); +} + +// static method: descriptor +_MTL_INLINE MTL::AccelerationStructureMotionTriangleGeometryDescriptor* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionTriangleGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::init() +{ + return NS::Object::init(); +} + +// property: boundingBoxBuffers +_MTL_INLINE NS::Array* MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::boundingBoxBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(boundingBoxBuffers)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::setBoundingBoxBuffers(const NS::Array* boundingBoxBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBoundingBoxBuffers_), boundingBoxBuffers); +} + +// property: boundingBoxStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::boundingBoxStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(boundingBoxStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::setBoundingBoxStride(NS::UInteger boundingBoxStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBoundingBoxStride_), boundingBoxStride); +} + +// property: boundingBoxCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::boundingBoxCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(boundingBoxCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::setBoundingBoxCount(NS::UInteger boundingBoxCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBoundingBoxCount_), boundingBoxCount); +} + +// static method: descriptor +_MTL_INLINE MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + +// static method: alloc +_MTL_INLINE MTL::InstanceAccelerationStructureDescriptor* MTL::InstanceAccelerationStructureDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLInstanceAccelerationStructureDescriptor)); +} + +// method: init +_MTL_INLINE MTL::InstanceAccelerationStructureDescriptor* MTL::InstanceAccelerationStructureDescriptor::init() +{ + return NS::Object::init(); +} + +// property: instanceDescriptorBuffer +_MTL_INLINE MTL::Buffer* MTL::InstanceAccelerationStructureDescriptor::instanceDescriptorBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceDescriptorBuffer)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceDescriptorBuffer(const MTL::Buffer* instanceDescriptorBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceDescriptorBuffer_), instanceDescriptorBuffer); +} + +// property: instanceDescriptorBufferOffset +_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::instanceDescriptorBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceDescriptorBufferOffset)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceDescriptorBufferOffset(NS::UInteger instanceDescriptorBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceDescriptorBufferOffset_), instanceDescriptorBufferOffset); +} + +// property: instanceDescriptorStride +_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::instanceDescriptorStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceDescriptorStride)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceDescriptorStride(NS::UInteger instanceDescriptorStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceDescriptorStride_), instanceDescriptorStride); +} + +// property: instanceCount +_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::instanceCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceCount)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceCount(NS::UInteger instanceCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceCount_), instanceCount); +} + +// property: instancedAccelerationStructures +_MTL_INLINE NS::Array* MTL::InstanceAccelerationStructureDescriptor::instancedAccelerationStructures() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instancedAccelerationStructures)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstancedAccelerationStructures(const NS::Array* instancedAccelerationStructures) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstancedAccelerationStructures_), instancedAccelerationStructures); +} + +// property: instanceDescriptorType +_MTL_INLINE MTL::AccelerationStructureInstanceDescriptorType MTL::InstanceAccelerationStructureDescriptor::instanceDescriptorType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceDescriptorType)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceDescriptorType(MTL::AccelerationStructureInstanceDescriptorType instanceDescriptorType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceDescriptorType_), instanceDescriptorType); +} + +// property: motionTransformBuffer +_MTL_INLINE MTL::Buffer* MTL::InstanceAccelerationStructureDescriptor::motionTransformBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionTransformBuffer)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setMotionTransformBuffer(const MTL::Buffer* motionTransformBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionTransformBuffer_), motionTransformBuffer); +} + +// property: motionTransformBufferOffset +_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::motionTransformBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionTransformBufferOffset)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setMotionTransformBufferOffset(NS::UInteger motionTransformBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionTransformBufferOffset_), motionTransformBufferOffset); +} + +// property: motionTransformCount +_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::motionTransformCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionTransformCount)); +} + +_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setMotionTransformCount(NS::UInteger motionTransformCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionTransformCount_), motionTransformCount); +} + +// static method: descriptor +_MTL_INLINE MTL::InstanceAccelerationStructureDescriptor* MTL::InstanceAccelerationStructureDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLInstanceAccelerationStructureDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + +// property: size +_MTL_INLINE NS::UInteger MTL::AccelerationStructure::size() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(size)); +} + +// property: gpuResourceID +_MTL_INLINE MTL::ResourceID MTL::AccelerationStructure::gpuResourceID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuResourceID)); +} diff --git a/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp b/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp new file mode 100644 index 00000000..8a07cec0 --- /dev/null +++ b/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp @@ -0,0 +1,290 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLAccelerationStructureCommandEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLAccelerationStructureCommandEncoder.hpp" +#include "MTLArgument.hpp" +#include "MTLCommandEncoder.hpp" + +namespace MTL +{ +_MTL_OPTIONS(NS::UInteger, AccelerationStructureRefitOptions) { + AccelerationStructureRefitOptionVertexData = 1, + AccelerationStructureRefitOptionPerPrimitiveData = 2, +}; + +class AccelerationStructureCommandEncoder : public NS::Referencing +{ +public: + void buildAccelerationStructure(const class AccelerationStructure* accelerationStructure, const class AccelerationStructureDescriptor* descriptor, const class Buffer* scratchBuffer, NS::UInteger scratchBufferOffset); + + void refitAccelerationStructure(const class AccelerationStructure* sourceAccelerationStructure, const class AccelerationStructureDescriptor* descriptor, const class AccelerationStructure* destinationAccelerationStructure, const class Buffer* scratchBuffer, NS::UInteger scratchBufferOffset); + + void refitAccelerationStructure(const class AccelerationStructure* sourceAccelerationStructure, const class AccelerationStructureDescriptor* descriptor, const class AccelerationStructure* destinationAccelerationStructure, const class Buffer* scratchBuffer, NS::UInteger scratchBufferOffset, MTL::AccelerationStructureRefitOptions options); + + void copyAccelerationStructure(const class AccelerationStructure* sourceAccelerationStructure, const class AccelerationStructure* destinationAccelerationStructure); + + void writeCompactedAccelerationStructureSize(const class AccelerationStructure* accelerationStructure, const class Buffer* buffer, NS::UInteger offset); + + void writeCompactedAccelerationStructureSize(const class AccelerationStructure* accelerationStructure, const class Buffer* buffer, NS::UInteger offset, MTL::DataType sizeDataType); + + void copyAndCompactAccelerationStructure(const class AccelerationStructure* sourceAccelerationStructure, const class AccelerationStructure* destinationAccelerationStructure); + + void updateFence(const class Fence* fence); + + void waitForFence(const class Fence* fence); + + void useResource(const class Resource* resource, MTL::ResourceUsage usage); + + void useResources(const class Resource* const resources[], NS::UInteger count, MTL::ResourceUsage usage); + + void useHeap(const class Heap* heap); + + void useHeaps(const class Heap* const heaps[], NS::UInteger count); + + void sampleCountersInBuffer(const class CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier); +}; + +class AccelerationStructurePassSampleBufferAttachmentDescriptor : public NS::Copying +{ +public: + static class AccelerationStructurePassSampleBufferAttachmentDescriptor* alloc(); + + class AccelerationStructurePassSampleBufferAttachmentDescriptor* init(); + + class CounterSampleBuffer* sampleBuffer() const; + void setSampleBuffer(const class CounterSampleBuffer* sampleBuffer); + + NS::UInteger startOfEncoderSampleIndex() const; + void setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex); + + NS::UInteger endOfEncoderSampleIndex() const; + void setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex); +}; + +class AccelerationStructurePassSampleBufferAttachmentDescriptorArray : public NS::Referencing +{ +public: + static class AccelerationStructurePassSampleBufferAttachmentDescriptorArray* alloc(); + + class AccelerationStructurePassSampleBufferAttachmentDescriptorArray* init(); + + class AccelerationStructurePassSampleBufferAttachmentDescriptor* object(NS::UInteger attachmentIndex); + + void setObject(const class AccelerationStructurePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex); +}; + +class AccelerationStructurePassDescriptor : public NS::Copying +{ +public: + static class AccelerationStructurePassDescriptor* alloc(); + + class AccelerationStructurePassDescriptor* init(); + + static class AccelerationStructurePassDescriptor* accelerationStructurePassDescriptor(); + + class AccelerationStructurePassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const; +}; + +} + +// method: buildAccelerationStructure:descriptor:scratchBuffer:scratchBufferOffset: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::buildAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, const MTL::AccelerationStructureDescriptor* descriptor, const MTL::Buffer* scratchBuffer, NS::UInteger scratchBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(buildAccelerationStructure_descriptor_scratchBuffer_scratchBufferOffset_), accelerationStructure, descriptor, scratchBuffer, scratchBufferOffset); +} + +// method: refitAccelerationStructure:descriptor:destination:scratchBuffer:scratchBufferOffset: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::refitAccelerationStructure(const MTL::AccelerationStructure* sourceAccelerationStructure, const MTL::AccelerationStructureDescriptor* descriptor, const MTL::AccelerationStructure* destinationAccelerationStructure, const MTL::Buffer* scratchBuffer, NS::UInteger scratchBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(refitAccelerationStructure_descriptor_destination_scratchBuffer_scratchBufferOffset_), sourceAccelerationStructure, descriptor, destinationAccelerationStructure, scratchBuffer, scratchBufferOffset); +} + +// method: refitAccelerationStructure:descriptor:destination:scratchBuffer:scratchBufferOffset:options: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::refitAccelerationStructure(const MTL::AccelerationStructure* sourceAccelerationStructure, const MTL::AccelerationStructureDescriptor* descriptor, const MTL::AccelerationStructure* destinationAccelerationStructure, const MTL::Buffer* scratchBuffer, NS::UInteger scratchBufferOffset, MTL::AccelerationStructureRefitOptions options) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(refitAccelerationStructure_descriptor_destination_scratchBuffer_scratchBufferOffset_options_), sourceAccelerationStructure, descriptor, destinationAccelerationStructure, scratchBuffer, scratchBufferOffset, options); +} + +// method: copyAccelerationStructure:toAccelerationStructure: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::copyAccelerationStructure(const MTL::AccelerationStructure* sourceAccelerationStructure, const MTL::AccelerationStructure* destinationAccelerationStructure) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyAccelerationStructure_toAccelerationStructure_), sourceAccelerationStructure, destinationAccelerationStructure); +} + +// method: writeCompactedAccelerationStructureSize:toBuffer:offset: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::writeCompactedAccelerationStructureSize(const MTL::AccelerationStructure* accelerationStructure, const MTL::Buffer* buffer, NS::UInteger offset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(writeCompactedAccelerationStructureSize_toBuffer_offset_), accelerationStructure, buffer, offset); +} + +// method: writeCompactedAccelerationStructureSize:toBuffer:offset:sizeDataType: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::writeCompactedAccelerationStructureSize(const MTL::AccelerationStructure* accelerationStructure, const MTL::Buffer* buffer, NS::UInteger offset, MTL::DataType sizeDataType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(writeCompactedAccelerationStructureSize_toBuffer_offset_sizeDataType_), accelerationStructure, buffer, offset, sizeDataType); +} + +// method: copyAndCompactAccelerationStructure:toAccelerationStructure: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::copyAndCompactAccelerationStructure(const MTL::AccelerationStructure* sourceAccelerationStructure, const MTL::AccelerationStructure* destinationAccelerationStructure) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyAndCompactAccelerationStructure_toAccelerationStructure_), sourceAccelerationStructure, destinationAccelerationStructure); +} + +// method: updateFence: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::updateFence(const MTL::Fence* fence) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(updateFence_), fence); +} + +// method: waitForFence: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::waitForFence(const MTL::Fence* fence) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitForFence_), fence); +} + +// method: useResource:usage: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::useResource(const MTL::Resource* resource, MTL::ResourceUsage usage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useResource_usage_), resource, usage); +} + +// method: useResources:count:usage: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::useResources(const MTL::Resource* const resources[], NS::UInteger count, MTL::ResourceUsage usage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useResources_count_usage_), resources, count, usage); +} + +// method: useHeap: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::useHeap(const MTL::Heap* heap) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useHeap_), heap); +} + +// method: useHeaps:count: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::useHeaps(const MTL::Heap* const heaps[], NS::UInteger count) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useHeaps_count_), heaps, count); +} + +// method: sampleCountersInBuffer:atSampleIndex:withBarrier: +_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::sampleCountersInBuffer(const MTL::CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_), sampleBuffer, sampleIndex, barrier); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor* MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructurePassSampleBufferAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor* MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: sampleBuffer +_MTL_INLINE MTL::CounterSampleBuffer* MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor::sampleBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer); +} + +// property: startOfEncoderSampleIndex +_MTL_INLINE NS::UInteger MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor::startOfEncoderSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(startOfEncoderSampleIndex)); +} + +_MTL_INLINE void MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor::setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStartOfEncoderSampleIndex_), startOfEncoderSampleIndex); +} + +// property: endOfEncoderSampleIndex +_MTL_INLINE NS::UInteger MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor::endOfEncoderSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(endOfEncoderSampleIndex)); +} + +_MTL_INLINE void MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor::setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setEndOfEncoderSampleIndex_), endOfEncoderSampleIndex); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructurePassSampleBufferAttachmentDescriptorArray* MTL::AccelerationStructurePassSampleBufferAttachmentDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructurePassSampleBufferAttachmentDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructurePassSampleBufferAttachmentDescriptorArray* MTL::AccelerationStructurePassSampleBufferAttachmentDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor* MTL::AccelerationStructurePassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::AccelerationStructurePassSampleBufferAttachmentDescriptorArray::setObject(const MTL::AccelerationStructurePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructurePassDescriptor* MTL::AccelerationStructurePassDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructurePassDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructurePassDescriptor* MTL::AccelerationStructurePassDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: accelerationStructurePassDescriptor +_MTL_INLINE MTL::AccelerationStructurePassDescriptor* MTL::AccelerationStructurePassDescriptor::accelerationStructurePassDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLAccelerationStructurePassDescriptor), _MTL_PRIVATE_SEL(accelerationStructurePassDescriptor)); +} + +// property: sampleBufferAttachments +_MTL_INLINE MTL::AccelerationStructurePassSampleBufferAttachmentDescriptorArray* MTL::AccelerationStructurePassDescriptor::sampleBufferAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBufferAttachments)); +} diff --git a/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp b/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp new file mode 100644 index 00000000..8a4b95f0 --- /dev/null +++ b/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp @@ -0,0 +1,169 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLAccelerationStructureTypes.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "MTLDefines.hpp" +#include "MTLPrivate.hpp" +#include "MTLResource.hpp" +#include "MTLStageInputOutputDescriptor.hpp" + +#include "../Foundation/NSRange.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTL +{ +struct PackedFloat3 +{ + PackedFloat3(); + PackedFloat3(float x, float y, float z); + + float& operator[](int idx); + float operator[](int idx) const; + + union + { + struct + { + float x; + float y; + float z; + }; + + float elements[3]; + }; +} _MTL_PACKED; + +struct PackedFloat4x3 +{ + PackedFloat4x3(); + PackedFloat4x3(const PackedFloat3& col0, const PackedFloat3& col1, const PackedFloat3& col2, const PackedFloat3& col3); + + PackedFloat3& operator[](int idx); + const PackedFloat3& operator[](int idx) const; + + PackedFloat3 columns[4]; +} _MTL_PACKED; + +struct AxisAlignedBoundingBox +{ + AxisAlignedBoundingBox(); + AxisAlignedBoundingBox(PackedFloat3 p); + AxisAlignedBoundingBox(PackedFloat3 min, PackedFloat3 max); + + PackedFloat3 min; + PackedFloat3 max; +} _MTL_PACKED; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::PackedFloat3::PackedFloat3() + : x(0.0f) + , y(0.0f) + , z(0.0f) +{ +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::PackedFloat3::PackedFloat3(float _x, float _y, float _z) + : x(_x) + , y(_y) + , z(_z) +{ +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE float& MTL::PackedFloat3::operator[](int idx) +{ + return elements[idx]; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE float MTL::PackedFloat3::operator[](int idx) const +{ + return elements[idx]; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::PackedFloat4x3::PackedFloat4x3() +{ + columns[0] = PackedFloat3(0.0f, 0.0f, 0.0f); + columns[1] = PackedFloat3(0.0f, 0.0f, 0.0f); + columns[2] = PackedFloat3(0.0f, 0.0f, 0.0f); + columns[3] = PackedFloat3(0.0f, 0.0f, 0.0f); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::PackedFloat4x3::PackedFloat4x3(const PackedFloat3& col0, const PackedFloat3& col1, const PackedFloat3& col2, const PackedFloat3& col3) +{ + columns[0] = col0; + columns[1] = col1; + columns[2] = col2; + columns[3] = col3; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::PackedFloat3& MTL::PackedFloat4x3::operator[](int idx) +{ + return columns[idx]; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE const MTL::PackedFloat3& MTL::PackedFloat4x3::operator[](int idx) const +{ + return columns[idx]; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::AxisAlignedBoundingBox::AxisAlignedBoundingBox() + : min(INFINITY, INFINITY, INFINITY) + , max(-INFINITY, -INFINITY, -INFINITY) +{ +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::AxisAlignedBoundingBox::AxisAlignedBoundingBox(PackedFloat3 p) + : min(p) + , max(p) +{ +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::AxisAlignedBoundingBox::AxisAlignedBoundingBox(PackedFloat3 _min, PackedFloat3 _max) + : min(_min) + , max(_max) +{ +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Metal/MTLArgument.hpp b/metal-cpp/Metal/MTLArgument.hpp new file mode 100644 index 00000000..d92ce579 --- /dev/null +++ b/metal-cpp/Metal/MTLArgument.hpp @@ -0,0 +1,841 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLArgument.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLArgument.hpp" +#include "MTLTexture.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, DataType) { + DataTypeNone = 0, + DataTypeStruct = 1, + DataTypeArray = 2, + DataTypeFloat = 3, + DataTypeFloat2 = 4, + DataTypeFloat3 = 5, + DataTypeFloat4 = 6, + DataTypeFloat2x2 = 7, + DataTypeFloat2x3 = 8, + DataTypeFloat2x4 = 9, + DataTypeFloat3x2 = 10, + DataTypeFloat3x3 = 11, + DataTypeFloat3x4 = 12, + DataTypeFloat4x2 = 13, + DataTypeFloat4x3 = 14, + DataTypeFloat4x4 = 15, + DataTypeHalf = 16, + DataTypeHalf2 = 17, + DataTypeHalf3 = 18, + DataTypeHalf4 = 19, + DataTypeHalf2x2 = 20, + DataTypeHalf2x3 = 21, + DataTypeHalf2x4 = 22, + DataTypeHalf3x2 = 23, + DataTypeHalf3x3 = 24, + DataTypeHalf3x4 = 25, + DataTypeHalf4x2 = 26, + DataTypeHalf4x3 = 27, + DataTypeHalf4x4 = 28, + DataTypeInt = 29, + DataTypeInt2 = 30, + DataTypeInt3 = 31, + DataTypeInt4 = 32, + DataTypeUInt = 33, + DataTypeUInt2 = 34, + DataTypeUInt3 = 35, + DataTypeUInt4 = 36, + DataTypeShort = 37, + DataTypeShort2 = 38, + DataTypeShort3 = 39, + DataTypeShort4 = 40, + DataTypeUShort = 41, + DataTypeUShort2 = 42, + DataTypeUShort3 = 43, + DataTypeUShort4 = 44, + DataTypeChar = 45, + DataTypeChar2 = 46, + DataTypeChar3 = 47, + DataTypeChar4 = 48, + DataTypeUChar = 49, + DataTypeUChar2 = 50, + DataTypeUChar3 = 51, + DataTypeUChar4 = 52, + DataTypeBool = 53, + DataTypeBool2 = 54, + DataTypeBool3 = 55, + DataTypeBool4 = 56, + DataTypeTexture = 58, + DataTypeSampler = 59, + DataTypePointer = 60, + DataTypeR8Unorm = 62, + DataTypeR8Snorm = 63, + DataTypeR16Unorm = 64, + DataTypeR16Snorm = 65, + DataTypeRG8Unorm = 66, + DataTypeRG8Snorm = 67, + DataTypeRG16Unorm = 68, + DataTypeRG16Snorm = 69, + DataTypeRGBA8Unorm = 70, + DataTypeRGBA8Unorm_sRGB = 71, + DataTypeRGBA8Snorm = 72, + DataTypeRGBA16Unorm = 73, + DataTypeRGBA16Snorm = 74, + DataTypeRGB10A2Unorm = 75, + DataTypeRG11B10Float = 76, + DataTypeRGB9E5Float = 77, + DataTypeRenderPipeline = 78, + DataTypeComputePipeline = 79, + DataTypeIndirectCommandBuffer = 80, + DataTypeLong = 81, + DataTypeLong2 = 82, + DataTypeLong3 = 83, + DataTypeLong4 = 84, + DataTypeULong = 85, + DataTypeULong2 = 86, + DataTypeULong3 = 87, + DataTypeULong4 = 88, + DataTypeVisibleFunctionTable = 115, + DataTypeIntersectionFunctionTable = 116, + DataTypePrimitiveAccelerationStructure = 117, + DataTypeInstanceAccelerationStructure = 118, +}; + +_MTL_ENUM(NS::Integer, BindingType) { + BindingTypeBuffer = 0, + BindingTypeThreadgroupMemory = 1, + BindingTypeTexture = 2, + BindingTypeSampler = 3, + BindingTypeImageblockData = 16, + BindingTypeImageblock = 17, + BindingTypeVisibleFunctionTable = 24, + BindingTypePrimitiveAccelerationStructure = 25, + BindingTypeInstanceAccelerationStructure = 26, + BindingTypeIntersectionFunctionTable = 27, + BindingTypeObjectPayload = 34, +}; + +_MTL_ENUM(NS::UInteger, ArgumentType) { + ArgumentTypeBuffer = 0, + ArgumentTypeThreadgroupMemory = 1, + ArgumentTypeTexture = 2, + ArgumentTypeSampler = 3, + ArgumentTypeImageblockData = 16, + ArgumentTypeImageblock = 17, + ArgumentTypeVisibleFunctionTable = 24, + ArgumentTypePrimitiveAccelerationStructure = 25, + ArgumentTypeInstanceAccelerationStructure = 26, + ArgumentTypeIntersectionFunctionTable = 27, +}; + +_MTL_ENUM(NS::UInteger, ArgumentAccess) { + ArgumentAccessReadOnly = 0, + ArgumentAccessReadWrite = 1, + ArgumentAccessWriteOnly = 2, +}; + +class Type : public NS::Referencing +{ +public: + static class Type* alloc(); + + class Type* init(); + + MTL::DataType dataType() const; +}; + +class StructMember : public NS::Referencing +{ +public: + static class StructMember* alloc(); + + class StructMember* init(); + + NS::String* name() const; + + NS::UInteger offset() const; + + MTL::DataType dataType() const; + + class StructType* structType(); + + class ArrayType* arrayType(); + + class TextureReferenceType* textureReferenceType(); + + class PointerType* pointerType(); + + NS::UInteger argumentIndex() const; +}; + +class StructType : public NS::Referencing +{ +public: + static class StructType* alloc(); + + class StructType* init(); + + NS::Array* members() const; + + class StructMember* memberByName(const NS::String* name); +}; + +class ArrayType : public NS::Referencing +{ +public: + static class ArrayType* alloc(); + + class ArrayType* init(); + + MTL::DataType elementType() const; + + NS::UInteger arrayLength() const; + + NS::UInteger stride() const; + + NS::UInteger argumentIndexStride() const; + + class StructType* elementStructType(); + + class ArrayType* elementArrayType(); + + class TextureReferenceType* elementTextureReferenceType(); + + class PointerType* elementPointerType(); +}; + +class PointerType : public NS::Referencing +{ +public: + static class PointerType* alloc(); + + class PointerType* init(); + + MTL::DataType elementType() const; + + MTL::ArgumentAccess access() const; + + NS::UInteger alignment() const; + + NS::UInteger dataSize() const; + + bool elementIsArgumentBuffer() const; + + class StructType* elementStructType(); + + class ArrayType* elementArrayType(); +}; + +class TextureReferenceType : public NS::Referencing +{ +public: + static class TextureReferenceType* alloc(); + + class TextureReferenceType* init(); + + MTL::DataType textureDataType() const; + + MTL::TextureType textureType() const; + + MTL::ArgumentAccess access() const; + + bool isDepthTexture() const; +}; + +class Argument : public NS::Referencing +{ +public: + static class Argument* alloc(); + + class Argument* init(); + + NS::String* name() const; + + MTL::ArgumentType type() const; + + MTL::ArgumentAccess access() const; + + NS::UInteger index() const; + + bool active() const; + + NS::UInteger bufferAlignment() const; + + NS::UInteger bufferDataSize() const; + + MTL::DataType bufferDataType() const; + + class StructType* bufferStructType() const; + + class PointerType* bufferPointerType() const; + + NS::UInteger threadgroupMemoryAlignment() const; + + NS::UInteger threadgroupMemoryDataSize() const; + + MTL::TextureType textureType() const; + + MTL::DataType textureDataType() const; + + bool isDepthTexture() const; + + NS::UInteger arrayLength() const; +}; + +class Binding : public NS::Referencing +{ +public: + NS::String* name() const; + + MTL::BindingType type() const; + + MTL::ArgumentAccess access() const; + + NS::UInteger index() const; + + bool used() const; + + bool argument() const; +}; + +class BufferBinding : public NS::Referencing +{ +public: + NS::UInteger bufferAlignment() const; + + NS::UInteger bufferDataSize() const; + + MTL::DataType bufferDataType() const; + + class StructType* bufferStructType() const; + + class PointerType* bufferPointerType() const; +}; + +class ThreadgroupBinding : public NS::Referencing +{ +public: + NS::UInteger threadgroupMemoryAlignment() const; + + NS::UInteger threadgroupMemoryDataSize() const; +}; + +class TextureBinding : public NS::Referencing +{ +public: + MTL::TextureType textureType() const; + + MTL::DataType textureDataType() const; + + bool depthTexture() const; + + NS::UInteger arrayLength() const; +}; + +class ObjectPayloadBinding : public NS::Referencing +{ +public: + NS::UInteger objectPayloadAlignment() const; + + NS::UInteger objectPayloadDataSize() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::Type* MTL::Type::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLType)); +} + +// method: init +_MTL_INLINE MTL::Type* MTL::Type::init() +{ + return NS::Object::init(); +} + +// property: dataType +_MTL_INLINE MTL::DataType MTL::Type::dataType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(dataType)); +} + +// static method: alloc +_MTL_INLINE MTL::StructMember* MTL::StructMember::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLStructMember)); +} + +// method: init +_MTL_INLINE MTL::StructMember* MTL::StructMember::init() +{ + return NS::Object::init(); +} + +// property: name +_MTL_INLINE NS::String* MTL::StructMember::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: offset +_MTL_INLINE NS::UInteger MTL::StructMember::offset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(offset)); +} + +// property: dataType +_MTL_INLINE MTL::DataType MTL::StructMember::dataType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(dataType)); +} + +// method: structType +_MTL_INLINE MTL::StructType* MTL::StructMember::structType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(structType)); +} + +// method: arrayType +_MTL_INLINE MTL::ArrayType* MTL::StructMember::arrayType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arrayType)); +} + +// method: textureReferenceType +_MTL_INLINE MTL::TextureReferenceType* MTL::StructMember::textureReferenceType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureReferenceType)); +} + +// method: pointerType +_MTL_INLINE MTL::PointerType* MTL::StructMember::pointerType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(pointerType)); +} + +// property: argumentIndex +_MTL_INLINE NS::UInteger MTL::StructMember::argumentIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(argumentIndex)); +} + +// static method: alloc +_MTL_INLINE MTL::StructType* MTL::StructType::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLStructType)); +} + +// method: init +_MTL_INLINE MTL::StructType* MTL::StructType::init() +{ + return NS::Object::init(); +} + +// property: members +_MTL_INLINE NS::Array* MTL::StructType::members() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(members)); +} + +// method: memberByName: +_MTL_INLINE MTL::StructMember* MTL::StructType::memberByName(const NS::String* name) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(memberByName_), name); +} + +// static method: alloc +_MTL_INLINE MTL::ArrayType* MTL::ArrayType::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLArrayType)); +} + +// method: init +_MTL_INLINE MTL::ArrayType* MTL::ArrayType::init() +{ + return NS::Object::init(); +} + +// property: elementType +_MTL_INLINE MTL::DataType MTL::ArrayType::elementType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementType)); +} + +// property: arrayLength +_MTL_INLINE NS::UInteger MTL::ArrayType::arrayLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arrayLength)); +} + +// property: stride +_MTL_INLINE NS::UInteger MTL::ArrayType::stride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stride)); +} + +// property: argumentIndexStride +_MTL_INLINE NS::UInteger MTL::ArrayType::argumentIndexStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(argumentIndexStride)); +} + +// method: elementStructType +_MTL_INLINE MTL::StructType* MTL::ArrayType::elementStructType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementStructType)); +} + +// method: elementArrayType +_MTL_INLINE MTL::ArrayType* MTL::ArrayType::elementArrayType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementArrayType)); +} + +// method: elementTextureReferenceType +_MTL_INLINE MTL::TextureReferenceType* MTL::ArrayType::elementTextureReferenceType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementTextureReferenceType)); +} + +// method: elementPointerType +_MTL_INLINE MTL::PointerType* MTL::ArrayType::elementPointerType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementPointerType)); +} + +// static method: alloc +_MTL_INLINE MTL::PointerType* MTL::PointerType::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLPointerType)); +} + +// method: init +_MTL_INLINE MTL::PointerType* MTL::PointerType::init() +{ + return NS::Object::init(); +} + +// property: elementType +_MTL_INLINE MTL::DataType MTL::PointerType::elementType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementType)); +} + +// property: access +_MTL_INLINE MTL::ArgumentAccess MTL::PointerType::access() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); +} + +// property: alignment +_MTL_INLINE NS::UInteger MTL::PointerType::alignment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(alignment)); +} + +// property: dataSize +_MTL_INLINE NS::UInteger MTL::PointerType::dataSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(dataSize)); +} + +// property: elementIsArgumentBuffer +_MTL_INLINE bool MTL::PointerType::elementIsArgumentBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementIsArgumentBuffer)); +} + +// method: elementStructType +_MTL_INLINE MTL::StructType* MTL::PointerType::elementStructType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementStructType)); +} + +// method: elementArrayType +_MTL_INLINE MTL::ArrayType* MTL::PointerType::elementArrayType() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(elementArrayType)); +} + +// static method: alloc +_MTL_INLINE MTL::TextureReferenceType* MTL::TextureReferenceType::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLTextureReferenceType)); +} + +// method: init +_MTL_INLINE MTL::TextureReferenceType* MTL::TextureReferenceType::init() +{ + return NS::Object::init(); +} + +// property: textureDataType +_MTL_INLINE MTL::DataType MTL::TextureReferenceType::textureDataType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureDataType)); +} + +// property: textureType +_MTL_INLINE MTL::TextureType MTL::TextureReferenceType::textureType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureType)); +} + +// property: access +_MTL_INLINE MTL::ArgumentAccess MTL::TextureReferenceType::access() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); +} + +// property: isDepthTexture +_MTL_INLINE bool MTL::TextureReferenceType::isDepthTexture() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isDepthTexture)); +} + +// static method: alloc +_MTL_INLINE MTL::Argument* MTL::Argument::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLArgument)); +} + +// method: init +_MTL_INLINE MTL::Argument* MTL::Argument::init() +{ + return NS::Object::init(); +} + +// property: name +_MTL_INLINE NS::String* MTL::Argument::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: type +_MTL_INLINE MTL::ArgumentType MTL::Argument::type() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); +} + +// property: access +_MTL_INLINE MTL::ArgumentAccess MTL::Argument::access() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); +} + +// property: index +_MTL_INLINE NS::UInteger MTL::Argument::index() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(index)); +} + +// property: active +_MTL_INLINE bool MTL::Argument::active() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isActive)); +} + +// property: bufferAlignment +_MTL_INLINE NS::UInteger MTL::Argument::bufferAlignment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferAlignment)); +} + +// property: bufferDataSize +_MTL_INLINE NS::UInteger MTL::Argument::bufferDataSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferDataSize)); +} + +// property: bufferDataType +_MTL_INLINE MTL::DataType MTL::Argument::bufferDataType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferDataType)); +} + +// property: bufferStructType +_MTL_INLINE MTL::StructType* MTL::Argument::bufferStructType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferStructType)); +} + +// property: bufferPointerType +_MTL_INLINE MTL::PointerType* MTL::Argument::bufferPointerType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferPointerType)); +} + +// property: threadgroupMemoryAlignment +_MTL_INLINE NS::UInteger MTL::Argument::threadgroupMemoryAlignment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadgroupMemoryAlignment)); +} + +// property: threadgroupMemoryDataSize +_MTL_INLINE NS::UInteger MTL::Argument::threadgroupMemoryDataSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadgroupMemoryDataSize)); +} + +// property: textureType +_MTL_INLINE MTL::TextureType MTL::Argument::textureType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureType)); +} + +// property: textureDataType +_MTL_INLINE MTL::DataType MTL::Argument::textureDataType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureDataType)); +} + +// property: isDepthTexture +_MTL_INLINE bool MTL::Argument::isDepthTexture() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isDepthTexture)); +} + +// property: arrayLength +_MTL_INLINE NS::UInteger MTL::Argument::arrayLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arrayLength)); +} + +// property: name +_MTL_INLINE NS::String* MTL::Binding::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: type +_MTL_INLINE MTL::BindingType MTL::Binding::type() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); +} + +// property: access +_MTL_INLINE MTL::ArgumentAccess MTL::Binding::access() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); +} + +// property: index +_MTL_INLINE NS::UInteger MTL::Binding::index() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(index)); +} + +// property: used +_MTL_INLINE bool MTL::Binding::used() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isUsed)); +} + +// property: argument +_MTL_INLINE bool MTL::Binding::argument() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isArgument)); +} + +// property: bufferAlignment +_MTL_INLINE NS::UInteger MTL::BufferBinding::bufferAlignment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferAlignment)); +} + +// property: bufferDataSize +_MTL_INLINE NS::UInteger MTL::BufferBinding::bufferDataSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferDataSize)); +} + +// property: bufferDataType +_MTL_INLINE MTL::DataType MTL::BufferBinding::bufferDataType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferDataType)); +} + +// property: bufferStructType +_MTL_INLINE MTL::StructType* MTL::BufferBinding::bufferStructType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferStructType)); +} + +// property: bufferPointerType +_MTL_INLINE MTL::PointerType* MTL::BufferBinding::bufferPointerType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferPointerType)); +} + +// property: threadgroupMemoryAlignment +_MTL_INLINE NS::UInteger MTL::ThreadgroupBinding::threadgroupMemoryAlignment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadgroupMemoryAlignment)); +} + +// property: threadgroupMemoryDataSize +_MTL_INLINE NS::UInteger MTL::ThreadgroupBinding::threadgroupMemoryDataSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadgroupMemoryDataSize)); +} + +// property: textureType +_MTL_INLINE MTL::TextureType MTL::TextureBinding::textureType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureType)); +} + +// property: textureDataType +_MTL_INLINE MTL::DataType MTL::TextureBinding::textureDataType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureDataType)); +} + +// property: depthTexture +_MTL_INLINE bool MTL::TextureBinding::depthTexture() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isDepthTexture)); +} + +// property: arrayLength +_MTL_INLINE NS::UInteger MTL::TextureBinding::arrayLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arrayLength)); +} + +// property: objectPayloadAlignment +_MTL_INLINE NS::UInteger MTL::ObjectPayloadBinding::objectPayloadAlignment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectPayloadAlignment)); +} + +// property: objectPayloadDataSize +_MTL_INLINE NS::UInteger MTL::ObjectPayloadBinding::objectPayloadDataSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectPayloadDataSize)); +} diff --git a/metal-cpp/Metal/MTLArgumentEncoder.hpp b/metal-cpp/Metal/MTLArgumentEncoder.hpp new file mode 100644 index 00000000..b4fab77f --- /dev/null +++ b/metal-cpp/Metal/MTLArgumentEncoder.hpp @@ -0,0 +1,241 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLArgumentEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +class ArgumentEncoder : public NS::Referencing +{ +public: + class Device* device() const; + + NS::String* label() const; + void setLabel(const NS::String* label); + + NS::UInteger encodedLength() const; + + NS::UInteger alignment() const; + + void setArgumentBuffer(const class Buffer* argumentBuffer, NS::UInteger offset); + + void setArgumentBuffer(const class Buffer* argumentBuffer, NS::UInteger startOffset, NS::UInteger arrayElement); + + void setBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setBuffers(const class Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range); + + void setTexture(const class Texture* texture, NS::UInteger index); + + void setTextures(const class Texture* const textures[], NS::Range range); + + void setSamplerState(const class SamplerState* sampler, NS::UInteger index); + + void setSamplerStates(const class SamplerState* const samplers[], NS::Range range); + + void* constantData(NS::UInteger index); + + void setRenderPipelineState(const class RenderPipelineState* pipeline, NS::UInteger index); + + void setRenderPipelineStates(const class RenderPipelineState* const pipelines[], NS::Range range); + + void setComputePipelineState(const class ComputePipelineState* pipeline, NS::UInteger index); + + void setComputePipelineStates(const class ComputePipelineState* const pipelines[], NS::Range range); + + void setIndirectCommandBuffer(const class IndirectCommandBuffer* indirectCommandBuffer, NS::UInteger index); + + void setIndirectCommandBuffers(const class IndirectCommandBuffer* const buffers[], NS::Range range); + + void setAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger index); + + class ArgumentEncoder* newArgumentEncoder(NS::UInteger index); + + void setVisibleFunctionTable(const class VisibleFunctionTable* visibleFunctionTable, NS::UInteger index); + + void setVisibleFunctionTables(const class VisibleFunctionTable* const visibleFunctionTables[], NS::Range range); + + void setIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger index); + + void setIntersectionFunctionTables(const class IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range); +}; + +} + +// property: device +_MTL_INLINE MTL::Device* MTL::ArgumentEncoder::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: label +_MTL_INLINE NS::String* MTL::ArgumentEncoder::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::ArgumentEncoder::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: encodedLength +_MTL_INLINE NS::UInteger MTL::ArgumentEncoder::encodedLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(encodedLength)); +} + +// property: alignment +_MTL_INLINE NS::UInteger MTL::ArgumentEncoder::alignment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(alignment)); +} + +// method: setArgumentBuffer:offset: +_MTL_INLINE void MTL::ArgumentEncoder::setArgumentBuffer(const MTL::Buffer* argumentBuffer, NS::UInteger offset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setArgumentBuffer_offset_), argumentBuffer, offset); +} + +// method: setArgumentBuffer:startOffset:arrayElement: +_MTL_INLINE void MTL::ArgumentEncoder::setArgumentBuffer(const MTL::Buffer* argumentBuffer, NS::UInteger startOffset, NS::UInteger arrayElement) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setArgumentBuffer_startOffset_arrayElement_), argumentBuffer, startOffset, arrayElement); +} + +// method: setBuffer:offset:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setBuffers:offsets:withRange: +_MTL_INLINE void MTL::ArgumentEncoder::setBuffers(const MTL::Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffers_offsets_withRange_), buffers, offsets, range); +} + +// method: setTexture:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setTexture(const MTL::Texture* texture, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTexture_atIndex_), texture, index); +} + +// method: setTextures:withRange: +_MTL_INLINE void MTL::ArgumentEncoder::setTextures(const MTL::Texture* const textures[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTextures_withRange_), textures, range); +} + +// method: setSamplerState:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setSamplerState(const MTL::SamplerState* sampler, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSamplerState_atIndex_), sampler, index); +} + +// method: setSamplerStates:withRange: +_MTL_INLINE void MTL::ArgumentEncoder::setSamplerStates(const MTL::SamplerState* const samplers[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSamplerStates_withRange_), samplers, range); +} + +// method: constantDataAtIndex: +_MTL_INLINE void* MTL::ArgumentEncoder::constantData(NS::UInteger index) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(constantDataAtIndex_), index); +} + +// method: setRenderPipelineState:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setRenderPipelineState(const MTL::RenderPipelineState* pipeline, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRenderPipelineState_atIndex_), pipeline, index); +} + +// method: setRenderPipelineStates:withRange: +_MTL_INLINE void MTL::ArgumentEncoder::setRenderPipelineStates(const MTL::RenderPipelineState* const pipelines[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRenderPipelineStates_withRange_), pipelines, range); +} + +// method: setComputePipelineState:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setComputePipelineState(const MTL::ComputePipelineState* pipeline, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setComputePipelineState_atIndex_), pipeline, index); +} + +// method: setComputePipelineStates:withRange: +_MTL_INLINE void MTL::ArgumentEncoder::setComputePipelineStates(const MTL::ComputePipelineState* const pipelines[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setComputePipelineStates_withRange_), pipelines, range); +} + +// method: setIndirectCommandBuffer:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setIndirectCommandBuffer(const MTL::IndirectCommandBuffer* indirectCommandBuffer, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndirectCommandBuffer_atIndex_), indirectCommandBuffer, index); +} + +// method: setIndirectCommandBuffers:withRange: +_MTL_INLINE void MTL::ArgumentEncoder::setIndirectCommandBuffers(const MTL::IndirectCommandBuffer* const buffers[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndirectCommandBuffers_withRange_), buffers, range); +} + +// method: setAccelerationStructure:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAccelerationStructure_atIndex_), accelerationStructure, index); +} + +// method: newArgumentEncoderForBufferAtIndex: +_MTL_INLINE MTL::ArgumentEncoder* MTL::ArgumentEncoder::newArgumentEncoder(NS::UInteger index) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newArgumentEncoderForBufferAtIndex_), index); +} + +// method: setVisibleFunctionTable:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setVisibleFunctionTable(const MTL::VisibleFunctionTable* visibleFunctionTable, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVisibleFunctionTable_atIndex_), visibleFunctionTable, index); +} + +// method: setVisibleFunctionTables:withRange: +_MTL_INLINE void MTL::ArgumentEncoder::setVisibleFunctionTables(const MTL::VisibleFunctionTable* const visibleFunctionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVisibleFunctionTables_withRange_), visibleFunctionTables, range); +} + +// method: setIntersectionFunctionTable:atIndex: +_MTL_INLINE void MTL::ArgumentEncoder::setIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTable_atIndex_), intersectionFunctionTable, index); +} + +// method: setIntersectionFunctionTables:withRange: +_MTL_INLINE void MTL::ArgumentEncoder::setIntersectionFunctionTables(const MTL::IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTables_withRange_), intersectionFunctionTables, range); +} diff --git a/metal-cpp/Metal/MTLBinaryArchive.hpp b/metal-cpp/Metal/MTLBinaryArchive.hpp new file mode 100644 index 00000000..7d123625 --- /dev/null +++ b/metal-cpp/Metal/MTLBinaryArchive.hpp @@ -0,0 +1,139 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLBinaryArchive.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, BinaryArchiveError) { + BinaryArchiveErrorNone = 0, + BinaryArchiveErrorInvalidFile = 1, + BinaryArchiveErrorUnexpectedElement = 2, + BinaryArchiveErrorCompilationFailure = 3, + BinaryArchiveErrorInternalError = 4, +}; + +class BinaryArchiveDescriptor : public NS::Copying +{ +public: + static class BinaryArchiveDescriptor* alloc(); + + class BinaryArchiveDescriptor* init(); + + NS::URL* url() const; + void setUrl(const NS::URL* url); +}; + +class BinaryArchive : public NS::Referencing +{ +public: + NS::String* label() const; + void setLabel(const NS::String* label); + + class Device* device() const; + + bool addComputePipelineFunctions(const class ComputePipelineDescriptor* descriptor, NS::Error** error); + + bool addRenderPipelineFunctions(const class RenderPipelineDescriptor* descriptor, NS::Error** error); + + bool addTileRenderPipelineFunctions(const class TileRenderPipelineDescriptor* descriptor, NS::Error** error); + + bool serializeToURL(const NS::URL* url, NS::Error** error); + + bool addFunction(const class FunctionDescriptor* descriptor, const class Library* library, NS::Error** error); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::BinaryArchiveDescriptor* MTL::BinaryArchiveDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLBinaryArchiveDescriptor)); +} + +// method: init +_MTL_INLINE MTL::BinaryArchiveDescriptor* MTL::BinaryArchiveDescriptor::init() +{ + return NS::Object::init(); +} + +// property: url +_MTL_INLINE NS::URL* MTL::BinaryArchiveDescriptor::url() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(url)); +} + +_MTL_INLINE void MTL::BinaryArchiveDescriptor::setUrl(const NS::URL* url) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setUrl_), url); +} + +// property: label +_MTL_INLINE NS::String* MTL::BinaryArchive::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::BinaryArchive::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::BinaryArchive::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// method: addComputePipelineFunctionsWithDescriptor:error: +_MTL_INLINE bool MTL::BinaryArchive::addComputePipelineFunctions(const MTL::ComputePipelineDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(addComputePipelineFunctionsWithDescriptor_error_), descriptor, error); +} + +// method: addRenderPipelineFunctionsWithDescriptor:error: +_MTL_INLINE bool MTL::BinaryArchive::addRenderPipelineFunctions(const MTL::RenderPipelineDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(addRenderPipelineFunctionsWithDescriptor_error_), descriptor, error); +} + +// method: addTileRenderPipelineFunctionsWithDescriptor:error: +_MTL_INLINE bool MTL::BinaryArchive::addTileRenderPipelineFunctions(const MTL::TileRenderPipelineDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(addTileRenderPipelineFunctionsWithDescriptor_error_), descriptor, error); +} + +// method: serializeToURL:error: +_MTL_INLINE bool MTL::BinaryArchive::serializeToURL(const NS::URL* url, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(serializeToURL_error_), url, error); +} + +// method: addFunctionWithDescriptor:library:error: +_MTL_INLINE bool MTL::BinaryArchive::addFunction(const MTL::FunctionDescriptor* descriptor, const MTL::Library* library, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(addFunctionWithDescriptor_library_error_), descriptor, library, error); +} diff --git a/metal-cpp/Metal/MTLBlitCommandEncoder.hpp b/metal-cpp/Metal/MTLBlitCommandEncoder.hpp new file mode 100644 index 00000000..fe64f822 --- /dev/null +++ b/metal-cpp/Metal/MTLBlitCommandEncoder.hpp @@ -0,0 +1,246 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLBlitCommandEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLBlitCommandEncoder.hpp" +#include "MTLCommandEncoder.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_OPTIONS(NS::UInteger, BlitOption) { + BlitOptionNone = 0, + BlitOptionDepthFromDepthStencil = 1, + BlitOptionStencilFromDepthStencil = 2, + BlitOptionRowLinearPVRTC = 4, +}; + +class BlitCommandEncoder : public NS::Referencing +{ +public: + void synchronizeResource(const class Resource* resource); + + void synchronizeTexture(const class Texture* texture, NS::UInteger slice, NS::UInteger level); + + void copyFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin); + + void copyFromBuffer(const class Buffer* sourceBuffer, NS::UInteger sourceOffset, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Size sourceSize, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin); + + void copyFromBuffer(const class Buffer* sourceBuffer, NS::UInteger sourceOffset, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Size sourceSize, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin, MTL::BlitOption options); + + void copyFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const class Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger destinationBytesPerRow, NS::UInteger destinationBytesPerImage); + + void copyFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const class Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger destinationBytesPerRow, NS::UInteger destinationBytesPerImage, MTL::BlitOption options); + + void generateMipmaps(const class Texture* texture); + + void fillBuffer(const class Buffer* buffer, NS::Range range, uint8_t value); + + void copyFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, NS::UInteger sliceCount, NS::UInteger levelCount); + + void copyFromTexture(const class Texture* sourceTexture, const class Texture* destinationTexture); + + void copyFromBuffer(const class Buffer* sourceBuffer, NS::UInteger sourceOffset, const class Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger size); + + void updateFence(const class Fence* fence); + + void waitForFence(const class Fence* fence); + + void getTextureAccessCounters(const class Texture* texture, MTL::Region region, NS::UInteger mipLevel, NS::UInteger slice, bool resetCounters, const class Buffer* countersBuffer, NS::UInteger countersBufferOffset); + + void resetTextureAccessCounters(const class Texture* texture, MTL::Region region, NS::UInteger mipLevel, NS::UInteger slice); + + void optimizeContentsForGPUAccess(const class Texture* texture); + + void optimizeContentsForGPUAccess(const class Texture* texture, NS::UInteger slice, NS::UInteger level); + + void optimizeContentsForCPUAccess(const class Texture* texture); + + void optimizeContentsForCPUAccess(const class Texture* texture, NS::UInteger slice, NS::UInteger level); + + void resetCommandsInBuffer(const class IndirectCommandBuffer* buffer, NS::Range range); + + void copyIndirectCommandBuffer(const class IndirectCommandBuffer* source, NS::Range sourceRange, const class IndirectCommandBuffer* destination, NS::UInteger destinationIndex); + + void optimizeIndirectCommandBuffer(const class IndirectCommandBuffer* indirectCommandBuffer, NS::Range range); + + void sampleCountersInBuffer(const class CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier); + + void resolveCounters(const class CounterSampleBuffer* sampleBuffer, NS::Range range, const class Buffer* destinationBuffer, NS::UInteger destinationOffset); +}; + +} + +// method: synchronizeResource: +_MTL_INLINE void MTL::BlitCommandEncoder::synchronizeResource(const MTL::Resource* resource) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(synchronizeResource_), resource); +} + +// method: synchronizeTexture:slice:level: +_MTL_INLINE void MTL::BlitCommandEncoder::synchronizeTexture(const MTL::Texture* texture, NS::UInteger slice, NS::UInteger level) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(synchronizeTexture_slice_level_), texture, slice, level); +} + +// method: copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin: +_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_), sourceTexture, sourceSlice, sourceLevel, sourceOrigin, sourceSize, destinationTexture, destinationSlice, destinationLevel, destinationOrigin); +} + +// method: copyFromBuffer:sourceOffset:sourceBytesPerRow:sourceBytesPerImage:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin: +_MTL_INLINE void MTL::BlitCommandEncoder::copyFromBuffer(const MTL::Buffer* sourceBuffer, NS::UInteger sourceOffset, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Size sourceSize, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_), sourceBuffer, sourceOffset, sourceBytesPerRow, sourceBytesPerImage, sourceSize, destinationTexture, destinationSlice, destinationLevel, destinationOrigin); +} + +// method: copyFromBuffer:sourceOffset:sourceBytesPerRow:sourceBytesPerImage:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:options: +_MTL_INLINE void MTL::BlitCommandEncoder::copyFromBuffer(const MTL::Buffer* sourceBuffer, NS::UInteger sourceOffset, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Size sourceSize, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin, MTL::BlitOption options) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_options_), sourceBuffer, sourceOffset, sourceBytesPerRow, sourceBytesPerImage, sourceSize, destinationTexture, destinationSlice, destinationLevel, destinationOrigin, options); +} + +// method: copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toBuffer:destinationOffset:destinationBytesPerRow:destinationBytesPerImage: +_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const MTL::Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger destinationBytesPerRow, NS::UInteger destinationBytesPerImage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_), sourceTexture, sourceSlice, sourceLevel, sourceOrigin, sourceSize, destinationBuffer, destinationOffset, destinationBytesPerRow, destinationBytesPerImage); +} + +// method: copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toBuffer:destinationOffset:destinationBytesPerRow:destinationBytesPerImage:options: +_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const MTL::Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger destinationBytesPerRow, NS::UInteger destinationBytesPerImage, MTL::BlitOption options) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_options_), sourceTexture, sourceSlice, sourceLevel, sourceOrigin, sourceSize, destinationBuffer, destinationOffset, destinationBytesPerRow, destinationBytesPerImage, options); +} + +// method: generateMipmapsForTexture: +_MTL_INLINE void MTL::BlitCommandEncoder::generateMipmaps(const MTL::Texture* texture) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(generateMipmapsForTexture_), texture); +} + +// method: fillBuffer:range:value: +_MTL_INLINE void MTL::BlitCommandEncoder::fillBuffer(const MTL::Buffer* buffer, NS::Range range, uint8_t value) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(fillBuffer_range_value_), buffer, range, value); +} + +// method: copyFromTexture:sourceSlice:sourceLevel:toTexture:destinationSlice:destinationLevel:sliceCount:levelCount: +_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, NS::UInteger sliceCount, NS::UInteger levelCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyFromTexture_sourceSlice_sourceLevel_toTexture_destinationSlice_destinationLevel_sliceCount_levelCount_), sourceTexture, sourceSlice, sourceLevel, destinationTexture, destinationSlice, destinationLevel, sliceCount, levelCount); +} + +// method: copyFromTexture:toTexture: +_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, const MTL::Texture* destinationTexture) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyFromTexture_toTexture_), sourceTexture, destinationTexture); +} + +// method: copyFromBuffer:sourceOffset:toBuffer:destinationOffset:size: +_MTL_INLINE void MTL::BlitCommandEncoder::copyFromBuffer(const MTL::Buffer* sourceBuffer, NS::UInteger sourceOffset, const MTL::Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger size) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyFromBuffer_sourceOffset_toBuffer_destinationOffset_size_), sourceBuffer, sourceOffset, destinationBuffer, destinationOffset, size); +} + +// method: updateFence: +_MTL_INLINE void MTL::BlitCommandEncoder::updateFence(const MTL::Fence* fence) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(updateFence_), fence); +} + +// method: waitForFence: +_MTL_INLINE void MTL::BlitCommandEncoder::waitForFence(const MTL::Fence* fence) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitForFence_), fence); +} + +// method: getTextureAccessCounters:region:mipLevel:slice:resetCounters:countersBuffer:countersBufferOffset: +_MTL_INLINE void MTL::BlitCommandEncoder::getTextureAccessCounters(const MTL::Texture* texture, MTL::Region region, NS::UInteger mipLevel, NS::UInteger slice, bool resetCounters, const MTL::Buffer* countersBuffer, NS::UInteger countersBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(getTextureAccessCounters_region_mipLevel_slice_resetCounters_countersBuffer_countersBufferOffset_), texture, region, mipLevel, slice, resetCounters, countersBuffer, countersBufferOffset); +} + +// method: resetTextureAccessCounters:region:mipLevel:slice: +_MTL_INLINE void MTL::BlitCommandEncoder::resetTextureAccessCounters(const MTL::Texture* texture, MTL::Region region, NS::UInteger mipLevel, NS::UInteger slice) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(resetTextureAccessCounters_region_mipLevel_slice_), texture, region, mipLevel, slice); +} + +// method: optimizeContentsForGPUAccess: +_MTL_INLINE void MTL::BlitCommandEncoder::optimizeContentsForGPUAccess(const MTL::Texture* texture) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(optimizeContentsForGPUAccess_), texture); +} + +// method: optimizeContentsForGPUAccess:slice:level: +_MTL_INLINE void MTL::BlitCommandEncoder::optimizeContentsForGPUAccess(const MTL::Texture* texture, NS::UInteger slice, NS::UInteger level) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(optimizeContentsForGPUAccess_slice_level_), texture, slice, level); +} + +// method: optimizeContentsForCPUAccess: +_MTL_INLINE void MTL::BlitCommandEncoder::optimizeContentsForCPUAccess(const MTL::Texture* texture) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(optimizeContentsForCPUAccess_), texture); +} + +// method: optimizeContentsForCPUAccess:slice:level: +_MTL_INLINE void MTL::BlitCommandEncoder::optimizeContentsForCPUAccess(const MTL::Texture* texture, NS::UInteger slice, NS::UInteger level) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(optimizeContentsForCPUAccess_slice_level_), texture, slice, level); +} + +// method: resetCommandsInBuffer:withRange: +_MTL_INLINE void MTL::BlitCommandEncoder::resetCommandsInBuffer(const MTL::IndirectCommandBuffer* buffer, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(resetCommandsInBuffer_withRange_), buffer, range); +} + +// method: copyIndirectCommandBuffer:sourceRange:destination:destinationIndex: +_MTL_INLINE void MTL::BlitCommandEncoder::copyIndirectCommandBuffer(const MTL::IndirectCommandBuffer* source, NS::Range sourceRange, const MTL::IndirectCommandBuffer* destination, NS::UInteger destinationIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyIndirectCommandBuffer_sourceRange_destination_destinationIndex_), source, sourceRange, destination, destinationIndex); +} + +// method: optimizeIndirectCommandBuffer:withRange: +_MTL_INLINE void MTL::BlitCommandEncoder::optimizeIndirectCommandBuffer(const MTL::IndirectCommandBuffer* indirectCommandBuffer, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(optimizeIndirectCommandBuffer_withRange_), indirectCommandBuffer, range); +} + +// method: sampleCountersInBuffer:atSampleIndex:withBarrier: +_MTL_INLINE void MTL::BlitCommandEncoder::sampleCountersInBuffer(const MTL::CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_), sampleBuffer, sampleIndex, barrier); +} + +// method: resolveCounters:inRange:destinationBuffer:destinationOffset: +_MTL_INLINE void MTL::BlitCommandEncoder::resolveCounters(const MTL::CounterSampleBuffer* sampleBuffer, NS::Range range, const MTL::Buffer* destinationBuffer, NS::UInteger destinationOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(resolveCounters_inRange_destinationBuffer_destinationOffset_), sampleBuffer, range, destinationBuffer, destinationOffset); +} diff --git a/metal-cpp/Metal/MTLBlitPass.hpp b/metal-cpp/Metal/MTLBlitPass.hpp new file mode 100644 index 00000000..f16e8fdb --- /dev/null +++ b/metal-cpp/Metal/MTLBlitPass.hpp @@ -0,0 +1,165 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLBlitPass.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +class BlitPassSampleBufferAttachmentDescriptor : public NS::Copying +{ +public: + static class BlitPassSampleBufferAttachmentDescriptor* alloc(); + + class BlitPassSampleBufferAttachmentDescriptor* init(); + + class CounterSampleBuffer* sampleBuffer() const; + void setSampleBuffer(const class CounterSampleBuffer* sampleBuffer); + + NS::UInteger startOfEncoderSampleIndex() const; + void setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex); + + NS::UInteger endOfEncoderSampleIndex() const; + void setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex); +}; + +class BlitPassSampleBufferAttachmentDescriptorArray : public NS::Referencing +{ +public: + static class BlitPassSampleBufferAttachmentDescriptorArray* alloc(); + + class BlitPassSampleBufferAttachmentDescriptorArray* init(); + + class BlitPassSampleBufferAttachmentDescriptor* object(NS::UInteger attachmentIndex); + + void setObject(const class BlitPassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex); +}; + +class BlitPassDescriptor : public NS::Copying +{ +public: + static class BlitPassDescriptor* alloc(); + + class BlitPassDescriptor* init(); + + static class BlitPassDescriptor* blitPassDescriptor(); + + class BlitPassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptor* MTL::BlitPassSampleBufferAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLBlitPassSampleBufferAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptor* MTL::BlitPassSampleBufferAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: sampleBuffer +_MTL_INLINE MTL::CounterSampleBuffer* MTL::BlitPassSampleBufferAttachmentDescriptor::sampleBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBuffer)); +} + +_MTL_INLINE void MTL::BlitPassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer); +} + +// property: startOfEncoderSampleIndex +_MTL_INLINE NS::UInteger MTL::BlitPassSampleBufferAttachmentDescriptor::startOfEncoderSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(startOfEncoderSampleIndex)); +} + +_MTL_INLINE void MTL::BlitPassSampleBufferAttachmentDescriptor::setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStartOfEncoderSampleIndex_), startOfEncoderSampleIndex); +} + +// property: endOfEncoderSampleIndex +_MTL_INLINE NS::UInteger MTL::BlitPassSampleBufferAttachmentDescriptor::endOfEncoderSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(endOfEncoderSampleIndex)); +} + +_MTL_INLINE void MTL::BlitPassSampleBufferAttachmentDescriptor::setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setEndOfEncoderSampleIndex_), endOfEncoderSampleIndex); +} + +// static method: alloc +_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptorArray* MTL::BlitPassSampleBufferAttachmentDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLBlitPassSampleBufferAttachmentDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptorArray* MTL::BlitPassSampleBufferAttachmentDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptor* MTL::BlitPassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::BlitPassSampleBufferAttachmentDescriptorArray::setObject(const MTL::BlitPassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex); +} + +// static method: alloc +_MTL_INLINE MTL::BlitPassDescriptor* MTL::BlitPassDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLBlitPassDescriptor)); +} + +// method: init +_MTL_INLINE MTL::BlitPassDescriptor* MTL::BlitPassDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: blitPassDescriptor +_MTL_INLINE MTL::BlitPassDescriptor* MTL::BlitPassDescriptor::blitPassDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLBlitPassDescriptor), _MTL_PRIVATE_SEL(blitPassDescriptor)); +} + +// property: sampleBufferAttachments +_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptorArray* MTL::BlitPassDescriptor::sampleBufferAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBufferAttachments)); +} diff --git a/metal-cpp/Metal/MTLBuffer.hpp b/metal-cpp/Metal/MTLBuffer.hpp new file mode 100644 index 00000000..f936f139 --- /dev/null +++ b/metal-cpp/Metal/MTLBuffer.hpp @@ -0,0 +1,109 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLBuffer.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLResource.hpp" + +namespace MTL +{ +class Buffer : public NS::Referencing +{ +public: + NS::UInteger length() const; + + void* contents(); + + void didModifyRange(NS::Range range); + + class Texture* newTexture(const class TextureDescriptor* descriptor, NS::UInteger offset, NS::UInteger bytesPerRow); + + void addDebugMarker(const NS::String* marker, NS::Range range); + + void removeAllDebugMarkers(); + + class Buffer* remoteStorageBuffer() const; + + class Buffer* newRemoteBufferViewForDevice(const class Device* device); + + uint64_t gpuAddress() const; +}; + +} + +// property: length +_MTL_INLINE NS::UInteger MTL::Buffer::length() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(length)); +} + +// method: contents +_MTL_INLINE void* MTL::Buffer::contents() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(contents)); +} + +// method: didModifyRange: +_MTL_INLINE void MTL::Buffer::didModifyRange(NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(didModifyRange_), range); +} + +// method: newTextureWithDescriptor:offset:bytesPerRow: +_MTL_INLINE MTL::Texture* MTL::Buffer::newTexture(const MTL::TextureDescriptor* descriptor, NS::UInteger offset, NS::UInteger bytesPerRow) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_offset_bytesPerRow_), descriptor, offset, bytesPerRow); +} + +// method: addDebugMarker:range: +_MTL_INLINE void MTL::Buffer::addDebugMarker(const NS::String* marker, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(addDebugMarker_range_), marker, range); +} + +// method: removeAllDebugMarkers +_MTL_INLINE void MTL::Buffer::removeAllDebugMarkers() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(removeAllDebugMarkers)); +} + +// property: remoteStorageBuffer +_MTL_INLINE MTL::Buffer* MTL::Buffer::remoteStorageBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(remoteStorageBuffer)); +} + +// method: newRemoteBufferViewForDevice: +_MTL_INLINE MTL::Buffer* MTL::Buffer::newRemoteBufferViewForDevice(const MTL::Device* device) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRemoteBufferViewForDevice_), device); +} + +// property: gpuAddress +_MTL_INLINE uint64_t MTL::Buffer::gpuAddress() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuAddress)); +} diff --git a/metal-cpp/Metal/MTLCaptureManager.hpp b/metal-cpp/Metal/MTLCaptureManager.hpp new file mode 100644 index 00000000..0cdf53b8 --- /dev/null +++ b/metal-cpp/Metal/MTLCaptureManager.hpp @@ -0,0 +1,220 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLCaptureManager.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLCaptureManager.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::Integer, CaptureError) { + CaptureErrorNotSupported = 1, + CaptureErrorAlreadyCapturing = 2, + CaptureErrorInvalidDescriptor = 3, +}; + +_MTL_ENUM(NS::Integer, CaptureDestination) { + CaptureDestinationDeveloperTools = 1, + CaptureDestinationGPUTraceDocument = 2, +}; + +class CaptureDescriptor : public NS::Copying +{ +public: + static class CaptureDescriptor* alloc(); + + class CaptureDescriptor* init(); + + id captureObject() const; + void setCaptureObject(id captureObject); + + MTL::CaptureDestination destination() const; + void setDestination(MTL::CaptureDestination destination); + + NS::URL* outputURL() const; + void setOutputURL(const NS::URL* outputURL); +}; + +class CaptureManager : public NS::Referencing +{ +public: + static class CaptureManager* alloc(); + + static class CaptureManager* sharedCaptureManager(); + + MTL::CaptureManager* init(); + + class CaptureScope* newCaptureScope(const class Device* device); + + class CaptureScope* newCaptureScope(const class CommandQueue* commandQueue); + + bool supportsDestination(MTL::CaptureDestination destination); + + bool startCapture(const class CaptureDescriptor* descriptor, NS::Error** error); + + void startCapture(const class Device* device); + + void startCapture(const class CommandQueue* commandQueue); + + void startCapture(const class CaptureScope* captureScope); + + void stopCapture(); + + class CaptureScope* defaultCaptureScope() const; + void setDefaultCaptureScope(const class CaptureScope* defaultCaptureScope); + + bool isCapturing() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::CaptureDescriptor* MTL::CaptureDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLCaptureDescriptor)); +} + +// method: init +_MTL_INLINE MTL::CaptureDescriptor* MTL::CaptureDescriptor::init() +{ + return NS::Object::init(); +} + +// property: captureObject +_MTL_INLINE id MTL::CaptureDescriptor::captureObject() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(captureObject)); +} + +_MTL_INLINE void MTL::CaptureDescriptor::setCaptureObject(id captureObject) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCaptureObject_), captureObject); +} + +// property: destination +_MTL_INLINE MTL::CaptureDestination MTL::CaptureDescriptor::destination() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(destination)); +} + +_MTL_INLINE void MTL::CaptureDescriptor::setDestination(MTL::CaptureDestination destination) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDestination_), destination); +} + +// property: outputURL +_MTL_INLINE NS::URL* MTL::CaptureDescriptor::outputURL() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(outputURL)); +} + +_MTL_INLINE void MTL::CaptureDescriptor::setOutputURL(const NS::URL* outputURL) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOutputURL_), outputURL); +} + +// static method: alloc +_MTL_INLINE MTL::CaptureManager* MTL::CaptureManager::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLCaptureManager)); +} + +// static method: sharedCaptureManager +_MTL_INLINE MTL::CaptureManager* MTL::CaptureManager::sharedCaptureManager() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLCaptureManager), _MTL_PRIVATE_SEL(sharedCaptureManager)); +} + +// method: init +_MTL_INLINE MTL::CaptureManager* MTL::CaptureManager::init() +{ + return NS::Object::init(); +} + +// method: newCaptureScopeWithDevice: +_MTL_INLINE MTL::CaptureScope* MTL::CaptureManager::newCaptureScope(const MTL::Device* device) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newCaptureScopeWithDevice_), device); +} + +// method: newCaptureScopeWithCommandQueue: +_MTL_INLINE MTL::CaptureScope* MTL::CaptureManager::newCaptureScope(const MTL::CommandQueue* commandQueue) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newCaptureScopeWithCommandQueue_), commandQueue); +} + +// method: supportsDestination: +_MTL_INLINE bool MTL::CaptureManager::supportsDestination(MTL::CaptureDestination destination) +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsDestination_), destination); +} + +// method: startCaptureWithDescriptor:error: +_MTL_INLINE bool MTL::CaptureManager::startCapture(const MTL::CaptureDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(startCaptureWithDescriptor_error_), descriptor, error); +} + +// method: startCaptureWithDevice: +_MTL_INLINE void MTL::CaptureManager::startCapture(const MTL::Device* device) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(startCaptureWithDevice_), device); +} + +// method: startCaptureWithCommandQueue: +_MTL_INLINE void MTL::CaptureManager::startCapture(const MTL::CommandQueue* commandQueue) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(startCaptureWithCommandQueue_), commandQueue); +} + +// method: startCaptureWithScope: +_MTL_INLINE void MTL::CaptureManager::startCapture(const MTL::CaptureScope* captureScope) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(startCaptureWithScope_), captureScope); +} + +// method: stopCapture +_MTL_INLINE void MTL::CaptureManager::stopCapture() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(stopCapture)); +} + +// property: defaultCaptureScope +_MTL_INLINE MTL::CaptureScope* MTL::CaptureManager::defaultCaptureScope() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(defaultCaptureScope)); +} + +_MTL_INLINE void MTL::CaptureManager::setDefaultCaptureScope(const MTL::CaptureScope* defaultCaptureScope) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDefaultCaptureScope_), defaultCaptureScope); +} + +// property: isCapturing +_MTL_INLINE bool MTL::CaptureManager::isCapturing() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isCapturing)); +} diff --git a/metal-cpp/Metal/MTLCaptureScope.hpp b/metal-cpp/Metal/MTLCaptureScope.hpp new file mode 100644 index 00000000..1ad42469 --- /dev/null +++ b/metal-cpp/Metal/MTLCaptureScope.hpp @@ -0,0 +1,92 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLCaptureScope.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "MTLDefines.hpp" +#include "MTLPrivate.hpp" + +#include "../Foundation/NSObject.hpp" +#include "../Foundation/NSString.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTL +{ +class CaptureScope : public NS::Referencing +{ +public: + class Device* device() const; + + NS::String* label() const; + void setLabel(const NS::String* pLabel); + + class CommandQueue* commandQueue() const; + + void beginScope(); + void endScope(); +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::Device* MTL::CaptureScope::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE NS::String* MTL::CaptureScope::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE void MTL::CaptureScope::setLabel(const NS::String* pLabel) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), pLabel); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE MTL::CommandQueue* MTL::CaptureScope::commandQueue() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandQueue)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE void MTL::CaptureScope::beginScope() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(beginScope)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTL_INLINE void MTL::CaptureScope::endScope() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(endScope)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Metal/MTLCommandBuffer.hpp b/metal-cpp/Metal/MTLCommandBuffer.hpp new file mode 100644 index 00000000..6bc12d02 --- /dev/null +++ b/metal-cpp/Metal/MTLCommandBuffer.hpp @@ -0,0 +1,474 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLCommandBuffer.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLCommandBuffer.hpp" +#include + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, CommandBufferStatus) { + CommandBufferStatusNotEnqueued = 0, + CommandBufferStatusEnqueued = 1, + CommandBufferStatusCommitted = 2, + CommandBufferStatusScheduled = 3, + CommandBufferStatusCompleted = 4, + CommandBufferStatusError = 5, +}; + +_MTL_ENUM(NS::UInteger, CommandBufferError) { + CommandBufferErrorNone = 0, + CommandBufferErrorInternal = 1, + CommandBufferErrorTimeout = 2, + CommandBufferErrorPageFault = 3, + CommandBufferErrorAccessRevoked = 4, + CommandBufferErrorBlacklisted = 4, + CommandBufferErrorNotPermitted = 7, + CommandBufferErrorOutOfMemory = 8, + CommandBufferErrorInvalidResource = 9, + CommandBufferErrorMemoryless = 10, + CommandBufferErrorDeviceRemoved = 11, + CommandBufferErrorStackOverflow = 12, +}; + +_MTL_OPTIONS(NS::UInteger, CommandBufferErrorOption) { + CommandBufferErrorOptionNone = 0, + CommandBufferErrorOptionEncoderExecutionStatus = 1, +}; + +_MTL_ENUM(NS::Integer, CommandEncoderErrorState) { + CommandEncoderErrorStateUnknown = 0, + CommandEncoderErrorStateCompleted = 1, + CommandEncoderErrorStateAffected = 2, + CommandEncoderErrorStatePending = 3, + CommandEncoderErrorStateFaulted = 4, +}; + +class CommandBufferDescriptor : public NS::Copying +{ +public: + static class CommandBufferDescriptor* alloc(); + + class CommandBufferDescriptor* init(); + + bool retainedReferences() const; + void setRetainedReferences(bool retainedReferences); + + MTL::CommandBufferErrorOption errorOptions() const; + void setErrorOptions(MTL::CommandBufferErrorOption errorOptions); +}; + +class CommandBufferEncoderInfo : public NS::Referencing +{ +public: + NS::String* label() const; + + NS::Array* debugSignposts() const; + + MTL::CommandEncoderErrorState errorState() const; +}; + +_MTL_ENUM(NS::UInteger, DispatchType) { + DispatchTypeSerial = 0, + DispatchTypeConcurrent = 1, +}; + +class CommandBuffer; + +using CommandBufferHandler = void (^)(CommandBuffer*); + +using HandlerFunction = std::function; + +class CommandBuffer : public NS::Referencing +{ +public: + void addScheduledHandler(const HandlerFunction& function); + + void addCompletedHandler(const HandlerFunction& function); + + class Device* device() const; + + class CommandQueue* commandQueue() const; + + bool retainedReferences() const; + + MTL::CommandBufferErrorOption errorOptions() const; + + NS::String* label() const; + void setLabel(const NS::String* label); + + CFTimeInterval kernelStartTime() const; + + CFTimeInterval kernelEndTime() const; + + class LogContainer* logs() const; + + CFTimeInterval GPUStartTime() const; + + CFTimeInterval GPUEndTime() const; + + void enqueue(); + + void commit(); + + void addScheduledHandler(const MTL::CommandBufferHandler block); + + void presentDrawable(const class Drawable* drawable); + + void presentDrawableAtTime(const class Drawable* drawable, CFTimeInterval presentationTime); + + void presentDrawableAfterMinimumDuration(const class Drawable* drawable, CFTimeInterval duration); + + void waitUntilScheduled(); + + void addCompletedHandler(const MTL::CommandBufferHandler block); + + void waitUntilCompleted(); + + MTL::CommandBufferStatus status() const; + + NS::Error* error() const; + + class BlitCommandEncoder* blitCommandEncoder(); + + class RenderCommandEncoder* renderCommandEncoder(const class RenderPassDescriptor* renderPassDescriptor); + + class ComputeCommandEncoder* computeCommandEncoder(const class ComputePassDescriptor* computePassDescriptor); + + class BlitCommandEncoder* blitCommandEncoder(const class BlitPassDescriptor* blitPassDescriptor); + + class ComputeCommandEncoder* computeCommandEncoder(); + + class ComputeCommandEncoder* computeCommandEncoder(MTL::DispatchType dispatchType); + + void encodeWait(const class Event* event, uint64_t value); + + void encodeSignalEvent(const class Event* event, uint64_t value); + + class ParallelRenderCommandEncoder* parallelRenderCommandEncoder(const class RenderPassDescriptor* renderPassDescriptor); + + class ResourceStateCommandEncoder* resourceStateCommandEncoder(); + + class ResourceStateCommandEncoder* resourceStateCommandEncoder(const class ResourceStatePassDescriptor* resourceStatePassDescriptor); + + class AccelerationStructureCommandEncoder* accelerationStructureCommandEncoder(); + + class AccelerationStructureCommandEncoder* accelerationStructureCommandEncoder(const class AccelerationStructurePassDescriptor* descriptor); + + void pushDebugGroup(const NS::String* string); + + void popDebugGroup(); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::CommandBufferDescriptor* MTL::CommandBufferDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLCommandBufferDescriptor)); +} + +// method: init +_MTL_INLINE MTL::CommandBufferDescriptor* MTL::CommandBufferDescriptor::init() +{ + return NS::Object::init(); +} + +// property: retainedReferences +_MTL_INLINE bool MTL::CommandBufferDescriptor::retainedReferences() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(retainedReferences)); +} + +_MTL_INLINE void MTL::CommandBufferDescriptor::setRetainedReferences(bool retainedReferences) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRetainedReferences_), retainedReferences); +} + +// property: errorOptions +_MTL_INLINE MTL::CommandBufferErrorOption MTL::CommandBufferDescriptor::errorOptions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(errorOptions)); +} + +_MTL_INLINE void MTL::CommandBufferDescriptor::setErrorOptions(MTL::CommandBufferErrorOption errorOptions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setErrorOptions_), errorOptions); +} + +// property: label +_MTL_INLINE NS::String* MTL::CommandBufferEncoderInfo::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +// property: debugSignposts +_MTL_INLINE NS::Array* MTL::CommandBufferEncoderInfo::debugSignposts() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(debugSignposts)); +} + +// property: errorState +_MTL_INLINE MTL::CommandEncoderErrorState MTL::CommandBufferEncoderInfo::errorState() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(errorState)); +} + +_MTL_INLINE void MTL::CommandBuffer::addScheduledHandler(const HandlerFunction& function) +{ + __block HandlerFunction blockFunction = function; + + addScheduledHandler(^(MTL::CommandBuffer* pCommandBuffer) { blockFunction(pCommandBuffer); }); +} + +_MTL_INLINE void MTL::CommandBuffer::addCompletedHandler(const HandlerFunction& function) +{ + __block HandlerFunction blockFunction = function; + + addCompletedHandler(^(MTL::CommandBuffer* pCommandBuffer) { blockFunction(pCommandBuffer); }); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::CommandBuffer::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: commandQueue +_MTL_INLINE MTL::CommandQueue* MTL::CommandBuffer::commandQueue() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandQueue)); +} + +// property: retainedReferences +_MTL_INLINE bool MTL::CommandBuffer::retainedReferences() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(retainedReferences)); +} + +// property: errorOptions +_MTL_INLINE MTL::CommandBufferErrorOption MTL::CommandBuffer::errorOptions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(errorOptions)); +} + +// property: label +_MTL_INLINE NS::String* MTL::CommandBuffer::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::CommandBuffer::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: kernelStartTime +_MTL_INLINE CFTimeInterval MTL::CommandBuffer::kernelStartTime() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(kernelStartTime)); +} + +// property: kernelEndTime +_MTL_INLINE CFTimeInterval MTL::CommandBuffer::kernelEndTime() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(kernelEndTime)); +} + +// property: logs +_MTL_INLINE MTL::LogContainer* MTL::CommandBuffer::logs() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(logs)); +} + +// property: GPUStartTime +_MTL_INLINE CFTimeInterval MTL::CommandBuffer::GPUStartTime() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(GPUStartTime)); +} + +// property: GPUEndTime +_MTL_INLINE CFTimeInterval MTL::CommandBuffer::GPUEndTime() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(GPUEndTime)); +} + +// method: enqueue +_MTL_INLINE void MTL::CommandBuffer::enqueue() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(enqueue)); +} + +// method: commit +_MTL_INLINE void MTL::CommandBuffer::commit() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(commit)); +} + +// method: addScheduledHandler: +_MTL_INLINE void MTL::CommandBuffer::addScheduledHandler(const MTL::CommandBufferHandler block) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(addScheduledHandler_), block); +} + +// method: presentDrawable: +_MTL_INLINE void MTL::CommandBuffer::presentDrawable(const MTL::Drawable* drawable) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(presentDrawable_), drawable); +} + +// method: presentDrawable:atTime: +_MTL_INLINE void MTL::CommandBuffer::presentDrawableAtTime(const MTL::Drawable* drawable, CFTimeInterval presentationTime) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(presentDrawable_atTime_), drawable, presentationTime); +} + +// method: presentDrawable:afterMinimumDuration: +_MTL_INLINE void MTL::CommandBuffer::presentDrawableAfterMinimumDuration(const MTL::Drawable* drawable, CFTimeInterval duration) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(presentDrawable_afterMinimumDuration_), drawable, duration); +} + +// method: waitUntilScheduled +_MTL_INLINE void MTL::CommandBuffer::waitUntilScheduled() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitUntilScheduled)); +} + +// method: addCompletedHandler: +_MTL_INLINE void MTL::CommandBuffer::addCompletedHandler(const MTL::CommandBufferHandler block) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(addCompletedHandler_), block); +} + +// method: waitUntilCompleted +_MTL_INLINE void MTL::CommandBuffer::waitUntilCompleted() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitUntilCompleted)); +} + +// property: status +_MTL_INLINE MTL::CommandBufferStatus MTL::CommandBuffer::status() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(status)); +} + +// property: error +_MTL_INLINE NS::Error* MTL::CommandBuffer::error() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(error)); +} + +// method: blitCommandEncoder +_MTL_INLINE MTL::BlitCommandEncoder* MTL::CommandBuffer::blitCommandEncoder() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(blitCommandEncoder)); +} + +// method: renderCommandEncoderWithDescriptor: +_MTL_INLINE MTL::RenderCommandEncoder* MTL::CommandBuffer::renderCommandEncoder(const MTL::RenderPassDescriptor* renderPassDescriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(renderCommandEncoderWithDescriptor_), renderPassDescriptor); +} + +// method: computeCommandEncoderWithDescriptor: +_MTL_INLINE MTL::ComputeCommandEncoder* MTL::CommandBuffer::computeCommandEncoder(const MTL::ComputePassDescriptor* computePassDescriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(computeCommandEncoderWithDescriptor_), computePassDescriptor); +} + +// method: blitCommandEncoderWithDescriptor: +_MTL_INLINE MTL::BlitCommandEncoder* MTL::CommandBuffer::blitCommandEncoder(const MTL::BlitPassDescriptor* blitPassDescriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(blitCommandEncoderWithDescriptor_), blitPassDescriptor); +} + +// method: computeCommandEncoder +_MTL_INLINE MTL::ComputeCommandEncoder* MTL::CommandBuffer::computeCommandEncoder() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(computeCommandEncoder)); +} + +// method: computeCommandEncoderWithDispatchType: +_MTL_INLINE MTL::ComputeCommandEncoder* MTL::CommandBuffer::computeCommandEncoder(MTL::DispatchType dispatchType) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(computeCommandEncoderWithDispatchType_), dispatchType); +} + +// method: encodeWaitForEvent:value: +_MTL_INLINE void MTL::CommandBuffer::encodeWait(const MTL::Event* event, uint64_t value) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(encodeWaitForEvent_value_), event, value); +} + +// method: encodeSignalEvent:value: +_MTL_INLINE void MTL::CommandBuffer::encodeSignalEvent(const MTL::Event* event, uint64_t value) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(encodeSignalEvent_value_), event, value); +} + +// method: parallelRenderCommandEncoderWithDescriptor: +_MTL_INLINE MTL::ParallelRenderCommandEncoder* MTL::CommandBuffer::parallelRenderCommandEncoder(const MTL::RenderPassDescriptor* renderPassDescriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(parallelRenderCommandEncoderWithDescriptor_), renderPassDescriptor); +} + +// method: resourceStateCommandEncoder +_MTL_INLINE MTL::ResourceStateCommandEncoder* MTL::CommandBuffer::resourceStateCommandEncoder() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resourceStateCommandEncoder)); +} + +// method: resourceStateCommandEncoderWithDescriptor: +_MTL_INLINE MTL::ResourceStateCommandEncoder* MTL::CommandBuffer::resourceStateCommandEncoder(const MTL::ResourceStatePassDescriptor* resourceStatePassDescriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resourceStateCommandEncoderWithDescriptor_), resourceStatePassDescriptor); +} + +// method: accelerationStructureCommandEncoder +_MTL_INLINE MTL::AccelerationStructureCommandEncoder* MTL::CommandBuffer::accelerationStructureCommandEncoder() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(accelerationStructureCommandEncoder)); +} + +// method: accelerationStructureCommandEncoderWithDescriptor: +_MTL_INLINE MTL::AccelerationStructureCommandEncoder* MTL::CommandBuffer::accelerationStructureCommandEncoder(const MTL::AccelerationStructurePassDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(accelerationStructureCommandEncoderWithDescriptor_), descriptor); +} + +// method: pushDebugGroup: +_MTL_INLINE void MTL::CommandBuffer::pushDebugGroup(const NS::String* string) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(pushDebugGroup_), string); +} + +// method: popDebugGroup +_MTL_INLINE void MTL::CommandBuffer::popDebugGroup() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(popDebugGroup)); +} diff --git a/metal-cpp/Metal/MTLCommandEncoder.hpp b/metal-cpp/Metal/MTLCommandEncoder.hpp new file mode 100644 index 00000000..8b5e6651 --- /dev/null +++ b/metal-cpp/Metal/MTLCommandEncoder.hpp @@ -0,0 +1,101 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLCommandEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +_MTL_OPTIONS(NS::UInteger, ResourceUsage) { + ResourceUsageRead = 1, + ResourceUsageWrite = 2, + ResourceUsageSample = 4, +}; + +_MTL_OPTIONS(NS::UInteger, BarrierScope) { + BarrierScopeBuffers = 1, + BarrierScopeTextures = 2, + BarrierScopeRenderTargets = 4, +}; + +class CommandEncoder : public NS::Referencing +{ +public: + class Device* device() const; + + NS::String* label() const; + void setLabel(const NS::String* label); + + void endEncoding(); + + void insertDebugSignpost(const NS::String* string); + + void pushDebugGroup(const NS::String* string); + + void popDebugGroup(); +}; + +} + +// property: device +_MTL_INLINE MTL::Device* MTL::CommandEncoder::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: label +_MTL_INLINE NS::String* MTL::CommandEncoder::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::CommandEncoder::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// method: endEncoding +_MTL_INLINE void MTL::CommandEncoder::endEncoding() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(endEncoding)); +} + +// method: insertDebugSignpost: +_MTL_INLINE void MTL::CommandEncoder::insertDebugSignpost(const NS::String* string) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(insertDebugSignpost_), string); +} + +// method: pushDebugGroup: +_MTL_INLINE void MTL::CommandEncoder::pushDebugGroup(const NS::String* string) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(pushDebugGroup_), string); +} + +// method: popDebugGroup +_MTL_INLINE void MTL::CommandEncoder::popDebugGroup() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(popDebugGroup)); +} diff --git a/metal-cpp/Metal/MTLCommandQueue.hpp b/metal-cpp/Metal/MTLCommandQueue.hpp new file mode 100644 index 00000000..42678af9 --- /dev/null +++ b/metal-cpp/Metal/MTLCommandQueue.hpp @@ -0,0 +1,89 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLCommandQueue.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +class CommandQueue : public NS::Referencing +{ +public: + NS::String* label() const; + void setLabel(const NS::String* label); + + class Device* device() const; + + class CommandBuffer* commandBuffer(); + + class CommandBuffer* commandBuffer(const class CommandBufferDescriptor* descriptor); + + class CommandBuffer* commandBufferWithUnretainedReferences(); + + void insertDebugCaptureBoundary(); +}; + +} + +// property: label +_MTL_INLINE NS::String* MTL::CommandQueue::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::CommandQueue::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::CommandQueue::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// method: commandBuffer +_MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBuffer() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandBuffer)); +} + +// method: commandBufferWithDescriptor: +_MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBuffer(const MTL::CommandBufferDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandBufferWithDescriptor_), descriptor); +} + +// method: commandBufferWithUnretainedReferences +_MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBufferWithUnretainedReferences() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandBufferWithUnretainedReferences)); +} + +// method: insertDebugCaptureBoundary +_MTL_INLINE void MTL::CommandQueue::insertDebugCaptureBoundary() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(insertDebugCaptureBoundary)); +} diff --git a/metal-cpp/Metal/MTLComputeCommandEncoder.hpp b/metal-cpp/Metal/MTLComputeCommandEncoder.hpp new file mode 100644 index 00000000..f3afd00b --- /dev/null +++ b/metal-cpp/Metal/MTLComputeCommandEncoder.hpp @@ -0,0 +1,330 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLComputeCommandEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLCommandBuffer.hpp" +#include "MTLCommandEncoder.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +struct DispatchThreadgroupsIndirectArguments +{ + uint32_t threadgroupsPerGrid[3]; +} _MTL_PACKED; + +struct StageInRegionIndirectArguments +{ + uint32_t stageInOrigin[3]; + uint32_t stageInSize[3]; +} _MTL_PACKED; + +class ComputeCommandEncoder : public NS::Referencing +{ +public: + MTL::DispatchType dispatchType() const; + + void setComputePipelineState(const class ComputePipelineState* state); + + void setBytes(const void* bytes, NS::UInteger length, NS::UInteger index); + + void setBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setBufferOffset(NS::UInteger offset, NS::UInteger index); + + void setBuffers(const class Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range); + + void setVisibleFunctionTable(const class VisibleFunctionTable* visibleFunctionTable, NS::UInteger bufferIndex); + + void setVisibleFunctionTables(const class VisibleFunctionTable* const visibleFunctionTables[], NS::Range range); + + void setIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex); + + void setIntersectionFunctionTables(const class IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range); + + void setAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex); + + void setTexture(const class Texture* texture, NS::UInteger index); + + void setTextures(const class Texture* const textures[], NS::Range range); + + void setSamplerState(const class SamplerState* sampler, NS::UInteger index); + + void setSamplerStates(const class SamplerState* const samplers[], NS::Range range); + + void setSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index); + + void setSamplerStates(const class SamplerState* const samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range); + + void setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index); + + void setImageblockWidth(NS::UInteger width, NS::UInteger height); + + void setStageInRegion(MTL::Region region); + + void setStageInRegion(const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset); + + void dispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup); + + void dispatchThreadgroups(const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset, MTL::Size threadsPerThreadgroup); + + void dispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup); + + void updateFence(const class Fence* fence); + + void waitForFence(const class Fence* fence); + + void useResource(const class Resource* resource, MTL::ResourceUsage usage); + + void useResources(const class Resource* const resources[], NS::UInteger count, MTL::ResourceUsage usage); + + void useHeap(const class Heap* heap); + + void useHeaps(const class Heap* const heaps[], NS::UInteger count); + + void executeCommandsInBuffer(const class IndirectCommandBuffer* indirectCommandBuffer, NS::Range executionRange); + + void executeCommandsInBuffer(const class IndirectCommandBuffer* indirectCommandbuffer, const class Buffer* indirectRangeBuffer, NS::UInteger indirectBufferOffset); + + void memoryBarrier(MTL::BarrierScope scope); + + void memoryBarrier(const class Resource* const resources[], NS::UInteger count); + + void sampleCountersInBuffer(const class CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier); +}; + +} + +// property: dispatchType +_MTL_INLINE MTL::DispatchType MTL::ComputeCommandEncoder::dispatchType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(dispatchType)); +} + +// method: setComputePipelineState: +_MTL_INLINE void MTL::ComputeCommandEncoder::setComputePipelineState(const MTL::ComputePipelineState* state) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setComputePipelineState_), state); +} + +// method: setBytes:length:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setBytes(const void* bytes, NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBytes_length_atIndex_), bytes, length, index); +} + +// method: setBuffer:offset:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setBufferOffset:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setBufferOffset(NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBufferOffset_atIndex_), offset, index); +} + +// method: setBuffers:offsets:withRange: +_MTL_INLINE void MTL::ComputeCommandEncoder::setBuffers(const MTL::Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffers_offsets_withRange_), buffers, offsets, range); +} + +// method: setVisibleFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setVisibleFunctionTable(const MTL::VisibleFunctionTable* visibleFunctionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVisibleFunctionTable_atBufferIndex_), visibleFunctionTable, bufferIndex); +} + +// method: setVisibleFunctionTables:withBufferRange: +_MTL_INLINE void MTL::ComputeCommandEncoder::setVisibleFunctionTables(const MTL::VisibleFunctionTable* const visibleFunctionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVisibleFunctionTables_withBufferRange_), visibleFunctionTables, range); +} + +// method: setIntersectionFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTable_atBufferIndex_), intersectionFunctionTable, bufferIndex); +} + +// method: setIntersectionFunctionTables:withBufferRange: +_MTL_INLINE void MTL::ComputeCommandEncoder::setIntersectionFunctionTables(const MTL::IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTables_withBufferRange_), intersectionFunctionTables, range); +} + +// method: setAccelerationStructure:atBufferIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAccelerationStructure_atBufferIndex_), accelerationStructure, bufferIndex); +} + +// method: setTexture:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setTexture(const MTL::Texture* texture, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTexture_atIndex_), texture, index); +} + +// method: setTextures:withRange: +_MTL_INLINE void MTL::ComputeCommandEncoder::setTextures(const MTL::Texture* const textures[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTextures_withRange_), textures, range); +} + +// method: setSamplerState:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setSamplerState(const MTL::SamplerState* sampler, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSamplerState_atIndex_), sampler, index); +} + +// method: setSamplerStates:withRange: +_MTL_INLINE void MTL::ComputeCommandEncoder::setSamplerStates(const MTL::SamplerState* const samplers[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSamplerStates_withRange_), samplers, range); +} + +// method: setSamplerState:lodMinClamp:lodMaxClamp:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index); +} + +// method: setSamplerStates:lodMinClamps:lodMaxClamps:withRange: +_MTL_INLINE void MTL::ComputeCommandEncoder::setSamplerStates(const MTL::SamplerState* const samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range); +} + +// method: setThreadgroupMemoryLength:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setThreadgroupMemoryLength_atIndex_), length, index); +} + +// method: setImageblockWidth:height: +_MTL_INLINE void MTL::ComputeCommandEncoder::setImageblockWidth(NS::UInteger width, NS::UInteger height) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setImageblockWidth_height_), width, height); +} + +// method: setStageInRegion: +_MTL_INLINE void MTL::ComputeCommandEncoder::setStageInRegion(MTL::Region region) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStageInRegion_), region); +} + +// method: setStageInRegionWithIndirectBuffer:indirectBufferOffset: +_MTL_INLINE void MTL::ComputeCommandEncoder::setStageInRegion(const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStageInRegionWithIndirectBuffer_indirectBufferOffset_), indirectBuffer, indirectBufferOffset); +} + +// method: dispatchThreadgroups:threadsPerThreadgroup: +_MTL_INLINE void MTL::ComputeCommandEncoder::dispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(dispatchThreadgroups_threadsPerThreadgroup_), threadgroupsPerGrid, threadsPerThreadgroup); +} + +// method: dispatchThreadgroupsWithIndirectBuffer:indirectBufferOffset:threadsPerThreadgroup: +_MTL_INLINE void MTL::ComputeCommandEncoder::dispatchThreadgroups(const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset, MTL::Size threadsPerThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(dispatchThreadgroupsWithIndirectBuffer_indirectBufferOffset_threadsPerThreadgroup_), indirectBuffer, indirectBufferOffset, threadsPerThreadgroup); +} + +// method: dispatchThreads:threadsPerThreadgroup: +_MTL_INLINE void MTL::ComputeCommandEncoder::dispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(dispatchThreads_threadsPerThreadgroup_), threadsPerGrid, threadsPerThreadgroup); +} + +// method: updateFence: +_MTL_INLINE void MTL::ComputeCommandEncoder::updateFence(const MTL::Fence* fence) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(updateFence_), fence); +} + +// method: waitForFence: +_MTL_INLINE void MTL::ComputeCommandEncoder::waitForFence(const MTL::Fence* fence) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitForFence_), fence); +} + +// method: useResource:usage: +_MTL_INLINE void MTL::ComputeCommandEncoder::useResource(const MTL::Resource* resource, MTL::ResourceUsage usage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useResource_usage_), resource, usage); +} + +// method: useResources:count:usage: +_MTL_INLINE void MTL::ComputeCommandEncoder::useResources(const MTL::Resource* const resources[], NS::UInteger count, MTL::ResourceUsage usage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useResources_count_usage_), resources, count, usage); +} + +// method: useHeap: +_MTL_INLINE void MTL::ComputeCommandEncoder::useHeap(const MTL::Heap* heap) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useHeap_), heap); +} + +// method: useHeaps:count: +_MTL_INLINE void MTL::ComputeCommandEncoder::useHeaps(const MTL::Heap* const heaps[], NS::UInteger count) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useHeaps_count_), heaps, count); +} + +// method: executeCommandsInBuffer:withRange: +_MTL_INLINE void MTL::ComputeCommandEncoder::executeCommandsInBuffer(const MTL::IndirectCommandBuffer* indirectCommandBuffer, NS::Range executionRange) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(executeCommandsInBuffer_withRange_), indirectCommandBuffer, executionRange); +} + +// method: executeCommandsInBuffer:indirectBuffer:indirectBufferOffset: +_MTL_INLINE void MTL::ComputeCommandEncoder::executeCommandsInBuffer(const MTL::IndirectCommandBuffer* indirectCommandbuffer, const MTL::Buffer* indirectRangeBuffer, NS::UInteger indirectBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(executeCommandsInBuffer_indirectBuffer_indirectBufferOffset_), indirectCommandbuffer, indirectRangeBuffer, indirectBufferOffset); +} + +// method: memoryBarrierWithScope: +_MTL_INLINE void MTL::ComputeCommandEncoder::memoryBarrier(MTL::BarrierScope scope) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(memoryBarrierWithScope_), scope); +} + +// method: memoryBarrierWithResources:count: +_MTL_INLINE void MTL::ComputeCommandEncoder::memoryBarrier(const MTL::Resource* const resources[], NS::UInteger count) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(memoryBarrierWithResources_count_), resources, count); +} + +// method: sampleCountersInBuffer:atSampleIndex:withBarrier: +_MTL_INLINE void MTL::ComputeCommandEncoder::sampleCountersInBuffer(const MTL::CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_), sampleBuffer, sampleIndex, barrier); +} diff --git a/metal-cpp/Metal/MTLComputePass.hpp b/metal-cpp/Metal/MTLComputePass.hpp new file mode 100644 index 00000000..6cc79f8e --- /dev/null +++ b/metal-cpp/Metal/MTLComputePass.hpp @@ -0,0 +1,181 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLComputePass.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLCommandBuffer.hpp" + +namespace MTL +{ +class ComputePassSampleBufferAttachmentDescriptor : public NS::Copying +{ +public: + static class ComputePassSampleBufferAttachmentDescriptor* alloc(); + + class ComputePassSampleBufferAttachmentDescriptor* init(); + + class CounterSampleBuffer* sampleBuffer() const; + void setSampleBuffer(const class CounterSampleBuffer* sampleBuffer); + + NS::UInteger startOfEncoderSampleIndex() const; + void setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex); + + NS::UInteger endOfEncoderSampleIndex() const; + void setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex); +}; + +class ComputePassSampleBufferAttachmentDescriptorArray : public NS::Referencing +{ +public: + static class ComputePassSampleBufferAttachmentDescriptorArray* alloc(); + + class ComputePassSampleBufferAttachmentDescriptorArray* init(); + + class ComputePassSampleBufferAttachmentDescriptor* object(NS::UInteger attachmentIndex); + + void setObject(const class ComputePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex); +}; + +class ComputePassDescriptor : public NS::Copying +{ +public: + static class ComputePassDescriptor* alloc(); + + class ComputePassDescriptor* init(); + + static class ComputePassDescriptor* computePassDescriptor(); + + MTL::DispatchType dispatchType() const; + void setDispatchType(MTL::DispatchType dispatchType); + + class ComputePassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptor* MTL::ComputePassSampleBufferAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLComputePassSampleBufferAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptor* MTL::ComputePassSampleBufferAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: sampleBuffer +_MTL_INLINE MTL::CounterSampleBuffer* MTL::ComputePassSampleBufferAttachmentDescriptor::sampleBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBuffer)); +} + +_MTL_INLINE void MTL::ComputePassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer); +} + +// property: startOfEncoderSampleIndex +_MTL_INLINE NS::UInteger MTL::ComputePassSampleBufferAttachmentDescriptor::startOfEncoderSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(startOfEncoderSampleIndex)); +} + +_MTL_INLINE void MTL::ComputePassSampleBufferAttachmentDescriptor::setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStartOfEncoderSampleIndex_), startOfEncoderSampleIndex); +} + +// property: endOfEncoderSampleIndex +_MTL_INLINE NS::UInteger MTL::ComputePassSampleBufferAttachmentDescriptor::endOfEncoderSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(endOfEncoderSampleIndex)); +} + +_MTL_INLINE void MTL::ComputePassSampleBufferAttachmentDescriptor::setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setEndOfEncoderSampleIndex_), endOfEncoderSampleIndex); +} + +// static method: alloc +_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptorArray* MTL::ComputePassSampleBufferAttachmentDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLComputePassSampleBufferAttachmentDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptorArray* MTL::ComputePassSampleBufferAttachmentDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptor* MTL::ComputePassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::ComputePassSampleBufferAttachmentDescriptorArray::setObject(const MTL::ComputePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex); +} + +// static method: alloc +_MTL_INLINE MTL::ComputePassDescriptor* MTL::ComputePassDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLComputePassDescriptor)); +} + +// method: init +_MTL_INLINE MTL::ComputePassDescriptor* MTL::ComputePassDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: computePassDescriptor +_MTL_INLINE MTL::ComputePassDescriptor* MTL::ComputePassDescriptor::computePassDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLComputePassDescriptor), _MTL_PRIVATE_SEL(computePassDescriptor)); +} + +// property: dispatchType +_MTL_INLINE MTL::DispatchType MTL::ComputePassDescriptor::dispatchType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(dispatchType)); +} + +_MTL_INLINE void MTL::ComputePassDescriptor::setDispatchType(MTL::DispatchType dispatchType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDispatchType_), dispatchType); +} + +// property: sampleBufferAttachments +_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptorArray* MTL::ComputePassDescriptor::sampleBufferAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBufferAttachments)); +} diff --git a/metal-cpp/Metal/MTLComputePipeline.hpp b/metal-cpp/Metal/MTLComputePipeline.hpp new file mode 100644 index 00000000..35275a39 --- /dev/null +++ b/metal-cpp/Metal/MTLComputePipeline.hpp @@ -0,0 +1,373 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLComputePipeline.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLTypes.hpp" + +namespace MTL +{ +class ComputePipelineReflection : public NS::Referencing +{ +public: + static class ComputePipelineReflection* alloc(); + + class ComputePipelineReflection* init(); + + NS::Array* bindings() const; + + NS::Array* arguments() const; +}; + +class ComputePipelineDescriptor : public NS::Copying +{ +public: + static class ComputePipelineDescriptor* alloc(); + + class ComputePipelineDescriptor* init(); + + NS::String* label() const; + void setLabel(const NS::String* label); + + class Function* computeFunction() const; + void setComputeFunction(const class Function* computeFunction); + + bool threadGroupSizeIsMultipleOfThreadExecutionWidth() const; + void setThreadGroupSizeIsMultipleOfThreadExecutionWidth(bool threadGroupSizeIsMultipleOfThreadExecutionWidth); + + NS::UInteger maxTotalThreadsPerThreadgroup() const; + void setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup); + + class StageInputOutputDescriptor* stageInputDescriptor() const; + void setStageInputDescriptor(const class StageInputOutputDescriptor* stageInputDescriptor); + + class PipelineBufferDescriptorArray* buffers() const; + + bool supportIndirectCommandBuffers() const; + void setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers); + + NS::Array* insertLibraries() const; + void setInsertLibraries(const NS::Array* insertLibraries); + + NS::Array* preloadedLibraries() const; + void setPreloadedLibraries(const NS::Array* preloadedLibraries); + + NS::Array* binaryArchives() const; + void setBinaryArchives(const NS::Array* binaryArchives); + + void reset(); + + class LinkedFunctions* linkedFunctions() const; + void setLinkedFunctions(const class LinkedFunctions* linkedFunctions); + + bool supportAddingBinaryFunctions() const; + void setSupportAddingBinaryFunctions(bool supportAddingBinaryFunctions); + + NS::UInteger maxCallStackDepth() const; + void setMaxCallStackDepth(NS::UInteger maxCallStackDepth); +}; + +class ComputePipelineState : public NS::Referencing +{ +public: + NS::String* label() const; + + class Device* device() const; + + NS::UInteger maxTotalThreadsPerThreadgroup() const; + + NS::UInteger threadExecutionWidth() const; + + NS::UInteger staticThreadgroupMemoryLength() const; + + NS::UInteger imageblockMemoryLength(MTL::Size imageblockDimensions); + + bool supportIndirectCommandBuffers() const; + + MTL::ResourceID gpuResourceID() const; + + class FunctionHandle* functionHandle(const class Function* function); + + class ComputePipelineState* newComputePipelineState(const NS::Array* functions, NS::Error** error); + + class VisibleFunctionTable* newVisibleFunctionTable(const class VisibleFunctionTableDescriptor* descriptor); + + class IntersectionFunctionTable* newIntersectionFunctionTable(const class IntersectionFunctionTableDescriptor* descriptor); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::ComputePipelineReflection* MTL::ComputePipelineReflection::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLComputePipelineReflection)); +} + +// method: init +_MTL_INLINE MTL::ComputePipelineReflection* MTL::ComputePipelineReflection::init() +{ + return NS::Object::init(); +} + +// property: bindings +_MTL_INLINE NS::Array* MTL::ComputePipelineReflection::bindings() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bindings)); +} + +// property: arguments +_MTL_INLINE NS::Array* MTL::ComputePipelineReflection::arguments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arguments)); +} + +// static method: alloc +_MTL_INLINE MTL::ComputePipelineDescriptor* MTL::ComputePipelineDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLComputePipelineDescriptor)); +} + +// method: init +_MTL_INLINE MTL::ComputePipelineDescriptor* MTL::ComputePipelineDescriptor::init() +{ + return NS::Object::init(); +} + +// property: label +_MTL_INLINE NS::String* MTL::ComputePipelineDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: computeFunction +_MTL_INLINE MTL::Function* MTL::ComputePipelineDescriptor::computeFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(computeFunction)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setComputeFunction(const MTL::Function* computeFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setComputeFunction_), computeFunction); +} + +// property: threadGroupSizeIsMultipleOfThreadExecutionWidth +_MTL_INLINE bool MTL::ComputePipelineDescriptor::threadGroupSizeIsMultipleOfThreadExecutionWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadGroupSizeIsMultipleOfThreadExecutionWidth)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setThreadGroupSizeIsMultipleOfThreadExecutionWidth(bool threadGroupSizeIsMultipleOfThreadExecutionWidth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setThreadGroupSizeIsMultipleOfThreadExecutionWidth_), threadGroupSizeIsMultipleOfThreadExecutionWidth); +} + +// property: maxTotalThreadsPerThreadgroup +_MTL_INLINE NS::UInteger MTL::ComputePipelineDescriptor::maxTotalThreadsPerThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxTotalThreadsPerThreadgroup_), maxTotalThreadsPerThreadgroup); +} + +// property: stageInputDescriptor +_MTL_INLINE MTL::StageInputOutputDescriptor* MTL::ComputePipelineDescriptor::stageInputDescriptor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stageInputDescriptor)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setStageInputDescriptor(const MTL::StageInputOutputDescriptor* stageInputDescriptor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStageInputDescriptor_), stageInputDescriptor); +} + +// property: buffers +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::ComputePipelineDescriptor::buffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(buffers)); +} + +// property: supportIndirectCommandBuffers +_MTL_INLINE bool MTL::ComputePipelineDescriptor::supportIndirectCommandBuffers() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportIndirectCommandBuffers_), supportIndirectCommandBuffers); +} + +// property: insertLibraries +_MTL_INLINE NS::Array* MTL::ComputePipelineDescriptor::insertLibraries() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(insertLibraries)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setInsertLibraries(const NS::Array* insertLibraries) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInsertLibraries_), insertLibraries); +} + +// property: preloadedLibraries +_MTL_INLINE NS::Array* MTL::ComputePipelineDescriptor::preloadedLibraries() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(preloadedLibraries)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setPreloadedLibraries(const NS::Array* preloadedLibraries) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPreloadedLibraries_), preloadedLibraries); +} + +// property: binaryArchives +_MTL_INLINE NS::Array* MTL::ComputePipelineDescriptor::binaryArchives() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(binaryArchives)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setBinaryArchives(const NS::Array* binaryArchives) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBinaryArchives_), binaryArchives); +} + +// method: reset +_MTL_INLINE void MTL::ComputePipelineDescriptor::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} + +// property: linkedFunctions +_MTL_INLINE MTL::LinkedFunctions* MTL::ComputePipelineDescriptor::linkedFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(linkedFunctions)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setLinkedFunctions(const MTL::LinkedFunctions* linkedFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLinkedFunctions_), linkedFunctions); +} + +// property: supportAddingBinaryFunctions +_MTL_INLINE bool MTL::ComputePipelineDescriptor::supportAddingBinaryFunctions() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportAddingBinaryFunctions)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setSupportAddingBinaryFunctions(bool supportAddingBinaryFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportAddingBinaryFunctions_), supportAddingBinaryFunctions); +} + +// property: maxCallStackDepth +_MTL_INLINE NS::UInteger MTL::ComputePipelineDescriptor::maxCallStackDepth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxCallStackDepth)); +} + +_MTL_INLINE void MTL::ComputePipelineDescriptor::setMaxCallStackDepth(NS::UInteger maxCallStackDepth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxCallStackDepth_), maxCallStackDepth); +} + +// property: label +_MTL_INLINE NS::String* MTL::ComputePipelineState::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::ComputePipelineState::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: maxTotalThreadsPerThreadgroup +_MTL_INLINE NS::UInteger MTL::ComputePipelineState::maxTotalThreadsPerThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup)); +} + +// property: threadExecutionWidth +_MTL_INLINE NS::UInteger MTL::ComputePipelineState::threadExecutionWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadExecutionWidth)); +} + +// property: staticThreadgroupMemoryLength +_MTL_INLINE NS::UInteger MTL::ComputePipelineState::staticThreadgroupMemoryLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(staticThreadgroupMemoryLength)); +} + +// method: imageblockMemoryLengthForDimensions: +_MTL_INLINE NS::UInteger MTL::ComputePipelineState::imageblockMemoryLength(MTL::Size imageblockDimensions) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(imageblockMemoryLengthForDimensions_), imageblockDimensions); +} + +// property: supportIndirectCommandBuffers +_MTL_INLINE bool MTL::ComputePipelineState::supportIndirectCommandBuffers() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers)); +} + +// property: gpuResourceID +_MTL_INLINE MTL::ResourceID MTL::ComputePipelineState::gpuResourceID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuResourceID)); +} + +// method: functionHandleWithFunction: +_MTL_INLINE MTL::FunctionHandle* MTL::ComputePipelineState::functionHandle(const MTL::Function* function) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionHandleWithFunction_), function); +} + +// method: newComputePipelineStateWithAdditionalBinaryFunctions:error: +_MTL_INLINE MTL::ComputePipelineState* MTL::ComputePipelineState::newComputePipelineState(const NS::Array* functions, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithAdditionalBinaryFunctions_error_), functions, error); +} + +// method: newVisibleFunctionTableWithDescriptor: +_MTL_INLINE MTL::VisibleFunctionTable* MTL::ComputePipelineState::newVisibleFunctionTable(const MTL::VisibleFunctionTableDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newVisibleFunctionTableWithDescriptor_), descriptor); +} + +// method: newIntersectionFunctionTableWithDescriptor: +_MTL_INLINE MTL::IntersectionFunctionTable* MTL::ComputePipelineState::newIntersectionFunctionTable(const MTL::IntersectionFunctionTableDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIntersectionFunctionTableWithDescriptor_), descriptor); +} diff --git a/metal-cpp/Metal/MTLCounters.hpp b/metal-cpp/Metal/MTLCounters.hpp new file mode 100644 index 00000000..7bff72c6 --- /dev/null +++ b/metal-cpp/Metal/MTLCounters.hpp @@ -0,0 +1,258 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLCounters.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLResource.hpp" + +namespace MTL +{ +struct CounterResultTimestamp +{ + uint64_t timestamp; +} _MTL_PACKED; + +struct CounterResultStageUtilization +{ + uint64_t totalCycles; + uint64_t vertexCycles; + uint64_t tessellationCycles; + uint64_t postTessellationVertexCycles; + uint64_t fragmentCycles; + uint64_t renderTargetCycles; +} _MTL_PACKED; + +struct CounterResultStatistic +{ + uint64_t tessellationInputPatches; + uint64_t vertexInvocations; + uint64_t postTessellationVertexInvocations; + uint64_t clipperInvocations; + uint64_t clipperPrimitivesOut; + uint64_t fragmentInvocations; + uint64_t fragmentsPassed; + uint64_t computeKernelInvocations; +} _MTL_PACKED; + +_MTL_CONST(NS::ErrorDomain, CounterErrorDomain); + +using CommonCounter = NS::String*; + +_MTL_CONST(CommonCounter, CommonCounterTimestamp); +_MTL_CONST(CommonCounter, CommonCounterTessellationInputPatches); +_MTL_CONST(CommonCounter, CommonCounterVertexInvocations); +_MTL_CONST(CommonCounter, CommonCounterPostTessellationVertexInvocations); +_MTL_CONST(CommonCounter, CommonCounterClipperInvocations); +_MTL_CONST(CommonCounter, CommonCounterClipperPrimitivesOut); +_MTL_CONST(CommonCounter, CommonCounterFragmentInvocations); +_MTL_CONST(CommonCounter, CommonCounterFragmentsPassed); +_MTL_CONST(CommonCounter, CommonCounterComputeKernelInvocations); +_MTL_CONST(CommonCounter, CommonCounterTotalCycles); +_MTL_CONST(CommonCounter, CommonCounterVertexCycles); +_MTL_CONST(CommonCounter, CommonCounterTessellationCycles); +_MTL_CONST(CommonCounter, CommonCounterPostTessellationVertexCycles); +_MTL_CONST(CommonCounter, CommonCounterFragmentCycles); +_MTL_CONST(CommonCounter, CommonCounterRenderTargetWriteCycles); + +using CommonCounterSet = NS::String*; + +_MTL_CONST(CommonCounterSet, CommonCounterSetTimestamp); +_MTL_CONST(CommonCounterSet, CommonCounterSetStageUtilization); +_MTL_CONST(CommonCounterSet, CommonCounterSetStatistic); + +class Counter : public NS::Referencing +{ +public: + NS::String* name() const; +}; + +class CounterSet : public NS::Referencing +{ +public: + NS::String* name() const; + + NS::Array* counters() const; +}; + +class CounterSampleBufferDescriptor : public NS::Copying +{ +public: + static class CounterSampleBufferDescriptor* alloc(); + + class CounterSampleBufferDescriptor* init(); + + class CounterSet* counterSet() const; + void setCounterSet(const class CounterSet* counterSet); + + NS::String* label() const; + void setLabel(const NS::String* label); + + MTL::StorageMode storageMode() const; + void setStorageMode(MTL::StorageMode storageMode); + + NS::UInteger sampleCount() const; + void setSampleCount(NS::UInteger sampleCount); +}; + +class CounterSampleBuffer : public NS::Referencing +{ +public: + class Device* device() const; + + NS::String* label() const; + + NS::UInteger sampleCount() const; + + NS::Data* resolveCounterRange(NS::Range range); +}; + +_MTL_ENUM(NS::Integer, CounterSampleBufferError) { + CounterSampleBufferErrorOutOfMemory = 0, + CounterSampleBufferErrorInvalid = 1, + CounterSampleBufferErrorInternal = 2, +}; + +} + +_MTL_PRIVATE_DEF_STR(NS::ErrorDomain, CounterErrorDomain); + +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterTimestamp); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterTessellationInputPatches); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterVertexInvocations); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterPostTessellationVertexInvocations); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterClipperInvocations); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterClipperPrimitivesOut); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterFragmentInvocations); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterFragmentsPassed); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterComputeKernelInvocations); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterTotalCycles); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterVertexCycles); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterTessellationCycles); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterPostTessellationVertexCycles); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterFragmentCycles); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounter, CommonCounterRenderTargetWriteCycles); + +_MTL_PRIVATE_DEF_STR(MTL::CommonCounterSet, CommonCounterSetTimestamp); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounterSet, CommonCounterSetStageUtilization); +_MTL_PRIVATE_DEF_STR(MTL::CommonCounterSet, CommonCounterSetStatistic); + +// property: name +_MTL_INLINE NS::String* MTL::Counter::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: name +_MTL_INLINE NS::String* MTL::CounterSet::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: counters +_MTL_INLINE NS::Array* MTL::CounterSet::counters() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(counters)); +} + +// static method: alloc +_MTL_INLINE MTL::CounterSampleBufferDescriptor* MTL::CounterSampleBufferDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLCounterSampleBufferDescriptor)); +} + +// method: init +_MTL_INLINE MTL::CounterSampleBufferDescriptor* MTL::CounterSampleBufferDescriptor::init() +{ + return NS::Object::init(); +} + +// property: counterSet +_MTL_INLINE MTL::CounterSet* MTL::CounterSampleBufferDescriptor::counterSet() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(counterSet)); +} + +_MTL_INLINE void MTL::CounterSampleBufferDescriptor::setCounterSet(const MTL::CounterSet* counterSet) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCounterSet_), counterSet); +} + +// property: label +_MTL_INLINE NS::String* MTL::CounterSampleBufferDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::CounterSampleBufferDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: storageMode +_MTL_INLINE MTL::StorageMode MTL::CounterSampleBufferDescriptor::storageMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(storageMode)); +} + +_MTL_INLINE void MTL::CounterSampleBufferDescriptor::setStorageMode(MTL::StorageMode storageMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStorageMode_), storageMode); +} + +// property: sampleCount +_MTL_INLINE NS::UInteger MTL::CounterSampleBufferDescriptor::sampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCount)); +} + +_MTL_INLINE void MTL::CounterSampleBufferDescriptor::setSampleCount(NS::UInteger sampleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleCount_), sampleCount); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::CounterSampleBuffer::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: label +_MTL_INLINE NS::String* MTL::CounterSampleBuffer::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +// property: sampleCount +_MTL_INLINE NS::UInteger MTL::CounterSampleBuffer::sampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCount)); +} + +// method: resolveCounterRange: +_MTL_INLINE NS::Data* MTL::CounterSampleBuffer::resolveCounterRange(NS::Range range) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resolveCounterRange_), range); +} diff --git a/metal-cpp/Metal/MTLDefines.hpp b/metal-cpp/Metal/MTLDefines.hpp new file mode 100644 index 00000000..b2aba9d0 --- /dev/null +++ b/metal-cpp/Metal/MTLDefines.hpp @@ -0,0 +1,41 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLDefines.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "../Foundation/NSDefines.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define _MTL_EXPORT _NS_EXPORT +#define _MTL_EXTERN _NS_EXTERN +#define _MTL_INLINE _NS_INLINE +#define _MTL_PACKED _NS_PACKED + +#define _MTL_CONST(type, name) _NS_CONST(type, name) +#define _MTL_ENUM(type, name) _NS_ENUM(type, name) +#define _MTL_OPTIONS(type, name) _NS_OPTIONS(type, name) + +#define _MTL_VALIDATE_SIZE(ns, name) _NS_VALIDATE_SIZE(ns, name) +#define _MTL_VALIDATE_ENUM(ns, name) _NS_VALIDATE_ENUM(ns, name) + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Metal/MTLDepthStencil.hpp b/metal-cpp/Metal/MTLDepthStencil.hpp new file mode 100644 index 00000000..1cb24764 --- /dev/null +++ b/metal-cpp/Metal/MTLDepthStencil.hpp @@ -0,0 +1,269 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLDepthStencil.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLDepthStencil.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, CompareFunction) { + CompareFunctionNever = 0, + CompareFunctionLess = 1, + CompareFunctionEqual = 2, + CompareFunctionLessEqual = 3, + CompareFunctionGreater = 4, + CompareFunctionNotEqual = 5, + CompareFunctionGreaterEqual = 6, + CompareFunctionAlways = 7, +}; + +_MTL_ENUM(NS::UInteger, StencilOperation) { + StencilOperationKeep = 0, + StencilOperationZero = 1, + StencilOperationReplace = 2, + StencilOperationIncrementClamp = 3, + StencilOperationDecrementClamp = 4, + StencilOperationInvert = 5, + StencilOperationIncrementWrap = 6, + StencilOperationDecrementWrap = 7, +}; + +class StencilDescriptor : public NS::Copying +{ +public: + static class StencilDescriptor* alloc(); + + class StencilDescriptor* init(); + + MTL::CompareFunction stencilCompareFunction() const; + void setStencilCompareFunction(MTL::CompareFunction stencilCompareFunction); + + MTL::StencilOperation stencilFailureOperation() const; + void setStencilFailureOperation(MTL::StencilOperation stencilFailureOperation); + + MTL::StencilOperation depthFailureOperation() const; + void setDepthFailureOperation(MTL::StencilOperation depthFailureOperation); + + MTL::StencilOperation depthStencilPassOperation() const; + void setDepthStencilPassOperation(MTL::StencilOperation depthStencilPassOperation); + + uint32_t readMask() const; + void setReadMask(uint32_t readMask); + + uint32_t writeMask() const; + void setWriteMask(uint32_t writeMask); +}; + +class DepthStencilDescriptor : public NS::Copying +{ +public: + static class DepthStencilDescriptor* alloc(); + + class DepthStencilDescriptor* init(); + + MTL::CompareFunction depthCompareFunction() const; + void setDepthCompareFunction(MTL::CompareFunction depthCompareFunction); + + bool depthWriteEnabled() const; + void setDepthWriteEnabled(bool depthWriteEnabled); + + class StencilDescriptor* frontFaceStencil() const; + void setFrontFaceStencil(const class StencilDescriptor* frontFaceStencil); + + class StencilDescriptor* backFaceStencil() const; + void setBackFaceStencil(const class StencilDescriptor* backFaceStencil); + + NS::String* label() const; + void setLabel(const NS::String* label); +}; + +class DepthStencilState : public NS::Referencing +{ +public: + NS::String* label() const; + + class Device* device() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::StencilDescriptor* MTL::StencilDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLStencilDescriptor)); +} + +// method: init +_MTL_INLINE MTL::StencilDescriptor* MTL::StencilDescriptor::init() +{ + return NS::Object::init(); +} + +// property: stencilCompareFunction +_MTL_INLINE MTL::CompareFunction MTL::StencilDescriptor::stencilCompareFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stencilCompareFunction)); +} + +_MTL_INLINE void MTL::StencilDescriptor::setStencilCompareFunction(MTL::CompareFunction stencilCompareFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilCompareFunction_), stencilCompareFunction); +} + +// property: stencilFailureOperation +_MTL_INLINE MTL::StencilOperation MTL::StencilDescriptor::stencilFailureOperation() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stencilFailureOperation)); +} + +_MTL_INLINE void MTL::StencilDescriptor::setStencilFailureOperation(MTL::StencilOperation stencilFailureOperation) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilFailureOperation_), stencilFailureOperation); +} + +// property: depthFailureOperation +_MTL_INLINE MTL::StencilOperation MTL::StencilDescriptor::depthFailureOperation() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depthFailureOperation)); +} + +_MTL_INLINE void MTL::StencilDescriptor::setDepthFailureOperation(MTL::StencilOperation depthFailureOperation) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthFailureOperation_), depthFailureOperation); +} + +// property: depthStencilPassOperation +_MTL_INLINE MTL::StencilOperation MTL::StencilDescriptor::depthStencilPassOperation() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depthStencilPassOperation)); +} + +_MTL_INLINE void MTL::StencilDescriptor::setDepthStencilPassOperation(MTL::StencilOperation depthStencilPassOperation) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthStencilPassOperation_), depthStencilPassOperation); +} + +// property: readMask +_MTL_INLINE uint32_t MTL::StencilDescriptor::readMask() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(readMask)); +} + +_MTL_INLINE void MTL::StencilDescriptor::setReadMask(uint32_t readMask) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setReadMask_), readMask); +} + +// property: writeMask +_MTL_INLINE uint32_t MTL::StencilDescriptor::writeMask() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(writeMask)); +} + +_MTL_INLINE void MTL::StencilDescriptor::setWriteMask(uint32_t writeMask) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setWriteMask_), writeMask); +} + +// static method: alloc +_MTL_INLINE MTL::DepthStencilDescriptor* MTL::DepthStencilDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLDepthStencilDescriptor)); +} + +// method: init +_MTL_INLINE MTL::DepthStencilDescriptor* MTL::DepthStencilDescriptor::init() +{ + return NS::Object::init(); +} + +// property: depthCompareFunction +_MTL_INLINE MTL::CompareFunction MTL::DepthStencilDescriptor::depthCompareFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depthCompareFunction)); +} + +_MTL_INLINE void MTL::DepthStencilDescriptor::setDepthCompareFunction(MTL::CompareFunction depthCompareFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthCompareFunction_), depthCompareFunction); +} + +// property: depthWriteEnabled +_MTL_INLINE bool MTL::DepthStencilDescriptor::depthWriteEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isDepthWriteEnabled)); +} + +_MTL_INLINE void MTL::DepthStencilDescriptor::setDepthWriteEnabled(bool depthWriteEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthWriteEnabled_), depthWriteEnabled); +} + +// property: frontFaceStencil +_MTL_INLINE MTL::StencilDescriptor* MTL::DepthStencilDescriptor::frontFaceStencil() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(frontFaceStencil)); +} + +_MTL_INLINE void MTL::DepthStencilDescriptor::setFrontFaceStencil(const MTL::StencilDescriptor* frontFaceStencil) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFrontFaceStencil_), frontFaceStencil); +} + +// property: backFaceStencil +_MTL_INLINE MTL::StencilDescriptor* MTL::DepthStencilDescriptor::backFaceStencil() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(backFaceStencil)); +} + +_MTL_INLINE void MTL::DepthStencilDescriptor::setBackFaceStencil(const MTL::StencilDescriptor* backFaceStencil) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBackFaceStencil_), backFaceStencil); +} + +// property: label +_MTL_INLINE NS::String* MTL::DepthStencilDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::DepthStencilDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: label +_MTL_INLINE NS::String* MTL::DepthStencilState::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::DepthStencilState::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} diff --git a/metal-cpp/Metal/MTLDevice.hpp b/metal-cpp/Metal/MTLDevice.hpp new file mode 100644 index 00000000..514d6387 --- /dev/null +++ b/metal-cpp/Metal/MTLDevice.hpp @@ -0,0 +1,1352 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLDevice.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLArgument.hpp" +#include "MTLDevice.hpp" +#include "MTLPixelFormat.hpp" +#include "MTLResource.hpp" +#include "MTLTexture.hpp" +#include "MTLTypes.hpp" +#include +#include + +namespace MTL +{ +_MTL_ENUM(NS::Integer, IOCompressionMethod) { + IOCompressionMethodZlib = 0, + IOCompressionMethodLZFSE = 1, + IOCompressionMethodLZ4 = 2, + IOCompressionMethodLZMA = 3, + IOCompressionMethodLZBitmap = 4, +}; + +_MTL_ENUM(NS::UInteger, FeatureSet) { + FeatureSet_iOS_GPUFamily1_v1 = 0, + FeatureSet_iOS_GPUFamily2_v1 = 1, + FeatureSet_iOS_GPUFamily1_v2 = 2, + FeatureSet_iOS_GPUFamily2_v2 = 3, + FeatureSet_iOS_GPUFamily3_v1 = 4, + FeatureSet_iOS_GPUFamily1_v3 = 5, + FeatureSet_iOS_GPUFamily2_v3 = 6, + FeatureSet_iOS_GPUFamily3_v2 = 7, + FeatureSet_iOS_GPUFamily1_v4 = 8, + FeatureSet_iOS_GPUFamily2_v4 = 9, + FeatureSet_iOS_GPUFamily3_v3 = 10, + FeatureSet_iOS_GPUFamily4_v1 = 11, + FeatureSet_iOS_GPUFamily1_v5 = 12, + FeatureSet_iOS_GPUFamily2_v5 = 13, + FeatureSet_iOS_GPUFamily3_v4 = 14, + FeatureSet_iOS_GPUFamily4_v2 = 15, + FeatureSet_iOS_GPUFamily5_v1 = 16, + FeatureSet_macOS_GPUFamily1_v1 = 10000, + FeatureSet_OSX_GPUFamily1_v1 = 10000, + FeatureSet_macOS_GPUFamily1_v2 = 10001, + FeatureSet_OSX_GPUFamily1_v2 = 10001, + FeatureSet_OSX_ReadWriteTextureTier2 = 10002, + FeatureSet_macOS_ReadWriteTextureTier2 = 10002, + FeatureSet_macOS_GPUFamily1_v3 = 10003, + FeatureSet_macOS_GPUFamily1_v4 = 10004, + FeatureSet_macOS_GPUFamily2_v1 = 10005, + FeatureSet_watchOS_GPUFamily1_v1 = 20000, + FeatureSet_WatchOS_GPUFamily1_v1 = 20000, + FeatureSet_watchOS_GPUFamily2_v1 = 20001, + FeatureSet_WatchOS_GPUFamily2_v1 = 20001, + FeatureSet_tvOS_GPUFamily1_v1 = 30000, + FeatureSet_TVOS_GPUFamily1_v1 = 30000, + FeatureSet_tvOS_GPUFamily1_v2 = 30001, + FeatureSet_tvOS_GPUFamily1_v3 = 30002, + FeatureSet_tvOS_GPUFamily2_v1 = 30003, + FeatureSet_tvOS_GPUFamily1_v4 = 30004, + FeatureSet_tvOS_GPUFamily2_v2 = 30005, +}; + +_MTL_ENUM(NS::Integer, GPUFamily) { + GPUFamilyApple1 = 1001, + GPUFamilyApple2 = 1002, + GPUFamilyApple3 = 1003, + GPUFamilyApple4 = 1004, + GPUFamilyApple5 = 1005, + GPUFamilyApple6 = 1006, + GPUFamilyApple7 = 1007, + GPUFamilyApple8 = 1008, + GPUFamilyMac1 = 2001, + GPUFamilyMac2 = 2002, + GPUFamilyCommon1 = 3001, + GPUFamilyCommon2 = 3002, + GPUFamilyCommon3 = 3003, + GPUFamilyMacCatalyst1 = 4001, + GPUFamilyMacCatalyst2 = 4002, + GPUFamilyMetal3 = 5001, +}; + +_MTL_ENUM(NS::UInteger, DeviceLocation) { + DeviceLocationBuiltIn = 0, + DeviceLocationSlot = 1, + DeviceLocationExternal = 2, + DeviceLocationUnspecified = NS::UIntegerMax, +}; + +_MTL_OPTIONS(NS::UInteger, PipelineOption) { + PipelineOptionNone = 0, + PipelineOptionArgumentInfo = 1, + PipelineOptionBufferTypeInfo = 2, + PipelineOptionFailOnBinaryArchiveMiss = 4, +}; + +_MTL_ENUM(NS::UInteger, ReadWriteTextureTier) { + ReadWriteTextureTierNone = 0, + ReadWriteTextureTier1 = 1, + ReadWriteTextureTier2 = 2, +}; + +_MTL_ENUM(NS::UInteger, ArgumentBuffersTier) { + ArgumentBuffersTier1 = 0, + ArgumentBuffersTier2 = 1, +}; + +_MTL_ENUM(NS::UInteger, SparseTextureRegionAlignmentMode) { + SparseTextureRegionAlignmentModeOutward = 0, + SparseTextureRegionAlignmentModeInward = 1, +}; + +_MTL_ENUM(NS::Integer, SparsePageSize) { + SparsePageSize16 = 101, + SparsePageSize64 = 102, + SparsePageSize256 = 103, +}; + +struct AccelerationStructureSizes +{ + NS::UInteger accelerationStructureSize; + NS::UInteger buildScratchBufferSize; + NS::UInteger refitScratchBufferSize; +} _MTL_PACKED; + +_MTL_ENUM(NS::UInteger, CounterSamplingPoint) { + CounterSamplingPointAtStageBoundary = 0, + CounterSamplingPointAtDrawBoundary = 1, + CounterSamplingPointAtDispatchBoundary = 2, + CounterSamplingPointAtTileDispatchBoundary = 3, + CounterSamplingPointAtBlitBoundary = 4, +}; + +struct SizeAndAlign +{ + NS::UInteger size; + NS::UInteger align; +} _MTL_PACKED; + +class ArgumentDescriptor : public NS::Copying +{ +public: + static class ArgumentDescriptor* alloc(); + + class ArgumentDescriptor* init(); + + static class ArgumentDescriptor* argumentDescriptor(); + + MTL::DataType dataType() const; + void setDataType(MTL::DataType dataType); + + NS::UInteger index() const; + void setIndex(NS::UInteger index); + + NS::UInteger arrayLength() const; + void setArrayLength(NS::UInteger arrayLength); + + MTL::ArgumentAccess access() const; + void setAccess(MTL::ArgumentAccess access); + + MTL::TextureType textureType() const; + void setTextureType(MTL::TextureType textureType); + + NS::UInteger constantBlockAlignment() const; + void setConstantBlockAlignment(NS::UInteger constantBlockAlignment); +}; + +using DeviceNotificationName = NS::String*; +_MTL_CONST(DeviceNotificationName, DeviceWasAddedNotification); +_MTL_CONST(DeviceNotificationName, DeviceRemovalRequestedNotification); +_MTL_CONST(DeviceNotificationName, DeviceWasRemovedNotification); +_MTL_CONST(NS::ErrorUserInfoKey, CommandBufferEncoderInfoErrorKey); + +using DeviceNotificationHandlerBlock = void (^)(class Device* pDevice, DeviceNotificationName notifyName); + +using DeviceNotificationHandlerFunction = std::function; + +using AutoreleasedComputePipelineReflection = class ComputePipelineReflection*; + +using AutoreleasedRenderPipelineReflection = class RenderPipelineReflection*; + +using NewLibraryCompletionHandler = void (^)(class Library*, NS::Error*); + +using NewLibraryCompletionHandlerFunction = std::function; + +using NewRenderPipelineStateCompletionHandler = void (^)(class RenderPipelineState*, NS::Error*); + +using NewRenderPipelineStateCompletionHandlerFunction = std::function; + +using NewRenderPipelineStateWithReflectionCompletionHandler = void (^)(class RenderPipelineState*, class RenderPipelineReflection*, NS::Error*); + +using NewRenderPipelineStateWithReflectionCompletionHandlerFunction = std::function; + +using NewComputePipelineStateCompletionHandler = void (^)(class ComputePipelineState*, NS::Error*); + +using NewComputePipelineStateCompletionHandlerFunction = std::function; + +using NewComputePipelineStateWithReflectionCompletionHandler = void (^)(class ComputePipelineState*, class ComputePipelineReflection*, NS::Error*); + +using NewComputePipelineStateWithReflectionCompletionHandlerFunction = std::function; + +using Timestamp = std::uint64_t; + +MTL::Device* CreateSystemDefaultDevice(); + +NS::Array* CopyAllDevices(); + +NS::Array* CopyAllDevicesWithObserver(NS::Object** pOutObserver, DeviceNotificationHandlerBlock handler); + +NS::Array* CopyAllDevicesWithObserver(NS::Object** pOutObserver, const DeviceNotificationHandlerFunction& handler); + +void RemoveDeviceObserver(const NS::Object* pObserver); + +class Device : public NS::Referencing +{ +public: + void newLibrary(const NS::String* pSource, const class CompileOptions* pOptions, const NewLibraryCompletionHandlerFunction& completionHandler); + + void newLibrary(const class StitchedLibraryDescriptor* pDescriptor, const MTL::NewLibraryCompletionHandlerFunction& completionHandler); + + void newRenderPipelineState(const class RenderPipelineDescriptor* pDescriptor, const NewRenderPipelineStateCompletionHandlerFunction& completionHandler); + + void newRenderPipelineState(const class RenderPipelineDescriptor* pDescriptor, PipelineOption options, const NewRenderPipelineStateWithReflectionCompletionHandlerFunction& completionHandler); + + void newRenderPipelineState(const class TileRenderPipelineDescriptor* pDescriptor, PipelineOption options, const NewRenderPipelineStateWithReflectionCompletionHandlerFunction& completionHandler); + + void newComputePipelineState(const class Function* pFunction, const NewComputePipelineStateCompletionHandlerFunction& completionHandler); + + void newComputePipelineState(const class Function* pFunction, PipelineOption options, const NewComputePipelineStateWithReflectionCompletionHandlerFunction& completionHandler); + + void newComputePipelineState(const class ComputePipelineDescriptor* pDescriptor, PipelineOption options, const NewComputePipelineStateWithReflectionCompletionHandlerFunction& completionHandler); + + bool isHeadless() const; + + NS::String* name() const; + + uint64_t registryID() const; + + MTL::Size maxThreadsPerThreadgroup() const; + + bool lowPower() const; + + bool headless() const; + + bool removable() const; + + bool hasUnifiedMemory() const; + + uint64_t recommendedMaxWorkingSetSize() const; + + MTL::DeviceLocation location() const; + + NS::UInteger locationNumber() const; + + uint64_t maxTransferRate() const; + + bool depth24Stencil8PixelFormatSupported() const; + + MTL::ReadWriteTextureTier readWriteTextureSupport() const; + + MTL::ArgumentBuffersTier argumentBuffersSupport() const; + + bool rasterOrderGroupsSupported() const; + + bool supports32BitFloatFiltering() const; + + bool supports32BitMSAA() const; + + bool supportsQueryTextureLOD() const; + + bool supportsBCTextureCompression() const; + + bool supportsPullModelInterpolation() const; + + bool barycentricCoordsSupported() const; + + bool supportsShaderBarycentricCoordinates() const; + + NS::UInteger currentAllocatedSize() const; + + class CommandQueue* newCommandQueue(); + + class CommandQueue* newCommandQueue(NS::UInteger maxCommandBufferCount); + + MTL::SizeAndAlign heapTextureSizeAndAlign(const class TextureDescriptor* desc); + + MTL::SizeAndAlign heapBufferSizeAndAlign(NS::UInteger length, MTL::ResourceOptions options); + + class Heap* newHeap(const class HeapDescriptor* descriptor); + + class Buffer* newBuffer(NS::UInteger length, MTL::ResourceOptions options); + + class Buffer* newBuffer(const void* pointer, NS::UInteger length, MTL::ResourceOptions options); + + class Buffer* newBuffer(const void* pointer, NS::UInteger length, MTL::ResourceOptions options, void (^deallocator)(void*, NS::UInteger)); + + class DepthStencilState* newDepthStencilState(const class DepthStencilDescriptor* descriptor); + + class Texture* newTexture(const class TextureDescriptor* descriptor); + + class Texture* newTexture(const class TextureDescriptor* descriptor, const IOSurfaceRef iosurface, NS::UInteger plane); + + class Texture* newSharedTexture(const class TextureDescriptor* descriptor); + + class Texture* newSharedTexture(const class SharedTextureHandle* sharedHandle); + + class SamplerState* newSamplerState(const class SamplerDescriptor* descriptor); + + class Library* newDefaultLibrary(); + + class Library* newDefaultLibrary(const NS::Bundle* bundle, NS::Error** error); + + class Library* newLibrary(const NS::String* filepath, NS::Error** error); + + class Library* newLibrary(const NS::URL* url, NS::Error** error); + + class Library* newLibrary(const dispatch_data_t data, NS::Error** error); + + class Library* newLibrary(const NS::String* source, const class CompileOptions* options, NS::Error** error); + + void newLibrary(const NS::String* source, const class CompileOptions* options, const MTL::NewLibraryCompletionHandler completionHandler); + + class Library* newLibrary(const class StitchedLibraryDescriptor* descriptor, NS::Error** error); + + void newLibrary(const class StitchedLibraryDescriptor* descriptor, const MTL::NewLibraryCompletionHandler completionHandler); + + class RenderPipelineState* newRenderPipelineState(const class RenderPipelineDescriptor* descriptor, NS::Error** error); + + class RenderPipelineState* newRenderPipelineState(const class RenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error); + + void newRenderPipelineState(const class RenderPipelineDescriptor* descriptor, const MTL::NewRenderPipelineStateCompletionHandler completionHandler); + + void newRenderPipelineState(const class RenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler); + + class ComputePipelineState* newComputePipelineState(const class Function* computeFunction, NS::Error** error); + + class ComputePipelineState* newComputePipelineState(const class Function* computeFunction, MTL::PipelineOption options, const MTL::AutoreleasedComputePipelineReflection* reflection, NS::Error** error); + + void newComputePipelineState(const class Function* computeFunction, const MTL::NewComputePipelineStateCompletionHandler completionHandler); + + void newComputePipelineState(const class Function* computeFunction, MTL::PipelineOption options, const MTL::NewComputePipelineStateWithReflectionCompletionHandler completionHandler); + + class ComputePipelineState* newComputePipelineState(const class ComputePipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedComputePipelineReflection* reflection, NS::Error** error); + + void newComputePipelineState(const class ComputePipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewComputePipelineStateWithReflectionCompletionHandler completionHandler); + + class Fence* newFence(); + + bool supportsFeatureSet(MTL::FeatureSet featureSet); + + bool supportsFamily(MTL::GPUFamily gpuFamily); + + bool supportsTextureSampleCount(NS::UInteger sampleCount); + + NS::UInteger minimumLinearTextureAlignmentForPixelFormat(MTL::PixelFormat format); + + NS::UInteger minimumTextureBufferAlignmentForPixelFormat(MTL::PixelFormat format); + + class RenderPipelineState* newRenderPipelineState(const class TileRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error); + + void newRenderPipelineState(const class TileRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler); + + class RenderPipelineState* newRenderPipelineState(const class MeshRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error); + + void newRenderPipelineState(const class MeshRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler); + + NS::UInteger maxThreadgroupMemoryLength() const; + + NS::UInteger maxArgumentBufferSamplerCount() const; + + bool programmableSamplePositionsSupported() const; + + void getDefaultSamplePositions(MTL::SamplePosition* positions, NS::UInteger count); + + class ArgumentEncoder* newArgumentEncoder(const NS::Array* arguments); + + bool supportsRasterizationRateMap(NS::UInteger layerCount); + + class RasterizationRateMap* newRasterizationRateMap(const class RasterizationRateMapDescriptor* descriptor); + + class IndirectCommandBuffer* newIndirectCommandBuffer(const class IndirectCommandBufferDescriptor* descriptor, NS::UInteger maxCount, MTL::ResourceOptions options); + + class Event* newEvent(); + + class SharedEvent* newSharedEvent(); + + class SharedEvent* newSharedEvent(const class SharedEventHandle* sharedEventHandle); + + uint64_t peerGroupID() const; + + uint32_t peerIndex() const; + + uint32_t peerCount() const; + + class IOFileHandle* newIOHandle(const NS::URL* url, NS::Error** error); + + class IOCommandQueue* newIOCommandQueue(const class IOCommandQueueDescriptor* descriptor, NS::Error** error); + + class IOFileHandle* newIOHandle(const NS::URL* url, MTL::IOCompressionMethod compressionMethod, NS::Error** error); + + MTL::Size sparseTileSize(MTL::TextureType textureType, MTL::PixelFormat pixelFormat, NS::UInteger sampleCount); + + NS::UInteger sparseTileSizeInBytes() const; + + void convertSparsePixelRegions(const MTL::Region* pixelRegions, MTL::Region* tileRegions, MTL::Size tileSize, MTL::SparseTextureRegionAlignmentMode mode, NS::UInteger numRegions); + + void convertSparseTileRegions(const MTL::Region* tileRegions, MTL::Region* pixelRegions, MTL::Size tileSize, NS::UInteger numRegions); + + NS::UInteger sparseTileSizeInBytes(MTL::SparsePageSize sparsePageSize); + + MTL::Size sparseTileSize(MTL::TextureType textureType, MTL::PixelFormat pixelFormat, NS::UInteger sampleCount, MTL::SparsePageSize sparsePageSize); + + NS::UInteger maxBufferLength() const; + + NS::Array* counterSets() const; + + class CounterSampleBuffer* newCounterSampleBuffer(const class CounterSampleBufferDescriptor* descriptor, NS::Error** error); + + void sampleTimestamps(MTL::Timestamp* cpuTimestamp, MTL::Timestamp* gpuTimestamp); + + class ArgumentEncoder* newArgumentEncoder(const class BufferBinding* bufferBinding); + + bool supportsCounterSampling(MTL::CounterSamplingPoint samplingPoint); + + bool supportsVertexAmplificationCount(NS::UInteger count); + + bool supportsDynamicLibraries() const; + + bool supportsRenderDynamicLibraries() const; + + class DynamicLibrary* newDynamicLibrary(const class Library* library, NS::Error** error); + + class DynamicLibrary* newDynamicLibrary(const NS::URL* url, NS::Error** error); + + class BinaryArchive* newBinaryArchive(const class BinaryArchiveDescriptor* descriptor, NS::Error** error); + + bool supportsRaytracing() const; + + MTL::AccelerationStructureSizes accelerationStructureSizes(const class AccelerationStructureDescriptor* descriptor); + + class AccelerationStructure* newAccelerationStructure(NS::UInteger size); + + class AccelerationStructure* newAccelerationStructure(const class AccelerationStructureDescriptor* descriptor); + + MTL::SizeAndAlign heapAccelerationStructureSizeAndAlign(NS::UInteger size); + + MTL::SizeAndAlign heapAccelerationStructureSizeAndAlign(const class AccelerationStructureDescriptor* descriptor); + + bool supportsFunctionPointers() const; + + bool supportsFunctionPointersFromRender() const; + + bool supportsRaytracingFromRender() const; + + bool supportsPrimitiveMotionBlur() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::ArgumentDescriptor* MTL::ArgumentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLArgumentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::ArgumentDescriptor* MTL::ArgumentDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: argumentDescriptor +_MTL_INLINE MTL::ArgumentDescriptor* MTL::ArgumentDescriptor::argumentDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLArgumentDescriptor), _MTL_PRIVATE_SEL(argumentDescriptor)); +} + +// property: dataType +_MTL_INLINE MTL::DataType MTL::ArgumentDescriptor::dataType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(dataType)); +} + +_MTL_INLINE void MTL::ArgumentDescriptor::setDataType(MTL::DataType dataType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDataType_), dataType); +} + +// property: index +_MTL_INLINE NS::UInteger MTL::ArgumentDescriptor::index() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(index)); +} + +_MTL_INLINE void MTL::ArgumentDescriptor::setIndex(NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndex_), index); +} + +// property: arrayLength +_MTL_INLINE NS::UInteger MTL::ArgumentDescriptor::arrayLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arrayLength)); +} + +_MTL_INLINE void MTL::ArgumentDescriptor::setArrayLength(NS::UInteger arrayLength) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setArrayLength_), arrayLength); +} + +// property: access +_MTL_INLINE MTL::ArgumentAccess MTL::ArgumentDescriptor::access() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); +} + +_MTL_INLINE void MTL::ArgumentDescriptor::setAccess(MTL::ArgumentAccess access) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAccess_), access); +} + +// property: textureType +_MTL_INLINE MTL::TextureType MTL::ArgumentDescriptor::textureType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureType)); +} + +_MTL_INLINE void MTL::ArgumentDescriptor::setTextureType(MTL::TextureType textureType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTextureType_), textureType); +} + +// property: constantBlockAlignment +_MTL_INLINE NS::UInteger MTL::ArgumentDescriptor::constantBlockAlignment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(constantBlockAlignment)); +} + +_MTL_INLINE void MTL::ArgumentDescriptor::setConstantBlockAlignment(NS::UInteger constantBlockAlignment) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantBlockAlignment_), constantBlockAlignment); +} + +_MTL_PRIVATE_DEF_WEAK_CONST(MTL::DeviceNotificationName, DeviceWasAddedNotification); +_MTL_PRIVATE_DEF_WEAK_CONST(MTL::DeviceNotificationName, DeviceRemovalRequestedNotification); +_MTL_PRIVATE_DEF_WEAK_CONST(MTL::DeviceNotificationName, DeviceWasRemovedNotification); +_MTL_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, CommandBufferEncoderInfoErrorKey); + +#if defined(MTL_PRIVATE_IMPLEMENTATION) + +extern "C" MTL::Device* MTLCreateSystemDefaultDevice(); + +extern "C" NS::Array* MTLCopyAllDevices(); + +extern "C" NS::Array* MTLCopyAllDevicesWithObserver(NS::Object**, MTL::DeviceNotificationHandlerBlock); + +extern "C" void MTLRemoveDeviceObserver(const NS::Object*); + +#include + +_NS_EXPORT MTL::Device* MTL::CreateSystemDefaultDevice() +{ + return ::MTLCreateSystemDefaultDevice(); +} + +_NS_EXPORT NS::Array* MTL::CopyAllDevices() +{ +#if TARGET_OS_OSX + return ::MTLCopyAllDevices(); +#else + return nullptr; +#endif // TARGET_OS_OSX +} + +_NS_EXPORT NS::Array* MTL::CopyAllDevicesWithObserver(NS::Object** pOutObserver, DeviceNotificationHandlerBlock handler) +{ +#if TARGET_OS_OSX + return ::MTLCopyAllDevicesWithObserver(pOutObserver, handler); +#else + (void)pOutObserver; + (void)handler; + + return nullptr; +#endif // TARGET_OS_OSX +} + +_NS_EXPORT NS::Array* MTL::CopyAllDevicesWithObserver(NS::Object** pOutObserver, const DeviceNotificationHandlerFunction& handler) +{ + __block DeviceNotificationHandlerFunction function = handler; + + return CopyAllDevicesWithObserver(pOutObserver, ^(Device* pDevice, DeviceNotificationName pNotificationName) { function(pDevice, pNotificationName); }); +} + +_NS_EXPORT void MTL::RemoveDeviceObserver(const NS::Object* pObserver) +{ +#if TARGET_OS_OSX + ::MTLRemoveDeviceObserver(pObserver); +#endif // TARGET_OS_OSX +} + +#endif // MTL_PRIVATE_IMPLEMENTATION + +_MTL_INLINE void MTL::Device::newLibrary(const NS::String* pSource, const CompileOptions* pOptions, const NewLibraryCompletionHandlerFunction& completionHandler) +{ + __block NewLibraryCompletionHandlerFunction blockCompletionHandler = completionHandler; + + newLibrary(pSource, pOptions, ^(Library* pLibrary, NS::Error* pError) { blockCompletionHandler(pLibrary, pError); }); +} + +_MTL_INLINE void MTL::Device::newLibrary(const class StitchedLibraryDescriptor* pDescriptor, const MTL::NewLibraryCompletionHandlerFunction& completionHandler) +{ + __block NewLibraryCompletionHandlerFunction blockCompletionHandler = completionHandler; + + newLibrary(pDescriptor, ^(Library* pLibrary, NS::Error* pError) { blockCompletionHandler(pLibrary, pError); }); +} + +_MTL_INLINE void MTL::Device::newRenderPipelineState(const RenderPipelineDescriptor* pDescriptor, const NewRenderPipelineStateCompletionHandlerFunction& completionHandler) +{ + __block NewRenderPipelineStateCompletionHandlerFunction blockCompletionHandler = completionHandler; + + newRenderPipelineState(pDescriptor, ^(RenderPipelineState* pPipelineState, NS::Error* pError) { blockCompletionHandler(pPipelineState, pError); }); +} + +_MTL_INLINE void MTL::Device::newRenderPipelineState(const RenderPipelineDescriptor* pDescriptor, PipelineOption options, const NewRenderPipelineStateWithReflectionCompletionHandlerFunction& completionHandler) +{ + __block NewRenderPipelineStateWithReflectionCompletionHandlerFunction blockCompletionHandler = completionHandler; + + newRenderPipelineState(pDescriptor, options, ^(RenderPipelineState* pPipelineState, class RenderPipelineReflection* pReflection, NS::Error* pError) { blockCompletionHandler(pPipelineState, pReflection, pError); }); +} + +_MTL_INLINE void MTL::Device::newRenderPipelineState(const TileRenderPipelineDescriptor* pDescriptor, PipelineOption options, const NewRenderPipelineStateWithReflectionCompletionHandlerFunction& completionHandler) +{ + __block NewRenderPipelineStateWithReflectionCompletionHandlerFunction blockCompletionHandler = completionHandler; + + newRenderPipelineState(pDescriptor, options, ^(RenderPipelineState* pPipelineState, class RenderPipelineReflection* pReflection, NS::Error* pError) { blockCompletionHandler(pPipelineState, pReflection, pError); }); +} + +_MTL_INLINE void MTL::Device::newComputePipelineState(const class Function* pFunction, const NewComputePipelineStateCompletionHandlerFunction& completionHandler) +{ + __block NewComputePipelineStateCompletionHandlerFunction blockCompletionHandler = completionHandler; + + newComputePipelineState(pFunction, ^(ComputePipelineState* pPipelineState, NS::Error* pError) { blockCompletionHandler(pPipelineState, pError); }); +} + +_MTL_INLINE void MTL::Device::newComputePipelineState(const Function* pFunction, PipelineOption options, const NewComputePipelineStateWithReflectionCompletionHandlerFunction& completionHandler) +{ + __block NewComputePipelineStateWithReflectionCompletionHandlerFunction blockCompletionHandler = completionHandler; + + newComputePipelineState(pFunction, options, ^(ComputePipelineState* pPipelineState, ComputePipelineReflection* pReflection, NS::Error* pError) { blockCompletionHandler(pPipelineState, pReflection, pError); }); +} + +_MTL_INLINE void MTL::Device::newComputePipelineState(const ComputePipelineDescriptor* pDescriptor, PipelineOption options, const NewComputePipelineStateWithReflectionCompletionHandlerFunction& completionHandler) +{ + __block NewComputePipelineStateWithReflectionCompletionHandlerFunction blockCompletionHandler = completionHandler; + + newComputePipelineState(pDescriptor, options, ^(ComputePipelineState* pPipelineState, ComputePipelineReflection* pReflection, NS::Error* pError) { blockCompletionHandler(pPipelineState, pReflection, pError); }); +} + +_MTL_INLINE bool MTL::Device::isHeadless() const +{ + return headless(); +} + +// property: name +_MTL_INLINE NS::String* MTL::Device::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: registryID +_MTL_INLINE uint64_t MTL::Device::registryID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(registryID)); +} + +// property: maxThreadsPerThreadgroup +_MTL_INLINE MTL::Size MTL::Device::maxThreadsPerThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxThreadsPerThreadgroup)); +} + +// property: lowPower +_MTL_INLINE bool MTL::Device::lowPower() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isLowPower)); +} + +// property: headless +_MTL_INLINE bool MTL::Device::headless() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isHeadless)); +} + +// property: removable +_MTL_INLINE bool MTL::Device::removable() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isRemovable)); +} + +// property: hasUnifiedMemory +_MTL_INLINE bool MTL::Device::hasUnifiedMemory() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(hasUnifiedMemory)); +} + +// property: recommendedMaxWorkingSetSize +_MTL_INLINE uint64_t MTL::Device::recommendedMaxWorkingSetSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(recommendedMaxWorkingSetSize)); +} + +// property: location +_MTL_INLINE MTL::DeviceLocation MTL::Device::location() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(location)); +} + +// property: locationNumber +_MTL_INLINE NS::UInteger MTL::Device::locationNumber() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(locationNumber)); +} + +// property: maxTransferRate +_MTL_INLINE uint64_t MTL::Device::maxTransferRate() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTransferRate)); +} + +// property: depth24Stencil8PixelFormatSupported +_MTL_INLINE bool MTL::Device::depth24Stencil8PixelFormatSupported() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(isDepth24Stencil8PixelFormatSupported)); +} + +// property: readWriteTextureSupport +_MTL_INLINE MTL::ReadWriteTextureTier MTL::Device::readWriteTextureSupport() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(readWriteTextureSupport)); +} + +// property: argumentBuffersSupport +_MTL_INLINE MTL::ArgumentBuffersTier MTL::Device::argumentBuffersSupport() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(argumentBuffersSupport)); +} + +// property: rasterOrderGroupsSupported +_MTL_INLINE bool MTL::Device::rasterOrderGroupsSupported() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(areRasterOrderGroupsSupported)); +} + +// property: supports32BitFloatFiltering +_MTL_INLINE bool MTL::Device::supports32BitFloatFiltering() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supports32BitFloatFiltering)); +} + +// property: supports32BitMSAA +_MTL_INLINE bool MTL::Device::supports32BitMSAA() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supports32BitMSAA)); +} + +// property: supportsQueryTextureLOD +_MTL_INLINE bool MTL::Device::supportsQueryTextureLOD() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsQueryTextureLOD)); +} + +// property: supportsBCTextureCompression +_MTL_INLINE bool MTL::Device::supportsBCTextureCompression() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsBCTextureCompression)); +} + +// property: supportsPullModelInterpolation +_MTL_INLINE bool MTL::Device::supportsPullModelInterpolation() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsPullModelInterpolation)); +} + +// property: barycentricCoordsSupported +_MTL_INLINE bool MTL::Device::barycentricCoordsSupported() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(areBarycentricCoordsSupported)); +} + +// property: supportsShaderBarycentricCoordinates +_MTL_INLINE bool MTL::Device::supportsShaderBarycentricCoordinates() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsShaderBarycentricCoordinates)); +} + +// property: currentAllocatedSize +_MTL_INLINE NS::UInteger MTL::Device::currentAllocatedSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(currentAllocatedSize)); +} + +// method: newCommandQueue +_MTL_INLINE MTL::CommandQueue* MTL::Device::newCommandQueue() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newCommandQueue)); +} + +// method: newCommandQueueWithMaxCommandBufferCount: +_MTL_INLINE MTL::CommandQueue* MTL::Device::newCommandQueue(NS::UInteger maxCommandBufferCount) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newCommandQueueWithMaxCommandBufferCount_), maxCommandBufferCount); +} + +// method: heapTextureSizeAndAlignWithDescriptor: +_MTL_INLINE MTL::SizeAndAlign MTL::Device::heapTextureSizeAndAlign(const MTL::TextureDescriptor* desc) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(heapTextureSizeAndAlignWithDescriptor_), desc); +} + +// method: heapBufferSizeAndAlignWithLength:options: +_MTL_INLINE MTL::SizeAndAlign MTL::Device::heapBufferSizeAndAlign(NS::UInteger length, MTL::ResourceOptions options) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(heapBufferSizeAndAlignWithLength_options_), length, options); +} + +// method: newHeapWithDescriptor: +_MTL_INLINE MTL::Heap* MTL::Device::newHeap(const MTL::HeapDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newHeapWithDescriptor_), descriptor); +} + +// method: newBufferWithLength:options: +_MTL_INLINE MTL::Buffer* MTL::Device::newBuffer(NS::UInteger length, MTL::ResourceOptions options) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newBufferWithLength_options_), length, options); +} + +// method: newBufferWithBytes:length:options: +_MTL_INLINE MTL::Buffer* MTL::Device::newBuffer(const void* pointer, NS::UInteger length, MTL::ResourceOptions options) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newBufferWithBytes_length_options_), pointer, length, options); +} + +// method: newBufferWithBytesNoCopy:length:options:deallocator: +_MTL_INLINE MTL::Buffer* MTL::Device::newBuffer(const void* pointer, NS::UInteger length, MTL::ResourceOptions options, void (^deallocator)(void*, NS::UInteger)) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newBufferWithBytesNoCopy_length_options_deallocator_), pointer, length, options, deallocator); +} + +// method: newDepthStencilStateWithDescriptor: +_MTL_INLINE MTL::DepthStencilState* MTL::Device::newDepthStencilState(const MTL::DepthStencilDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newDepthStencilStateWithDescriptor_), descriptor); +} + +// method: newTextureWithDescriptor: +_MTL_INLINE MTL::Texture* MTL::Device::newTexture(const MTL::TextureDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_), descriptor); +} + +// method: newTextureWithDescriptor:iosurface:plane: +_MTL_INLINE MTL::Texture* MTL::Device::newTexture(const MTL::TextureDescriptor* descriptor, const IOSurfaceRef iosurface, NS::UInteger plane) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_iosurface_plane_), descriptor, iosurface, plane); +} + +// method: newSharedTextureWithDescriptor: +_MTL_INLINE MTL::Texture* MTL::Device::newSharedTexture(const MTL::TextureDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newSharedTextureWithDescriptor_), descriptor); +} + +// method: newSharedTextureWithHandle: +_MTL_INLINE MTL::Texture* MTL::Device::newSharedTexture(const MTL::SharedTextureHandle* sharedHandle) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newSharedTextureWithHandle_), sharedHandle); +} + +// method: newSamplerStateWithDescriptor: +_MTL_INLINE MTL::SamplerState* MTL::Device::newSamplerState(const MTL::SamplerDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newSamplerStateWithDescriptor_), descriptor); +} + +// method: newDefaultLibrary +_MTL_INLINE MTL::Library* MTL::Device::newDefaultLibrary() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newDefaultLibrary)); +} + +// method: newDefaultLibraryWithBundle:error: +_MTL_INLINE MTL::Library* MTL::Device::newDefaultLibrary(const NS::Bundle* bundle, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newDefaultLibraryWithBundle_error_), bundle, error); +} + +// method: newLibraryWithFile:error: +_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const NS::String* filepath, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newLibraryWithFile_error_), filepath, error); +} + +// method: newLibraryWithURL:error: +_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const NS::URL* url, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newLibraryWithURL_error_), url, error); +} + +// method: newLibraryWithData:error: +_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const dispatch_data_t data, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newLibraryWithData_error_), data, error); +} + +// method: newLibraryWithSource:options:error: +_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const NS::String* source, const MTL::CompileOptions* options, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newLibraryWithSource_options_error_), source, options, error); +} + +// method: newLibraryWithSource:options:completionHandler: +_MTL_INLINE void MTL::Device::newLibrary(const NS::String* source, const MTL::CompileOptions* options, const MTL::NewLibraryCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newLibraryWithSource_options_completionHandler_), source, options, completionHandler); +} + +// method: newLibraryWithStitchedDescriptor:error: +_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const MTL::StitchedLibraryDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newLibraryWithStitchedDescriptor_error_), descriptor, error); +} + +// method: newLibraryWithStitchedDescriptor:completionHandler: +_MTL_INLINE void MTL::Device::newLibrary(const MTL::StitchedLibraryDescriptor* descriptor, const MTL::NewLibraryCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newLibraryWithStitchedDescriptor_completionHandler_), descriptor, completionHandler); +} + +// method: newRenderPipelineStateWithDescriptor:error: +_MTL_INLINE MTL::RenderPipelineState* MTL::Device::newRenderPipelineState(const MTL::RenderPipelineDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithDescriptor_error_), descriptor, error); +} + +// method: newRenderPipelineStateWithDescriptor:options:reflection:error: +_MTL_INLINE MTL::RenderPipelineState* MTL::Device::newRenderPipelineState(const MTL::RenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithDescriptor_options_reflection_error_), descriptor, options, reflection, error); +} + +// method: newRenderPipelineStateWithDescriptor:completionHandler: +_MTL_INLINE void MTL::Device::newRenderPipelineState(const MTL::RenderPipelineDescriptor* descriptor, const MTL::NewRenderPipelineStateCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithDescriptor_completionHandler_), descriptor, completionHandler); +} + +// method: newRenderPipelineStateWithDescriptor:options:completionHandler: +_MTL_INLINE void MTL::Device::newRenderPipelineState(const MTL::RenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithDescriptor_options_completionHandler_), descriptor, options, completionHandler); +} + +// method: newComputePipelineStateWithFunction:error: +_MTL_INLINE MTL::ComputePipelineState* MTL::Device::newComputePipelineState(const MTL::Function* computeFunction, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithFunction_error_), computeFunction, error); +} + +// method: newComputePipelineStateWithFunction:options:reflection:error: +_MTL_INLINE MTL::ComputePipelineState* MTL::Device::newComputePipelineState(const MTL::Function* computeFunction, MTL::PipelineOption options, const MTL::AutoreleasedComputePipelineReflection* reflection, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithFunction_options_reflection_error_), computeFunction, options, reflection, error); +} + +// method: newComputePipelineStateWithFunction:completionHandler: +_MTL_INLINE void MTL::Device::newComputePipelineState(const MTL::Function* computeFunction, const MTL::NewComputePipelineStateCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithFunction_completionHandler_), computeFunction, completionHandler); +} + +// method: newComputePipelineStateWithFunction:options:completionHandler: +_MTL_INLINE void MTL::Device::newComputePipelineState(const MTL::Function* computeFunction, MTL::PipelineOption options, const MTL::NewComputePipelineStateWithReflectionCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithFunction_options_completionHandler_), computeFunction, options, completionHandler); +} + +// method: newComputePipelineStateWithDescriptor:options:reflection:error: +_MTL_INLINE MTL::ComputePipelineState* MTL::Device::newComputePipelineState(const MTL::ComputePipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedComputePipelineReflection* reflection, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithDescriptor_options_reflection_error_), descriptor, options, reflection, error); +} + +// method: newComputePipelineStateWithDescriptor:options:completionHandler: +_MTL_INLINE void MTL::Device::newComputePipelineState(const MTL::ComputePipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewComputePipelineStateWithReflectionCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithDescriptor_options_completionHandler_), descriptor, options, completionHandler); +} + +// method: newFence +_MTL_INLINE MTL::Fence* MTL::Device::newFence() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newFence)); +} + +// method: supportsFeatureSet: +_MTL_INLINE bool MTL::Device::supportsFeatureSet(MTL::FeatureSet featureSet) +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsFeatureSet_), featureSet); +} + +// method: supportsFamily: +_MTL_INLINE bool MTL::Device::supportsFamily(MTL::GPUFamily gpuFamily) +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsFamily_), gpuFamily); +} + +// method: supportsTextureSampleCount: +_MTL_INLINE bool MTL::Device::supportsTextureSampleCount(NS::UInteger sampleCount) +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsTextureSampleCount_), sampleCount); +} + +// method: minimumLinearTextureAlignmentForPixelFormat: +_MTL_INLINE NS::UInteger MTL::Device::minimumLinearTextureAlignmentForPixelFormat(MTL::PixelFormat format) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(minimumLinearTextureAlignmentForPixelFormat_), format); +} + +// method: minimumTextureBufferAlignmentForPixelFormat: +_MTL_INLINE NS::UInteger MTL::Device::minimumTextureBufferAlignmentForPixelFormat(MTL::PixelFormat format) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(minimumTextureBufferAlignmentForPixelFormat_), format); +} + +// method: newRenderPipelineStateWithTileDescriptor:options:reflection:error: +_MTL_INLINE MTL::RenderPipelineState* MTL::Device::newRenderPipelineState(const MTL::TileRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithTileDescriptor_options_reflection_error_), descriptor, options, reflection, error); +} + +// method: newRenderPipelineStateWithTileDescriptor:options:completionHandler: +_MTL_INLINE void MTL::Device::newRenderPipelineState(const MTL::TileRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithTileDescriptor_options_completionHandler_), descriptor, options, completionHandler); +} + +// method: newRenderPipelineStateWithMeshDescriptor:options:reflection:error: +_MTL_INLINE MTL::RenderPipelineState* MTL::Device::newRenderPipelineState(const MTL::MeshRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithMeshDescriptor_options_reflection_error_), descriptor, options, reflection, error); +} + +// method: newRenderPipelineStateWithMeshDescriptor:options:completionHandler: +_MTL_INLINE void MTL::Device::newRenderPipelineState(const MTL::MeshRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithMeshDescriptor_options_completionHandler_), descriptor, options, completionHandler); +} + +// property: maxThreadgroupMemoryLength +_MTL_INLINE NS::UInteger MTL::Device::maxThreadgroupMemoryLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxThreadgroupMemoryLength)); +} + +// property: maxArgumentBufferSamplerCount +_MTL_INLINE NS::UInteger MTL::Device::maxArgumentBufferSamplerCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxArgumentBufferSamplerCount)); +} + +// property: programmableSamplePositionsSupported +_MTL_INLINE bool MTL::Device::programmableSamplePositionsSupported() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(areProgrammableSamplePositionsSupported)); +} + +// method: getDefaultSamplePositions:count: +_MTL_INLINE void MTL::Device::getDefaultSamplePositions(MTL::SamplePosition* positions, NS::UInteger count) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(getDefaultSamplePositions_count_), positions, count); +} + +// method: newArgumentEncoderWithArguments: +_MTL_INLINE MTL::ArgumentEncoder* MTL::Device::newArgumentEncoder(const NS::Array* arguments) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newArgumentEncoderWithArguments_), arguments); +} + +// method: supportsRasterizationRateMapWithLayerCount: +_MTL_INLINE bool MTL::Device::supportsRasterizationRateMap(NS::UInteger layerCount) +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsRasterizationRateMapWithLayerCount_), layerCount); +} + +// method: newRasterizationRateMapWithDescriptor: +_MTL_INLINE MTL::RasterizationRateMap* MTL::Device::newRasterizationRateMap(const MTL::RasterizationRateMapDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRasterizationRateMapWithDescriptor_), descriptor); +} + +// method: newIndirectCommandBufferWithDescriptor:maxCommandCount:options: +_MTL_INLINE MTL::IndirectCommandBuffer* MTL::Device::newIndirectCommandBuffer(const MTL::IndirectCommandBufferDescriptor* descriptor, NS::UInteger maxCount, MTL::ResourceOptions options) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIndirectCommandBufferWithDescriptor_maxCommandCount_options_), descriptor, maxCount, options); +} + +// method: newEvent +_MTL_INLINE MTL::Event* MTL::Device::newEvent() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newEvent)); +} + +// method: newSharedEvent +_MTL_INLINE MTL::SharedEvent* MTL::Device::newSharedEvent() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newSharedEvent)); +} + +// method: newSharedEventWithHandle: +_MTL_INLINE MTL::SharedEvent* MTL::Device::newSharedEvent(const MTL::SharedEventHandle* sharedEventHandle) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newSharedEventWithHandle_), sharedEventHandle); +} + +// property: peerGroupID +_MTL_INLINE uint64_t MTL::Device::peerGroupID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(peerGroupID)); +} + +// property: peerIndex +_MTL_INLINE uint32_t MTL::Device::peerIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(peerIndex)); +} + +// property: peerCount +_MTL_INLINE uint32_t MTL::Device::peerCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(peerCount)); +} + +// method: newIOHandleWithURL:error: +_MTL_INLINE MTL::IOFileHandle* MTL::Device::newIOHandle(const NS::URL* url, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIOHandleWithURL_error_), url, error); +} + +// method: newIOCommandQueueWithDescriptor:error: +_MTL_INLINE MTL::IOCommandQueue* MTL::Device::newIOCommandQueue(const MTL::IOCommandQueueDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIOCommandQueueWithDescriptor_error_), descriptor, error); +} + +// method: newIOHandleWithURL:compressionMethod:error: +_MTL_INLINE MTL::IOFileHandle* MTL::Device::newIOHandle(const NS::URL* url, MTL::IOCompressionMethod compressionMethod, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIOHandleWithURL_compressionMethod_error_), url, compressionMethod, error); +} + +// method: sparseTileSizeWithTextureType:pixelFormat:sampleCount: +_MTL_INLINE MTL::Size MTL::Device::sparseTileSize(MTL::TextureType textureType, MTL::PixelFormat pixelFormat, NS::UInteger sampleCount) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sparseTileSizeWithTextureType_pixelFormat_sampleCount_), textureType, pixelFormat, sampleCount); +} + +// property: sparseTileSizeInBytes +_MTL_INLINE NS::UInteger MTL::Device::sparseTileSizeInBytes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sparseTileSizeInBytes)); +} + +// method: convertSparsePixelRegions:toTileRegions:withTileSize:alignmentMode:numRegions: +_MTL_INLINE void MTL::Device::convertSparsePixelRegions(const MTL::Region* pixelRegions, MTL::Region* tileRegions, MTL::Size tileSize, MTL::SparseTextureRegionAlignmentMode mode, NS::UInteger numRegions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(convertSparsePixelRegions_toTileRegions_withTileSize_alignmentMode_numRegions_), pixelRegions, tileRegions, tileSize, mode, numRegions); +} + +// method: convertSparseTileRegions:toPixelRegions:withTileSize:numRegions: +_MTL_INLINE void MTL::Device::convertSparseTileRegions(const MTL::Region* tileRegions, MTL::Region* pixelRegions, MTL::Size tileSize, NS::UInteger numRegions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(convertSparseTileRegions_toPixelRegions_withTileSize_numRegions_), tileRegions, pixelRegions, tileSize, numRegions); +} + +// method: sparseTileSizeInBytesForSparsePageSize: +_MTL_INLINE NS::UInteger MTL::Device::sparseTileSizeInBytes(MTL::SparsePageSize sparsePageSize) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sparseTileSizeInBytesForSparsePageSize_), sparsePageSize); +} + +// method: sparseTileSizeWithTextureType:pixelFormat:sampleCount:sparsePageSize: +_MTL_INLINE MTL::Size MTL::Device::sparseTileSize(MTL::TextureType textureType, MTL::PixelFormat pixelFormat, NS::UInteger sampleCount, MTL::SparsePageSize sparsePageSize) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sparseTileSizeWithTextureType_pixelFormat_sampleCount_sparsePageSize_), textureType, pixelFormat, sampleCount, sparsePageSize); +} + +// property: maxBufferLength +_MTL_INLINE NS::UInteger MTL::Device::maxBufferLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxBufferLength)); +} + +// property: counterSets +_MTL_INLINE NS::Array* MTL::Device::counterSets() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(counterSets)); +} + +// method: newCounterSampleBufferWithDescriptor:error: +_MTL_INLINE MTL::CounterSampleBuffer* MTL::Device::newCounterSampleBuffer(const MTL::CounterSampleBufferDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newCounterSampleBufferWithDescriptor_error_), descriptor, error); +} + +// method: sampleTimestamps:gpuTimestamp: +_MTL_INLINE void MTL::Device::sampleTimestamps(MTL::Timestamp* cpuTimestamp, MTL::Timestamp* gpuTimestamp) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleTimestamps_gpuTimestamp_), cpuTimestamp, gpuTimestamp); +} + +// method: newArgumentEncoderWithBufferBinding: +_MTL_INLINE MTL::ArgumentEncoder* MTL::Device::newArgumentEncoder(const MTL::BufferBinding* bufferBinding) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newArgumentEncoderWithBufferBinding_), bufferBinding); +} + +// method: supportsCounterSampling: +_MTL_INLINE bool MTL::Device::supportsCounterSampling(MTL::CounterSamplingPoint samplingPoint) +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsCounterSampling_), samplingPoint); +} + +// method: supportsVertexAmplificationCount: +_MTL_INLINE bool MTL::Device::supportsVertexAmplificationCount(NS::UInteger count) +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsVertexAmplificationCount_), count); +} + +// property: supportsDynamicLibraries +_MTL_INLINE bool MTL::Device::supportsDynamicLibraries() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsDynamicLibraries)); +} + +// property: supportsRenderDynamicLibraries +_MTL_INLINE bool MTL::Device::supportsRenderDynamicLibraries() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsRenderDynamicLibraries)); +} + +// method: newDynamicLibrary:error: +_MTL_INLINE MTL::DynamicLibrary* MTL::Device::newDynamicLibrary(const MTL::Library* library, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newDynamicLibrary_error_), library, error); +} + +// method: newDynamicLibraryWithURL:error: +_MTL_INLINE MTL::DynamicLibrary* MTL::Device::newDynamicLibrary(const NS::URL* url, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newDynamicLibraryWithURL_error_), url, error); +} + +// method: newBinaryArchiveWithDescriptor:error: +_MTL_INLINE MTL::BinaryArchive* MTL::Device::newBinaryArchive(const MTL::BinaryArchiveDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newBinaryArchiveWithDescriptor_error_), descriptor, error); +} + +// property: supportsRaytracing +_MTL_INLINE bool MTL::Device::supportsRaytracing() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsRaytracing)); +} + +// method: accelerationStructureSizesWithDescriptor: +_MTL_INLINE MTL::AccelerationStructureSizes MTL::Device::accelerationStructureSizes(const MTL::AccelerationStructureDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(accelerationStructureSizesWithDescriptor_), descriptor); +} + +// method: newAccelerationStructureWithSize: +_MTL_INLINE MTL::AccelerationStructure* MTL::Device::newAccelerationStructure(NS::UInteger size) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newAccelerationStructureWithSize_), size); +} + +// method: newAccelerationStructureWithDescriptor: +_MTL_INLINE MTL::AccelerationStructure* MTL::Device::newAccelerationStructure(const MTL::AccelerationStructureDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newAccelerationStructureWithDescriptor_), descriptor); +} + +// method: heapAccelerationStructureSizeAndAlignWithSize: +_MTL_INLINE MTL::SizeAndAlign MTL::Device::heapAccelerationStructureSizeAndAlign(NS::UInteger size) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(heapAccelerationStructureSizeAndAlignWithSize_), size); +} + +// method: heapAccelerationStructureSizeAndAlignWithDescriptor: +_MTL_INLINE MTL::SizeAndAlign MTL::Device::heapAccelerationStructureSizeAndAlign(const MTL::AccelerationStructureDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(heapAccelerationStructureSizeAndAlignWithDescriptor_), descriptor); +} + +// property: supportsFunctionPointers +_MTL_INLINE bool MTL::Device::supportsFunctionPointers() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsFunctionPointers)); +} + +// property: supportsFunctionPointersFromRender +_MTL_INLINE bool MTL::Device::supportsFunctionPointersFromRender() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsFunctionPointersFromRender)); +} + +// property: supportsRaytracingFromRender +_MTL_INLINE bool MTL::Device::supportsRaytracingFromRender() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsRaytracingFromRender)); +} + +// property: supportsPrimitiveMotionBlur +_MTL_INLINE bool MTL::Device::supportsPrimitiveMotionBlur() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsPrimitiveMotionBlur)); +} diff --git a/metal-cpp/Metal/MTLDrawable.hpp b/metal-cpp/Metal/MTLDrawable.hpp new file mode 100644 index 00000000..b23232b3 --- /dev/null +++ b/metal-cpp/Metal/MTLDrawable.hpp @@ -0,0 +1,99 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLDrawable.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include +#include + +namespace MTL +{ +using DrawablePresentedHandler = void (^)(class Drawable*); + +using DrawablePresentedHandlerFunction = std::function; + +class Drawable : public NS::Referencing +{ +public: + void addPresentedHandler(const MTL::DrawablePresentedHandlerFunction& function); + + void present(); + + void presentAtTime(CFTimeInterval presentationTime); + + void presentAfterMinimumDuration(CFTimeInterval duration); + + void addPresentedHandler(const MTL::DrawablePresentedHandler block); + + CFTimeInterval presentedTime() const; + + NS::UInteger drawableID() const; +}; + +} + +_MTL_INLINE void MTL::Drawable::addPresentedHandler(const MTL::DrawablePresentedHandlerFunction& function) +{ + __block DrawablePresentedHandlerFunction blockFunction = function; + + addPresentedHandler(^(Drawable* pDrawable) { blockFunction(pDrawable); }); +} + +// method: present +_MTL_INLINE void MTL::Drawable::present() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(present)); +} + +// method: presentAtTime: +_MTL_INLINE void MTL::Drawable::presentAtTime(CFTimeInterval presentationTime) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(presentAtTime_), presentationTime); +} + +// method: presentAfterMinimumDuration: +_MTL_INLINE void MTL::Drawable::presentAfterMinimumDuration(CFTimeInterval duration) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(presentAfterMinimumDuration_), duration); +} + +// method: addPresentedHandler: +_MTL_INLINE void MTL::Drawable::addPresentedHandler(const MTL::DrawablePresentedHandler block) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(addPresentedHandler_), block); +} + +// property: presentedTime +_MTL_INLINE CFTimeInterval MTL::Drawable::presentedTime() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(presentedTime)); +} + +// property: drawableID +_MTL_INLINE NS::UInteger MTL::Drawable::drawableID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(drawableID)); +} diff --git a/metal-cpp/Metal/MTLDynamicLibrary.hpp b/metal-cpp/Metal/MTLDynamicLibrary.hpp new file mode 100644 index 00000000..3c6c2eb6 --- /dev/null +++ b/metal-cpp/Metal/MTLDynamicLibrary.hpp @@ -0,0 +1,82 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLDynamicLibrary.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, DynamicLibraryError) { + DynamicLibraryErrorNone = 0, + DynamicLibraryErrorInvalidFile = 1, + DynamicLibraryErrorCompilationFailure = 2, + DynamicLibraryErrorUnresolvedInstallName = 3, + DynamicLibraryErrorDependencyLoadFailure = 4, + DynamicLibraryErrorUnsupported = 5, +}; + +class DynamicLibrary : public NS::Referencing +{ +public: + NS::String* label() const; + void setLabel(const NS::String* label); + + class Device* device() const; + + NS::String* installName() const; + + bool serializeToURL(const NS::URL* url, NS::Error** error); +}; + +} + +// property: label +_MTL_INLINE NS::String* MTL::DynamicLibrary::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::DynamicLibrary::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::DynamicLibrary::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: installName +_MTL_INLINE NS::String* MTL::DynamicLibrary::installName() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(installName)); +} + +// method: serializeToURL:error: +_MTL_INLINE bool MTL::DynamicLibrary::serializeToURL(const NS::URL* url, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(serializeToURL_error_), url, error); +} diff --git a/metal-cpp/Metal/MTLEvent.hpp b/metal-cpp/Metal/MTLEvent.hpp new file mode 100644 index 00000000..abb4e06d --- /dev/null +++ b/metal-cpp/Metal/MTLEvent.hpp @@ -0,0 +1,159 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLEvent.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLEvent.hpp" + +namespace MTL +{ +class Event : public NS::Referencing +{ +public: + class Device* device() const; + + NS::String* label() const; + void setLabel(const NS::String* label); +}; + +class SharedEventListener : public NS::Referencing +{ +public: + static class SharedEventListener* alloc(); + + MTL::SharedEventListener* init(); + + MTL::SharedEventListener* init(const dispatch_queue_t dispatchQueue); + + dispatch_queue_t dispatchQueue() const; +}; + +using SharedEventNotificationBlock = void (^)(SharedEvent* pEvent, std::uint64_t value); + +class SharedEvent : public NS::Referencing +{ +public: + void notifyListener(const class SharedEventListener* listener, uint64_t value, const MTL::SharedEventNotificationBlock block); + + class SharedEventHandle* newSharedEventHandle(); + + uint64_t signaledValue() const; + void setSignaledValue(uint64_t signaledValue); +}; + +class SharedEventHandle : public NS::SecureCoding +{ +public: + static class SharedEventHandle* alloc(); + + class SharedEventHandle* init(); + + NS::String* label() const; +}; + +} + +// property: device +_MTL_INLINE MTL::Device* MTL::Event::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: label +_MTL_INLINE NS::String* MTL::Event::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::Event::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// static method: alloc +_MTL_INLINE MTL::SharedEventListener* MTL::SharedEventListener::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLSharedEventListener)); +} + +// method: init +_MTL_INLINE MTL::SharedEventListener* MTL::SharedEventListener::init() +{ + return NS::Object::init(); +} + +// method: initWithDispatchQueue: +_MTL_INLINE MTL::SharedEventListener* MTL::SharedEventListener::init(const dispatch_queue_t dispatchQueue) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(initWithDispatchQueue_), dispatchQueue); +} + +// property: dispatchQueue +_MTL_INLINE dispatch_queue_t MTL::SharedEventListener::dispatchQueue() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(dispatchQueue)); +} + +// method: notifyListener:atValue:block: +_MTL_INLINE void MTL::SharedEvent::notifyListener(const MTL::SharedEventListener* listener, uint64_t value, const MTL::SharedEventNotificationBlock block) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(notifyListener_atValue_block_), listener, value, block); +} + +// method: newSharedEventHandle +_MTL_INLINE MTL::SharedEventHandle* MTL::SharedEvent::newSharedEventHandle() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newSharedEventHandle)); +} + +// property: signaledValue +_MTL_INLINE uint64_t MTL::SharedEvent::signaledValue() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(signaledValue)); +} + +_MTL_INLINE void MTL::SharedEvent::setSignaledValue(uint64_t signaledValue) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSignaledValue_), signaledValue); +} + +// static method: alloc +_MTL_INLINE MTL::SharedEventHandle* MTL::SharedEventHandle::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLSharedEventHandle)); +} + +// method: init +_MTL_INLINE MTL::SharedEventHandle* MTL::SharedEventHandle::init() +{ + return NS::Object::init(); +} + +// property: label +_MTL_INLINE NS::String* MTL::SharedEventHandle::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} diff --git a/metal-cpp/Metal/MTLFence.hpp b/metal-cpp/Metal/MTLFence.hpp new file mode 100644 index 00000000..6337ac17 --- /dev/null +++ b/metal-cpp/Metal/MTLFence.hpp @@ -0,0 +1,57 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLFence.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +class Fence : public NS::Referencing +{ +public: + class Device* device() const; + + NS::String* label() const; + void setLabel(const NS::String* label); +}; + +} + +// property: device +_MTL_INLINE MTL::Device* MTL::Fence::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: label +_MTL_INLINE NS::String* MTL::Fence::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::Fence::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} diff --git a/metal-cpp/Metal/MTLFunctionConstantValues.hpp b/metal-cpp/Metal/MTLFunctionConstantValues.hpp new file mode 100644 index 00000000..93157c6a --- /dev/null +++ b/metal-cpp/Metal/MTLFunctionConstantValues.hpp @@ -0,0 +1,85 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLFunctionConstantValues.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLArgument.hpp" + +namespace MTL +{ +class FunctionConstantValues : public NS::Copying +{ +public: + static class FunctionConstantValues* alloc(); + + class FunctionConstantValues* init(); + + void setConstantValue(const void* value, MTL::DataType type, NS::UInteger index); + + void setConstantValues(const void* values, MTL::DataType type, NS::Range range); + + void setConstantValue(const void* value, MTL::DataType type, const NS::String* name); + + void reset(); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::FunctionConstantValues* MTL::FunctionConstantValues::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLFunctionConstantValues)); +} + +// method: init +_MTL_INLINE MTL::FunctionConstantValues* MTL::FunctionConstantValues::init() +{ + return NS::Object::init(); +} + +// method: setConstantValue:type:atIndex: +_MTL_INLINE void MTL::FunctionConstantValues::setConstantValue(const void* value, MTL::DataType type, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantValue_type_atIndex_), value, type, index); +} + +// method: setConstantValues:type:withRange: +_MTL_INLINE void MTL::FunctionConstantValues::setConstantValues(const void* values, MTL::DataType type, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantValues_type_withRange_), values, type, range); +} + +// method: setConstantValue:type:withName: +_MTL_INLINE void MTL::FunctionConstantValues::setConstantValue(const void* value, MTL::DataType type, const NS::String* name) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantValue_type_withName_), value, type, name); +} + +// method: reset +_MTL_INLINE void MTL::FunctionConstantValues::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} diff --git a/metal-cpp/Metal/MTLFunctionDescriptor.hpp b/metal-cpp/Metal/MTLFunctionDescriptor.hpp new file mode 100644 index 00000000..02188ab5 --- /dev/null +++ b/metal-cpp/Metal/MTLFunctionDescriptor.hpp @@ -0,0 +1,156 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLFunctionDescriptor.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLFunctionDescriptor.hpp" + +namespace MTL +{ +_MTL_OPTIONS(NS::UInteger, FunctionOptions) { + FunctionOptionNone = 0, + FunctionOptionCompileToBinary = 1, +}; + +class FunctionDescriptor : public NS::Copying +{ +public: + static class FunctionDescriptor* alloc(); + + class FunctionDescriptor* init(); + + static class FunctionDescriptor* functionDescriptor(); + + NS::String* name() const; + void setName(const NS::String* name); + + NS::String* specializedName() const; + void setSpecializedName(const NS::String* specializedName); + + class FunctionConstantValues* constantValues() const; + void setConstantValues(const class FunctionConstantValues* constantValues); + + MTL::FunctionOptions options() const; + void setOptions(MTL::FunctionOptions options); + + NS::Array* binaryArchives() const; + void setBinaryArchives(const NS::Array* binaryArchives); +}; + +class IntersectionFunctionDescriptor : public NS::Copying +{ +public: + static class IntersectionFunctionDescriptor* alloc(); + + class IntersectionFunctionDescriptor* init(); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::FunctionDescriptor* MTL::FunctionDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLFunctionDescriptor)); +} + +// method: init +_MTL_INLINE MTL::FunctionDescriptor* MTL::FunctionDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: functionDescriptor +_MTL_INLINE MTL::FunctionDescriptor* MTL::FunctionDescriptor::functionDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLFunctionDescriptor), _MTL_PRIVATE_SEL(functionDescriptor)); +} + +// property: name +_MTL_INLINE NS::String* MTL::FunctionDescriptor::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +_MTL_INLINE void MTL::FunctionDescriptor::setName(const NS::String* name) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setName_), name); +} + +// property: specializedName +_MTL_INLINE NS::String* MTL::FunctionDescriptor::specializedName() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(specializedName)); +} + +_MTL_INLINE void MTL::FunctionDescriptor::setSpecializedName(const NS::String* specializedName) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSpecializedName_), specializedName); +} + +// property: constantValues +_MTL_INLINE MTL::FunctionConstantValues* MTL::FunctionDescriptor::constantValues() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(constantValues)); +} + +_MTL_INLINE void MTL::FunctionDescriptor::setConstantValues(const MTL::FunctionConstantValues* constantValues) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantValues_), constantValues); +} + +// property: options +_MTL_INLINE MTL::FunctionOptions MTL::FunctionDescriptor::options() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(options)); +} + +_MTL_INLINE void MTL::FunctionDescriptor::setOptions(MTL::FunctionOptions options) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOptions_), options); +} + +// property: binaryArchives +_MTL_INLINE NS::Array* MTL::FunctionDescriptor::binaryArchives() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(binaryArchives)); +} + +_MTL_INLINE void MTL::FunctionDescriptor::setBinaryArchives(const NS::Array* binaryArchives) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBinaryArchives_), binaryArchives); +} + +// static method: alloc +_MTL_INLINE MTL::IntersectionFunctionDescriptor* MTL::IntersectionFunctionDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLIntersectionFunctionDescriptor)); +} + +// method: init +_MTL_INLINE MTL::IntersectionFunctionDescriptor* MTL::IntersectionFunctionDescriptor::init() +{ + return NS::Object::init(); +} diff --git a/metal-cpp/Metal/MTLFunctionHandle.hpp b/metal-cpp/Metal/MTLFunctionHandle.hpp new file mode 100644 index 00000000..8b0785a2 --- /dev/null +++ b/metal-cpp/Metal/MTLFunctionHandle.hpp @@ -0,0 +1,61 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLFunctionHandle.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLLibrary.hpp" + +namespace MTL +{ +class FunctionHandle : public NS::Referencing +{ +public: + MTL::FunctionType functionType() const; + + NS::String* name() const; + + class Device* device() const; +}; + +} + +// property: functionType +_MTL_INLINE MTL::FunctionType MTL::FunctionHandle::functionType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionType)); +} + +// property: name +_MTL_INLINE NS::String* MTL::FunctionHandle::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::FunctionHandle::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} diff --git a/metal-cpp/Metal/MTLFunctionLog.hpp b/metal-cpp/Metal/MTLFunctionLog.hpp new file mode 100644 index 00000000..31404bc8 --- /dev/null +++ b/metal-cpp/Metal/MTLFunctionLog.hpp @@ -0,0 +1,114 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLFunctionLog.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLFunctionLog.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, FunctionLogType) { + FunctionLogTypeValidation = 0, +}; + +class LogContainer : public NS::Referencing +{ +public: +}; + +class FunctionLogDebugLocation : public NS::Referencing +{ +public: + NS::String* functionName() const; + + NS::URL* URL() const; + + NS::UInteger line() const; + + NS::UInteger column() const; +}; + +class FunctionLog : public NS::Referencing +{ +public: + MTL::FunctionLogType type() const; + + NS::String* encoderLabel() const; + + class Function* function() const; + + class FunctionLogDebugLocation* debugLocation() const; +}; + +} + +// property: functionName +_MTL_INLINE NS::String* MTL::FunctionLogDebugLocation::functionName() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionName)); +} + +// property: URL +_MTL_INLINE NS::URL* MTL::FunctionLogDebugLocation::URL() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(URL)); +} + +// property: line +_MTL_INLINE NS::UInteger MTL::FunctionLogDebugLocation::line() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(line)); +} + +// property: column +_MTL_INLINE NS::UInteger MTL::FunctionLogDebugLocation::column() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(column)); +} + +// property: type +_MTL_INLINE MTL::FunctionLogType MTL::FunctionLog::type() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); +} + +// property: encoderLabel +_MTL_INLINE NS::String* MTL::FunctionLog::encoderLabel() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(encoderLabel)); +} + +// property: function +_MTL_INLINE MTL::Function* MTL::FunctionLog::function() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(function)); +} + +// property: debugLocation +_MTL_INLINE MTL::FunctionLogDebugLocation* MTL::FunctionLog::debugLocation() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(debugLocation)); +} diff --git a/metal-cpp/Metal/MTLFunctionStitching.hpp b/metal-cpp/Metal/MTLFunctionStitching.hpp new file mode 100644 index 00000000..a0c2b566 --- /dev/null +++ b/metal-cpp/Metal/MTLFunctionStitching.hpp @@ -0,0 +1,305 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLFunctionStitching.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLFunctionStitching.hpp" + +namespace MTL +{ +class FunctionStitchingAttribute : public NS::Referencing +{ +}; + +class FunctionStitchingAttributeAlwaysInline : public NS::Referencing +{ +public: + static class FunctionStitchingAttributeAlwaysInline* alloc(); + + class FunctionStitchingAttributeAlwaysInline* init(); +}; + +class FunctionStitchingNode : public NS::Copying +{ +}; + +class FunctionStitchingInputNode : public NS::Referencing +{ +public: + static class FunctionStitchingInputNode* alloc(); + + class FunctionStitchingInputNode* init(); + + NS::UInteger argumentIndex() const; + void setArgumentIndex(NS::UInteger argumentIndex); + + MTL::FunctionStitchingInputNode* init(NS::UInteger argument); +}; + +class FunctionStitchingFunctionNode : public NS::Referencing +{ +public: + static class FunctionStitchingFunctionNode* alloc(); + + class FunctionStitchingFunctionNode* init(); + + NS::String* name() const; + void setName(const NS::String* name); + + NS::Array* arguments() const; + void setArguments(const NS::Array* arguments); + + NS::Array* controlDependencies() const; + void setControlDependencies(const NS::Array* controlDependencies); + + MTL::FunctionStitchingFunctionNode* init(const NS::String* name, const NS::Array* arguments, const NS::Array* controlDependencies); +}; + +class FunctionStitchingGraph : public NS::Copying +{ +public: + static class FunctionStitchingGraph* alloc(); + + class FunctionStitchingGraph* init(); + + NS::String* functionName() const; + void setFunctionName(const NS::String* functionName); + + NS::Array* nodes() const; + void setNodes(const NS::Array* nodes); + + class FunctionStitchingFunctionNode* outputNode() const; + void setOutputNode(const class FunctionStitchingFunctionNode* outputNode); + + NS::Array* attributes() const; + void setAttributes(const NS::Array* attributes); + + MTL::FunctionStitchingGraph* init(const NS::String* functionName, const NS::Array* nodes, const class FunctionStitchingFunctionNode* outputNode, const NS::Array* attributes); +}; + +class StitchedLibraryDescriptor : public NS::Copying +{ +public: + static class StitchedLibraryDescriptor* alloc(); + + class StitchedLibraryDescriptor* init(); + + NS::Array* functionGraphs() const; + void setFunctionGraphs(const NS::Array* functionGraphs); + + NS::Array* functions() const; + void setFunctions(const NS::Array* functions); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::FunctionStitchingAttributeAlwaysInline* MTL::FunctionStitchingAttributeAlwaysInline::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLFunctionStitchingAttributeAlwaysInline)); +} + +// method: init +_MTL_INLINE MTL::FunctionStitchingAttributeAlwaysInline* MTL::FunctionStitchingAttributeAlwaysInline::init() +{ + return NS::Object::init(); +} + +// static method: alloc +_MTL_INLINE MTL::FunctionStitchingInputNode* MTL::FunctionStitchingInputNode::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLFunctionStitchingInputNode)); +} + +// method: init +_MTL_INLINE MTL::FunctionStitchingInputNode* MTL::FunctionStitchingInputNode::init() +{ + return NS::Object::init(); +} + +// property: argumentIndex +_MTL_INLINE NS::UInteger MTL::FunctionStitchingInputNode::argumentIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(argumentIndex)); +} + +_MTL_INLINE void MTL::FunctionStitchingInputNode::setArgumentIndex(NS::UInteger argumentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setArgumentIndex_), argumentIndex); +} + +// method: initWithArgumentIndex: +_MTL_INLINE MTL::FunctionStitchingInputNode* MTL::FunctionStitchingInputNode::init(NS::UInteger argument) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(initWithArgumentIndex_), argument); +} + +// static method: alloc +_MTL_INLINE MTL::FunctionStitchingFunctionNode* MTL::FunctionStitchingFunctionNode::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLFunctionStitchingFunctionNode)); +} + +// method: init +_MTL_INLINE MTL::FunctionStitchingFunctionNode* MTL::FunctionStitchingFunctionNode::init() +{ + return NS::Object::init(); +} + +// property: name +_MTL_INLINE NS::String* MTL::FunctionStitchingFunctionNode::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +_MTL_INLINE void MTL::FunctionStitchingFunctionNode::setName(const NS::String* name) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setName_), name); +} + +// property: arguments +_MTL_INLINE NS::Array* MTL::FunctionStitchingFunctionNode::arguments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arguments)); +} + +_MTL_INLINE void MTL::FunctionStitchingFunctionNode::setArguments(const NS::Array* arguments) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setArguments_), arguments); +} + +// property: controlDependencies +_MTL_INLINE NS::Array* MTL::FunctionStitchingFunctionNode::controlDependencies() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlDependencies)); +} + +_MTL_INLINE void MTL::FunctionStitchingFunctionNode::setControlDependencies(const NS::Array* controlDependencies) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlDependencies_), controlDependencies); +} + +// method: initWithName:arguments:controlDependencies: +_MTL_INLINE MTL::FunctionStitchingFunctionNode* MTL::FunctionStitchingFunctionNode::init(const NS::String* name, const NS::Array* arguments, const NS::Array* controlDependencies) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(initWithName_arguments_controlDependencies_), name, arguments, controlDependencies); +} + +// static method: alloc +_MTL_INLINE MTL::FunctionStitchingGraph* MTL::FunctionStitchingGraph::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLFunctionStitchingGraph)); +} + +// method: init +_MTL_INLINE MTL::FunctionStitchingGraph* MTL::FunctionStitchingGraph::init() +{ + return NS::Object::init(); +} + +// property: functionName +_MTL_INLINE NS::String* MTL::FunctionStitchingGraph::functionName() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionName)); +} + +_MTL_INLINE void MTL::FunctionStitchingGraph::setFunctionName(const NS::String* functionName) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctionName_), functionName); +} + +// property: nodes +_MTL_INLINE NS::Array* MTL::FunctionStitchingGraph::nodes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(nodes)); +} + +_MTL_INLINE void MTL::FunctionStitchingGraph::setNodes(const NS::Array* nodes) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setNodes_), nodes); +} + +// property: outputNode +_MTL_INLINE MTL::FunctionStitchingFunctionNode* MTL::FunctionStitchingGraph::outputNode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(outputNode)); +} + +_MTL_INLINE void MTL::FunctionStitchingGraph::setOutputNode(const MTL::FunctionStitchingFunctionNode* outputNode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOutputNode_), outputNode); +} + +// property: attributes +_MTL_INLINE NS::Array* MTL::FunctionStitchingGraph::attributes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(attributes)); +} + +_MTL_INLINE void MTL::FunctionStitchingGraph::setAttributes(const NS::Array* attributes) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAttributes_), attributes); +} + +// method: initWithFunctionName:nodes:outputNode:attributes: +_MTL_INLINE MTL::FunctionStitchingGraph* MTL::FunctionStitchingGraph::init(const NS::String* functionName, const NS::Array* nodes, const MTL::FunctionStitchingFunctionNode* outputNode, const NS::Array* attributes) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(initWithFunctionName_nodes_outputNode_attributes_), functionName, nodes, outputNode, attributes); +} + +// static method: alloc +_MTL_INLINE MTL::StitchedLibraryDescriptor* MTL::StitchedLibraryDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLStitchedLibraryDescriptor)); +} + +// method: init +_MTL_INLINE MTL::StitchedLibraryDescriptor* MTL::StitchedLibraryDescriptor::init() +{ + return NS::Object::init(); +} + +// property: functionGraphs +_MTL_INLINE NS::Array* MTL::StitchedLibraryDescriptor::functionGraphs() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionGraphs)); +} + +_MTL_INLINE void MTL::StitchedLibraryDescriptor::setFunctionGraphs(const NS::Array* functionGraphs) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctionGraphs_), functionGraphs); +} + +// property: functions +_MTL_INLINE NS::Array* MTL::StitchedLibraryDescriptor::functions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functions)); +} + +_MTL_INLINE void MTL::StitchedLibraryDescriptor::setFunctions(const NS::Array* functions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctions_), functions); +} diff --git a/metal-cpp/Metal/MTLHeaderBridge.hpp b/metal-cpp/Metal/MTLHeaderBridge.hpp new file mode 100644 index 00000000..95da4964 --- /dev/null +++ b/metal-cpp/Metal/MTLHeaderBridge.hpp @@ -0,0 +1,2286 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLHeaderBridge.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once +#include "MTLPrivate.hpp" + +namespace MTL::Private::Class +{ + +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureBoundingBoxGeometryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureGeometryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureMotionTriangleGeometryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructurePassDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructurePassSampleBufferAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructurePassSampleBufferAttachmentDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureTriangleGeometryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLArgument); +_MTL_PRIVATE_DEF_CLS(MTLArgumentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLArrayType); +_MTL_PRIVATE_DEF_CLS(MTLAttribute); +_MTL_PRIVATE_DEF_CLS(MTLAttributeDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAttributeDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLBinaryArchiveDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLBlitPassDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLBlitPassSampleBufferAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLBlitPassSampleBufferAttachmentDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLBufferLayoutDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLBufferLayoutDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLCaptureDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLCaptureManager); +_MTL_PRIVATE_DEF_CLS(MTLCommandBufferDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLCompileOptions); +_MTL_PRIVATE_DEF_CLS(MTLComputePassDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLComputePassSampleBufferAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLComputePassSampleBufferAttachmentDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLComputePipelineDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLComputePipelineReflection); +_MTL_PRIVATE_DEF_CLS(MTLCounterSampleBufferDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLDepthStencilDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLFunctionConstant); +_MTL_PRIVATE_DEF_CLS(MTLFunctionConstantValues); +_MTL_PRIVATE_DEF_CLS(MTLFunctionDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingAttributeAlwaysInline); +_MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingFunctionNode); +_MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingGraph); +_MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingInputNode); +_MTL_PRIVATE_DEF_CLS(MTLHeapDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLIOCommandQueueDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLIndirectCommandBufferDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLInstanceAccelerationStructureDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLIntersectionFunctionDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLIntersectionFunctionTableDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLLinkedFunctions); +_MTL_PRIVATE_DEF_CLS(MTLMeshRenderPipelineDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLMotionKeyframeData); +_MTL_PRIVATE_DEF_CLS(MTLPipelineBufferDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLPipelineBufferDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLPointerType); +_MTL_PRIVATE_DEF_CLS(MTLPrimitiveAccelerationStructureDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRasterizationRateLayerArray); +_MTL_PRIVATE_DEF_CLS(MTLRasterizationRateLayerDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRasterizationRateMapDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRasterizationRateSampleArray); +_MTL_PRIVATE_DEF_CLS(MTLRenderPassAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPassColorAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPassColorAttachmentDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLRenderPassDepthAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPassDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPassSampleBufferAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPassSampleBufferAttachmentDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLRenderPassStencilAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineColorAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineColorAttachmentDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineFunctionsDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineReflection); +_MTL_PRIVATE_DEF_CLS(MTLResourceStatePassDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLResourceStatePassSampleBufferAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLResourceStatePassSampleBufferAttachmentDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLSamplerDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLSharedEventHandle); +_MTL_PRIVATE_DEF_CLS(MTLSharedEventListener); +_MTL_PRIVATE_DEF_CLS(MTLSharedTextureHandle); +_MTL_PRIVATE_DEF_CLS(MTLStageInputOutputDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLStencilDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLStitchedLibraryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLStructMember); +_MTL_PRIVATE_DEF_CLS(MTLStructType); +_MTL_PRIVATE_DEF_CLS(MTLTextureDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLTextureReferenceType); +_MTL_PRIVATE_DEF_CLS(MTLTileRenderPipelineColorAttachmentDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLTileRenderPipelineColorAttachmentDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLTileRenderPipelineDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLType); +_MTL_PRIVATE_DEF_CLS(MTLVertexAttribute); +_MTL_PRIVATE_DEF_CLS(MTLVertexAttributeDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLVertexAttributeDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLVertexBufferLayoutDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLVertexBufferLayoutDescriptorArray); +_MTL_PRIVATE_DEF_CLS(MTLVertexDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLVisibleFunctionTableDescriptor); + +} + +namespace MTL::Private::Protocol +{ + +_MTL_PRIVATE_DEF_PRO(MTLAccelerationStructure); +_MTL_PRIVATE_DEF_PRO(MTLAccelerationStructureCommandEncoder); +_MTL_PRIVATE_DEF_PRO(MTLArgumentEncoder); +_MTL_PRIVATE_DEF_PRO(MTLBinaryArchive); +_MTL_PRIVATE_DEF_PRO(MTLBinding); +_MTL_PRIVATE_DEF_PRO(MTLBlitCommandEncoder); +_MTL_PRIVATE_DEF_PRO(MTLBuffer); +_MTL_PRIVATE_DEF_PRO(MTLBufferBinding); +_MTL_PRIVATE_DEF_PRO(MTLCommandBuffer); +_MTL_PRIVATE_DEF_PRO(MTLCommandBufferEncoderInfo); +_MTL_PRIVATE_DEF_PRO(MTLCommandEncoder); +_MTL_PRIVATE_DEF_PRO(MTLCommandQueue); +_MTL_PRIVATE_DEF_PRO(MTLComputeCommandEncoder); +_MTL_PRIVATE_DEF_PRO(MTLComputePipelineState); +_MTL_PRIVATE_DEF_PRO(MTLCounter); +_MTL_PRIVATE_DEF_PRO(MTLCounterSampleBuffer); +_MTL_PRIVATE_DEF_PRO(MTLCounterSet); +_MTL_PRIVATE_DEF_PRO(MTLDepthStencilState); +_MTL_PRIVATE_DEF_PRO(MTLDevice); +_MTL_PRIVATE_DEF_PRO(MTLDrawable); +_MTL_PRIVATE_DEF_PRO(MTLDynamicLibrary); +_MTL_PRIVATE_DEF_PRO(MTLEvent); +_MTL_PRIVATE_DEF_PRO(MTLFence); +_MTL_PRIVATE_DEF_PRO(MTLFunction); +_MTL_PRIVATE_DEF_PRO(MTLFunctionHandle); +_MTL_PRIVATE_DEF_PRO(MTLFunctionLog); +_MTL_PRIVATE_DEF_PRO(MTLFunctionLogDebugLocation); +_MTL_PRIVATE_DEF_PRO(MTLFunctionStitchingAttribute); +_MTL_PRIVATE_DEF_PRO(MTLFunctionStitchingNode); +_MTL_PRIVATE_DEF_PRO(MTLHeap); +_MTL_PRIVATE_DEF_PRO(MTLIOCommandBuffer); +_MTL_PRIVATE_DEF_PRO(MTLIOCommandQueue); +_MTL_PRIVATE_DEF_PRO(MTLIOFileHandle); +_MTL_PRIVATE_DEF_PRO(MTLIOScratchBuffer); +_MTL_PRIVATE_DEF_PRO(MTLIOScratchBufferAllocator); +_MTL_PRIVATE_DEF_PRO(MTLIndirectCommandBuffer); +_MTL_PRIVATE_DEF_PRO(MTLIndirectComputeCommand); +_MTL_PRIVATE_DEF_PRO(MTLIndirectRenderCommand); +_MTL_PRIVATE_DEF_PRO(MTLIntersectionFunctionTable); +_MTL_PRIVATE_DEF_PRO(MTLLibrary); +_MTL_PRIVATE_DEF_PRO(MTLLogContainer); +_MTL_PRIVATE_DEF_PRO(MTLObjectPayloadBinding); +_MTL_PRIVATE_DEF_PRO(MTLParallelRenderCommandEncoder); +_MTL_PRIVATE_DEF_PRO(MTLRasterizationRateMap); +_MTL_PRIVATE_DEF_PRO(MTLRenderCommandEncoder); +_MTL_PRIVATE_DEF_PRO(MTLRenderPipelineState); +_MTL_PRIVATE_DEF_PRO(MTLResource); +_MTL_PRIVATE_DEF_PRO(MTLResourceStateCommandEncoder); +_MTL_PRIVATE_DEF_PRO(MTLSamplerState); +_MTL_PRIVATE_DEF_PRO(MTLSharedEvent); +_MTL_PRIVATE_DEF_PRO(MTLTexture); +_MTL_PRIVATE_DEF_PRO(MTLTextureBinding); +_MTL_PRIVATE_DEF_PRO(MTLThreadgroupBinding); +_MTL_PRIVATE_DEF_PRO(MTLVisibleFunctionTable); + +} + +namespace MTL::Private::Selector +{ + +_MTL_PRIVATE_DEF_SEL(GPUEndTime, + "GPUEndTime"); +_MTL_PRIVATE_DEF_SEL(GPUStartTime, + "GPUStartTime"); +_MTL_PRIVATE_DEF_SEL(URL, + "URL"); +_MTL_PRIVATE_DEF_SEL(accelerationStructureCommandEncoder, + "accelerationStructureCommandEncoder"); +_MTL_PRIVATE_DEF_SEL(accelerationStructureCommandEncoderWithDescriptor_, + "accelerationStructureCommandEncoderWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(accelerationStructurePassDescriptor, + "accelerationStructurePassDescriptor"); +_MTL_PRIVATE_DEF_SEL(accelerationStructureSizesWithDescriptor_, + "accelerationStructureSizesWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(access, + "access"); +_MTL_PRIVATE_DEF_SEL(addBarrier, + "addBarrier"); +_MTL_PRIVATE_DEF_SEL(addCompletedHandler_, + "addCompletedHandler:"); +_MTL_PRIVATE_DEF_SEL(addComputePipelineFunctionsWithDescriptor_error_, + "addComputePipelineFunctionsWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(addDebugMarker_range_, + "addDebugMarker:range:"); +_MTL_PRIVATE_DEF_SEL(addFunctionWithDescriptor_library_error_, + "addFunctionWithDescriptor:library:error:"); +_MTL_PRIVATE_DEF_SEL(addPresentedHandler_, + "addPresentedHandler:"); +_MTL_PRIVATE_DEF_SEL(addRenderPipelineFunctionsWithDescriptor_error_, + "addRenderPipelineFunctionsWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(addScheduledHandler_, + "addScheduledHandler:"); +_MTL_PRIVATE_DEF_SEL(addTileRenderPipelineFunctionsWithDescriptor_error_, + "addTileRenderPipelineFunctionsWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(alignment, + "alignment"); +_MTL_PRIVATE_DEF_SEL(allocatedSize, + "allocatedSize"); +_MTL_PRIVATE_DEF_SEL(allowDuplicateIntersectionFunctionInvocation, + "allowDuplicateIntersectionFunctionInvocation"); +_MTL_PRIVATE_DEF_SEL(allowGPUOptimizedContents, + "allowGPUOptimizedContents"); +_MTL_PRIVATE_DEF_SEL(alphaBlendOperation, + "alphaBlendOperation"); +_MTL_PRIVATE_DEF_SEL(areBarycentricCoordsSupported, + "areBarycentricCoordsSupported"); +_MTL_PRIVATE_DEF_SEL(areProgrammableSamplePositionsSupported, + "areProgrammableSamplePositionsSupported"); +_MTL_PRIVATE_DEF_SEL(areRasterOrderGroupsSupported, + "areRasterOrderGroupsSupported"); +_MTL_PRIVATE_DEF_SEL(argumentBuffersSupport, + "argumentBuffersSupport"); +_MTL_PRIVATE_DEF_SEL(argumentDescriptor, + "argumentDescriptor"); +_MTL_PRIVATE_DEF_SEL(argumentIndex, + "argumentIndex"); +_MTL_PRIVATE_DEF_SEL(argumentIndexStride, + "argumentIndexStride"); +_MTL_PRIVATE_DEF_SEL(arguments, + "arguments"); +_MTL_PRIVATE_DEF_SEL(arrayLength, + "arrayLength"); +_MTL_PRIVATE_DEF_SEL(arrayType, + "arrayType"); +_MTL_PRIVATE_DEF_SEL(attributeIndex, + "attributeIndex"); +_MTL_PRIVATE_DEF_SEL(attributeType, + "attributeType"); +_MTL_PRIVATE_DEF_SEL(attributes, + "attributes"); +_MTL_PRIVATE_DEF_SEL(backFaceStencil, + "backFaceStencil"); +_MTL_PRIVATE_DEF_SEL(binaryArchives, + "binaryArchives"); +_MTL_PRIVATE_DEF_SEL(binaryFunctions, + "binaryFunctions"); +_MTL_PRIVATE_DEF_SEL(bindings, + "bindings"); +_MTL_PRIVATE_DEF_SEL(blitCommandEncoder, + "blitCommandEncoder"); +_MTL_PRIVATE_DEF_SEL(blitCommandEncoderWithDescriptor_, + "blitCommandEncoderWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(blitPassDescriptor, + "blitPassDescriptor"); +_MTL_PRIVATE_DEF_SEL(borderColor, + "borderColor"); +_MTL_PRIVATE_DEF_SEL(boundingBoxBuffer, + "boundingBoxBuffer"); +_MTL_PRIVATE_DEF_SEL(boundingBoxBufferOffset, + "boundingBoxBufferOffset"); +_MTL_PRIVATE_DEF_SEL(boundingBoxBuffers, + "boundingBoxBuffers"); +_MTL_PRIVATE_DEF_SEL(boundingBoxCount, + "boundingBoxCount"); +_MTL_PRIVATE_DEF_SEL(boundingBoxStride, + "boundingBoxStride"); +_MTL_PRIVATE_DEF_SEL(buffer, + "buffer"); +_MTL_PRIVATE_DEF_SEL(bufferAlignment, + "bufferAlignment"); +_MTL_PRIVATE_DEF_SEL(bufferBytesPerRow, + "bufferBytesPerRow"); +_MTL_PRIVATE_DEF_SEL(bufferDataSize, + "bufferDataSize"); +_MTL_PRIVATE_DEF_SEL(bufferDataType, + "bufferDataType"); +_MTL_PRIVATE_DEF_SEL(bufferIndex, + "bufferIndex"); +_MTL_PRIVATE_DEF_SEL(bufferOffset, + "bufferOffset"); +_MTL_PRIVATE_DEF_SEL(bufferPointerType, + "bufferPointerType"); +_MTL_PRIVATE_DEF_SEL(bufferStructType, + "bufferStructType"); +_MTL_PRIVATE_DEF_SEL(buffers, + "buffers"); +_MTL_PRIVATE_DEF_SEL(buildAccelerationStructure_descriptor_scratchBuffer_scratchBufferOffset_, + "buildAccelerationStructure:descriptor:scratchBuffer:scratchBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(captureObject, + "captureObject"); +_MTL_PRIVATE_DEF_SEL(clearBarrier, + "clearBarrier"); +_MTL_PRIVATE_DEF_SEL(clearColor, + "clearColor"); +_MTL_PRIVATE_DEF_SEL(clearDepth, + "clearDepth"); +_MTL_PRIVATE_DEF_SEL(clearStencil, + "clearStencil"); +_MTL_PRIVATE_DEF_SEL(colorAttachments, + "colorAttachments"); +_MTL_PRIVATE_DEF_SEL(column, + "column"); +_MTL_PRIVATE_DEF_SEL(commandBuffer, + "commandBuffer"); +_MTL_PRIVATE_DEF_SEL(commandBufferWithDescriptor_, + "commandBufferWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(commandBufferWithUnretainedReferences, + "commandBufferWithUnretainedReferences"); +_MTL_PRIVATE_DEF_SEL(commandQueue, + "commandQueue"); +_MTL_PRIVATE_DEF_SEL(commandTypes, + "commandTypes"); +_MTL_PRIVATE_DEF_SEL(commit, + "commit"); +_MTL_PRIVATE_DEF_SEL(compareFunction, + "compareFunction"); +_MTL_PRIVATE_DEF_SEL(compressionType, + "compressionType"); +_MTL_PRIVATE_DEF_SEL(computeCommandEncoder, + "computeCommandEncoder"); +_MTL_PRIVATE_DEF_SEL(computeCommandEncoderWithDescriptor_, + "computeCommandEncoderWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(computeCommandEncoderWithDispatchType_, + "computeCommandEncoderWithDispatchType:"); +_MTL_PRIVATE_DEF_SEL(computeFunction, + "computeFunction"); +_MTL_PRIVATE_DEF_SEL(computePassDescriptor, + "computePassDescriptor"); +_MTL_PRIVATE_DEF_SEL(concurrentDispatchThreadgroups_threadsPerThreadgroup_, + "concurrentDispatchThreadgroups:threadsPerThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(concurrentDispatchThreads_threadsPerThreadgroup_, + "concurrentDispatchThreads:threadsPerThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(constantBlockAlignment, + "constantBlockAlignment"); +_MTL_PRIVATE_DEF_SEL(constantDataAtIndex_, + "constantDataAtIndex:"); +_MTL_PRIVATE_DEF_SEL(constantValues, + "constantValues"); +_MTL_PRIVATE_DEF_SEL(contents, + "contents"); +_MTL_PRIVATE_DEF_SEL(controlDependencies, + "controlDependencies"); +_MTL_PRIVATE_DEF_SEL(convertSparsePixelRegions_toTileRegions_withTileSize_alignmentMode_numRegions_, + "convertSparsePixelRegions:toTileRegions:withTileSize:alignmentMode:numRegions:"); +_MTL_PRIVATE_DEF_SEL(convertSparseTileRegions_toPixelRegions_withTileSize_numRegions_, + "convertSparseTileRegions:toPixelRegions:withTileSize:numRegions:"); +_MTL_PRIVATE_DEF_SEL(copyAccelerationStructure_toAccelerationStructure_, + "copyAccelerationStructure:toAccelerationStructure:"); +_MTL_PRIVATE_DEF_SEL(copyAndCompactAccelerationStructure_toAccelerationStructure_, + "copyAndCompactAccelerationStructure:toAccelerationStructure:"); +_MTL_PRIVATE_DEF_SEL(copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_, + "copyFromBuffer:sourceOffset:sourceBytesPerRow:sourceBytesPerImage:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:"); +_MTL_PRIVATE_DEF_SEL(copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_options_, + "copyFromBuffer:sourceOffset:sourceBytesPerRow:sourceBytesPerImage:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:options:"); +_MTL_PRIVATE_DEF_SEL(copyFromBuffer_sourceOffset_toBuffer_destinationOffset_size_, + "copyFromBuffer:sourceOffset:toBuffer:destinationOffset:size:"); +_MTL_PRIVATE_DEF_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_, + "copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toBuffer:destinationOffset:destinationBytesPerRow:destinationBytesPerImage:"); +_MTL_PRIVATE_DEF_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_options_, + "copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toBuffer:destinationOffset:destinationBytesPerRow:destinationBytesPerImage:options:"); +_MTL_PRIVATE_DEF_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_, + "copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:"); +_MTL_PRIVATE_DEF_SEL(copyFromTexture_sourceSlice_sourceLevel_toTexture_destinationSlice_destinationLevel_sliceCount_levelCount_, + "copyFromTexture:sourceSlice:sourceLevel:toTexture:destinationSlice:destinationLevel:sliceCount:levelCount:"); +_MTL_PRIVATE_DEF_SEL(copyFromTexture_toTexture_, + "copyFromTexture:toTexture:"); +_MTL_PRIVATE_DEF_SEL(copyIndirectCommandBuffer_sourceRange_destination_destinationIndex_, + "copyIndirectCommandBuffer:sourceRange:destination:destinationIndex:"); +_MTL_PRIVATE_DEF_SEL(copyParameterDataToBuffer_offset_, + "copyParameterDataToBuffer:offset:"); +_MTL_PRIVATE_DEF_SEL(copyStatusToBuffer_offset_, + "copyStatusToBuffer:offset:"); +_MTL_PRIVATE_DEF_SEL(counterSet, + "counterSet"); +_MTL_PRIVATE_DEF_SEL(counterSets, + "counterSets"); +_MTL_PRIVATE_DEF_SEL(counters, + "counters"); +_MTL_PRIVATE_DEF_SEL(cpuCacheMode, + "cpuCacheMode"); +_MTL_PRIVATE_DEF_SEL(currentAllocatedSize, + "currentAllocatedSize"); +_MTL_PRIVATE_DEF_SEL(data, + "data"); +_MTL_PRIVATE_DEF_SEL(dataSize, + "dataSize"); +_MTL_PRIVATE_DEF_SEL(dataType, + "dataType"); +_MTL_PRIVATE_DEF_SEL(dealloc, + "dealloc"); +_MTL_PRIVATE_DEF_SEL(debugLocation, + "debugLocation"); +_MTL_PRIVATE_DEF_SEL(debugSignposts, + "debugSignposts"); +_MTL_PRIVATE_DEF_SEL(defaultCaptureScope, + "defaultCaptureScope"); +_MTL_PRIVATE_DEF_SEL(defaultRasterSampleCount, + "defaultRasterSampleCount"); +_MTL_PRIVATE_DEF_SEL(depth, + "depth"); +_MTL_PRIVATE_DEF_SEL(depthAttachment, + "depthAttachment"); +_MTL_PRIVATE_DEF_SEL(depthAttachmentPixelFormat, + "depthAttachmentPixelFormat"); +_MTL_PRIVATE_DEF_SEL(depthCompareFunction, + "depthCompareFunction"); +_MTL_PRIVATE_DEF_SEL(depthFailureOperation, + "depthFailureOperation"); +_MTL_PRIVATE_DEF_SEL(depthPlane, + "depthPlane"); +_MTL_PRIVATE_DEF_SEL(depthResolveFilter, + "depthResolveFilter"); +_MTL_PRIVATE_DEF_SEL(depthStencilPassOperation, + "depthStencilPassOperation"); +_MTL_PRIVATE_DEF_SEL(descriptor, + "descriptor"); +_MTL_PRIVATE_DEF_SEL(destination, + "destination"); +_MTL_PRIVATE_DEF_SEL(destinationAlphaBlendFactor, + "destinationAlphaBlendFactor"); +_MTL_PRIVATE_DEF_SEL(destinationRGBBlendFactor, + "destinationRGBBlendFactor"); +_MTL_PRIVATE_DEF_SEL(device, + "device"); +_MTL_PRIVATE_DEF_SEL(didModifyRange_, + "didModifyRange:"); +_MTL_PRIVATE_DEF_SEL(dispatchQueue, + "dispatchQueue"); +_MTL_PRIVATE_DEF_SEL(dispatchThreadgroups_threadsPerThreadgroup_, + "dispatchThreadgroups:threadsPerThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(dispatchThreadgroupsWithIndirectBuffer_indirectBufferOffset_threadsPerThreadgroup_, + "dispatchThreadgroupsWithIndirectBuffer:indirectBufferOffset:threadsPerThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(dispatchThreads_threadsPerThreadgroup_, + "dispatchThreads:threadsPerThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(dispatchThreadsPerTile_, + "dispatchThreadsPerTile:"); +_MTL_PRIVATE_DEF_SEL(dispatchType, + "dispatchType"); +_MTL_PRIVATE_DEF_SEL(drawIndexedPatches_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_indirectBuffer_indirectBufferOffset_, + "drawIndexedPatches:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:indirectBuffer:indirectBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(drawIndexedPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_instanceCount_baseInstance_, + "drawIndexedPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:instanceCount:baseInstance:"); +_MTL_PRIVATE_DEF_SEL(drawIndexedPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_instanceCount_baseInstance_tessellationFactorBuffer_tessellationFactorBufferOffset_tessellationFactorBufferInstanceStride_, + "drawIndexedPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride:"); +_MTL_PRIVATE_DEF_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_, + "drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_, + "drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:"); +_MTL_PRIVATE_DEF_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_baseVertex_baseInstance_, + "drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:baseVertex:baseInstance:"); +_MTL_PRIVATE_DEF_SEL(drawIndexedPrimitives_indexType_indexBuffer_indexBufferOffset_indirectBuffer_indirectBufferOffset_, + "drawIndexedPrimitives:indexType:indexBuffer:indexBufferOffset:indirectBuffer:indirectBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(drawMeshThreadgroups_threadsPerObjectThreadgroup_threadsPerMeshThreadgroup_, + "drawMeshThreadgroups:threadsPerObjectThreadgroup:threadsPerMeshThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(drawMeshThreadgroupsWithIndirectBuffer_indirectBufferOffset_threadsPerObjectThreadgroup_threadsPerMeshThreadgroup_, + "drawMeshThreadgroupsWithIndirectBuffer:indirectBufferOffset:threadsPerObjectThreadgroup:threadsPerMeshThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(drawMeshThreads_threadsPerObjectThreadgroup_threadsPerMeshThreadgroup_, + "drawMeshThreads:threadsPerObjectThreadgroup:threadsPerMeshThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(drawPatches_patchIndexBuffer_patchIndexBufferOffset_indirectBuffer_indirectBufferOffset_, + "drawPatches:patchIndexBuffer:patchIndexBufferOffset:indirectBuffer:indirectBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(drawPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_instanceCount_baseInstance_, + "drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance:"); +_MTL_PRIVATE_DEF_SEL(drawPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_instanceCount_baseInstance_tessellationFactorBuffer_tessellationFactorBufferOffset_tessellationFactorBufferInstanceStride_, + "drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride:"); +_MTL_PRIVATE_DEF_SEL(drawPrimitives_indirectBuffer_indirectBufferOffset_, + "drawPrimitives:indirectBuffer:indirectBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(drawPrimitives_vertexStart_vertexCount_, + "drawPrimitives:vertexStart:vertexCount:"); +_MTL_PRIVATE_DEF_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_, + "drawPrimitives:vertexStart:vertexCount:instanceCount:"); +_MTL_PRIVATE_DEF_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_baseInstance_, + "drawPrimitives:vertexStart:vertexCount:instanceCount:baseInstance:"); +_MTL_PRIVATE_DEF_SEL(drawableID, + "drawableID"); +_MTL_PRIVATE_DEF_SEL(elementArrayType, + "elementArrayType"); +_MTL_PRIVATE_DEF_SEL(elementIsArgumentBuffer, + "elementIsArgumentBuffer"); +_MTL_PRIVATE_DEF_SEL(elementPointerType, + "elementPointerType"); +_MTL_PRIVATE_DEF_SEL(elementStructType, + "elementStructType"); +_MTL_PRIVATE_DEF_SEL(elementTextureReferenceType, + "elementTextureReferenceType"); +_MTL_PRIVATE_DEF_SEL(elementType, + "elementType"); +_MTL_PRIVATE_DEF_SEL(encodeSignalEvent_value_, + "encodeSignalEvent:value:"); +_MTL_PRIVATE_DEF_SEL(encodeWaitForEvent_value_, + "encodeWaitForEvent:value:"); +_MTL_PRIVATE_DEF_SEL(encodedLength, + "encodedLength"); +_MTL_PRIVATE_DEF_SEL(encoderLabel, + "encoderLabel"); +_MTL_PRIVATE_DEF_SEL(endEncoding, + "endEncoding"); +_MTL_PRIVATE_DEF_SEL(endOfEncoderSampleIndex, + "endOfEncoderSampleIndex"); +_MTL_PRIVATE_DEF_SEL(endOfFragmentSampleIndex, + "endOfFragmentSampleIndex"); +_MTL_PRIVATE_DEF_SEL(endOfVertexSampleIndex, + "endOfVertexSampleIndex"); +_MTL_PRIVATE_DEF_SEL(enqueue, + "enqueue"); +_MTL_PRIVATE_DEF_SEL(enqueueBarrier, + "enqueueBarrier"); +_MTL_PRIVATE_DEF_SEL(error, + "error"); +_MTL_PRIVATE_DEF_SEL(errorOptions, + "errorOptions"); +_MTL_PRIVATE_DEF_SEL(errorState, + "errorState"); +_MTL_PRIVATE_DEF_SEL(executeCommandsInBuffer_indirectBuffer_indirectBufferOffset_, + "executeCommandsInBuffer:indirectBuffer:indirectBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(executeCommandsInBuffer_withRange_, + "executeCommandsInBuffer:withRange:"); +_MTL_PRIVATE_DEF_SEL(fastMathEnabled, + "fastMathEnabled"); +_MTL_PRIVATE_DEF_SEL(fillBuffer_range_value_, + "fillBuffer:range:value:"); +_MTL_PRIVATE_DEF_SEL(firstMipmapInTail, + "firstMipmapInTail"); +_MTL_PRIVATE_DEF_SEL(format, + "format"); +_MTL_PRIVATE_DEF_SEL(fragmentAdditionalBinaryFunctions, + "fragmentAdditionalBinaryFunctions"); +_MTL_PRIVATE_DEF_SEL(fragmentArguments, + "fragmentArguments"); +_MTL_PRIVATE_DEF_SEL(fragmentBindings, + "fragmentBindings"); +_MTL_PRIVATE_DEF_SEL(fragmentBuffers, + "fragmentBuffers"); +_MTL_PRIVATE_DEF_SEL(fragmentFunction, + "fragmentFunction"); +_MTL_PRIVATE_DEF_SEL(fragmentLinkedFunctions, + "fragmentLinkedFunctions"); +_MTL_PRIVATE_DEF_SEL(fragmentPreloadedLibraries, + "fragmentPreloadedLibraries"); +_MTL_PRIVATE_DEF_SEL(frontFaceStencil, + "frontFaceStencil"); +_MTL_PRIVATE_DEF_SEL(function, + "function"); +_MTL_PRIVATE_DEF_SEL(functionConstantsDictionary, + "functionConstantsDictionary"); +_MTL_PRIVATE_DEF_SEL(functionCount, + "functionCount"); +_MTL_PRIVATE_DEF_SEL(functionDescriptor, + "functionDescriptor"); +_MTL_PRIVATE_DEF_SEL(functionGraphs, + "functionGraphs"); +_MTL_PRIVATE_DEF_SEL(functionHandleWithFunction_, + "functionHandleWithFunction:"); +_MTL_PRIVATE_DEF_SEL(functionHandleWithFunction_stage_, + "functionHandleWithFunction:stage:"); +_MTL_PRIVATE_DEF_SEL(functionName, + "functionName"); +_MTL_PRIVATE_DEF_SEL(functionNames, + "functionNames"); +_MTL_PRIVATE_DEF_SEL(functionType, + "functionType"); +_MTL_PRIVATE_DEF_SEL(functions, + "functions"); +_MTL_PRIVATE_DEF_SEL(generateMipmapsForTexture_, + "generateMipmapsForTexture:"); +_MTL_PRIVATE_DEF_SEL(geometryDescriptors, + "geometryDescriptors"); +_MTL_PRIVATE_DEF_SEL(getBytes_bytesPerRow_bytesPerImage_fromRegion_mipmapLevel_slice_, + "getBytes:bytesPerRow:bytesPerImage:fromRegion:mipmapLevel:slice:"); +_MTL_PRIVATE_DEF_SEL(getBytes_bytesPerRow_fromRegion_mipmapLevel_, + "getBytes:bytesPerRow:fromRegion:mipmapLevel:"); +_MTL_PRIVATE_DEF_SEL(getDefaultSamplePositions_count_, + "getDefaultSamplePositions:count:"); +_MTL_PRIVATE_DEF_SEL(getSamplePositions_count_, + "getSamplePositions:count:"); +_MTL_PRIVATE_DEF_SEL(getTextureAccessCounters_region_mipLevel_slice_resetCounters_countersBuffer_countersBufferOffset_, + "getTextureAccessCounters:region:mipLevel:slice:resetCounters:countersBuffer:countersBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(gpuAddress, + "gpuAddress"); +_MTL_PRIVATE_DEF_SEL(gpuResourceID, + "gpuResourceID"); +_MTL_PRIVATE_DEF_SEL(groups, + "groups"); +_MTL_PRIVATE_DEF_SEL(hasUnifiedMemory, + "hasUnifiedMemory"); +_MTL_PRIVATE_DEF_SEL(hazardTrackingMode, + "hazardTrackingMode"); +_MTL_PRIVATE_DEF_SEL(heap, + "heap"); +_MTL_PRIVATE_DEF_SEL(heapAccelerationStructureSizeAndAlignWithDescriptor_, + "heapAccelerationStructureSizeAndAlignWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(heapAccelerationStructureSizeAndAlignWithSize_, + "heapAccelerationStructureSizeAndAlignWithSize:"); +_MTL_PRIVATE_DEF_SEL(heapBufferSizeAndAlignWithLength_options_, + "heapBufferSizeAndAlignWithLength:options:"); +_MTL_PRIVATE_DEF_SEL(heapOffset, + "heapOffset"); +_MTL_PRIVATE_DEF_SEL(heapTextureSizeAndAlignWithDescriptor_, + "heapTextureSizeAndAlignWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(height, + "height"); +_MTL_PRIVATE_DEF_SEL(horizontal, + "horizontal"); +_MTL_PRIVATE_DEF_SEL(horizontalSampleStorage, + "horizontalSampleStorage"); +_MTL_PRIVATE_DEF_SEL(imageblockMemoryLengthForDimensions_, + "imageblockMemoryLengthForDimensions:"); +_MTL_PRIVATE_DEF_SEL(imageblockSampleLength, + "imageblockSampleLength"); +_MTL_PRIVATE_DEF_SEL(index, + "index"); +_MTL_PRIVATE_DEF_SEL(indexBuffer, + "indexBuffer"); +_MTL_PRIVATE_DEF_SEL(indexBufferIndex, + "indexBufferIndex"); +_MTL_PRIVATE_DEF_SEL(indexBufferOffset, + "indexBufferOffset"); +_MTL_PRIVATE_DEF_SEL(indexType, + "indexType"); +_MTL_PRIVATE_DEF_SEL(indirectComputeCommandAtIndex_, + "indirectComputeCommandAtIndex:"); +_MTL_PRIVATE_DEF_SEL(indirectRenderCommandAtIndex_, + "indirectRenderCommandAtIndex:"); +_MTL_PRIVATE_DEF_SEL(inheritBuffers, + "inheritBuffers"); +_MTL_PRIVATE_DEF_SEL(inheritPipelineState, + "inheritPipelineState"); +_MTL_PRIVATE_DEF_SEL(init, + "init"); +_MTL_PRIVATE_DEF_SEL(initWithArgumentIndex_, + "initWithArgumentIndex:"); +_MTL_PRIVATE_DEF_SEL(initWithDispatchQueue_, + "initWithDispatchQueue:"); +_MTL_PRIVATE_DEF_SEL(initWithFunctionName_nodes_outputNode_attributes_, + "initWithFunctionName:nodes:outputNode:attributes:"); +_MTL_PRIVATE_DEF_SEL(initWithName_arguments_controlDependencies_, + "initWithName:arguments:controlDependencies:"); +_MTL_PRIVATE_DEF_SEL(initWithSampleCount_, + "initWithSampleCount:"); +_MTL_PRIVATE_DEF_SEL(initWithSampleCount_horizontal_vertical_, + "initWithSampleCount:horizontal:vertical:"); +_MTL_PRIVATE_DEF_SEL(inputPrimitiveTopology, + "inputPrimitiveTopology"); +_MTL_PRIVATE_DEF_SEL(insertDebugCaptureBoundary, + "insertDebugCaptureBoundary"); +_MTL_PRIVATE_DEF_SEL(insertDebugSignpost_, + "insertDebugSignpost:"); +_MTL_PRIVATE_DEF_SEL(insertLibraries, + "insertLibraries"); +_MTL_PRIVATE_DEF_SEL(installName, + "installName"); +_MTL_PRIVATE_DEF_SEL(instanceCount, + "instanceCount"); +_MTL_PRIVATE_DEF_SEL(instanceDescriptorBuffer, + "instanceDescriptorBuffer"); +_MTL_PRIVATE_DEF_SEL(instanceDescriptorBufferOffset, + "instanceDescriptorBufferOffset"); +_MTL_PRIVATE_DEF_SEL(instanceDescriptorStride, + "instanceDescriptorStride"); +_MTL_PRIVATE_DEF_SEL(instanceDescriptorType, + "instanceDescriptorType"); +_MTL_PRIVATE_DEF_SEL(instancedAccelerationStructures, + "instancedAccelerationStructures"); +_MTL_PRIVATE_DEF_SEL(intersectionFunctionTableDescriptor, + "intersectionFunctionTableDescriptor"); +_MTL_PRIVATE_DEF_SEL(intersectionFunctionTableOffset, + "intersectionFunctionTableOffset"); +_MTL_PRIVATE_DEF_SEL(iosurface, + "iosurface"); +_MTL_PRIVATE_DEF_SEL(iosurfacePlane, + "iosurfacePlane"); +_MTL_PRIVATE_DEF_SEL(isActive, + "isActive"); +_MTL_PRIVATE_DEF_SEL(isAliasable, + "isAliasable"); +_MTL_PRIVATE_DEF_SEL(isAlphaToCoverageEnabled, + "isAlphaToCoverageEnabled"); +_MTL_PRIVATE_DEF_SEL(isAlphaToOneEnabled, + "isAlphaToOneEnabled"); +_MTL_PRIVATE_DEF_SEL(isArgument, + "isArgument"); +_MTL_PRIVATE_DEF_SEL(isBlendingEnabled, + "isBlendingEnabled"); +_MTL_PRIVATE_DEF_SEL(isCapturing, + "isCapturing"); +_MTL_PRIVATE_DEF_SEL(isDepth24Stencil8PixelFormatSupported, + "isDepth24Stencil8PixelFormatSupported"); +_MTL_PRIVATE_DEF_SEL(isDepthTexture, + "isDepthTexture"); +_MTL_PRIVATE_DEF_SEL(isDepthWriteEnabled, + "isDepthWriteEnabled"); +_MTL_PRIVATE_DEF_SEL(isFramebufferOnly, + "isFramebufferOnly"); +_MTL_PRIVATE_DEF_SEL(isHeadless, + "isHeadless"); +_MTL_PRIVATE_DEF_SEL(isLowPower, + "isLowPower"); +_MTL_PRIVATE_DEF_SEL(isPatchControlPointData, + "isPatchControlPointData"); +_MTL_PRIVATE_DEF_SEL(isPatchData, + "isPatchData"); +_MTL_PRIVATE_DEF_SEL(isRasterizationEnabled, + "isRasterizationEnabled"); +_MTL_PRIVATE_DEF_SEL(isRemovable, + "isRemovable"); +_MTL_PRIVATE_DEF_SEL(isShareable, + "isShareable"); +_MTL_PRIVATE_DEF_SEL(isSparse, + "isSparse"); +_MTL_PRIVATE_DEF_SEL(isTessellationFactorScaleEnabled, + "isTessellationFactorScaleEnabled"); +_MTL_PRIVATE_DEF_SEL(isUsed, + "isUsed"); +_MTL_PRIVATE_DEF_SEL(kernelEndTime, + "kernelEndTime"); +_MTL_PRIVATE_DEF_SEL(kernelStartTime, + "kernelStartTime"); +_MTL_PRIVATE_DEF_SEL(label, + "label"); +_MTL_PRIVATE_DEF_SEL(languageVersion, + "languageVersion"); +_MTL_PRIVATE_DEF_SEL(layerAtIndex_, + "layerAtIndex:"); +_MTL_PRIVATE_DEF_SEL(layerCount, + "layerCount"); +_MTL_PRIVATE_DEF_SEL(layers, + "layers"); +_MTL_PRIVATE_DEF_SEL(layouts, + "layouts"); +_MTL_PRIVATE_DEF_SEL(length, + "length"); +_MTL_PRIVATE_DEF_SEL(level, + "level"); +_MTL_PRIVATE_DEF_SEL(libraries, + "libraries"); +_MTL_PRIVATE_DEF_SEL(libraryType, + "libraryType"); +_MTL_PRIVATE_DEF_SEL(line, + "line"); +_MTL_PRIVATE_DEF_SEL(linkedFunctions, + "linkedFunctions"); +_MTL_PRIVATE_DEF_SEL(loadAction, + "loadAction"); +_MTL_PRIVATE_DEF_SEL(loadBuffer_offset_size_sourceHandle_sourceHandleOffset_, + "loadBuffer:offset:size:sourceHandle:sourceHandleOffset:"); +_MTL_PRIVATE_DEF_SEL(loadBytes_size_sourceHandle_sourceHandleOffset_, + "loadBytes:size:sourceHandle:sourceHandleOffset:"); +_MTL_PRIVATE_DEF_SEL(loadTexture_slice_level_size_sourceBytesPerRow_sourceBytesPerImage_destinationOrigin_sourceHandle_sourceHandleOffset_, + "loadTexture:slice:level:size:sourceBytesPerRow:sourceBytesPerImage:destinationOrigin:sourceHandle:sourceHandleOffset:"); +_MTL_PRIVATE_DEF_SEL(location, + "location"); +_MTL_PRIVATE_DEF_SEL(locationNumber, + "locationNumber"); +_MTL_PRIVATE_DEF_SEL(lodAverage, + "lodAverage"); +_MTL_PRIVATE_DEF_SEL(lodMaxClamp, + "lodMaxClamp"); +_MTL_PRIVATE_DEF_SEL(lodMinClamp, + "lodMinClamp"); +_MTL_PRIVATE_DEF_SEL(logs, + "logs"); +_MTL_PRIVATE_DEF_SEL(magFilter, + "magFilter"); +_MTL_PRIVATE_DEF_SEL(makeAliasable, + "makeAliasable"); +_MTL_PRIVATE_DEF_SEL(mapPhysicalToScreenCoordinates_forLayer_, + "mapPhysicalToScreenCoordinates:forLayer:"); +_MTL_PRIVATE_DEF_SEL(mapScreenToPhysicalCoordinates_forLayer_, + "mapScreenToPhysicalCoordinates:forLayer:"); +_MTL_PRIVATE_DEF_SEL(maxAnisotropy, + "maxAnisotropy"); +_MTL_PRIVATE_DEF_SEL(maxArgumentBufferSamplerCount, + "maxArgumentBufferSamplerCount"); +_MTL_PRIVATE_DEF_SEL(maxAvailableSizeWithAlignment_, + "maxAvailableSizeWithAlignment:"); +_MTL_PRIVATE_DEF_SEL(maxBufferLength, + "maxBufferLength"); +_MTL_PRIVATE_DEF_SEL(maxCallStackDepth, + "maxCallStackDepth"); +_MTL_PRIVATE_DEF_SEL(maxCommandBufferCount, + "maxCommandBufferCount"); +_MTL_PRIVATE_DEF_SEL(maxCommandsInFlight, + "maxCommandsInFlight"); +_MTL_PRIVATE_DEF_SEL(maxFragmentBufferBindCount, + "maxFragmentBufferBindCount"); +_MTL_PRIVATE_DEF_SEL(maxFragmentCallStackDepth, + "maxFragmentCallStackDepth"); +_MTL_PRIVATE_DEF_SEL(maxKernelBufferBindCount, + "maxKernelBufferBindCount"); +_MTL_PRIVATE_DEF_SEL(maxSampleCount, + "maxSampleCount"); +_MTL_PRIVATE_DEF_SEL(maxTessellationFactor, + "maxTessellationFactor"); +_MTL_PRIVATE_DEF_SEL(maxThreadgroupMemoryLength, + "maxThreadgroupMemoryLength"); +_MTL_PRIVATE_DEF_SEL(maxThreadsPerThreadgroup, + "maxThreadsPerThreadgroup"); +_MTL_PRIVATE_DEF_SEL(maxTotalThreadgroupsPerMeshGrid, + "maxTotalThreadgroupsPerMeshGrid"); +_MTL_PRIVATE_DEF_SEL(maxTotalThreadsPerMeshThreadgroup, + "maxTotalThreadsPerMeshThreadgroup"); +_MTL_PRIVATE_DEF_SEL(maxTotalThreadsPerObjectThreadgroup, + "maxTotalThreadsPerObjectThreadgroup"); +_MTL_PRIVATE_DEF_SEL(maxTotalThreadsPerThreadgroup, + "maxTotalThreadsPerThreadgroup"); +_MTL_PRIVATE_DEF_SEL(maxTransferRate, + "maxTransferRate"); +_MTL_PRIVATE_DEF_SEL(maxVertexAmplificationCount, + "maxVertexAmplificationCount"); +_MTL_PRIVATE_DEF_SEL(maxVertexBufferBindCount, + "maxVertexBufferBindCount"); +_MTL_PRIVATE_DEF_SEL(maxVertexCallStackDepth, + "maxVertexCallStackDepth"); +_MTL_PRIVATE_DEF_SEL(memberByName_, + "memberByName:"); +_MTL_PRIVATE_DEF_SEL(members, + "members"); +_MTL_PRIVATE_DEF_SEL(memoryBarrierWithResources_count_, + "memoryBarrierWithResources:count:"); +_MTL_PRIVATE_DEF_SEL(memoryBarrierWithResources_count_afterStages_beforeStages_, + "memoryBarrierWithResources:count:afterStages:beforeStages:"); +_MTL_PRIVATE_DEF_SEL(memoryBarrierWithScope_, + "memoryBarrierWithScope:"); +_MTL_PRIVATE_DEF_SEL(memoryBarrierWithScope_afterStages_beforeStages_, + "memoryBarrierWithScope:afterStages:beforeStages:"); +_MTL_PRIVATE_DEF_SEL(meshBindings, + "meshBindings"); +_MTL_PRIVATE_DEF_SEL(meshBuffers, + "meshBuffers"); +_MTL_PRIVATE_DEF_SEL(meshFunction, + "meshFunction"); +_MTL_PRIVATE_DEF_SEL(meshThreadExecutionWidth, + "meshThreadExecutionWidth"); +_MTL_PRIVATE_DEF_SEL(meshThreadgroupSizeIsMultipleOfThreadExecutionWidth, + "meshThreadgroupSizeIsMultipleOfThreadExecutionWidth"); +_MTL_PRIVATE_DEF_SEL(minFilter, + "minFilter"); +_MTL_PRIVATE_DEF_SEL(minimumLinearTextureAlignmentForPixelFormat_, + "minimumLinearTextureAlignmentForPixelFormat:"); +_MTL_PRIVATE_DEF_SEL(minimumTextureBufferAlignmentForPixelFormat_, + "minimumTextureBufferAlignmentForPixelFormat:"); +_MTL_PRIVATE_DEF_SEL(mipFilter, + "mipFilter"); +_MTL_PRIVATE_DEF_SEL(mipmapLevelCount, + "mipmapLevelCount"); +_MTL_PRIVATE_DEF_SEL(motionEndBorderMode, + "motionEndBorderMode"); +_MTL_PRIVATE_DEF_SEL(motionEndTime, + "motionEndTime"); +_MTL_PRIVATE_DEF_SEL(motionKeyframeCount, + "motionKeyframeCount"); +_MTL_PRIVATE_DEF_SEL(motionStartBorderMode, + "motionStartBorderMode"); +_MTL_PRIVATE_DEF_SEL(motionStartTime, + "motionStartTime"); +_MTL_PRIVATE_DEF_SEL(motionTransformBuffer, + "motionTransformBuffer"); +_MTL_PRIVATE_DEF_SEL(motionTransformBufferOffset, + "motionTransformBufferOffset"); +_MTL_PRIVATE_DEF_SEL(motionTransformCount, + "motionTransformCount"); +_MTL_PRIVATE_DEF_SEL(moveTextureMappingsFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_, + "moveTextureMappingsFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:"); +_MTL_PRIVATE_DEF_SEL(mutability, + "mutability"); +_MTL_PRIVATE_DEF_SEL(name, + "name"); +_MTL_PRIVATE_DEF_SEL(newAccelerationStructureWithDescriptor_, + "newAccelerationStructureWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newAccelerationStructureWithDescriptor_offset_, + "newAccelerationStructureWithDescriptor:offset:"); +_MTL_PRIVATE_DEF_SEL(newAccelerationStructureWithSize_, + "newAccelerationStructureWithSize:"); +_MTL_PRIVATE_DEF_SEL(newAccelerationStructureWithSize_offset_, + "newAccelerationStructureWithSize:offset:"); +_MTL_PRIVATE_DEF_SEL(newArgumentEncoderForBufferAtIndex_, + "newArgumentEncoderForBufferAtIndex:"); +_MTL_PRIVATE_DEF_SEL(newArgumentEncoderWithArguments_, + "newArgumentEncoderWithArguments:"); +_MTL_PRIVATE_DEF_SEL(newArgumentEncoderWithBufferBinding_, + "newArgumentEncoderWithBufferBinding:"); +_MTL_PRIVATE_DEF_SEL(newArgumentEncoderWithBufferIndex_, + "newArgumentEncoderWithBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(newArgumentEncoderWithBufferIndex_reflection_, + "newArgumentEncoderWithBufferIndex:reflection:"); +_MTL_PRIVATE_DEF_SEL(newBinaryArchiveWithDescriptor_error_, + "newBinaryArchiveWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(newBufferWithBytes_length_options_, + "newBufferWithBytes:length:options:"); +_MTL_PRIVATE_DEF_SEL(newBufferWithBytesNoCopy_length_options_deallocator_, + "newBufferWithBytesNoCopy:length:options:deallocator:"); +_MTL_PRIVATE_DEF_SEL(newBufferWithLength_options_, + "newBufferWithLength:options:"); +_MTL_PRIVATE_DEF_SEL(newBufferWithLength_options_offset_, + "newBufferWithLength:options:offset:"); +_MTL_PRIVATE_DEF_SEL(newCaptureScopeWithCommandQueue_, + "newCaptureScopeWithCommandQueue:"); +_MTL_PRIVATE_DEF_SEL(newCaptureScopeWithDevice_, + "newCaptureScopeWithDevice:"); +_MTL_PRIVATE_DEF_SEL(newCommandQueue, + "newCommandQueue"); +_MTL_PRIVATE_DEF_SEL(newCommandQueueWithMaxCommandBufferCount_, + "newCommandQueueWithMaxCommandBufferCount:"); +_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithAdditionalBinaryFunctions_error_, + "newComputePipelineStateWithAdditionalBinaryFunctions:error:"); +_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithDescriptor_options_completionHandler_, + "newComputePipelineStateWithDescriptor:options:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithDescriptor_options_reflection_error_, + "newComputePipelineStateWithDescriptor:options:reflection:error:"); +_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithFunction_completionHandler_, + "newComputePipelineStateWithFunction:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithFunction_error_, + "newComputePipelineStateWithFunction:error:"); +_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithFunction_options_completionHandler_, + "newComputePipelineStateWithFunction:options:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithFunction_options_reflection_error_, + "newComputePipelineStateWithFunction:options:reflection:error:"); +_MTL_PRIVATE_DEF_SEL(newCounterSampleBufferWithDescriptor_error_, + "newCounterSampleBufferWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(newDefaultLibrary, + "newDefaultLibrary"); +_MTL_PRIVATE_DEF_SEL(newDefaultLibraryWithBundle_error_, + "newDefaultLibraryWithBundle:error:"); +_MTL_PRIVATE_DEF_SEL(newDepthStencilStateWithDescriptor_, + "newDepthStencilStateWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newDynamicLibrary_error_, + "newDynamicLibrary:error:"); +_MTL_PRIVATE_DEF_SEL(newDynamicLibraryWithURL_error_, + "newDynamicLibraryWithURL:error:"); +_MTL_PRIVATE_DEF_SEL(newEvent, + "newEvent"); +_MTL_PRIVATE_DEF_SEL(newFence, + "newFence"); +_MTL_PRIVATE_DEF_SEL(newFunctionWithDescriptor_completionHandler_, + "newFunctionWithDescriptor:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newFunctionWithDescriptor_error_, + "newFunctionWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(newFunctionWithName_, + "newFunctionWithName:"); +_MTL_PRIVATE_DEF_SEL(newFunctionWithName_constantValues_completionHandler_, + "newFunctionWithName:constantValues:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newFunctionWithName_constantValues_error_, + "newFunctionWithName:constantValues:error:"); +_MTL_PRIVATE_DEF_SEL(newHeapWithDescriptor_, + "newHeapWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newIOCommandQueueWithDescriptor_error_, + "newIOCommandQueueWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(newIOHandleWithURL_compressionMethod_error_, + "newIOHandleWithURL:compressionMethod:error:"); +_MTL_PRIVATE_DEF_SEL(newIOHandleWithURL_error_, + "newIOHandleWithURL:error:"); +_MTL_PRIVATE_DEF_SEL(newIndirectCommandBufferWithDescriptor_maxCommandCount_options_, + "newIndirectCommandBufferWithDescriptor:maxCommandCount:options:"); +_MTL_PRIVATE_DEF_SEL(newIntersectionFunctionTableWithDescriptor_, + "newIntersectionFunctionTableWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newIntersectionFunctionTableWithDescriptor_stage_, + "newIntersectionFunctionTableWithDescriptor:stage:"); +_MTL_PRIVATE_DEF_SEL(newIntersectionFunctionWithDescriptor_completionHandler_, + "newIntersectionFunctionWithDescriptor:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newIntersectionFunctionWithDescriptor_error_, + "newIntersectionFunctionWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(newLibraryWithData_error_, + "newLibraryWithData:error:"); +_MTL_PRIVATE_DEF_SEL(newLibraryWithFile_error_, + "newLibraryWithFile:error:"); +_MTL_PRIVATE_DEF_SEL(newLibraryWithSource_options_completionHandler_, + "newLibraryWithSource:options:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newLibraryWithSource_options_error_, + "newLibraryWithSource:options:error:"); +_MTL_PRIVATE_DEF_SEL(newLibraryWithStitchedDescriptor_completionHandler_, + "newLibraryWithStitchedDescriptor:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newLibraryWithStitchedDescriptor_error_, + "newLibraryWithStitchedDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(newLibraryWithURL_error_, + "newLibraryWithURL:error:"); +_MTL_PRIVATE_DEF_SEL(newRasterizationRateMapWithDescriptor_, + "newRasterizationRateMapWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newRemoteBufferViewForDevice_, + "newRemoteBufferViewForDevice:"); +_MTL_PRIVATE_DEF_SEL(newRemoteTextureViewForDevice_, + "newRemoteTextureViewForDevice:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithAdditionalBinaryFunctions_error_, + "newRenderPipelineStateWithAdditionalBinaryFunctions:error:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithDescriptor_completionHandler_, + "newRenderPipelineStateWithDescriptor:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithDescriptor_error_, + "newRenderPipelineStateWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithDescriptor_options_completionHandler_, + "newRenderPipelineStateWithDescriptor:options:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithDescriptor_options_reflection_error_, + "newRenderPipelineStateWithDescriptor:options:reflection:error:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithMeshDescriptor_options_completionHandler_, + "newRenderPipelineStateWithMeshDescriptor:options:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithMeshDescriptor_options_reflection_error_, + "newRenderPipelineStateWithMeshDescriptor:options:reflection:error:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithTileDescriptor_options_completionHandler_, + "newRenderPipelineStateWithTileDescriptor:options:completionHandler:"); +_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithTileDescriptor_options_reflection_error_, + "newRenderPipelineStateWithTileDescriptor:options:reflection:error:"); +_MTL_PRIVATE_DEF_SEL(newSamplerStateWithDescriptor_, + "newSamplerStateWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newScratchBufferWithMinimumSize_, + "newScratchBufferWithMinimumSize:"); +_MTL_PRIVATE_DEF_SEL(newSharedEvent, + "newSharedEvent"); +_MTL_PRIVATE_DEF_SEL(newSharedEventHandle, + "newSharedEventHandle"); +_MTL_PRIVATE_DEF_SEL(newSharedEventWithHandle_, + "newSharedEventWithHandle:"); +_MTL_PRIVATE_DEF_SEL(newSharedTextureHandle, + "newSharedTextureHandle"); +_MTL_PRIVATE_DEF_SEL(newSharedTextureWithDescriptor_, + "newSharedTextureWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newSharedTextureWithHandle_, + "newSharedTextureWithHandle:"); +_MTL_PRIVATE_DEF_SEL(newTextureViewWithPixelFormat_, + "newTextureViewWithPixelFormat:"); +_MTL_PRIVATE_DEF_SEL(newTextureViewWithPixelFormat_textureType_levels_slices_, + "newTextureViewWithPixelFormat:textureType:levels:slices:"); +_MTL_PRIVATE_DEF_SEL(newTextureViewWithPixelFormat_textureType_levels_slices_swizzle_, + "newTextureViewWithPixelFormat:textureType:levels:slices:swizzle:"); +_MTL_PRIVATE_DEF_SEL(newTextureWithDescriptor_, + "newTextureWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newTextureWithDescriptor_iosurface_plane_, + "newTextureWithDescriptor:iosurface:plane:"); +_MTL_PRIVATE_DEF_SEL(newTextureWithDescriptor_offset_, + "newTextureWithDescriptor:offset:"); +_MTL_PRIVATE_DEF_SEL(newTextureWithDescriptor_offset_bytesPerRow_, + "newTextureWithDescriptor:offset:bytesPerRow:"); +_MTL_PRIVATE_DEF_SEL(newVisibleFunctionTableWithDescriptor_, + "newVisibleFunctionTableWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(newVisibleFunctionTableWithDescriptor_stage_, + "newVisibleFunctionTableWithDescriptor:stage:"); +_MTL_PRIVATE_DEF_SEL(nodes, + "nodes"); +_MTL_PRIVATE_DEF_SEL(normalizedCoordinates, + "normalizedCoordinates"); +_MTL_PRIVATE_DEF_SEL(notifyListener_atValue_block_, + "notifyListener:atValue:block:"); +_MTL_PRIVATE_DEF_SEL(objectAtIndexedSubscript_, + "objectAtIndexedSubscript:"); +_MTL_PRIVATE_DEF_SEL(objectBindings, + "objectBindings"); +_MTL_PRIVATE_DEF_SEL(objectBuffers, + "objectBuffers"); +_MTL_PRIVATE_DEF_SEL(objectFunction, + "objectFunction"); +_MTL_PRIVATE_DEF_SEL(objectPayloadAlignment, + "objectPayloadAlignment"); +_MTL_PRIVATE_DEF_SEL(objectPayloadDataSize, + "objectPayloadDataSize"); +_MTL_PRIVATE_DEF_SEL(objectThreadExecutionWidth, + "objectThreadExecutionWidth"); +_MTL_PRIVATE_DEF_SEL(objectThreadgroupSizeIsMultipleOfThreadExecutionWidth, + "objectThreadgroupSizeIsMultipleOfThreadExecutionWidth"); +_MTL_PRIVATE_DEF_SEL(offset, + "offset"); +_MTL_PRIVATE_DEF_SEL(opaque, + "opaque"); +_MTL_PRIVATE_DEF_SEL(optimizationLevel, + "optimizationLevel"); +_MTL_PRIVATE_DEF_SEL(optimizeContentsForCPUAccess_, + "optimizeContentsForCPUAccess:"); +_MTL_PRIVATE_DEF_SEL(optimizeContentsForCPUAccess_slice_level_, + "optimizeContentsForCPUAccess:slice:level:"); +_MTL_PRIVATE_DEF_SEL(optimizeContentsForGPUAccess_, + "optimizeContentsForGPUAccess:"); +_MTL_PRIVATE_DEF_SEL(optimizeContentsForGPUAccess_slice_level_, + "optimizeContentsForGPUAccess:slice:level:"); +_MTL_PRIVATE_DEF_SEL(optimizeIndirectCommandBuffer_withRange_, + "optimizeIndirectCommandBuffer:withRange:"); +_MTL_PRIVATE_DEF_SEL(options, + "options"); +_MTL_PRIVATE_DEF_SEL(outputNode, + "outputNode"); +_MTL_PRIVATE_DEF_SEL(outputURL, + "outputURL"); +_MTL_PRIVATE_DEF_SEL(parallelRenderCommandEncoderWithDescriptor_, + "parallelRenderCommandEncoderWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(parameterBufferSizeAndAlign, + "parameterBufferSizeAndAlign"); +_MTL_PRIVATE_DEF_SEL(parentRelativeLevel, + "parentRelativeLevel"); +_MTL_PRIVATE_DEF_SEL(parentRelativeSlice, + "parentRelativeSlice"); +_MTL_PRIVATE_DEF_SEL(parentTexture, + "parentTexture"); +_MTL_PRIVATE_DEF_SEL(patchControlPointCount, + "patchControlPointCount"); +_MTL_PRIVATE_DEF_SEL(patchType, + "patchType"); +_MTL_PRIVATE_DEF_SEL(payloadMemoryLength, + "payloadMemoryLength"); +_MTL_PRIVATE_DEF_SEL(peerCount, + "peerCount"); +_MTL_PRIVATE_DEF_SEL(peerGroupID, + "peerGroupID"); +_MTL_PRIVATE_DEF_SEL(peerIndex, + "peerIndex"); +_MTL_PRIVATE_DEF_SEL(physicalGranularity, + "physicalGranularity"); +_MTL_PRIVATE_DEF_SEL(physicalSizeForLayer_, + "physicalSizeForLayer:"); +_MTL_PRIVATE_DEF_SEL(pixelFormat, + "pixelFormat"); +_MTL_PRIVATE_DEF_SEL(pointerType, + "pointerType"); +_MTL_PRIVATE_DEF_SEL(popDebugGroup, + "popDebugGroup"); +_MTL_PRIVATE_DEF_SEL(preloadedLibraries, + "preloadedLibraries"); +_MTL_PRIVATE_DEF_SEL(preprocessorMacros, + "preprocessorMacros"); +_MTL_PRIVATE_DEF_SEL(present, + "present"); +_MTL_PRIVATE_DEF_SEL(presentAfterMinimumDuration_, + "presentAfterMinimumDuration:"); +_MTL_PRIVATE_DEF_SEL(presentAtTime_, + "presentAtTime:"); +_MTL_PRIVATE_DEF_SEL(presentDrawable_, + "presentDrawable:"); +_MTL_PRIVATE_DEF_SEL(presentDrawable_afterMinimumDuration_, + "presentDrawable:afterMinimumDuration:"); +_MTL_PRIVATE_DEF_SEL(presentDrawable_atTime_, + "presentDrawable:atTime:"); +_MTL_PRIVATE_DEF_SEL(presentedTime, + "presentedTime"); +_MTL_PRIVATE_DEF_SEL(preserveInvariance, + "preserveInvariance"); +_MTL_PRIVATE_DEF_SEL(primitiveDataBuffer, + "primitiveDataBuffer"); +_MTL_PRIVATE_DEF_SEL(primitiveDataBufferOffset, + "primitiveDataBufferOffset"); +_MTL_PRIVATE_DEF_SEL(primitiveDataElementSize, + "primitiveDataElementSize"); +_MTL_PRIVATE_DEF_SEL(primitiveDataStride, + "primitiveDataStride"); +_MTL_PRIVATE_DEF_SEL(priority, + "priority"); +_MTL_PRIVATE_DEF_SEL(privateFunctions, + "privateFunctions"); +_MTL_PRIVATE_DEF_SEL(pushDebugGroup_, + "pushDebugGroup:"); +_MTL_PRIVATE_DEF_SEL(rAddressMode, + "rAddressMode"); +_MTL_PRIVATE_DEF_SEL(rasterSampleCount, + "rasterSampleCount"); +_MTL_PRIVATE_DEF_SEL(rasterizationRateMap, + "rasterizationRateMap"); +_MTL_PRIVATE_DEF_SEL(rasterizationRateMapDescriptorWithScreenSize_, + "rasterizationRateMapDescriptorWithScreenSize:"); +_MTL_PRIVATE_DEF_SEL(rasterizationRateMapDescriptorWithScreenSize_layer_, + "rasterizationRateMapDescriptorWithScreenSize:layer:"); +_MTL_PRIVATE_DEF_SEL(rasterizationRateMapDescriptorWithScreenSize_layerCount_layers_, + "rasterizationRateMapDescriptorWithScreenSize:layerCount:layers:"); +_MTL_PRIVATE_DEF_SEL(readMask, + "readMask"); +_MTL_PRIVATE_DEF_SEL(readWriteTextureSupport, + "readWriteTextureSupport"); +_MTL_PRIVATE_DEF_SEL(recommendedMaxWorkingSetSize, + "recommendedMaxWorkingSetSize"); +_MTL_PRIVATE_DEF_SEL(refitAccelerationStructure_descriptor_destination_scratchBuffer_scratchBufferOffset_, + "refitAccelerationStructure:descriptor:destination:scratchBuffer:scratchBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(refitAccelerationStructure_descriptor_destination_scratchBuffer_scratchBufferOffset_options_, + "refitAccelerationStructure:descriptor:destination:scratchBuffer:scratchBufferOffset:options:"); +_MTL_PRIVATE_DEF_SEL(registryID, + "registryID"); +_MTL_PRIVATE_DEF_SEL(remoteStorageBuffer, + "remoteStorageBuffer"); +_MTL_PRIVATE_DEF_SEL(remoteStorageTexture, + "remoteStorageTexture"); +_MTL_PRIVATE_DEF_SEL(removeAllDebugMarkers, + "removeAllDebugMarkers"); +_MTL_PRIVATE_DEF_SEL(renderCommandEncoder, + "renderCommandEncoder"); +_MTL_PRIVATE_DEF_SEL(renderCommandEncoderWithDescriptor_, + "renderCommandEncoderWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(renderPassDescriptor, + "renderPassDescriptor"); +_MTL_PRIVATE_DEF_SEL(renderTargetArrayLength, + "renderTargetArrayLength"); +_MTL_PRIVATE_DEF_SEL(renderTargetHeight, + "renderTargetHeight"); +_MTL_PRIVATE_DEF_SEL(renderTargetWidth, + "renderTargetWidth"); +_MTL_PRIVATE_DEF_SEL(replaceRegion_mipmapLevel_slice_withBytes_bytesPerRow_bytesPerImage_, + "replaceRegion:mipmapLevel:slice:withBytes:bytesPerRow:bytesPerImage:"); +_MTL_PRIVATE_DEF_SEL(replaceRegion_mipmapLevel_withBytes_bytesPerRow_, + "replaceRegion:mipmapLevel:withBytes:bytesPerRow:"); +_MTL_PRIVATE_DEF_SEL(required, + "required"); +_MTL_PRIVATE_DEF_SEL(reset, + "reset"); +_MTL_PRIVATE_DEF_SEL(resetCommandsInBuffer_withRange_, + "resetCommandsInBuffer:withRange:"); +_MTL_PRIVATE_DEF_SEL(resetTextureAccessCounters_region_mipLevel_slice_, + "resetTextureAccessCounters:region:mipLevel:slice:"); +_MTL_PRIVATE_DEF_SEL(resetWithRange_, + "resetWithRange:"); +_MTL_PRIVATE_DEF_SEL(resolveCounterRange_, + "resolveCounterRange:"); +_MTL_PRIVATE_DEF_SEL(resolveCounters_inRange_destinationBuffer_destinationOffset_, + "resolveCounters:inRange:destinationBuffer:destinationOffset:"); +_MTL_PRIVATE_DEF_SEL(resolveDepthPlane, + "resolveDepthPlane"); +_MTL_PRIVATE_DEF_SEL(resolveLevel, + "resolveLevel"); +_MTL_PRIVATE_DEF_SEL(resolveSlice, + "resolveSlice"); +_MTL_PRIVATE_DEF_SEL(resolveTexture, + "resolveTexture"); +_MTL_PRIVATE_DEF_SEL(resourceOptions, + "resourceOptions"); +_MTL_PRIVATE_DEF_SEL(resourceStateCommandEncoder, + "resourceStateCommandEncoder"); +_MTL_PRIVATE_DEF_SEL(resourceStateCommandEncoderWithDescriptor_, + "resourceStateCommandEncoderWithDescriptor:"); +_MTL_PRIVATE_DEF_SEL(resourceStatePassDescriptor, + "resourceStatePassDescriptor"); +_MTL_PRIVATE_DEF_SEL(retainedReferences, + "retainedReferences"); +_MTL_PRIVATE_DEF_SEL(rgbBlendOperation, + "rgbBlendOperation"); +_MTL_PRIVATE_DEF_SEL(rootResource, + "rootResource"); +_MTL_PRIVATE_DEF_SEL(sAddressMode, + "sAddressMode"); +_MTL_PRIVATE_DEF_SEL(sampleBuffer, + "sampleBuffer"); +_MTL_PRIVATE_DEF_SEL(sampleBufferAttachments, + "sampleBufferAttachments"); +_MTL_PRIVATE_DEF_SEL(sampleCount, + "sampleCount"); +_MTL_PRIVATE_DEF_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_, + "sampleCountersInBuffer:atSampleIndex:withBarrier:"); +_MTL_PRIVATE_DEF_SEL(sampleTimestamps_gpuTimestamp_, + "sampleTimestamps:gpuTimestamp:"); +_MTL_PRIVATE_DEF_SEL(scratchBufferAllocator, + "scratchBufferAllocator"); +_MTL_PRIVATE_DEF_SEL(screenSize, + "screenSize"); +_MTL_PRIVATE_DEF_SEL(serializeToURL_error_, + "serializeToURL:error:"); +_MTL_PRIVATE_DEF_SEL(setAccelerationStructure_atBufferIndex_, + "setAccelerationStructure:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setAccelerationStructure_atIndex_, + "setAccelerationStructure:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setAccess_, + "setAccess:"); +_MTL_PRIVATE_DEF_SEL(setAllowDuplicateIntersectionFunctionInvocation_, + "setAllowDuplicateIntersectionFunctionInvocation:"); +_MTL_PRIVATE_DEF_SEL(setAllowGPUOptimizedContents_, + "setAllowGPUOptimizedContents:"); +_MTL_PRIVATE_DEF_SEL(setAlphaBlendOperation_, + "setAlphaBlendOperation:"); +_MTL_PRIVATE_DEF_SEL(setAlphaToCoverageEnabled_, + "setAlphaToCoverageEnabled:"); +_MTL_PRIVATE_DEF_SEL(setAlphaToOneEnabled_, + "setAlphaToOneEnabled:"); +_MTL_PRIVATE_DEF_SEL(setArgumentBuffer_offset_, + "setArgumentBuffer:offset:"); +_MTL_PRIVATE_DEF_SEL(setArgumentBuffer_startOffset_arrayElement_, + "setArgumentBuffer:startOffset:arrayElement:"); +_MTL_PRIVATE_DEF_SEL(setArgumentIndex_, + "setArgumentIndex:"); +_MTL_PRIVATE_DEF_SEL(setArguments_, + "setArguments:"); +_MTL_PRIVATE_DEF_SEL(setArrayLength_, + "setArrayLength:"); +_MTL_PRIVATE_DEF_SEL(setAttributes_, + "setAttributes:"); +_MTL_PRIVATE_DEF_SEL(setBackFaceStencil_, + "setBackFaceStencil:"); +_MTL_PRIVATE_DEF_SEL(setBarrier, + "setBarrier"); +_MTL_PRIVATE_DEF_SEL(setBinaryArchives_, + "setBinaryArchives:"); +_MTL_PRIVATE_DEF_SEL(setBinaryFunctions_, + "setBinaryFunctions:"); +_MTL_PRIVATE_DEF_SEL(setBlendColorRed_green_blue_alpha_, + "setBlendColorRed:green:blue:alpha:"); +_MTL_PRIVATE_DEF_SEL(setBlendingEnabled_, + "setBlendingEnabled:"); +_MTL_PRIVATE_DEF_SEL(setBorderColor_, + "setBorderColor:"); +_MTL_PRIVATE_DEF_SEL(setBoundingBoxBuffer_, + "setBoundingBoxBuffer:"); +_MTL_PRIVATE_DEF_SEL(setBoundingBoxBufferOffset_, + "setBoundingBoxBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setBoundingBoxBuffers_, + "setBoundingBoxBuffers:"); +_MTL_PRIVATE_DEF_SEL(setBoundingBoxCount_, + "setBoundingBoxCount:"); +_MTL_PRIVATE_DEF_SEL(setBoundingBoxStride_, + "setBoundingBoxStride:"); +_MTL_PRIVATE_DEF_SEL(setBuffer_, + "setBuffer:"); +_MTL_PRIVATE_DEF_SEL(setBuffer_offset_atIndex_, + "setBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setBufferIndex_, + "setBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setBufferOffset_atIndex_, + "setBufferOffset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setBuffers_offsets_withRange_, + "setBuffers:offsets:withRange:"); +_MTL_PRIVATE_DEF_SEL(setBytes_length_atIndex_, + "setBytes:length:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setCaptureObject_, + "setCaptureObject:"); +_MTL_PRIVATE_DEF_SEL(setClearColor_, + "setClearColor:"); +_MTL_PRIVATE_DEF_SEL(setClearDepth_, + "setClearDepth:"); +_MTL_PRIVATE_DEF_SEL(setClearStencil_, + "setClearStencil:"); +_MTL_PRIVATE_DEF_SEL(setColorStoreAction_atIndex_, + "setColorStoreAction:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setColorStoreActionOptions_atIndex_, + "setColorStoreActionOptions:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setCommandTypes_, + "setCommandTypes:"); +_MTL_PRIVATE_DEF_SEL(setCompareFunction_, + "setCompareFunction:"); +_MTL_PRIVATE_DEF_SEL(setCompressionType_, + "setCompressionType:"); +_MTL_PRIVATE_DEF_SEL(setComputeFunction_, + "setComputeFunction:"); +_MTL_PRIVATE_DEF_SEL(setComputePipelineState_, + "setComputePipelineState:"); +_MTL_PRIVATE_DEF_SEL(setComputePipelineState_atIndex_, + "setComputePipelineState:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setComputePipelineStates_withRange_, + "setComputePipelineStates:withRange:"); +_MTL_PRIVATE_DEF_SEL(setConstantBlockAlignment_, + "setConstantBlockAlignment:"); +_MTL_PRIVATE_DEF_SEL(setConstantValue_type_atIndex_, + "setConstantValue:type:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setConstantValue_type_withName_, + "setConstantValue:type:withName:"); +_MTL_PRIVATE_DEF_SEL(setConstantValues_, + "setConstantValues:"); +_MTL_PRIVATE_DEF_SEL(setConstantValues_type_withRange_, + "setConstantValues:type:withRange:"); +_MTL_PRIVATE_DEF_SEL(setControlDependencies_, + "setControlDependencies:"); +_MTL_PRIVATE_DEF_SEL(setCounterSet_, + "setCounterSet:"); +_MTL_PRIVATE_DEF_SEL(setCpuCacheMode_, + "setCpuCacheMode:"); +_MTL_PRIVATE_DEF_SEL(setCullMode_, + "setCullMode:"); +_MTL_PRIVATE_DEF_SEL(setDataType_, + "setDataType:"); +_MTL_PRIVATE_DEF_SEL(setDefaultCaptureScope_, + "setDefaultCaptureScope:"); +_MTL_PRIVATE_DEF_SEL(setDefaultRasterSampleCount_, + "setDefaultRasterSampleCount:"); +_MTL_PRIVATE_DEF_SEL(setDepth_, + "setDepth:"); +_MTL_PRIVATE_DEF_SEL(setDepthAttachment_, + "setDepthAttachment:"); +_MTL_PRIVATE_DEF_SEL(setDepthAttachmentPixelFormat_, + "setDepthAttachmentPixelFormat:"); +_MTL_PRIVATE_DEF_SEL(setDepthBias_slopeScale_clamp_, + "setDepthBias:slopeScale:clamp:"); +_MTL_PRIVATE_DEF_SEL(setDepthClipMode_, + "setDepthClipMode:"); +_MTL_PRIVATE_DEF_SEL(setDepthCompareFunction_, + "setDepthCompareFunction:"); +_MTL_PRIVATE_DEF_SEL(setDepthFailureOperation_, + "setDepthFailureOperation:"); +_MTL_PRIVATE_DEF_SEL(setDepthPlane_, + "setDepthPlane:"); +_MTL_PRIVATE_DEF_SEL(setDepthResolveFilter_, + "setDepthResolveFilter:"); +_MTL_PRIVATE_DEF_SEL(setDepthStencilPassOperation_, + "setDepthStencilPassOperation:"); +_MTL_PRIVATE_DEF_SEL(setDepthStencilState_, + "setDepthStencilState:"); +_MTL_PRIVATE_DEF_SEL(setDepthStoreAction_, + "setDepthStoreAction:"); +_MTL_PRIVATE_DEF_SEL(setDepthStoreActionOptions_, + "setDepthStoreActionOptions:"); +_MTL_PRIVATE_DEF_SEL(setDepthWriteEnabled_, + "setDepthWriteEnabled:"); +_MTL_PRIVATE_DEF_SEL(setDestination_, + "setDestination:"); +_MTL_PRIVATE_DEF_SEL(setDestinationAlphaBlendFactor_, + "setDestinationAlphaBlendFactor:"); +_MTL_PRIVATE_DEF_SEL(setDestinationRGBBlendFactor_, + "setDestinationRGBBlendFactor:"); +_MTL_PRIVATE_DEF_SEL(setDispatchType_, + "setDispatchType:"); +_MTL_PRIVATE_DEF_SEL(setEndOfEncoderSampleIndex_, + "setEndOfEncoderSampleIndex:"); +_MTL_PRIVATE_DEF_SEL(setEndOfFragmentSampleIndex_, + "setEndOfFragmentSampleIndex:"); +_MTL_PRIVATE_DEF_SEL(setEndOfVertexSampleIndex_, + "setEndOfVertexSampleIndex:"); +_MTL_PRIVATE_DEF_SEL(setErrorOptions_, + "setErrorOptions:"); +_MTL_PRIVATE_DEF_SEL(setFastMathEnabled_, + "setFastMathEnabled:"); +_MTL_PRIVATE_DEF_SEL(setFormat_, + "setFormat:"); +_MTL_PRIVATE_DEF_SEL(setFragmentAccelerationStructure_atBufferIndex_, + "setFragmentAccelerationStructure:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentAdditionalBinaryFunctions_, + "setFragmentAdditionalBinaryFunctions:"); +_MTL_PRIVATE_DEF_SEL(setFragmentBuffer_offset_atIndex_, + "setFragmentBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentBufferOffset_atIndex_, + "setFragmentBufferOffset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentBuffers_offsets_withRange_, + "setFragmentBuffers:offsets:withRange:"); +_MTL_PRIVATE_DEF_SEL(setFragmentBytes_length_atIndex_, + "setFragmentBytes:length:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentFunction_, + "setFragmentFunction:"); +_MTL_PRIVATE_DEF_SEL(setFragmentIntersectionFunctionTable_atBufferIndex_, + "setFragmentIntersectionFunctionTable:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentIntersectionFunctionTables_withBufferRange_, + "setFragmentIntersectionFunctionTables:withBufferRange:"); +_MTL_PRIVATE_DEF_SEL(setFragmentLinkedFunctions_, + "setFragmentLinkedFunctions:"); +_MTL_PRIVATE_DEF_SEL(setFragmentPreloadedLibraries_, + "setFragmentPreloadedLibraries:"); +_MTL_PRIVATE_DEF_SEL(setFragmentSamplerState_atIndex_, + "setFragmentSamplerState:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentSamplerState_lodMinClamp_lodMaxClamp_atIndex_, + "setFragmentSamplerState:lodMinClamp:lodMaxClamp:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentSamplerStates_lodMinClamps_lodMaxClamps_withRange_, + "setFragmentSamplerStates:lodMinClamps:lodMaxClamps:withRange:"); +_MTL_PRIVATE_DEF_SEL(setFragmentSamplerStates_withRange_, + "setFragmentSamplerStates:withRange:"); +_MTL_PRIVATE_DEF_SEL(setFragmentTexture_atIndex_, + "setFragmentTexture:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentTextures_withRange_, + "setFragmentTextures:withRange:"); +_MTL_PRIVATE_DEF_SEL(setFragmentVisibleFunctionTable_atBufferIndex_, + "setFragmentVisibleFunctionTable:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setFragmentVisibleFunctionTables_withBufferRange_, + "setFragmentVisibleFunctionTables:withBufferRange:"); +_MTL_PRIVATE_DEF_SEL(setFrontFaceStencil_, + "setFrontFaceStencil:"); +_MTL_PRIVATE_DEF_SEL(setFrontFacingWinding_, + "setFrontFacingWinding:"); +_MTL_PRIVATE_DEF_SEL(setFunction_atIndex_, + "setFunction:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setFunctionCount_, + "setFunctionCount:"); +_MTL_PRIVATE_DEF_SEL(setFunctionGraphs_, + "setFunctionGraphs:"); +_MTL_PRIVATE_DEF_SEL(setFunctionName_, + "setFunctionName:"); +_MTL_PRIVATE_DEF_SEL(setFunctions_, + "setFunctions:"); +_MTL_PRIVATE_DEF_SEL(setFunctions_withRange_, + "setFunctions:withRange:"); +_MTL_PRIVATE_DEF_SEL(setGeometryDescriptors_, + "setGeometryDescriptors:"); +_MTL_PRIVATE_DEF_SEL(setGroups_, + "setGroups:"); +_MTL_PRIVATE_DEF_SEL(setHazardTrackingMode_, + "setHazardTrackingMode:"); +_MTL_PRIVATE_DEF_SEL(setHeight_, + "setHeight:"); +_MTL_PRIVATE_DEF_SEL(setImageblockSampleLength_, + "setImageblockSampleLength:"); +_MTL_PRIVATE_DEF_SEL(setImageblockWidth_height_, + "setImageblockWidth:height:"); +_MTL_PRIVATE_DEF_SEL(setIndex_, + "setIndex:"); +_MTL_PRIVATE_DEF_SEL(setIndexBuffer_, + "setIndexBuffer:"); +_MTL_PRIVATE_DEF_SEL(setIndexBufferIndex_, + "setIndexBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setIndexBufferOffset_, + "setIndexBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setIndexType_, + "setIndexType:"); +_MTL_PRIVATE_DEF_SEL(setIndirectCommandBuffer_atIndex_, + "setIndirectCommandBuffer:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setIndirectCommandBuffers_withRange_, + "setIndirectCommandBuffers:withRange:"); +_MTL_PRIVATE_DEF_SEL(setInheritBuffers_, + "setInheritBuffers:"); +_MTL_PRIVATE_DEF_SEL(setInheritPipelineState_, + "setInheritPipelineState:"); +_MTL_PRIVATE_DEF_SEL(setInputPrimitiveTopology_, + "setInputPrimitiveTopology:"); +_MTL_PRIVATE_DEF_SEL(setInsertLibraries_, + "setInsertLibraries:"); +_MTL_PRIVATE_DEF_SEL(setInstallName_, + "setInstallName:"); +_MTL_PRIVATE_DEF_SEL(setInstanceCount_, + "setInstanceCount:"); +_MTL_PRIVATE_DEF_SEL(setInstanceDescriptorBuffer_, + "setInstanceDescriptorBuffer:"); +_MTL_PRIVATE_DEF_SEL(setInstanceDescriptorBufferOffset_, + "setInstanceDescriptorBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setInstanceDescriptorStride_, + "setInstanceDescriptorStride:"); +_MTL_PRIVATE_DEF_SEL(setInstanceDescriptorType_, + "setInstanceDescriptorType:"); +_MTL_PRIVATE_DEF_SEL(setInstancedAccelerationStructures_, + "setInstancedAccelerationStructures:"); +_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTable_atBufferIndex_, + "setIntersectionFunctionTable:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTable_atIndex_, + "setIntersectionFunctionTable:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTableOffset_, + "setIntersectionFunctionTableOffset:"); +_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTables_withBufferRange_, + "setIntersectionFunctionTables:withBufferRange:"); +_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTables_withRange_, + "setIntersectionFunctionTables:withRange:"); +_MTL_PRIVATE_DEF_SEL(setKernelBuffer_offset_atIndex_, + "setKernelBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setLabel_, + "setLabel:"); +_MTL_PRIVATE_DEF_SEL(setLanguageVersion_, + "setLanguageVersion:"); +_MTL_PRIVATE_DEF_SEL(setLayer_atIndex_, + "setLayer:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setLevel_, + "setLevel:"); +_MTL_PRIVATE_DEF_SEL(setLibraries_, + "setLibraries:"); +_MTL_PRIVATE_DEF_SEL(setLibraryType_, + "setLibraryType:"); +_MTL_PRIVATE_DEF_SEL(setLinkedFunctions_, + "setLinkedFunctions:"); +_MTL_PRIVATE_DEF_SEL(setLoadAction_, + "setLoadAction:"); +_MTL_PRIVATE_DEF_SEL(setLodAverage_, + "setLodAverage:"); +_MTL_PRIVATE_DEF_SEL(setLodMaxClamp_, + "setLodMaxClamp:"); +_MTL_PRIVATE_DEF_SEL(setLodMinClamp_, + "setLodMinClamp:"); +_MTL_PRIVATE_DEF_SEL(setMagFilter_, + "setMagFilter:"); +_MTL_PRIVATE_DEF_SEL(setMaxAnisotropy_, + "setMaxAnisotropy:"); +_MTL_PRIVATE_DEF_SEL(setMaxCallStackDepth_, + "setMaxCallStackDepth:"); +_MTL_PRIVATE_DEF_SEL(setMaxCommandBufferCount_, + "setMaxCommandBufferCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxCommandsInFlight_, + "setMaxCommandsInFlight:"); +_MTL_PRIVATE_DEF_SEL(setMaxFragmentBufferBindCount_, + "setMaxFragmentBufferBindCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxFragmentCallStackDepth_, + "setMaxFragmentCallStackDepth:"); +_MTL_PRIVATE_DEF_SEL(setMaxKernelBufferBindCount_, + "setMaxKernelBufferBindCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxTessellationFactor_, + "setMaxTessellationFactor:"); +_MTL_PRIVATE_DEF_SEL(setMaxTotalThreadgroupsPerMeshGrid_, + "setMaxTotalThreadgroupsPerMeshGrid:"); +_MTL_PRIVATE_DEF_SEL(setMaxTotalThreadsPerMeshThreadgroup_, + "setMaxTotalThreadsPerMeshThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(setMaxTotalThreadsPerObjectThreadgroup_, + "setMaxTotalThreadsPerObjectThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(setMaxTotalThreadsPerThreadgroup_, + "setMaxTotalThreadsPerThreadgroup:"); +_MTL_PRIVATE_DEF_SEL(setMaxVertexAmplificationCount_, + "setMaxVertexAmplificationCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxVertexBufferBindCount_, + "setMaxVertexBufferBindCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxVertexCallStackDepth_, + "setMaxVertexCallStackDepth:"); +_MTL_PRIVATE_DEF_SEL(setMeshBuffer_offset_atIndex_, + "setMeshBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setMeshBufferOffset_atIndex_, + "setMeshBufferOffset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setMeshBuffers_offsets_withRange_, + "setMeshBuffers:offsets:withRange:"); +_MTL_PRIVATE_DEF_SEL(setMeshBytes_length_atIndex_, + "setMeshBytes:length:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setMeshFunction_, + "setMeshFunction:"); +_MTL_PRIVATE_DEF_SEL(setMeshSamplerState_atIndex_, + "setMeshSamplerState:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setMeshSamplerState_lodMinClamp_lodMaxClamp_atIndex_, + "setMeshSamplerState:lodMinClamp:lodMaxClamp:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setMeshSamplerStates_lodMinClamps_lodMaxClamps_withRange_, + "setMeshSamplerStates:lodMinClamps:lodMaxClamps:withRange:"); +_MTL_PRIVATE_DEF_SEL(setMeshSamplerStates_withRange_, + "setMeshSamplerStates:withRange:"); +_MTL_PRIVATE_DEF_SEL(setMeshTexture_atIndex_, + "setMeshTexture:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setMeshTextures_withRange_, + "setMeshTextures:withRange:"); +_MTL_PRIVATE_DEF_SEL(setMeshThreadgroupSizeIsMultipleOfThreadExecutionWidth_, + "setMeshThreadgroupSizeIsMultipleOfThreadExecutionWidth:"); +_MTL_PRIVATE_DEF_SEL(setMinFilter_, + "setMinFilter:"); +_MTL_PRIVATE_DEF_SEL(setMipFilter_, + "setMipFilter:"); +_MTL_PRIVATE_DEF_SEL(setMipmapLevelCount_, + "setMipmapLevelCount:"); +_MTL_PRIVATE_DEF_SEL(setMotionEndBorderMode_, + "setMotionEndBorderMode:"); +_MTL_PRIVATE_DEF_SEL(setMotionEndTime_, + "setMotionEndTime:"); +_MTL_PRIVATE_DEF_SEL(setMotionKeyframeCount_, + "setMotionKeyframeCount:"); +_MTL_PRIVATE_DEF_SEL(setMotionStartBorderMode_, + "setMotionStartBorderMode:"); +_MTL_PRIVATE_DEF_SEL(setMotionStartTime_, + "setMotionStartTime:"); +_MTL_PRIVATE_DEF_SEL(setMotionTransformBuffer_, + "setMotionTransformBuffer:"); +_MTL_PRIVATE_DEF_SEL(setMotionTransformBufferOffset_, + "setMotionTransformBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setMotionTransformCount_, + "setMotionTransformCount:"); +_MTL_PRIVATE_DEF_SEL(setMutability_, + "setMutability:"); +_MTL_PRIVATE_DEF_SEL(setName_, + "setName:"); +_MTL_PRIVATE_DEF_SEL(setNodes_, + "setNodes:"); +_MTL_PRIVATE_DEF_SEL(setNormalizedCoordinates_, + "setNormalizedCoordinates:"); +_MTL_PRIVATE_DEF_SEL(setObject_atIndexedSubscript_, + "setObject:atIndexedSubscript:"); +_MTL_PRIVATE_DEF_SEL(setObjectBuffer_offset_atIndex_, + "setObjectBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setObjectBufferOffset_atIndex_, + "setObjectBufferOffset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setObjectBuffers_offsets_withRange_, + "setObjectBuffers:offsets:withRange:"); +_MTL_PRIVATE_DEF_SEL(setObjectBytes_length_atIndex_, + "setObjectBytes:length:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setObjectFunction_, + "setObjectFunction:"); +_MTL_PRIVATE_DEF_SEL(setObjectSamplerState_atIndex_, + "setObjectSamplerState:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setObjectSamplerState_lodMinClamp_lodMaxClamp_atIndex_, + "setObjectSamplerState:lodMinClamp:lodMaxClamp:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setObjectSamplerStates_lodMinClamps_lodMaxClamps_withRange_, + "setObjectSamplerStates:lodMinClamps:lodMaxClamps:withRange:"); +_MTL_PRIVATE_DEF_SEL(setObjectSamplerStates_withRange_, + "setObjectSamplerStates:withRange:"); +_MTL_PRIVATE_DEF_SEL(setObjectTexture_atIndex_, + "setObjectTexture:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setObjectTextures_withRange_, + "setObjectTextures:withRange:"); +_MTL_PRIVATE_DEF_SEL(setObjectThreadgroupMemoryLength_atIndex_, + "setObjectThreadgroupMemoryLength:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setObjectThreadgroupSizeIsMultipleOfThreadExecutionWidth_, + "setObjectThreadgroupSizeIsMultipleOfThreadExecutionWidth:"); +_MTL_PRIVATE_DEF_SEL(setOffset_, + "setOffset:"); +_MTL_PRIVATE_DEF_SEL(setOpaque_, + "setOpaque:"); +_MTL_PRIVATE_DEF_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_atIndex_, + "setOpaqueTriangleIntersectionFunctionWithSignature:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_withRange_, + "setOpaqueTriangleIntersectionFunctionWithSignature:withRange:"); +_MTL_PRIVATE_DEF_SEL(setOptimizationLevel_, + "setOptimizationLevel:"); +_MTL_PRIVATE_DEF_SEL(setOptions_, + "setOptions:"); +_MTL_PRIVATE_DEF_SEL(setOutputNode_, + "setOutputNode:"); +_MTL_PRIVATE_DEF_SEL(setOutputURL_, + "setOutputURL:"); +_MTL_PRIVATE_DEF_SEL(setPayloadMemoryLength_, + "setPayloadMemoryLength:"); +_MTL_PRIVATE_DEF_SEL(setPixelFormat_, + "setPixelFormat:"); +_MTL_PRIVATE_DEF_SEL(setPreloadedLibraries_, + "setPreloadedLibraries:"); +_MTL_PRIVATE_DEF_SEL(setPreprocessorMacros_, + "setPreprocessorMacros:"); +_MTL_PRIVATE_DEF_SEL(setPreserveInvariance_, + "setPreserveInvariance:"); +_MTL_PRIVATE_DEF_SEL(setPrimitiveDataBuffer_, + "setPrimitiveDataBuffer:"); +_MTL_PRIVATE_DEF_SEL(setPrimitiveDataBufferOffset_, + "setPrimitiveDataBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setPrimitiveDataElementSize_, + "setPrimitiveDataElementSize:"); +_MTL_PRIVATE_DEF_SEL(setPrimitiveDataStride_, + "setPrimitiveDataStride:"); +_MTL_PRIVATE_DEF_SEL(setPriority_, + "setPriority:"); +_MTL_PRIVATE_DEF_SEL(setPrivateFunctions_, + "setPrivateFunctions:"); +_MTL_PRIVATE_DEF_SEL(setPurgeableState_, + "setPurgeableState:"); +_MTL_PRIVATE_DEF_SEL(setRAddressMode_, + "setRAddressMode:"); +_MTL_PRIVATE_DEF_SEL(setRasterSampleCount_, + "setRasterSampleCount:"); +_MTL_PRIVATE_DEF_SEL(setRasterizationEnabled_, + "setRasterizationEnabled:"); +_MTL_PRIVATE_DEF_SEL(setRasterizationRateMap_, + "setRasterizationRateMap:"); +_MTL_PRIVATE_DEF_SEL(setReadMask_, + "setReadMask:"); +_MTL_PRIVATE_DEF_SEL(setRenderPipelineState_, + "setRenderPipelineState:"); +_MTL_PRIVATE_DEF_SEL(setRenderPipelineState_atIndex_, + "setRenderPipelineState:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setRenderPipelineStates_withRange_, + "setRenderPipelineStates:withRange:"); +_MTL_PRIVATE_DEF_SEL(setRenderTargetArrayLength_, + "setRenderTargetArrayLength:"); +_MTL_PRIVATE_DEF_SEL(setRenderTargetHeight_, + "setRenderTargetHeight:"); +_MTL_PRIVATE_DEF_SEL(setRenderTargetWidth_, + "setRenderTargetWidth:"); +_MTL_PRIVATE_DEF_SEL(setResolveDepthPlane_, + "setResolveDepthPlane:"); +_MTL_PRIVATE_DEF_SEL(setResolveLevel_, + "setResolveLevel:"); +_MTL_PRIVATE_DEF_SEL(setResolveSlice_, + "setResolveSlice:"); +_MTL_PRIVATE_DEF_SEL(setResolveTexture_, + "setResolveTexture:"); +_MTL_PRIVATE_DEF_SEL(setResourceOptions_, + "setResourceOptions:"); +_MTL_PRIVATE_DEF_SEL(setRetainedReferences_, + "setRetainedReferences:"); +_MTL_PRIVATE_DEF_SEL(setRgbBlendOperation_, + "setRgbBlendOperation:"); +_MTL_PRIVATE_DEF_SEL(setSAddressMode_, + "setSAddressMode:"); +_MTL_PRIVATE_DEF_SEL(setSampleBuffer_, + "setSampleBuffer:"); +_MTL_PRIVATE_DEF_SEL(setSampleCount_, + "setSampleCount:"); +_MTL_PRIVATE_DEF_SEL(setSamplePositions_count_, + "setSamplePositions:count:"); +_MTL_PRIVATE_DEF_SEL(setSamplerState_atIndex_, + "setSamplerState:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setSamplerState_lodMinClamp_lodMaxClamp_atIndex_, + "setSamplerState:lodMinClamp:lodMaxClamp:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setSamplerStates_lodMinClamps_lodMaxClamps_withRange_, + "setSamplerStates:lodMinClamps:lodMaxClamps:withRange:"); +_MTL_PRIVATE_DEF_SEL(setSamplerStates_withRange_, + "setSamplerStates:withRange:"); +_MTL_PRIVATE_DEF_SEL(setScissorRect_, + "setScissorRect:"); +_MTL_PRIVATE_DEF_SEL(setScissorRects_count_, + "setScissorRects:count:"); +_MTL_PRIVATE_DEF_SEL(setScratchBufferAllocator_, + "setScratchBufferAllocator:"); +_MTL_PRIVATE_DEF_SEL(setScreenSize_, + "setScreenSize:"); +_MTL_PRIVATE_DEF_SEL(setSignaledValue_, + "setSignaledValue:"); +_MTL_PRIVATE_DEF_SEL(setSize_, + "setSize:"); +_MTL_PRIVATE_DEF_SEL(setSlice_, + "setSlice:"); +_MTL_PRIVATE_DEF_SEL(setSourceAlphaBlendFactor_, + "setSourceAlphaBlendFactor:"); +_MTL_PRIVATE_DEF_SEL(setSourceRGBBlendFactor_, + "setSourceRGBBlendFactor:"); +_MTL_PRIVATE_DEF_SEL(setSparsePageSize_, + "setSparsePageSize:"); +_MTL_PRIVATE_DEF_SEL(setSpecializedName_, + "setSpecializedName:"); +_MTL_PRIVATE_DEF_SEL(setStageInRegion_, + "setStageInRegion:"); +_MTL_PRIVATE_DEF_SEL(setStageInRegionWithIndirectBuffer_indirectBufferOffset_, + "setStageInRegionWithIndirectBuffer:indirectBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setStageInputDescriptor_, + "setStageInputDescriptor:"); +_MTL_PRIVATE_DEF_SEL(setStartOfEncoderSampleIndex_, + "setStartOfEncoderSampleIndex:"); +_MTL_PRIVATE_DEF_SEL(setStartOfFragmentSampleIndex_, + "setStartOfFragmentSampleIndex:"); +_MTL_PRIVATE_DEF_SEL(setStartOfVertexSampleIndex_, + "setStartOfVertexSampleIndex:"); +_MTL_PRIVATE_DEF_SEL(setStencilAttachment_, + "setStencilAttachment:"); +_MTL_PRIVATE_DEF_SEL(setStencilAttachmentPixelFormat_, + "setStencilAttachmentPixelFormat:"); +_MTL_PRIVATE_DEF_SEL(setStencilCompareFunction_, + "setStencilCompareFunction:"); +_MTL_PRIVATE_DEF_SEL(setStencilFailureOperation_, + "setStencilFailureOperation:"); +_MTL_PRIVATE_DEF_SEL(setStencilFrontReferenceValue_backReferenceValue_, + "setStencilFrontReferenceValue:backReferenceValue:"); +_MTL_PRIVATE_DEF_SEL(setStencilReferenceValue_, + "setStencilReferenceValue:"); +_MTL_PRIVATE_DEF_SEL(setStencilResolveFilter_, + "setStencilResolveFilter:"); +_MTL_PRIVATE_DEF_SEL(setStencilStoreAction_, + "setStencilStoreAction:"); +_MTL_PRIVATE_DEF_SEL(setStencilStoreActionOptions_, + "setStencilStoreActionOptions:"); +_MTL_PRIVATE_DEF_SEL(setStepFunction_, + "setStepFunction:"); +_MTL_PRIVATE_DEF_SEL(setStepRate_, + "setStepRate:"); +_MTL_PRIVATE_DEF_SEL(setStorageMode_, + "setStorageMode:"); +_MTL_PRIVATE_DEF_SEL(setStoreAction_, + "setStoreAction:"); +_MTL_PRIVATE_DEF_SEL(setStoreActionOptions_, + "setStoreActionOptions:"); +_MTL_PRIVATE_DEF_SEL(setStride_, + "setStride:"); +_MTL_PRIVATE_DEF_SEL(setSupportAddingBinaryFunctions_, + "setSupportAddingBinaryFunctions:"); +_MTL_PRIVATE_DEF_SEL(setSupportAddingFragmentBinaryFunctions_, + "setSupportAddingFragmentBinaryFunctions:"); +_MTL_PRIVATE_DEF_SEL(setSupportAddingVertexBinaryFunctions_, + "setSupportAddingVertexBinaryFunctions:"); +_MTL_PRIVATE_DEF_SEL(setSupportArgumentBuffers_, + "setSupportArgumentBuffers:"); +_MTL_PRIVATE_DEF_SEL(setSupportIndirectCommandBuffers_, + "setSupportIndirectCommandBuffers:"); +_MTL_PRIVATE_DEF_SEL(setSupportRayTracing_, + "setSupportRayTracing:"); +_MTL_PRIVATE_DEF_SEL(setSwizzle_, + "setSwizzle:"); +_MTL_PRIVATE_DEF_SEL(setTAddressMode_, + "setTAddressMode:"); +_MTL_PRIVATE_DEF_SEL(setTessellationControlPointIndexType_, + "setTessellationControlPointIndexType:"); +_MTL_PRIVATE_DEF_SEL(setTessellationFactorBuffer_offset_instanceStride_, + "setTessellationFactorBuffer:offset:instanceStride:"); +_MTL_PRIVATE_DEF_SEL(setTessellationFactorFormat_, + "setTessellationFactorFormat:"); +_MTL_PRIVATE_DEF_SEL(setTessellationFactorScale_, + "setTessellationFactorScale:"); +_MTL_PRIVATE_DEF_SEL(setTessellationFactorScaleEnabled_, + "setTessellationFactorScaleEnabled:"); +_MTL_PRIVATE_DEF_SEL(setTessellationFactorStepFunction_, + "setTessellationFactorStepFunction:"); +_MTL_PRIVATE_DEF_SEL(setTessellationOutputWindingOrder_, + "setTessellationOutputWindingOrder:"); +_MTL_PRIVATE_DEF_SEL(setTessellationPartitionMode_, + "setTessellationPartitionMode:"); +_MTL_PRIVATE_DEF_SEL(setTexture_, + "setTexture:"); +_MTL_PRIVATE_DEF_SEL(setTexture_atIndex_, + "setTexture:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setTextureType_, + "setTextureType:"); +_MTL_PRIVATE_DEF_SEL(setTextures_withRange_, + "setTextures:withRange:"); +_MTL_PRIVATE_DEF_SEL(setThreadGroupSizeIsMultipleOfThreadExecutionWidth_, + "setThreadGroupSizeIsMultipleOfThreadExecutionWidth:"); +_MTL_PRIVATE_DEF_SEL(setThreadgroupMemoryLength_, + "setThreadgroupMemoryLength:"); +_MTL_PRIVATE_DEF_SEL(setThreadgroupMemoryLength_atIndex_, + "setThreadgroupMemoryLength:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setThreadgroupMemoryLength_offset_atIndex_, + "setThreadgroupMemoryLength:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setThreadgroupSizeMatchesTileSize_, + "setThreadgroupSizeMatchesTileSize:"); +_MTL_PRIVATE_DEF_SEL(setTileAccelerationStructure_atBufferIndex_, + "setTileAccelerationStructure:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileAdditionalBinaryFunctions_, + "setTileAdditionalBinaryFunctions:"); +_MTL_PRIVATE_DEF_SEL(setTileBuffer_offset_atIndex_, + "setTileBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileBufferOffset_atIndex_, + "setTileBufferOffset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileBuffers_offsets_withRange_, + "setTileBuffers:offsets:withRange:"); +_MTL_PRIVATE_DEF_SEL(setTileBytes_length_atIndex_, + "setTileBytes:length:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileFunction_, + "setTileFunction:"); +_MTL_PRIVATE_DEF_SEL(setTileHeight_, + "setTileHeight:"); +_MTL_PRIVATE_DEF_SEL(setTileIntersectionFunctionTable_atBufferIndex_, + "setTileIntersectionFunctionTable:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileIntersectionFunctionTables_withBufferRange_, + "setTileIntersectionFunctionTables:withBufferRange:"); +_MTL_PRIVATE_DEF_SEL(setTileSamplerState_atIndex_, + "setTileSamplerState:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileSamplerState_lodMinClamp_lodMaxClamp_atIndex_, + "setTileSamplerState:lodMinClamp:lodMaxClamp:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileSamplerStates_lodMinClamps_lodMaxClamps_withRange_, + "setTileSamplerStates:lodMinClamps:lodMaxClamps:withRange:"); +_MTL_PRIVATE_DEF_SEL(setTileSamplerStates_withRange_, + "setTileSamplerStates:withRange:"); +_MTL_PRIVATE_DEF_SEL(setTileTexture_atIndex_, + "setTileTexture:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileTextures_withRange_, + "setTileTextures:withRange:"); +_MTL_PRIVATE_DEF_SEL(setTileVisibleFunctionTable_atBufferIndex_, + "setTileVisibleFunctionTable:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setTileVisibleFunctionTables_withBufferRange_, + "setTileVisibleFunctionTables:withBufferRange:"); +_MTL_PRIVATE_DEF_SEL(setTileWidth_, + "setTileWidth:"); +_MTL_PRIVATE_DEF_SEL(setTransformationMatrixBuffer_, + "setTransformationMatrixBuffer:"); +_MTL_PRIVATE_DEF_SEL(setTransformationMatrixBufferOffset_, + "setTransformationMatrixBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setTriangleCount_, + "setTriangleCount:"); +_MTL_PRIVATE_DEF_SEL(setTriangleFillMode_, + "setTriangleFillMode:"); +_MTL_PRIVATE_DEF_SEL(setType_, + "setType:"); +_MTL_PRIVATE_DEF_SEL(setUrl_, + "setUrl:"); +_MTL_PRIVATE_DEF_SEL(setUsage_, + "setUsage:"); +_MTL_PRIVATE_DEF_SEL(setVertexAccelerationStructure_atBufferIndex_, + "setVertexAccelerationStructure:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexAdditionalBinaryFunctions_, + "setVertexAdditionalBinaryFunctions:"); +_MTL_PRIVATE_DEF_SEL(setVertexAmplificationCount_viewMappings_, + "setVertexAmplificationCount:viewMappings:"); +_MTL_PRIVATE_DEF_SEL(setVertexBuffer_, + "setVertexBuffer:"); +_MTL_PRIVATE_DEF_SEL(setVertexBuffer_offset_atIndex_, + "setVertexBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexBufferOffset_, + "setVertexBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setVertexBufferOffset_atIndex_, + "setVertexBufferOffset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexBuffers_, + "setVertexBuffers:"); +_MTL_PRIVATE_DEF_SEL(setVertexBuffers_offsets_withRange_, + "setVertexBuffers:offsets:withRange:"); +_MTL_PRIVATE_DEF_SEL(setVertexBytes_length_atIndex_, + "setVertexBytes:length:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexDescriptor_, + "setVertexDescriptor:"); +_MTL_PRIVATE_DEF_SEL(setVertexFormat_, + "setVertexFormat:"); +_MTL_PRIVATE_DEF_SEL(setVertexFunction_, + "setVertexFunction:"); +_MTL_PRIVATE_DEF_SEL(setVertexIntersectionFunctionTable_atBufferIndex_, + "setVertexIntersectionFunctionTable:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexIntersectionFunctionTables_withBufferRange_, + "setVertexIntersectionFunctionTables:withBufferRange:"); +_MTL_PRIVATE_DEF_SEL(setVertexLinkedFunctions_, + "setVertexLinkedFunctions:"); +_MTL_PRIVATE_DEF_SEL(setVertexPreloadedLibraries_, + "setVertexPreloadedLibraries:"); +_MTL_PRIVATE_DEF_SEL(setVertexSamplerState_atIndex_, + "setVertexSamplerState:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexSamplerState_lodMinClamp_lodMaxClamp_atIndex_, + "setVertexSamplerState:lodMinClamp:lodMaxClamp:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexSamplerStates_lodMinClamps_lodMaxClamps_withRange_, + "setVertexSamplerStates:lodMinClamps:lodMaxClamps:withRange:"); +_MTL_PRIVATE_DEF_SEL(setVertexSamplerStates_withRange_, + "setVertexSamplerStates:withRange:"); +_MTL_PRIVATE_DEF_SEL(setVertexStride_, + "setVertexStride:"); +_MTL_PRIVATE_DEF_SEL(setVertexTexture_atIndex_, + "setVertexTexture:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexTextures_withRange_, + "setVertexTextures:withRange:"); +_MTL_PRIVATE_DEF_SEL(setVertexVisibleFunctionTable_atBufferIndex_, + "setVertexVisibleFunctionTable:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexVisibleFunctionTables_withBufferRange_, + "setVertexVisibleFunctionTables:withBufferRange:"); +_MTL_PRIVATE_DEF_SEL(setViewport_, + "setViewport:"); +_MTL_PRIVATE_DEF_SEL(setViewports_count_, + "setViewports:count:"); +_MTL_PRIVATE_DEF_SEL(setVisibilityResultBuffer_, + "setVisibilityResultBuffer:"); +_MTL_PRIVATE_DEF_SEL(setVisibilityResultMode_offset_, + "setVisibilityResultMode:offset:"); +_MTL_PRIVATE_DEF_SEL(setVisibleFunctionTable_atBufferIndex_, + "setVisibleFunctionTable:atBufferIndex:"); +_MTL_PRIVATE_DEF_SEL(setVisibleFunctionTable_atIndex_, + "setVisibleFunctionTable:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVisibleFunctionTables_withBufferRange_, + "setVisibleFunctionTables:withBufferRange:"); +_MTL_PRIVATE_DEF_SEL(setVisibleFunctionTables_withRange_, + "setVisibleFunctionTables:withRange:"); +_MTL_PRIVATE_DEF_SEL(setWidth_, + "setWidth:"); +_MTL_PRIVATE_DEF_SEL(setWriteMask_, + "setWriteMask:"); +_MTL_PRIVATE_DEF_SEL(sharedCaptureManager, + "sharedCaptureManager"); +_MTL_PRIVATE_DEF_SEL(signalEvent_value_, + "signalEvent:value:"); +_MTL_PRIVATE_DEF_SEL(signaledValue, + "signaledValue"); +_MTL_PRIVATE_DEF_SEL(size, + "size"); +_MTL_PRIVATE_DEF_SEL(slice, + "slice"); +_MTL_PRIVATE_DEF_SEL(sourceAlphaBlendFactor, + "sourceAlphaBlendFactor"); +_MTL_PRIVATE_DEF_SEL(sourceRGBBlendFactor, + "sourceRGBBlendFactor"); +_MTL_PRIVATE_DEF_SEL(sparsePageSize, + "sparsePageSize"); +_MTL_PRIVATE_DEF_SEL(sparseTileSizeInBytes, + "sparseTileSizeInBytes"); +_MTL_PRIVATE_DEF_SEL(sparseTileSizeInBytesForSparsePageSize_, + "sparseTileSizeInBytesForSparsePageSize:"); +_MTL_PRIVATE_DEF_SEL(sparseTileSizeWithTextureType_pixelFormat_sampleCount_, + "sparseTileSizeWithTextureType:pixelFormat:sampleCount:"); +_MTL_PRIVATE_DEF_SEL(sparseTileSizeWithTextureType_pixelFormat_sampleCount_sparsePageSize_, + "sparseTileSizeWithTextureType:pixelFormat:sampleCount:sparsePageSize:"); +_MTL_PRIVATE_DEF_SEL(specializedName, + "specializedName"); +_MTL_PRIVATE_DEF_SEL(stageInputAttributes, + "stageInputAttributes"); +_MTL_PRIVATE_DEF_SEL(stageInputDescriptor, + "stageInputDescriptor"); +_MTL_PRIVATE_DEF_SEL(stageInputOutputDescriptor, + "stageInputOutputDescriptor"); +_MTL_PRIVATE_DEF_SEL(startCaptureWithCommandQueue_, + "startCaptureWithCommandQueue:"); +_MTL_PRIVATE_DEF_SEL(startCaptureWithDescriptor_error_, + "startCaptureWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(startCaptureWithDevice_, + "startCaptureWithDevice:"); +_MTL_PRIVATE_DEF_SEL(startCaptureWithScope_, + "startCaptureWithScope:"); +_MTL_PRIVATE_DEF_SEL(startOfEncoderSampleIndex, + "startOfEncoderSampleIndex"); +_MTL_PRIVATE_DEF_SEL(startOfFragmentSampleIndex, + "startOfFragmentSampleIndex"); +_MTL_PRIVATE_DEF_SEL(startOfVertexSampleIndex, + "startOfVertexSampleIndex"); +_MTL_PRIVATE_DEF_SEL(staticThreadgroupMemoryLength, + "staticThreadgroupMemoryLength"); +_MTL_PRIVATE_DEF_SEL(status, + "status"); +_MTL_PRIVATE_DEF_SEL(stencilAttachment, + "stencilAttachment"); +_MTL_PRIVATE_DEF_SEL(stencilAttachmentPixelFormat, + "stencilAttachmentPixelFormat"); +_MTL_PRIVATE_DEF_SEL(stencilCompareFunction, + "stencilCompareFunction"); +_MTL_PRIVATE_DEF_SEL(stencilFailureOperation, + "stencilFailureOperation"); +_MTL_PRIVATE_DEF_SEL(stencilResolveFilter, + "stencilResolveFilter"); +_MTL_PRIVATE_DEF_SEL(stepFunction, + "stepFunction"); +_MTL_PRIVATE_DEF_SEL(stepRate, + "stepRate"); +_MTL_PRIVATE_DEF_SEL(stopCapture, + "stopCapture"); +_MTL_PRIVATE_DEF_SEL(storageMode, + "storageMode"); +_MTL_PRIVATE_DEF_SEL(storeAction, + "storeAction"); +_MTL_PRIVATE_DEF_SEL(storeActionOptions, + "storeActionOptions"); +_MTL_PRIVATE_DEF_SEL(stride, + "stride"); +_MTL_PRIVATE_DEF_SEL(structType, + "structType"); +_MTL_PRIVATE_DEF_SEL(supportAddingBinaryFunctions, + "supportAddingBinaryFunctions"); +_MTL_PRIVATE_DEF_SEL(supportAddingFragmentBinaryFunctions, + "supportAddingFragmentBinaryFunctions"); +_MTL_PRIVATE_DEF_SEL(supportAddingVertexBinaryFunctions, + "supportAddingVertexBinaryFunctions"); +_MTL_PRIVATE_DEF_SEL(supportArgumentBuffers, + "supportArgumentBuffers"); +_MTL_PRIVATE_DEF_SEL(supportIndirectCommandBuffers, + "supportIndirectCommandBuffers"); +_MTL_PRIVATE_DEF_SEL(supportRayTracing, + "supportRayTracing"); +_MTL_PRIVATE_DEF_SEL(supports32BitFloatFiltering, + "supports32BitFloatFiltering"); +_MTL_PRIVATE_DEF_SEL(supports32BitMSAA, + "supports32BitMSAA"); +_MTL_PRIVATE_DEF_SEL(supportsBCTextureCompression, + "supportsBCTextureCompression"); +_MTL_PRIVATE_DEF_SEL(supportsCounterSampling_, + "supportsCounterSampling:"); +_MTL_PRIVATE_DEF_SEL(supportsDestination_, + "supportsDestination:"); +_MTL_PRIVATE_DEF_SEL(supportsDynamicLibraries, + "supportsDynamicLibraries"); +_MTL_PRIVATE_DEF_SEL(supportsFamily_, + "supportsFamily:"); +_MTL_PRIVATE_DEF_SEL(supportsFeatureSet_, + "supportsFeatureSet:"); +_MTL_PRIVATE_DEF_SEL(supportsFunctionPointers, + "supportsFunctionPointers"); +_MTL_PRIVATE_DEF_SEL(supportsFunctionPointersFromRender, + "supportsFunctionPointersFromRender"); +_MTL_PRIVATE_DEF_SEL(supportsPrimitiveMotionBlur, + "supportsPrimitiveMotionBlur"); +_MTL_PRIVATE_DEF_SEL(supportsPullModelInterpolation, + "supportsPullModelInterpolation"); +_MTL_PRIVATE_DEF_SEL(supportsQueryTextureLOD, + "supportsQueryTextureLOD"); +_MTL_PRIVATE_DEF_SEL(supportsRasterizationRateMapWithLayerCount_, + "supportsRasterizationRateMapWithLayerCount:"); +_MTL_PRIVATE_DEF_SEL(supportsRaytracing, + "supportsRaytracing"); +_MTL_PRIVATE_DEF_SEL(supportsRaytracingFromRender, + "supportsRaytracingFromRender"); +_MTL_PRIVATE_DEF_SEL(supportsRenderDynamicLibraries, + "supportsRenderDynamicLibraries"); +_MTL_PRIVATE_DEF_SEL(supportsShaderBarycentricCoordinates, + "supportsShaderBarycentricCoordinates"); +_MTL_PRIVATE_DEF_SEL(supportsTextureSampleCount_, + "supportsTextureSampleCount:"); +_MTL_PRIVATE_DEF_SEL(supportsVertexAmplificationCount_, + "supportsVertexAmplificationCount:"); +_MTL_PRIVATE_DEF_SEL(swizzle, + "swizzle"); +_MTL_PRIVATE_DEF_SEL(synchronizeResource_, + "synchronizeResource:"); +_MTL_PRIVATE_DEF_SEL(synchronizeTexture_slice_level_, + "synchronizeTexture:slice:level:"); +_MTL_PRIVATE_DEF_SEL(tAddressMode, + "tAddressMode"); +_MTL_PRIVATE_DEF_SEL(tailSizeInBytes, + "tailSizeInBytes"); +_MTL_PRIVATE_DEF_SEL(tessellationControlPointIndexType, + "tessellationControlPointIndexType"); +_MTL_PRIVATE_DEF_SEL(tessellationFactorFormat, + "tessellationFactorFormat"); +_MTL_PRIVATE_DEF_SEL(tessellationFactorStepFunction, + "tessellationFactorStepFunction"); +_MTL_PRIVATE_DEF_SEL(tessellationOutputWindingOrder, + "tessellationOutputWindingOrder"); +_MTL_PRIVATE_DEF_SEL(tessellationPartitionMode, + "tessellationPartitionMode"); +_MTL_PRIVATE_DEF_SEL(texture, + "texture"); +_MTL_PRIVATE_DEF_SEL(texture2DDescriptorWithPixelFormat_width_height_mipmapped_, + "texture2DDescriptorWithPixelFormat:width:height:mipmapped:"); +_MTL_PRIVATE_DEF_SEL(textureBarrier, + "textureBarrier"); +_MTL_PRIVATE_DEF_SEL(textureBufferDescriptorWithPixelFormat_width_resourceOptions_usage_, + "textureBufferDescriptorWithPixelFormat:width:resourceOptions:usage:"); +_MTL_PRIVATE_DEF_SEL(textureCubeDescriptorWithPixelFormat_size_mipmapped_, + "textureCubeDescriptorWithPixelFormat:size:mipmapped:"); +_MTL_PRIVATE_DEF_SEL(textureDataType, + "textureDataType"); +_MTL_PRIVATE_DEF_SEL(textureReferenceType, + "textureReferenceType"); +_MTL_PRIVATE_DEF_SEL(textureType, + "textureType"); +_MTL_PRIVATE_DEF_SEL(threadExecutionWidth, + "threadExecutionWidth"); +_MTL_PRIVATE_DEF_SEL(threadGroupSizeIsMultipleOfThreadExecutionWidth, + "threadGroupSizeIsMultipleOfThreadExecutionWidth"); +_MTL_PRIVATE_DEF_SEL(threadgroupMemoryAlignment, + "threadgroupMemoryAlignment"); +_MTL_PRIVATE_DEF_SEL(threadgroupMemoryDataSize, + "threadgroupMemoryDataSize"); +_MTL_PRIVATE_DEF_SEL(threadgroupMemoryLength, + "threadgroupMemoryLength"); +_MTL_PRIVATE_DEF_SEL(threadgroupSizeMatchesTileSize, + "threadgroupSizeMatchesTileSize"); +_MTL_PRIVATE_DEF_SEL(tileAdditionalBinaryFunctions, + "tileAdditionalBinaryFunctions"); +_MTL_PRIVATE_DEF_SEL(tileArguments, + "tileArguments"); +_MTL_PRIVATE_DEF_SEL(tileBindings, + "tileBindings"); +_MTL_PRIVATE_DEF_SEL(tileBuffers, + "tileBuffers"); +_MTL_PRIVATE_DEF_SEL(tileFunction, + "tileFunction"); +_MTL_PRIVATE_DEF_SEL(tileHeight, + "tileHeight"); +_MTL_PRIVATE_DEF_SEL(tileWidth, + "tileWidth"); +_MTL_PRIVATE_DEF_SEL(transformationMatrixBuffer, + "transformationMatrixBuffer"); +_MTL_PRIVATE_DEF_SEL(transformationMatrixBufferOffset, + "transformationMatrixBufferOffset"); +_MTL_PRIVATE_DEF_SEL(triangleCount, + "triangleCount"); +_MTL_PRIVATE_DEF_SEL(tryCancel, + "tryCancel"); +_MTL_PRIVATE_DEF_SEL(type, + "type"); +_MTL_PRIVATE_DEF_SEL(updateFence_, + "updateFence:"); +_MTL_PRIVATE_DEF_SEL(updateFence_afterStages_, + "updateFence:afterStages:"); +_MTL_PRIVATE_DEF_SEL(updateTextureMapping_mode_indirectBuffer_indirectBufferOffset_, + "updateTextureMapping:mode:indirectBuffer:indirectBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(updateTextureMapping_mode_region_mipLevel_slice_, + "updateTextureMapping:mode:region:mipLevel:slice:"); +_MTL_PRIVATE_DEF_SEL(updateTextureMappings_mode_regions_mipLevels_slices_numRegions_, + "updateTextureMappings:mode:regions:mipLevels:slices:numRegions:"); +_MTL_PRIVATE_DEF_SEL(url, + "url"); +_MTL_PRIVATE_DEF_SEL(usage, + "usage"); +_MTL_PRIVATE_DEF_SEL(useHeap_, + "useHeap:"); +_MTL_PRIVATE_DEF_SEL(useHeap_stages_, + "useHeap:stages:"); +_MTL_PRIVATE_DEF_SEL(useHeaps_count_, + "useHeaps:count:"); +_MTL_PRIVATE_DEF_SEL(useHeaps_count_stages_, + "useHeaps:count:stages:"); +_MTL_PRIVATE_DEF_SEL(useResource_usage_, + "useResource:usage:"); +_MTL_PRIVATE_DEF_SEL(useResource_usage_stages_, + "useResource:usage:stages:"); +_MTL_PRIVATE_DEF_SEL(useResources_count_usage_, + "useResources:count:usage:"); +_MTL_PRIVATE_DEF_SEL(useResources_count_usage_stages_, + "useResources:count:usage:stages:"); +_MTL_PRIVATE_DEF_SEL(usedSize, + "usedSize"); +_MTL_PRIVATE_DEF_SEL(vertexAdditionalBinaryFunctions, + "vertexAdditionalBinaryFunctions"); +_MTL_PRIVATE_DEF_SEL(vertexArguments, + "vertexArguments"); +_MTL_PRIVATE_DEF_SEL(vertexAttributes, + "vertexAttributes"); +_MTL_PRIVATE_DEF_SEL(vertexBindings, + "vertexBindings"); +_MTL_PRIVATE_DEF_SEL(vertexBuffer, + "vertexBuffer"); +_MTL_PRIVATE_DEF_SEL(vertexBufferOffset, + "vertexBufferOffset"); +_MTL_PRIVATE_DEF_SEL(vertexBuffers, + "vertexBuffers"); +_MTL_PRIVATE_DEF_SEL(vertexDescriptor, + "vertexDescriptor"); +_MTL_PRIVATE_DEF_SEL(vertexFormat, + "vertexFormat"); +_MTL_PRIVATE_DEF_SEL(vertexFunction, + "vertexFunction"); +_MTL_PRIVATE_DEF_SEL(vertexLinkedFunctions, + "vertexLinkedFunctions"); +_MTL_PRIVATE_DEF_SEL(vertexPreloadedLibraries, + "vertexPreloadedLibraries"); +_MTL_PRIVATE_DEF_SEL(vertexStride, + "vertexStride"); +_MTL_PRIVATE_DEF_SEL(vertical, + "vertical"); +_MTL_PRIVATE_DEF_SEL(verticalSampleStorage, + "verticalSampleStorage"); +_MTL_PRIVATE_DEF_SEL(visibilityResultBuffer, + "visibilityResultBuffer"); +_MTL_PRIVATE_DEF_SEL(visibleFunctionTableDescriptor, + "visibleFunctionTableDescriptor"); +_MTL_PRIVATE_DEF_SEL(waitForEvent_value_, + "waitForEvent:value:"); +_MTL_PRIVATE_DEF_SEL(waitForFence_, + "waitForFence:"); +_MTL_PRIVATE_DEF_SEL(waitForFence_beforeStages_, + "waitForFence:beforeStages:"); +_MTL_PRIVATE_DEF_SEL(waitUntilCompleted, + "waitUntilCompleted"); +_MTL_PRIVATE_DEF_SEL(waitUntilScheduled, + "waitUntilScheduled"); +_MTL_PRIVATE_DEF_SEL(width, + "width"); +_MTL_PRIVATE_DEF_SEL(writeCompactedAccelerationStructureSize_toBuffer_offset_, + "writeCompactedAccelerationStructureSize:toBuffer:offset:"); +_MTL_PRIVATE_DEF_SEL(writeCompactedAccelerationStructureSize_toBuffer_offset_sizeDataType_, + "writeCompactedAccelerationStructureSize:toBuffer:offset:sizeDataType:"); +_MTL_PRIVATE_DEF_SEL(writeMask, + "writeMask"); + +} diff --git a/metal-cpp/Metal/MTLHeap.hpp b/metal-cpp/Metal/MTLHeap.hpp new file mode 100644 index 00000000..4b0b155c --- /dev/null +++ b/metal-cpp/Metal/MTLHeap.hpp @@ -0,0 +1,329 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLHeap.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLDevice.hpp" +#include "MTLHeap.hpp" +#include "MTLResource.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::Integer, HeapType) { + HeapTypeAutomatic = 0, + HeapTypePlacement = 1, + HeapTypeSparse = 2, +}; + +class HeapDescriptor : public NS::Copying +{ +public: + static class HeapDescriptor* alloc(); + + class HeapDescriptor* init(); + + NS::UInteger size() const; + void setSize(NS::UInteger size); + + MTL::StorageMode storageMode() const; + void setStorageMode(MTL::StorageMode storageMode); + + MTL::CPUCacheMode cpuCacheMode() const; + void setCpuCacheMode(MTL::CPUCacheMode cpuCacheMode); + + MTL::SparsePageSize sparsePageSize() const; + void setSparsePageSize(MTL::SparsePageSize sparsePageSize); + + MTL::HazardTrackingMode hazardTrackingMode() const; + void setHazardTrackingMode(MTL::HazardTrackingMode hazardTrackingMode); + + MTL::ResourceOptions resourceOptions() const; + void setResourceOptions(MTL::ResourceOptions resourceOptions); + + MTL::HeapType type() const; + void setType(MTL::HeapType type); +}; + +class Heap : public NS::Referencing +{ +public: + NS::String* label() const; + void setLabel(const NS::String* label); + + class Device* device() const; + + MTL::StorageMode storageMode() const; + + MTL::CPUCacheMode cpuCacheMode() const; + + MTL::HazardTrackingMode hazardTrackingMode() const; + + MTL::ResourceOptions resourceOptions() const; + + NS::UInteger size() const; + + NS::UInteger usedSize() const; + + NS::UInteger currentAllocatedSize() const; + + NS::UInteger maxAvailableSize(NS::UInteger alignment); + + class Buffer* newBuffer(NS::UInteger length, MTL::ResourceOptions options); + + class Texture* newTexture(const class TextureDescriptor* desc); + + MTL::PurgeableState setPurgeableState(MTL::PurgeableState state); + + MTL::HeapType type() const; + + class Buffer* newBuffer(NS::UInteger length, MTL::ResourceOptions options, NS::UInteger offset); + + class Texture* newTexture(const class TextureDescriptor* descriptor, NS::UInteger offset); + + class AccelerationStructure* newAccelerationStructure(NS::UInteger size); + + class AccelerationStructure* newAccelerationStructure(const class AccelerationStructureDescriptor* descriptor); + + class AccelerationStructure* newAccelerationStructure(NS::UInteger size, NS::UInteger offset); + + class AccelerationStructure* newAccelerationStructure(const class AccelerationStructureDescriptor* descriptor, NS::UInteger offset); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::HeapDescriptor* MTL::HeapDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLHeapDescriptor)); +} + +// method: init +_MTL_INLINE MTL::HeapDescriptor* MTL::HeapDescriptor::init() +{ + return NS::Object::init(); +} + +// property: size +_MTL_INLINE NS::UInteger MTL::HeapDescriptor::size() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(size)); +} + +_MTL_INLINE void MTL::HeapDescriptor::setSize(NS::UInteger size) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSize_), size); +} + +// property: storageMode +_MTL_INLINE MTL::StorageMode MTL::HeapDescriptor::storageMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(storageMode)); +} + +_MTL_INLINE void MTL::HeapDescriptor::setStorageMode(MTL::StorageMode storageMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStorageMode_), storageMode); +} + +// property: cpuCacheMode +_MTL_INLINE MTL::CPUCacheMode MTL::HeapDescriptor::cpuCacheMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(cpuCacheMode)); +} + +_MTL_INLINE void MTL::HeapDescriptor::setCpuCacheMode(MTL::CPUCacheMode cpuCacheMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCpuCacheMode_), cpuCacheMode); +} + +// property: sparsePageSize +_MTL_INLINE MTL::SparsePageSize MTL::HeapDescriptor::sparsePageSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sparsePageSize)); +} + +_MTL_INLINE void MTL::HeapDescriptor::setSparsePageSize(MTL::SparsePageSize sparsePageSize) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSparsePageSize_), sparsePageSize); +} + +// property: hazardTrackingMode +_MTL_INLINE MTL::HazardTrackingMode MTL::HeapDescriptor::hazardTrackingMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(hazardTrackingMode)); +} + +_MTL_INLINE void MTL::HeapDescriptor::setHazardTrackingMode(MTL::HazardTrackingMode hazardTrackingMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setHazardTrackingMode_), hazardTrackingMode); +} + +// property: resourceOptions +_MTL_INLINE MTL::ResourceOptions MTL::HeapDescriptor::resourceOptions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resourceOptions)); +} + +_MTL_INLINE void MTL::HeapDescriptor::setResourceOptions(MTL::ResourceOptions resourceOptions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setResourceOptions_), resourceOptions); +} + +// property: type +_MTL_INLINE MTL::HeapType MTL::HeapDescriptor::type() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); +} + +_MTL_INLINE void MTL::HeapDescriptor::setType(MTL::HeapType type) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setType_), type); +} + +// property: label +_MTL_INLINE NS::String* MTL::Heap::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::Heap::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::Heap::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: storageMode +_MTL_INLINE MTL::StorageMode MTL::Heap::storageMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(storageMode)); +} + +// property: cpuCacheMode +_MTL_INLINE MTL::CPUCacheMode MTL::Heap::cpuCacheMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(cpuCacheMode)); +} + +// property: hazardTrackingMode +_MTL_INLINE MTL::HazardTrackingMode MTL::Heap::hazardTrackingMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(hazardTrackingMode)); +} + +// property: resourceOptions +_MTL_INLINE MTL::ResourceOptions MTL::Heap::resourceOptions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resourceOptions)); +} + +// property: size +_MTL_INLINE NS::UInteger MTL::Heap::size() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(size)); +} + +// property: usedSize +_MTL_INLINE NS::UInteger MTL::Heap::usedSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(usedSize)); +} + +// property: currentAllocatedSize +_MTL_INLINE NS::UInteger MTL::Heap::currentAllocatedSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(currentAllocatedSize)); +} + +// method: maxAvailableSizeWithAlignment: +_MTL_INLINE NS::UInteger MTL::Heap::maxAvailableSize(NS::UInteger alignment) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxAvailableSizeWithAlignment_), alignment); +} + +// method: newBufferWithLength:options: +_MTL_INLINE MTL::Buffer* MTL::Heap::newBuffer(NS::UInteger length, MTL::ResourceOptions options) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newBufferWithLength_options_), length, options); +} + +// method: newTextureWithDescriptor: +_MTL_INLINE MTL::Texture* MTL::Heap::newTexture(const MTL::TextureDescriptor* desc) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_), desc); +} + +// method: setPurgeableState: +_MTL_INLINE MTL::PurgeableState MTL::Heap::setPurgeableState(MTL::PurgeableState state) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(setPurgeableState_), state); +} + +// property: type +_MTL_INLINE MTL::HeapType MTL::Heap::type() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); +} + +// method: newBufferWithLength:options:offset: +_MTL_INLINE MTL::Buffer* MTL::Heap::newBuffer(NS::UInteger length, MTL::ResourceOptions options, NS::UInteger offset) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newBufferWithLength_options_offset_), length, options, offset); +} + +// method: newTextureWithDescriptor:offset: +_MTL_INLINE MTL::Texture* MTL::Heap::newTexture(const MTL::TextureDescriptor* descriptor, NS::UInteger offset) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_offset_), descriptor, offset); +} + +// method: newAccelerationStructureWithSize: +_MTL_INLINE MTL::AccelerationStructure* MTL::Heap::newAccelerationStructure(NS::UInteger size) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newAccelerationStructureWithSize_), size); +} + +// method: newAccelerationStructureWithDescriptor: +_MTL_INLINE MTL::AccelerationStructure* MTL::Heap::newAccelerationStructure(const MTL::AccelerationStructureDescriptor* descriptor) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newAccelerationStructureWithDescriptor_), descriptor); +} + +// method: newAccelerationStructureWithSize:offset: +_MTL_INLINE MTL::AccelerationStructure* MTL::Heap::newAccelerationStructure(NS::UInteger size, NS::UInteger offset) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newAccelerationStructureWithSize_offset_), size, offset); +} + +// method: newAccelerationStructureWithDescriptor:offset: +_MTL_INLINE MTL::AccelerationStructure* MTL::Heap::newAccelerationStructure(const MTL::AccelerationStructureDescriptor* descriptor, NS::UInteger offset) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newAccelerationStructureWithDescriptor_offset_), descriptor, offset); +} diff --git a/metal-cpp/Metal/MTLIOCommandBuffer.hpp b/metal-cpp/Metal/MTLIOCommandBuffer.hpp new file mode 100644 index 00000000..85a76de8 --- /dev/null +++ b/metal-cpp/Metal/MTLIOCommandBuffer.hpp @@ -0,0 +1,200 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLIOCommandBuffer.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLIOCommandBuffer.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::Integer, IOStatus) { + IOStatusPending = 0, + IOStatusCancelled = 1, + IOStatusError = 2, + IOStatusComplete = 3, +}; + +using IOCommandBufferHandler = void (^)(class IOCommandBuffer*); + +using IOCommandBufferHandlerFunction = std::function; + +class IOCommandBuffer : public NS::Referencing +{ +public: + void addCompletedHandler(const MTL::IOCommandBufferHandlerFunction& function); + + void addCompletedHandler(const MTL::IOCommandBufferHandler block); + + void loadBytes(const void* pointer, NS::UInteger size, const class IOFileHandle* sourceHandle, NS::UInteger sourceHandleOffset); + + void loadBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger size, const class IOFileHandle* sourceHandle, NS::UInteger sourceHandleOffset); + + void loadTexture(const class Texture* texture, NS::UInteger slice, NS::UInteger level, MTL::Size size, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Origin destinationOrigin, const class IOFileHandle* sourceHandle, NS::UInteger sourceHandleOffset); + + void copyStatusToBuffer(const class Buffer* buffer, NS::UInteger offset); + + void commit(); + + void waitUntilCompleted(); + + void tryCancel(); + + void addBarrier(); + + void pushDebugGroup(const NS::String* string); + + void popDebugGroup(); + + void enqueue(); + + void wait(const class SharedEvent* event, uint64_t value); + + void signalEvent(const class SharedEvent* event, uint64_t value); + + NS::String* label() const; + void setLabel(const NS::String* label); + + MTL::IOStatus status() const; + + NS::Error* error() const; +}; + +} + +_MTL_INLINE void MTL::IOCommandBuffer::addCompletedHandler(const MTL::IOCommandBufferHandlerFunction& function) +{ + __block IOCommandBufferHandlerFunction blockFunction = function; + + addCompletedHandler(^(IOCommandBuffer* pCommandBuffer) { blockFunction(pCommandBuffer); }); +} + +// method: addCompletedHandler: +_MTL_INLINE void MTL::IOCommandBuffer::addCompletedHandler(const MTL::IOCommandBufferHandler block) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(addCompletedHandler_), block); +} + +// method: loadBytes:size:sourceHandle:sourceHandleOffset: +_MTL_INLINE void MTL::IOCommandBuffer::loadBytes(const void* pointer, NS::UInteger size, const MTL::IOFileHandle* sourceHandle, NS::UInteger sourceHandleOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(loadBytes_size_sourceHandle_sourceHandleOffset_), pointer, size, sourceHandle, sourceHandleOffset); +} + +// method: loadBuffer:offset:size:sourceHandle:sourceHandleOffset: +_MTL_INLINE void MTL::IOCommandBuffer::loadBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger size, const MTL::IOFileHandle* sourceHandle, NS::UInteger sourceHandleOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(loadBuffer_offset_size_sourceHandle_sourceHandleOffset_), buffer, offset, size, sourceHandle, sourceHandleOffset); +} + +// method: loadTexture:slice:level:size:sourceBytesPerRow:sourceBytesPerImage:destinationOrigin:sourceHandle:sourceHandleOffset: +_MTL_INLINE void MTL::IOCommandBuffer::loadTexture(const MTL::Texture* texture, NS::UInteger slice, NS::UInteger level, MTL::Size size, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Origin destinationOrigin, const MTL::IOFileHandle* sourceHandle, NS::UInteger sourceHandleOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(loadTexture_slice_level_size_sourceBytesPerRow_sourceBytesPerImage_destinationOrigin_sourceHandle_sourceHandleOffset_), texture, slice, level, size, sourceBytesPerRow, sourceBytesPerImage, destinationOrigin, sourceHandle, sourceHandleOffset); +} + +// method: copyStatusToBuffer:offset: +_MTL_INLINE void MTL::IOCommandBuffer::copyStatusToBuffer(const MTL::Buffer* buffer, NS::UInteger offset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyStatusToBuffer_offset_), buffer, offset); +} + +// method: commit +_MTL_INLINE void MTL::IOCommandBuffer::commit() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(commit)); +} + +// method: waitUntilCompleted +_MTL_INLINE void MTL::IOCommandBuffer::waitUntilCompleted() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitUntilCompleted)); +} + +// method: tryCancel +_MTL_INLINE void MTL::IOCommandBuffer::tryCancel() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(tryCancel)); +} + +// method: addBarrier +_MTL_INLINE void MTL::IOCommandBuffer::addBarrier() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(addBarrier)); +} + +// method: pushDebugGroup: +_MTL_INLINE void MTL::IOCommandBuffer::pushDebugGroup(const NS::String* string) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(pushDebugGroup_), string); +} + +// method: popDebugGroup +_MTL_INLINE void MTL::IOCommandBuffer::popDebugGroup() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(popDebugGroup)); +} + +// method: enqueue +_MTL_INLINE void MTL::IOCommandBuffer::enqueue() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(enqueue)); +} + +// method: waitForEvent:value: +_MTL_INLINE void MTL::IOCommandBuffer::wait(const MTL::SharedEvent* event, uint64_t value) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitForEvent_value_), event, value); +} + +// method: signalEvent:value: +_MTL_INLINE void MTL::IOCommandBuffer::signalEvent(const MTL::SharedEvent* event, uint64_t value) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(signalEvent_value_), event, value); +} + +// property: label +_MTL_INLINE NS::String* MTL::IOCommandBuffer::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::IOCommandBuffer::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: status +_MTL_INLINE MTL::IOStatus MTL::IOCommandBuffer::status() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(status)); +} + +// property: error +_MTL_INLINE NS::Error* MTL::IOCommandBuffer::error() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(error)); +} diff --git a/metal-cpp/Metal/MTLIOCommandQueue.hpp b/metal-cpp/Metal/MTLIOCommandQueue.hpp new file mode 100644 index 00000000..de2fc8a1 --- /dev/null +++ b/metal-cpp/Metal/MTLIOCommandQueue.hpp @@ -0,0 +1,225 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLIOCommandQueue.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLIOCommandQueue.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::Integer, IOPriority) { + IOPriorityHigh = 0, + IOPriorityNormal = 1, + IOPriorityLow = 2, +}; + +_MTL_ENUM(NS::Integer, IOCommandQueueType) { + IOCommandQueueTypeConcurrent = 0, + IOCommandQueueTypeSerial = 1, +}; + +_MTL_CONST(NS::ErrorDomain, IOErrorDomain); + +_MTL_ENUM(NS::Integer, IOError) { + IOErrorURLInvalid = 1, + IOErrorInternal = 2, +}; + +class IOCommandQueue : public NS::Referencing +{ +public: + void enqueueBarrier(); + + class IOCommandBuffer* commandBuffer(); + + class IOCommandBuffer* commandBufferWithUnretainedReferences(); + + NS::String* label() const; + void setLabel(const NS::String* label); +}; + +class IOScratchBuffer : public NS::Referencing +{ +public: + class Buffer* buffer() const; +}; + +class IOScratchBufferAllocator : public NS::Referencing +{ +public: + class IOScratchBuffer* newScratchBuffer(NS::UInteger minimumSize); +}; + +class IOCommandQueueDescriptor : public NS::Copying +{ +public: + static class IOCommandQueueDescriptor* alloc(); + + class IOCommandQueueDescriptor* init(); + + NS::UInteger maxCommandBufferCount() const; + void setMaxCommandBufferCount(NS::UInteger maxCommandBufferCount); + + MTL::IOPriority priority() const; + void setPriority(MTL::IOPriority priority); + + MTL::IOCommandQueueType type() const; + void setType(MTL::IOCommandQueueType type); + + NS::UInteger maxCommandsInFlight() const; + void setMaxCommandsInFlight(NS::UInteger maxCommandsInFlight); + + class IOScratchBufferAllocator* scratchBufferAllocator() const; + void setScratchBufferAllocator(const class IOScratchBufferAllocator* scratchBufferAllocator); +}; + +class IOFileHandle : public NS::Referencing +{ +public: + NS::String* label() const; + void setLabel(const NS::String* label); +}; + +} + +// method: enqueueBarrier +_MTL_INLINE void MTL::IOCommandQueue::enqueueBarrier() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(enqueueBarrier)); +} + +// method: commandBuffer +_MTL_INLINE MTL::IOCommandBuffer* MTL::IOCommandQueue::commandBuffer() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandBuffer)); +} + +// method: commandBufferWithUnretainedReferences +_MTL_INLINE MTL::IOCommandBuffer* MTL::IOCommandQueue::commandBufferWithUnretainedReferences() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandBufferWithUnretainedReferences)); +} + +// property: label +_MTL_INLINE NS::String* MTL::IOCommandQueue::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::IOCommandQueue::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: buffer +_MTL_INLINE MTL::Buffer* MTL::IOScratchBuffer::buffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(buffer)); +} + +// method: newScratchBufferWithMinimumSize: +_MTL_INLINE MTL::IOScratchBuffer* MTL::IOScratchBufferAllocator::newScratchBuffer(NS::UInteger minimumSize) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newScratchBufferWithMinimumSize_), minimumSize); +} + +// static method: alloc +_MTL_INLINE MTL::IOCommandQueueDescriptor* MTL::IOCommandQueueDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLIOCommandQueueDescriptor)); +} + +// method: init +_MTL_INLINE MTL::IOCommandQueueDescriptor* MTL::IOCommandQueueDescriptor::init() +{ + return NS::Object::init(); +} + +// property: maxCommandBufferCount +_MTL_INLINE NS::UInteger MTL::IOCommandQueueDescriptor::maxCommandBufferCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxCommandBufferCount)); +} + +_MTL_INLINE void MTL::IOCommandQueueDescriptor::setMaxCommandBufferCount(NS::UInteger maxCommandBufferCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxCommandBufferCount_), maxCommandBufferCount); +} + +// property: priority +_MTL_INLINE MTL::IOPriority MTL::IOCommandQueueDescriptor::priority() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(priority)); +} + +_MTL_INLINE void MTL::IOCommandQueueDescriptor::setPriority(MTL::IOPriority priority) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPriority_), priority); +} + +// property: type +_MTL_INLINE MTL::IOCommandQueueType MTL::IOCommandQueueDescriptor::type() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); +} + +_MTL_INLINE void MTL::IOCommandQueueDescriptor::setType(MTL::IOCommandQueueType type) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setType_), type); +} + +// property: maxCommandsInFlight +_MTL_INLINE NS::UInteger MTL::IOCommandQueueDescriptor::maxCommandsInFlight() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxCommandsInFlight)); +} + +_MTL_INLINE void MTL::IOCommandQueueDescriptor::setMaxCommandsInFlight(NS::UInteger maxCommandsInFlight) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxCommandsInFlight_), maxCommandsInFlight); +} + +// property: scratchBufferAllocator +_MTL_INLINE MTL::IOScratchBufferAllocator* MTL::IOCommandQueueDescriptor::scratchBufferAllocator() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(scratchBufferAllocator)); +} + +_MTL_INLINE void MTL::IOCommandQueueDescriptor::setScratchBufferAllocator(const MTL::IOScratchBufferAllocator* scratchBufferAllocator) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setScratchBufferAllocator_), scratchBufferAllocator); +} + +// property: label +_MTL_INLINE NS::String* MTL::IOFileHandle::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::IOFileHandle::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} diff --git a/metal-cpp/Metal/MTLIOCompressor.hpp b/metal-cpp/Metal/MTLIOCompressor.hpp new file mode 100644 index 00000000..83fc486c --- /dev/null +++ b/metal-cpp/Metal/MTLIOCompressor.hpp @@ -0,0 +1,92 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLIOCompressor.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" +#include "MTLDevice.hpp" + +#include + +namespace MTL +{ +_MTL_ENUM(NS::Integer, IOCompressionStatus) { + IOCompressionStatusComplete = 0, + IOCompressionStatusError = 1, +}; + +size_t IOCompressionContextDefaultChunkSize(); + +void* IOCreateCompressionContext(const char* path, IOCompressionMethod type, size_t chunkSize); + +void IOCompressionContextAppendData(void* context, const void* data, size_t size); + +IOCompressionStatus IOFlushAndDestroyCompressionContext(void* context); + +} + +#if defined(MTL_PRIVATE_IMPLEMENTATION) + +namespace MTL::Private { + +MTL_DEF_FUNC(MTLIOCompressionContextDefaultChunkSize, size_t (*)(void)); + +MTL_DEF_FUNC( MTLIOCreateCompressionContext, void* (*)(const char*, MTL::IOCompressionMethod, size_t) ); + +MTL_DEF_FUNC( MTLIOCompressionContextAppendData, void (*)(void*, const void*, size_t) ); + +MTL_DEF_FUNC( MTLIOFlushAndDestroyCompressionContext, MTL::IOCompressionStatus (*)(void*) ); + +} + +_NS_EXPORT size_t MTL::IOCompressionContextDefaultChunkSize() +{ + return MTL::Private::MTLIOCompressionContextDefaultChunkSize(); +} + +_NS_EXPORT void* MTL::IOCreateCompressionContext(const char* path, IOCompressionMethod type, size_t chunkSize) +{ + if ( MTL::Private::MTLIOCreateCompressionContext ) + { + return MTL::Private::MTLIOCreateCompressionContext( path, type, chunkSize ); + } + return nullptr; +} + +_NS_EXPORT void MTL::IOCompressionContextAppendData(void* context, const void* data, size_t size) +{ + if ( MTL::Private::MTLIOCompressionContextAppendData ) + { + MTL::Private::MTLIOCompressionContextAppendData( context, data, size ); + } +} + +_NS_EXPORT MTL::IOCompressionStatus MTL::IOFlushAndDestroyCompressionContext(void* context) +{ + if ( MTL::Private::MTLIOFlushAndDestroyCompressionContext ) + { + return MTL::Private::MTLIOFlushAndDestroyCompressionContext( context ); + } + return MTL::IOCompressionStatusError; +} + +#endif diff --git a/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp b/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp new file mode 100644 index 00000000..570805b8 --- /dev/null +++ b/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp @@ -0,0 +1,212 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLIndirectCommandBuffer.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLIndirectCommandBuffer.hpp" +#include "MTLResource.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_OPTIONS(NS::UInteger, IndirectCommandType) { + IndirectCommandTypeDraw = 1, + IndirectCommandTypeDrawIndexed = 2, + IndirectCommandTypeDrawPatches = 4, + IndirectCommandTypeDrawIndexedPatches = 8, + IndirectCommandTypeConcurrentDispatch = 32, + IndirectCommandTypeConcurrentDispatchThreads = 64, +}; + +struct IndirectCommandBufferExecutionRange +{ + uint32_t location; + uint32_t length; +} _MTL_PACKED; + +class IndirectCommandBufferDescriptor : public NS::Copying +{ +public: + static class IndirectCommandBufferDescriptor* alloc(); + + class IndirectCommandBufferDescriptor* init(); + + MTL::IndirectCommandType commandTypes() const; + void setCommandTypes(MTL::IndirectCommandType commandTypes); + + bool inheritPipelineState() const; + void setInheritPipelineState(bool inheritPipelineState); + + bool inheritBuffers() const; + void setInheritBuffers(bool inheritBuffers); + + NS::UInteger maxVertexBufferBindCount() const; + void setMaxVertexBufferBindCount(NS::UInteger maxVertexBufferBindCount); + + NS::UInteger maxFragmentBufferBindCount() const; + void setMaxFragmentBufferBindCount(NS::UInteger maxFragmentBufferBindCount); + + NS::UInteger maxKernelBufferBindCount() const; + void setMaxKernelBufferBindCount(NS::UInteger maxKernelBufferBindCount); + + bool supportRayTracing() const; + void setSupportRayTracing(bool supportRayTracing); +}; + +class IndirectCommandBuffer : public NS::Referencing +{ +public: + NS::UInteger size() const; + + MTL::ResourceID gpuResourceID() const; + + void reset(NS::Range range); + + class IndirectRenderCommand* indirectRenderCommand(NS::UInteger commandIndex); + + class IndirectComputeCommand* indirectComputeCommand(NS::UInteger commandIndex); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::IndirectCommandBufferDescriptor* MTL::IndirectCommandBufferDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLIndirectCommandBufferDescriptor)); +} + +// method: init +_MTL_INLINE MTL::IndirectCommandBufferDescriptor* MTL::IndirectCommandBufferDescriptor::init() +{ + return NS::Object::init(); +} + +// property: commandTypes +_MTL_INLINE MTL::IndirectCommandType MTL::IndirectCommandBufferDescriptor::commandTypes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandTypes)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setCommandTypes(MTL::IndirectCommandType commandTypes) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCommandTypes_), commandTypes); +} + +// property: inheritPipelineState +_MTL_INLINE bool MTL::IndirectCommandBufferDescriptor::inheritPipelineState() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(inheritPipelineState)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setInheritPipelineState(bool inheritPipelineState) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInheritPipelineState_), inheritPipelineState); +} + +// property: inheritBuffers +_MTL_INLINE bool MTL::IndirectCommandBufferDescriptor::inheritBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(inheritBuffers)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setInheritBuffers(bool inheritBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInheritBuffers_), inheritBuffers); +} + +// property: maxVertexBufferBindCount +_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxVertexBufferBindCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxVertexBufferBindCount)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxVertexBufferBindCount(NS::UInteger maxVertexBufferBindCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxVertexBufferBindCount_), maxVertexBufferBindCount); +} + +// property: maxFragmentBufferBindCount +_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxFragmentBufferBindCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxFragmentBufferBindCount)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxFragmentBufferBindCount(NS::UInteger maxFragmentBufferBindCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxFragmentBufferBindCount_), maxFragmentBufferBindCount); +} + +// property: maxKernelBufferBindCount +_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxKernelBufferBindCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxKernelBufferBindCount)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxKernelBufferBindCount(NS::UInteger maxKernelBufferBindCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxKernelBufferBindCount_), maxKernelBufferBindCount); +} + +// property: supportRayTracing +_MTL_INLINE bool MTL::IndirectCommandBufferDescriptor::supportRayTracing() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportRayTracing)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setSupportRayTracing(bool supportRayTracing) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportRayTracing_), supportRayTracing); +} + +// property: size +_MTL_INLINE NS::UInteger MTL::IndirectCommandBuffer::size() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(size)); +} + +// property: gpuResourceID +_MTL_INLINE MTL::ResourceID MTL::IndirectCommandBuffer::gpuResourceID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuResourceID)); +} + +// method: resetWithRange: +_MTL_INLINE void MTL::IndirectCommandBuffer::reset(NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(resetWithRange_), range); +} + +// method: indirectRenderCommandAtIndex: +_MTL_INLINE MTL::IndirectRenderCommand* MTL::IndirectCommandBuffer::indirectRenderCommand(NS::UInteger commandIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indirectRenderCommandAtIndex_), commandIndex); +} + +// method: indirectComputeCommandAtIndex: +_MTL_INLINE MTL::IndirectComputeCommand* MTL::IndirectCommandBuffer::indirectComputeCommand(NS::UInteger commandIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indirectComputeCommandAtIndex_), commandIndex); +} diff --git a/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp b/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp new file mode 100644 index 00000000..659cb3db --- /dev/null +++ b/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp @@ -0,0 +1,187 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLIndirectCommandEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLRenderCommandEncoder.hpp" +#include "MTLStageInputOutputDescriptor.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +class IndirectRenderCommand : public NS::Referencing +{ +public: + void setRenderPipelineState(const class RenderPipelineState* pipelineState); + + void setVertexBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setFragmentBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const class Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride); + + void drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const class Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride); + + void drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount, NS::UInteger baseInstance); + + void drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance); + + void reset(); +}; + +class IndirectComputeCommand : public NS::Referencing +{ +public: + void setComputePipelineState(const class ComputePipelineState* pipelineState); + + void setKernelBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void concurrentDispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup); + + void concurrentDispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup); + + void setBarrier(); + + void clearBarrier(); + + void setImageblockWidth(NS::UInteger width, NS::UInteger height); + + void reset(); + + void setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index); + + void setStageInRegion(MTL::Region region); +}; + +} + +// method: setRenderPipelineState: +_MTL_INLINE void MTL::IndirectRenderCommand::setRenderPipelineState(const MTL::RenderPipelineState* pipelineState) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRenderPipelineState_), pipelineState); +} + +// method: setVertexBuffer:offset:atIndex: +_MTL_INLINE void MTL::IndirectRenderCommand::setVertexBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setFragmentBuffer:offset:atIndex: +_MTL_INLINE void MTL::IndirectRenderCommand::setFragmentBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride: +_MTL_INLINE void MTL::IndirectRenderCommand::drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_instanceCount_baseInstance_tessellationFactorBuffer_tessellationFactorBufferOffset_tessellationFactorBufferInstanceStride_), numberOfPatchControlPoints, patchStart, patchCount, patchIndexBuffer, patchIndexBufferOffset, instanceCount, baseInstance, buffer, offset, instanceStride); +} + +// method: drawIndexedPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride: +_MTL_INLINE void MTL::IndirectRenderCommand::drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const MTL::Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_instanceCount_baseInstance_tessellationFactorBuffer_tessellationFactorBufferOffset_tessellationFactorBufferInstanceStride_), numberOfPatchControlPoints, patchStart, patchCount, patchIndexBuffer, patchIndexBufferOffset, controlPointIndexBuffer, controlPointIndexBufferOffset, instanceCount, baseInstance, buffer, offset, instanceStride); +} + +// method: drawPrimitives:vertexStart:vertexCount:instanceCount:baseInstance: +_MTL_INLINE void MTL::IndirectRenderCommand::drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount, NS::UInteger baseInstance) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_baseInstance_), primitiveType, vertexStart, vertexCount, instanceCount, baseInstance); +} + +// method: drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:baseVertex:baseInstance: +_MTL_INLINE void MTL::IndirectRenderCommand::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_baseVertex_baseInstance_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); +} + +// method: reset +_MTL_INLINE void MTL::IndirectRenderCommand::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} + +// method: setComputePipelineState: +_MTL_INLINE void MTL::IndirectComputeCommand::setComputePipelineState(const MTL::ComputePipelineState* pipelineState) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setComputePipelineState_), pipelineState); +} + +// method: setKernelBuffer:offset:atIndex: +_MTL_INLINE void MTL::IndirectComputeCommand::setKernelBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setKernelBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: concurrentDispatchThreadgroups:threadsPerThreadgroup: +_MTL_INLINE void MTL::IndirectComputeCommand::concurrentDispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(concurrentDispatchThreadgroups_threadsPerThreadgroup_), threadgroupsPerGrid, threadsPerThreadgroup); +} + +// method: concurrentDispatchThreads:threadsPerThreadgroup: +_MTL_INLINE void MTL::IndirectComputeCommand::concurrentDispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(concurrentDispatchThreads_threadsPerThreadgroup_), threadsPerGrid, threadsPerThreadgroup); +} + +// method: setBarrier +_MTL_INLINE void MTL::IndirectComputeCommand::setBarrier() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBarrier)); +} + +// method: clearBarrier +_MTL_INLINE void MTL::IndirectComputeCommand::clearBarrier() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(clearBarrier)); +} + +// method: setImageblockWidth:height: +_MTL_INLINE void MTL::IndirectComputeCommand::setImageblockWidth(NS::UInteger width, NS::UInteger height) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setImageblockWidth_height_), width, height); +} + +// method: reset +_MTL_INLINE void MTL::IndirectComputeCommand::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} + +// method: setThreadgroupMemoryLength:atIndex: +_MTL_INLINE void MTL::IndirectComputeCommand::setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setThreadgroupMemoryLength_atIndex_), length, index); +} + +// method: setStageInRegion: +_MTL_INLINE void MTL::IndirectComputeCommand::setStageInRegion(MTL::Region region) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStageInRegion_), region); +} diff --git a/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp b/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp new file mode 100644 index 00000000..1dda215e --- /dev/null +++ b/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp @@ -0,0 +1,163 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLIntersectionFunctionTable.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLIntersectionFunctionTable.hpp" +#include "MTLResource.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_OPTIONS(NS::UInteger, IntersectionFunctionSignature) { + IntersectionFunctionSignatureNone = 0, + IntersectionFunctionSignatureInstancing = 1, + IntersectionFunctionSignatureTriangleData = 2, + IntersectionFunctionSignatureWorldSpaceData = 4, + IntersectionFunctionSignatureInstanceMotion = 8, + IntersectionFunctionSignaturePrimitiveMotion = 16, + IntersectionFunctionSignatureExtendedLimits = 32, +}; + +class IntersectionFunctionTableDescriptor : public NS::Copying +{ +public: + static class IntersectionFunctionTableDescriptor* alloc(); + + class IntersectionFunctionTableDescriptor* init(); + + static class IntersectionFunctionTableDescriptor* intersectionFunctionTableDescriptor(); + + NS::UInteger functionCount() const; + void setFunctionCount(NS::UInteger functionCount); +}; + +class IntersectionFunctionTable : public NS::Referencing +{ +public: + void setBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setBuffers(const class Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range); + + MTL::ResourceID gpuResourceID() const; + + void setFunction(const class FunctionHandle* function, NS::UInteger index); + + void setFunctions(const class FunctionHandle* const functions[], NS::Range range); + + void setOpaqueTriangleIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::UInteger index); + + void setOpaqueTriangleIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::Range range); + + void setVisibleFunctionTable(const class VisibleFunctionTable* functionTable, NS::UInteger bufferIndex); + + void setVisibleFunctionTables(const class VisibleFunctionTable* const functionTables[], NS::Range bufferRange); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::IntersectionFunctionTableDescriptor* MTL::IntersectionFunctionTableDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLIntersectionFunctionTableDescriptor)); +} + +// method: init +_MTL_INLINE MTL::IntersectionFunctionTableDescriptor* MTL::IntersectionFunctionTableDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: intersectionFunctionTableDescriptor +_MTL_INLINE MTL::IntersectionFunctionTableDescriptor* MTL::IntersectionFunctionTableDescriptor::intersectionFunctionTableDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLIntersectionFunctionTableDescriptor), _MTL_PRIVATE_SEL(intersectionFunctionTableDescriptor)); +} + +// property: functionCount +_MTL_INLINE NS::UInteger MTL::IntersectionFunctionTableDescriptor::functionCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionCount)); +} + +_MTL_INLINE void MTL::IntersectionFunctionTableDescriptor::setFunctionCount(NS::UInteger functionCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctionCount_), functionCount); +} + +// method: setBuffer:offset:atIndex: +_MTL_INLINE void MTL::IntersectionFunctionTable::setBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setBuffers:offsets:withRange: +_MTL_INLINE void MTL::IntersectionFunctionTable::setBuffers(const MTL::Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffers_offsets_withRange_), buffers, offsets, range); +} + +// property: gpuResourceID +_MTL_INLINE MTL::ResourceID MTL::IntersectionFunctionTable::gpuResourceID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuResourceID)); +} + +// method: setFunction:atIndex: +_MTL_INLINE void MTL::IntersectionFunctionTable::setFunction(const MTL::FunctionHandle* function, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunction_atIndex_), function, index); +} + +// method: setFunctions:withRange: +_MTL_INLINE void MTL::IntersectionFunctionTable::setFunctions(const MTL::FunctionHandle* const functions[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctions_withRange_), functions, range); +} + +// method: setOpaqueTriangleIntersectionFunctionWithSignature:atIndex: +_MTL_INLINE void MTL::IntersectionFunctionTable::setOpaqueTriangleIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_atIndex_), signature, index); +} + +// method: setOpaqueTriangleIntersectionFunctionWithSignature:withRange: +_MTL_INLINE void MTL::IntersectionFunctionTable::setOpaqueTriangleIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_withRange_), signature, range); +} + +// method: setVisibleFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::IntersectionFunctionTable::setVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVisibleFunctionTable_atBufferIndex_), functionTable, bufferIndex); +} + +// method: setVisibleFunctionTables:withBufferRange: +_MTL_INLINE void MTL::IntersectionFunctionTable::setVisibleFunctionTables(const MTL::VisibleFunctionTable* const functionTables[], NS::Range bufferRange) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVisibleFunctionTables_withBufferRange_), functionTables, bufferRange); +} diff --git a/metal-cpp/Metal/MTLLibrary.hpp b/metal-cpp/Metal/MTLLibrary.hpp new file mode 100644 index 00000000..74585178 --- /dev/null +++ b/metal-cpp/Metal/MTLLibrary.hpp @@ -0,0 +1,644 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLLibrary.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLArgument.hpp" +#include "MTLFunctionDescriptor.hpp" +#include "MTLLibrary.hpp" +#include + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, PatchType) { + PatchTypeNone = 0, + PatchTypeTriangle = 1, + PatchTypeQuad = 2, +}; + +class VertexAttribute : public NS::Referencing +{ +public: + static class VertexAttribute* alloc(); + + class VertexAttribute* init(); + + NS::String* name() const; + + NS::UInteger attributeIndex() const; + + MTL::DataType attributeType() const; + + bool active() const; + + bool patchData() const; + + bool patchControlPointData() const; +}; + +class Attribute : public NS::Referencing +{ +public: + static class Attribute* alloc(); + + class Attribute* init(); + + NS::String* name() const; + + NS::UInteger attributeIndex() const; + + MTL::DataType attributeType() const; + + bool active() const; + + bool patchData() const; + + bool patchControlPointData() const; +}; + +_MTL_ENUM(NS::UInteger, FunctionType) { + FunctionTypeVertex = 1, + FunctionTypeFragment = 2, + FunctionTypeKernel = 3, + FunctionTypeVisible = 5, + FunctionTypeIntersection = 6, + FunctionTypeMesh = 7, + FunctionTypeObject = 8, +}; + +class FunctionConstant : public NS::Referencing +{ +public: + static class FunctionConstant* alloc(); + + class FunctionConstant* init(); + + NS::String* name() const; + + MTL::DataType type() const; + + NS::UInteger index() const; + + bool required() const; +}; + +using AutoreleasedArgument = class Argument*; + +class Function : public NS::Referencing +{ +public: + NS::String* label() const; + void setLabel(const NS::String* label); + + class Device* device() const; + + MTL::FunctionType functionType() const; + + MTL::PatchType patchType() const; + + NS::Integer patchControlPointCount() const; + + NS::Array* vertexAttributes() const; + + NS::Array* stageInputAttributes() const; + + NS::String* name() const; + + NS::Dictionary* functionConstantsDictionary() const; + + class ArgumentEncoder* newArgumentEncoder(NS::UInteger bufferIndex); + + class ArgumentEncoder* newArgumentEncoder(NS::UInteger bufferIndex, const MTL::AutoreleasedArgument* reflection); + + MTL::FunctionOptions options() const; +}; + +_MTL_ENUM(NS::UInteger, LanguageVersion) { + LanguageVersion1_0 = 65536, + LanguageVersion1_1 = 65537, + LanguageVersion1_2 = 65538, + LanguageVersion2_0 = 131072, + LanguageVersion2_1 = 131073, + LanguageVersion2_2 = 131074, + LanguageVersion2_3 = 131075, + LanguageVersion2_4 = 131076, + LanguageVersion3_0 = 196608, +}; + +_MTL_ENUM(NS::Integer, LibraryType) { + LibraryTypeExecutable = 0, + LibraryTypeDynamic = 1, +}; + +_MTL_ENUM(NS::Integer, LibraryOptimizationLevel) { + LibraryOptimizationLevelDefault = 0, + LibraryOptimizationLevelSize = 1, +}; + +class CompileOptions : public NS::Copying +{ +public: + static class CompileOptions* alloc(); + + class CompileOptions* init(); + + NS::Dictionary* preprocessorMacros() const; + void setPreprocessorMacros(const NS::Dictionary* preprocessorMacros); + + bool fastMathEnabled() const; + void setFastMathEnabled(bool fastMathEnabled); + + MTL::LanguageVersion languageVersion() const; + void setLanguageVersion(MTL::LanguageVersion languageVersion); + + MTL::LibraryType libraryType() const; + void setLibraryType(MTL::LibraryType libraryType); + + NS::String* installName() const; + void setInstallName(const NS::String* installName); + + NS::Array* libraries() const; + void setLibraries(const NS::Array* libraries); + + bool preserveInvariance() const; + void setPreserveInvariance(bool preserveInvariance); + + MTL::LibraryOptimizationLevel optimizationLevel() const; + void setOptimizationLevel(MTL::LibraryOptimizationLevel optimizationLevel); +}; + +_MTL_ENUM(NS::UInteger, LibraryError) { + LibraryErrorUnsupported = 1, + LibraryErrorInternal = 2, + LibraryErrorCompileFailure = 3, + LibraryErrorCompileWarning = 4, + LibraryErrorFunctionNotFound = 5, + LibraryErrorFileNotFound = 6, +}; + +class Library : public NS::Referencing +{ +public: + void newFunction(const NS::String* pFunctionName, const class FunctionConstantValues* pConstantValues, const std::function& completionHandler); + + void newFunction(const class FunctionDescriptor* pDescriptor, const std::function& completionHandler); + + void newIntersectionFunction(const class IntersectionFunctionDescriptor* pDescriptor, const std::function& completionHandler); + + NS::String* label() const; + void setLabel(const NS::String* label); + + class Device* device() const; + + class Function* newFunction(const NS::String* functionName); + + class Function* newFunction(const NS::String* name, const class FunctionConstantValues* constantValues, NS::Error** error); + + void newFunction(const NS::String* name, const class FunctionConstantValues* constantValues, void (^completionHandler)(MTL::Function*, NS::Error*)); + + void newFunction(const class FunctionDescriptor* descriptor, void (^completionHandler)(MTL::Function*, NS::Error*)); + + class Function* newFunction(const class FunctionDescriptor* descriptor, NS::Error** error); + + void newIntersectionFunction(const class IntersectionFunctionDescriptor* descriptor, void (^completionHandler)(MTL::Function*, NS::Error*)); + + class Function* newIntersectionFunction(const class IntersectionFunctionDescriptor* descriptor, NS::Error** error); + + NS::Array* functionNames() const; + + MTL::LibraryType type() const; + + NS::String* installName() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::VertexAttribute* MTL::VertexAttribute::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLVertexAttribute)); +} + +// method: init +_MTL_INLINE MTL::VertexAttribute* MTL::VertexAttribute::init() +{ + return NS::Object::init(); +} + +// property: name +_MTL_INLINE NS::String* MTL::VertexAttribute::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: attributeIndex +_MTL_INLINE NS::UInteger MTL::VertexAttribute::attributeIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(attributeIndex)); +} + +// property: attributeType +_MTL_INLINE MTL::DataType MTL::VertexAttribute::attributeType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(attributeType)); +} + +// property: active +_MTL_INLINE bool MTL::VertexAttribute::active() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isActive)); +} + +// property: patchData +_MTL_INLINE bool MTL::VertexAttribute::patchData() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isPatchData)); +} + +// property: patchControlPointData +_MTL_INLINE bool MTL::VertexAttribute::patchControlPointData() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isPatchControlPointData)); +} + +// static method: alloc +_MTL_INLINE MTL::Attribute* MTL::Attribute::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAttribute)); +} + +// method: init +_MTL_INLINE MTL::Attribute* MTL::Attribute::init() +{ + return NS::Object::init(); +} + +// property: name +_MTL_INLINE NS::String* MTL::Attribute::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: attributeIndex +_MTL_INLINE NS::UInteger MTL::Attribute::attributeIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(attributeIndex)); +} + +// property: attributeType +_MTL_INLINE MTL::DataType MTL::Attribute::attributeType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(attributeType)); +} + +// property: active +_MTL_INLINE bool MTL::Attribute::active() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isActive)); +} + +// property: patchData +_MTL_INLINE bool MTL::Attribute::patchData() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isPatchData)); +} + +// property: patchControlPointData +_MTL_INLINE bool MTL::Attribute::patchControlPointData() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isPatchControlPointData)); +} + +// static method: alloc +_MTL_INLINE MTL::FunctionConstant* MTL::FunctionConstant::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLFunctionConstant)); +} + +// method: init +_MTL_INLINE MTL::FunctionConstant* MTL::FunctionConstant::init() +{ + return NS::Object::init(); +} + +// property: name +_MTL_INLINE NS::String* MTL::FunctionConstant::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: type +_MTL_INLINE MTL::DataType MTL::FunctionConstant::type() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); +} + +// property: index +_MTL_INLINE NS::UInteger MTL::FunctionConstant::index() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(index)); +} + +// property: required +_MTL_INLINE bool MTL::FunctionConstant::required() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(required)); +} + +// property: label +_MTL_INLINE NS::String* MTL::Function::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::Function::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::Function::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: functionType +_MTL_INLINE MTL::FunctionType MTL::Function::functionType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionType)); +} + +// property: patchType +_MTL_INLINE MTL::PatchType MTL::Function::patchType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(patchType)); +} + +// property: patchControlPointCount +_MTL_INLINE NS::Integer MTL::Function::patchControlPointCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(patchControlPointCount)); +} + +// property: vertexAttributes +_MTL_INLINE NS::Array* MTL::Function::vertexAttributes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexAttributes)); +} + +// property: stageInputAttributes +_MTL_INLINE NS::Array* MTL::Function::stageInputAttributes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stageInputAttributes)); +} + +// property: name +_MTL_INLINE NS::String* MTL::Function::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + +// property: functionConstantsDictionary +_MTL_INLINE NS::Dictionary* MTL::Function::functionConstantsDictionary() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionConstantsDictionary)); +} + +// method: newArgumentEncoderWithBufferIndex: +_MTL_INLINE MTL::ArgumentEncoder* MTL::Function::newArgumentEncoder(NS::UInteger bufferIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newArgumentEncoderWithBufferIndex_), bufferIndex); +} + +// method: newArgumentEncoderWithBufferIndex:reflection: +_MTL_INLINE MTL::ArgumentEncoder* MTL::Function::newArgumentEncoder(NS::UInteger bufferIndex, const MTL::AutoreleasedArgument* reflection) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newArgumentEncoderWithBufferIndex_reflection_), bufferIndex, reflection); +} + +// property: options +_MTL_INLINE MTL::FunctionOptions MTL::Function::options() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(options)); +} + +// static method: alloc +_MTL_INLINE MTL::CompileOptions* MTL::CompileOptions::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLCompileOptions)); +} + +// method: init +_MTL_INLINE MTL::CompileOptions* MTL::CompileOptions::init() +{ + return NS::Object::init(); +} + +// property: preprocessorMacros +_MTL_INLINE NS::Dictionary* MTL::CompileOptions::preprocessorMacros() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(preprocessorMacros)); +} + +_MTL_INLINE void MTL::CompileOptions::setPreprocessorMacros(const NS::Dictionary* preprocessorMacros) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPreprocessorMacros_), preprocessorMacros); +} + +// property: fastMathEnabled +_MTL_INLINE bool MTL::CompileOptions::fastMathEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fastMathEnabled)); +} + +_MTL_INLINE void MTL::CompileOptions::setFastMathEnabled(bool fastMathEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFastMathEnabled_), fastMathEnabled); +} + +// property: languageVersion +_MTL_INLINE MTL::LanguageVersion MTL::CompileOptions::languageVersion() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(languageVersion)); +} + +_MTL_INLINE void MTL::CompileOptions::setLanguageVersion(MTL::LanguageVersion languageVersion) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLanguageVersion_), languageVersion); +} + +// property: libraryType +_MTL_INLINE MTL::LibraryType MTL::CompileOptions::libraryType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(libraryType)); +} + +_MTL_INLINE void MTL::CompileOptions::setLibraryType(MTL::LibraryType libraryType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLibraryType_), libraryType); +} + +// property: installName +_MTL_INLINE NS::String* MTL::CompileOptions::installName() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(installName)); +} + +_MTL_INLINE void MTL::CompileOptions::setInstallName(const NS::String* installName) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstallName_), installName); +} + +// property: libraries +_MTL_INLINE NS::Array* MTL::CompileOptions::libraries() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(libraries)); +} + +_MTL_INLINE void MTL::CompileOptions::setLibraries(const NS::Array* libraries) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLibraries_), libraries); +} + +// property: preserveInvariance +_MTL_INLINE bool MTL::CompileOptions::preserveInvariance() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(preserveInvariance)); +} + +_MTL_INLINE void MTL::CompileOptions::setPreserveInvariance(bool preserveInvariance) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPreserveInvariance_), preserveInvariance); +} + +// property: optimizationLevel +_MTL_INLINE MTL::LibraryOptimizationLevel MTL::CompileOptions::optimizationLevel() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(optimizationLevel)); +} + +_MTL_INLINE void MTL::CompileOptions::setOptimizationLevel(MTL::LibraryOptimizationLevel optimizationLevel) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOptimizationLevel_), optimizationLevel); +} + +_MTL_INLINE void MTL::Library::newFunction(const NS::String* pFunctionName, const FunctionConstantValues* pConstantValues, const std::function& completionHandler) +{ + __block std::function blockCompletionHandler = completionHandler; + + newFunction(pFunctionName, pConstantValues, ^(Function* pFunction, NS::Error* pError) { blockCompletionHandler(pFunction, pError); }); +} + +_MTL_INLINE void MTL::Library::newFunction(const FunctionDescriptor* pDescriptor, const std::function& completionHandler) +{ + __block std::function blockCompletionHandler = completionHandler; + + newFunction(pDescriptor, ^(Function* pFunction, NS::Error* pError) { blockCompletionHandler(pFunction, pError); }); +} + +_MTL_INLINE void MTL::Library::newIntersectionFunction(const IntersectionFunctionDescriptor* pDescriptor, const std::function& completionHandler) +{ + __block std::function blockCompletionHandler = completionHandler; + + newIntersectionFunction(pDescriptor, ^(Function* pFunction, NS::Error* pError) { blockCompletionHandler(pFunction, pError); }); +} + +// property: label +_MTL_INLINE NS::String* MTL::Library::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::Library::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::Library::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// method: newFunctionWithName: +_MTL_INLINE MTL::Function* MTL::Library::newFunction(const NS::String* functionName) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newFunctionWithName_), functionName); +} + +// method: newFunctionWithName:constantValues:error: +_MTL_INLINE MTL::Function* MTL::Library::newFunction(const NS::String* name, const MTL::FunctionConstantValues* constantValues, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newFunctionWithName_constantValues_error_), name, constantValues, error); +} + +// method: newFunctionWithName:constantValues:completionHandler: +_MTL_INLINE void MTL::Library::newFunction(const NS::String* name, const MTL::FunctionConstantValues* constantValues, void (^completionHandler)(MTL::Function*, NS::Error*)) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newFunctionWithName_constantValues_completionHandler_), name, constantValues, completionHandler); +} + +// method: newFunctionWithDescriptor:completionHandler: +_MTL_INLINE void MTL::Library::newFunction(const MTL::FunctionDescriptor* descriptor, void (^completionHandler)(MTL::Function*, NS::Error*)) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newFunctionWithDescriptor_completionHandler_), descriptor, completionHandler); +} + +// method: newFunctionWithDescriptor:error: +_MTL_INLINE MTL::Function* MTL::Library::newFunction(const MTL::FunctionDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newFunctionWithDescriptor_error_), descriptor, error); +} + +// method: newIntersectionFunctionWithDescriptor:completionHandler: +_MTL_INLINE void MTL::Library::newIntersectionFunction(const MTL::IntersectionFunctionDescriptor* descriptor, void (^completionHandler)(MTL::Function*, NS::Error*)) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(newIntersectionFunctionWithDescriptor_completionHandler_), descriptor, completionHandler); +} + +// method: newIntersectionFunctionWithDescriptor:error: +_MTL_INLINE MTL::Function* MTL::Library::newIntersectionFunction(const MTL::IntersectionFunctionDescriptor* descriptor, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIntersectionFunctionWithDescriptor_error_), descriptor, error); +} + +// property: functionNames +_MTL_INLINE NS::Array* MTL::Library::functionNames() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionNames)); +} + +// property: type +_MTL_INLINE MTL::LibraryType MTL::Library::type() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); +} + +// property: installName +_MTL_INLINE NS::String* MTL::Library::installName() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(installName)); +} diff --git a/metal-cpp/Metal/MTLLinkedFunctions.hpp b/metal-cpp/Metal/MTLLinkedFunctions.hpp new file mode 100644 index 00000000..d8f0bd84 --- /dev/null +++ b/metal-cpp/Metal/MTLLinkedFunctions.hpp @@ -0,0 +1,115 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLLinkedFunctions.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +class LinkedFunctions : public NS::Copying +{ +public: + static class LinkedFunctions* alloc(); + + class LinkedFunctions* init(); + + static class LinkedFunctions* linkedFunctions(); + + NS::Array* functions() const; + void setFunctions(const NS::Array* functions); + + NS::Array* binaryFunctions() const; + void setBinaryFunctions(const NS::Array* binaryFunctions); + + NS::Dictionary* groups() const; + void setGroups(const NS::Dictionary* groups); + + NS::Array* privateFunctions() const; + void setPrivateFunctions(const NS::Array* privateFunctions); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLLinkedFunctions)); +} + +// method: init +_MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::init() +{ + return NS::Object::init(); +} + +// static method: linkedFunctions +_MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::linkedFunctions() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLLinkedFunctions), _MTL_PRIVATE_SEL(linkedFunctions)); +} + +// property: functions +_MTL_INLINE NS::Array* MTL::LinkedFunctions::functions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functions)); +} + +_MTL_INLINE void MTL::LinkedFunctions::setFunctions(const NS::Array* functions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctions_), functions); +} + +// property: binaryFunctions +_MTL_INLINE NS::Array* MTL::LinkedFunctions::binaryFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(binaryFunctions)); +} + +_MTL_INLINE void MTL::LinkedFunctions::setBinaryFunctions(const NS::Array* binaryFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBinaryFunctions_), binaryFunctions); +} + +// property: groups +_MTL_INLINE NS::Dictionary* MTL::LinkedFunctions::groups() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(groups)); +} + +_MTL_INLINE void MTL::LinkedFunctions::setGroups(const NS::Dictionary* groups) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setGroups_), groups); +} + +// property: privateFunctions +_MTL_INLINE NS::Array* MTL::LinkedFunctions::privateFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(privateFunctions)); +} + +_MTL_INLINE void MTL::LinkedFunctions::setPrivateFunctions(const NS::Array* privateFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPrivateFunctions_), privateFunctions); +} diff --git a/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp b/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp new file mode 100644 index 00000000..e1a661ef --- /dev/null +++ b/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp @@ -0,0 +1,94 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLParallelRenderCommandEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLCommandEncoder.hpp" +#include "MTLRenderPass.hpp" + +namespace MTL +{ +class ParallelRenderCommandEncoder : public NS::Referencing +{ +public: + class RenderCommandEncoder* renderCommandEncoder(); + + void setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex); + + void setDepthStoreAction(MTL::StoreAction storeAction); + + void setStencilStoreAction(MTL::StoreAction storeAction); + + void setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex); + + void setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions); + + void setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions); +}; + +} + +// method: renderCommandEncoder +_MTL_INLINE MTL::RenderCommandEncoder* MTL::ParallelRenderCommandEncoder::renderCommandEncoder() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(renderCommandEncoder)); +} + +// method: setColorStoreAction:atIndex: +_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setColorStoreAction_atIndex_), storeAction, colorAttachmentIndex); +} + +// method: setDepthStoreAction: +_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setDepthStoreAction(MTL::StoreAction storeAction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthStoreAction_), storeAction); +} + +// method: setStencilStoreAction: +_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setStencilStoreAction(MTL::StoreAction storeAction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilStoreAction_), storeAction); +} + +// method: setColorStoreActionOptions:atIndex: +_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setColorStoreActionOptions_atIndex_), storeActionOptions, colorAttachmentIndex); +} + +// method: setDepthStoreActionOptions: +_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthStoreActionOptions_), storeActionOptions); +} + +// method: setStencilStoreActionOptions: +_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilStoreActionOptions_), storeActionOptions); +} diff --git a/metal-cpp/Metal/MTLPipeline.hpp b/metal-cpp/Metal/MTLPipeline.hpp new file mode 100644 index 00000000..b8fa6c0f --- /dev/null +++ b/metal-cpp/Metal/MTLPipeline.hpp @@ -0,0 +1,109 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLPipeline.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLPipeline.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, Mutability) { + MutabilityDefault = 0, + MutabilityMutable = 1, + MutabilityImmutable = 2, +}; + +class PipelineBufferDescriptor : public NS::Copying +{ +public: + static class PipelineBufferDescriptor* alloc(); + + class PipelineBufferDescriptor* init(); + + MTL::Mutability mutability() const; + void setMutability(MTL::Mutability mutability); +}; + +class PipelineBufferDescriptorArray : public NS::Referencing +{ +public: + static class PipelineBufferDescriptorArray* alloc(); + + class PipelineBufferDescriptorArray* init(); + + class PipelineBufferDescriptor* object(NS::UInteger bufferIndex); + + void setObject(const class PipelineBufferDescriptor* buffer, NS::UInteger bufferIndex); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLPipelineBufferDescriptor)); +} + +// method: init +_MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptor::init() +{ + return NS::Object::init(); +} + +// property: mutability +_MTL_INLINE MTL::Mutability MTL::PipelineBufferDescriptor::mutability() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(mutability)); +} + +_MTL_INLINE void MTL::PipelineBufferDescriptor::setMutability(MTL::Mutability mutability) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMutability_), mutability); +} + +// static method: alloc +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::PipelineBufferDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLPipelineBufferDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::PipelineBufferDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptorArray::object(NS::UInteger bufferIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), bufferIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::PipelineBufferDescriptorArray::setObject(const MTL::PipelineBufferDescriptor* buffer, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), buffer, bufferIndex); +} diff --git a/metal-cpp/Metal/MTLPixelFormat.hpp b/metal-cpp/Metal/MTLPixelFormat.hpp new file mode 100644 index 00000000..1ea11e42 --- /dev/null +++ b/metal-cpp/Metal/MTLPixelFormat.hpp @@ -0,0 +1,173 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLPixelFormat.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, PixelFormat) { + PixelFormatInvalid = 0, + PixelFormatA8Unorm = 1, + PixelFormatR8Unorm = 10, + PixelFormatR8Unorm_sRGB = 11, + PixelFormatR8Snorm = 12, + PixelFormatR8Uint = 13, + PixelFormatR8Sint = 14, + PixelFormatR16Unorm = 20, + PixelFormatR16Snorm = 22, + PixelFormatR16Uint = 23, + PixelFormatR16Sint = 24, + PixelFormatR16Float = 25, + PixelFormatRG8Unorm = 30, + PixelFormatRG8Unorm_sRGB = 31, + PixelFormatRG8Snorm = 32, + PixelFormatRG8Uint = 33, + PixelFormatRG8Sint = 34, + PixelFormatB5G6R5Unorm = 40, + PixelFormatA1BGR5Unorm = 41, + PixelFormatABGR4Unorm = 42, + PixelFormatBGR5A1Unorm = 43, + PixelFormatR32Uint = 53, + PixelFormatR32Sint = 54, + PixelFormatR32Float = 55, + PixelFormatRG16Unorm = 60, + PixelFormatRG16Snorm = 62, + PixelFormatRG16Uint = 63, + PixelFormatRG16Sint = 64, + PixelFormatRG16Float = 65, + PixelFormatRGBA8Unorm = 70, + PixelFormatRGBA8Unorm_sRGB = 71, + PixelFormatRGBA8Snorm = 72, + PixelFormatRGBA8Uint = 73, + PixelFormatRGBA8Sint = 74, + PixelFormatBGRA8Unorm = 80, + PixelFormatBGRA8Unorm_sRGB = 81, + PixelFormatRGB10A2Unorm = 90, + PixelFormatRGB10A2Uint = 91, + PixelFormatRG11B10Float = 92, + PixelFormatRGB9E5Float = 93, + PixelFormatBGR10A2Unorm = 94, + PixelFormatRG32Uint = 103, + PixelFormatRG32Sint = 104, + PixelFormatRG32Float = 105, + PixelFormatRGBA16Unorm = 110, + PixelFormatRGBA16Snorm = 112, + PixelFormatRGBA16Uint = 113, + PixelFormatRGBA16Sint = 114, + PixelFormatRGBA16Float = 115, + PixelFormatRGBA32Uint = 123, + PixelFormatRGBA32Sint = 124, + PixelFormatRGBA32Float = 125, + PixelFormatBC1_RGBA = 130, + PixelFormatBC1_RGBA_sRGB = 131, + PixelFormatBC2_RGBA = 132, + PixelFormatBC2_RGBA_sRGB = 133, + PixelFormatBC3_RGBA = 134, + PixelFormatBC3_RGBA_sRGB = 135, + PixelFormatBC4_RUnorm = 140, + PixelFormatBC4_RSnorm = 141, + PixelFormatBC5_RGUnorm = 142, + PixelFormatBC5_RGSnorm = 143, + PixelFormatBC6H_RGBFloat = 150, + PixelFormatBC6H_RGBUfloat = 151, + PixelFormatBC7_RGBAUnorm = 152, + PixelFormatBC7_RGBAUnorm_sRGB = 153, + PixelFormatPVRTC_RGB_2BPP = 160, + PixelFormatPVRTC_RGB_2BPP_sRGB = 161, + PixelFormatPVRTC_RGB_4BPP = 162, + PixelFormatPVRTC_RGB_4BPP_sRGB = 163, + PixelFormatPVRTC_RGBA_2BPP = 164, + PixelFormatPVRTC_RGBA_2BPP_sRGB = 165, + PixelFormatPVRTC_RGBA_4BPP = 166, + PixelFormatPVRTC_RGBA_4BPP_sRGB = 167, + PixelFormatEAC_R11Unorm = 170, + PixelFormatEAC_R11Snorm = 172, + PixelFormatEAC_RG11Unorm = 174, + PixelFormatEAC_RG11Snorm = 176, + PixelFormatEAC_RGBA8 = 178, + PixelFormatEAC_RGBA8_sRGB = 179, + PixelFormatETC2_RGB8 = 180, + PixelFormatETC2_RGB8_sRGB = 181, + PixelFormatETC2_RGB8A1 = 182, + PixelFormatETC2_RGB8A1_sRGB = 183, + PixelFormatASTC_4x4_sRGB = 186, + PixelFormatASTC_5x4_sRGB = 187, + PixelFormatASTC_5x5_sRGB = 188, + PixelFormatASTC_6x5_sRGB = 189, + PixelFormatASTC_6x6_sRGB = 190, + PixelFormatASTC_8x5_sRGB = 192, + PixelFormatASTC_8x6_sRGB = 193, + PixelFormatASTC_8x8_sRGB = 194, + PixelFormatASTC_10x5_sRGB = 195, + PixelFormatASTC_10x6_sRGB = 196, + PixelFormatASTC_10x8_sRGB = 197, + PixelFormatASTC_10x10_sRGB = 198, + PixelFormatASTC_12x10_sRGB = 199, + PixelFormatASTC_12x12_sRGB = 200, + PixelFormatASTC_4x4_LDR = 204, + PixelFormatASTC_5x4_LDR = 205, + PixelFormatASTC_5x5_LDR = 206, + PixelFormatASTC_6x5_LDR = 207, + PixelFormatASTC_6x6_LDR = 208, + PixelFormatASTC_8x5_LDR = 210, + PixelFormatASTC_8x6_LDR = 211, + PixelFormatASTC_8x8_LDR = 212, + PixelFormatASTC_10x5_LDR = 213, + PixelFormatASTC_10x6_LDR = 214, + PixelFormatASTC_10x8_LDR = 215, + PixelFormatASTC_10x10_LDR = 216, + PixelFormatASTC_12x10_LDR = 217, + PixelFormatASTC_12x12_LDR = 218, + PixelFormatASTC_4x4_HDR = 222, + PixelFormatASTC_5x4_HDR = 223, + PixelFormatASTC_5x5_HDR = 224, + PixelFormatASTC_6x5_HDR = 225, + PixelFormatASTC_6x6_HDR = 226, + PixelFormatASTC_8x5_HDR = 228, + PixelFormatASTC_8x6_HDR = 229, + PixelFormatASTC_8x8_HDR = 230, + PixelFormatASTC_10x5_HDR = 231, + PixelFormatASTC_10x6_HDR = 232, + PixelFormatASTC_10x8_HDR = 233, + PixelFormatASTC_10x10_HDR = 234, + PixelFormatASTC_12x10_HDR = 235, + PixelFormatASTC_12x12_HDR = 236, + PixelFormatGBGR422 = 240, + PixelFormatBGRG422 = 241, + PixelFormatDepth16Unorm = 250, + PixelFormatDepth32Float = 252, + PixelFormatStencil8 = 253, + PixelFormatDepth24Unorm_Stencil8 = 255, + PixelFormatDepth32Float_Stencil8 = 260, + PixelFormatX32_Stencil8 = 261, + PixelFormatX24_Stencil8 = 262, + PixelFormatBGRA10_XR = 552, + PixelFormatBGRA10_XR_sRGB = 553, + PixelFormatBGR10_XR = 554, + PixelFormatBGR10_XR_sRGB = 555, +}; + +} diff --git a/metal-cpp/Metal/MTLPrivate.hpp b/metal-cpp/Metal/MTLPrivate.hpp new file mode 100644 index 00000000..a579e874 --- /dev/null +++ b/metal-cpp/Metal/MTLPrivate.hpp @@ -0,0 +1,156 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLPrivate.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "MTLDefines.hpp" + +#include + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define _MTL_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol) +#define _MTL_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor) + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#if defined(MTL_PRIVATE_IMPLEMENTATION) + +#ifdef METALCPP_SYMBOL_VISIBILITY_HIDDEN +#define _MTL_PRIVATE_VISIBILITY __attribute__((visibility("hidden"))) +#else +#define _MTL_PRIVATE_VISIBILITY __attribute__((visibility("default"))) +#endif // METALCPP_SYMBOL_VISIBILITY_HIDDEN + +#define _MTL_PRIVATE_IMPORT __attribute__((weak_import)) + +#ifdef __OBJC__ +#define _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol)) +#define _MTL_PRIVATE_OBJC_GET_PROTOCOL(symbol) ((__bridge void*)objc_getProtocol(#symbol)) +#else +#define _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol) +#define _MTL_PRIVATE_OBJC_GET_PROTOCOL(symbol) objc_getProtocol(#symbol) +#endif // __OBJC__ + +#define _MTL_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _MTL_PRIVATE_VISIBILITY = _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol) +#define _MTL_PRIVATE_DEF_PRO(symbol) void* s_k##symbol _MTL_PRIVATE_VISIBILITY = _MTL_PRIVATE_OBJC_GET_PROTOCOL(symbol) +#define _MTL_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _MTL_PRIVATE_VISIBILITY = sel_registerName(symbol) + +#include +#define MTL_DEF_FUNC( name, signature ) \ + using Fn##name = signature; \ + Fn##name name = reinterpret_cast< Fn##name >( dlsym( RTLD_DEFAULT, #name ) ) + +namespace MTL::Private +{ + template + inline _Type const LoadSymbol(const char* pSymbol) + { + const _Type* pAddress = static_cast<_Type*>(dlsym(RTLD_DEFAULT, pSymbol)); + + return pAddress ? *pAddress : nullptr; + } +} // MTL::Private + +#if defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__MAC_13_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__IPHONE_16_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0) || defined(__TVOS_16_0) + +#define _MTL_PRIVATE_DEF_STR(type, symbol) \ + _MTL_EXTERN type const MTL##symbol _MTL_PRIVATE_IMPORT; \ + type const MTL::symbol = (nullptr != &MTL##symbol) ? MTL##symbol : nullptr + +#define _MTL_PRIVATE_DEF_CONST(type, symbol) \ + _MTL_EXTERN type const MTL##symbol _MTL_PRIVATE_IMPORT; \ + type const MTL::symbol = (nullptr != &MTL##symbol) ? MTL##symbol : nullptr + +#define _MTL_PRIVATE_DEF_WEAK_CONST(type, symbol) \ + _MTL_EXTERN type const MTL##symbol; \ + type const MTL::symbol = Private::LoadSymbol("MTL" #symbol) + +#else + +#define _MTL_PRIVATE_DEF_STR(type, symbol) \ + _MTL_EXTERN type const MTL##symbol; \ + type const MTL::symbol = Private::LoadSymbol("MTL" #symbol) + +#define _MTL_PRIVATE_DEF_CONST(type, symbol) \ + _MTL_EXTERN type const MTL##symbol; \ + type const MTL::symbol = Private::LoadSymbol("MTL" #symbol) + +#define _MTL_PRIVATE_DEF_WEAK_CONST(type, symbol) _MTL_PRIVATE_DEF_CONST(type, symbol) + +#endif // defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__MAC_13_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__IPHONE_16_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0) || defined(__TVOS_16_0) + +#else + +#define _MTL_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol +#define _MTL_PRIVATE_DEF_PRO(symbol) extern void* s_k##symbol +#define _MTL_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor +#define _MTL_PRIVATE_DEF_STR(type, symbol) extern type const MTL::symbol +#define _MTL_PRIVATE_DEF_CONST(type, symbol) extern type const MTL::symbol +#define _MTL_PRIVATE_DEF_WEAK_CONST(type, symbol) extern type const MTL::symbol + +#endif // MTL_PRIVATE_IMPLEMENTATION + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTL +{ +namespace Private +{ + namespace Class + { + + } // Class +} // Private +} // MTL + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTL +{ +namespace Private +{ + namespace Protocol + { + + } // Protocol +} // Private +} // MTL + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTL +{ +namespace Private +{ + namespace Selector + { + + _MTL_PRIVATE_DEF_SEL(beginScope, + "beginScope"); + _MTL_PRIVATE_DEF_SEL(endScope, + "endScope"); + } // Class +} // Private +} // MTL + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Metal/MTLRasterizationRate.hpp b/metal-cpp/Metal/MTLRasterizationRate.hpp new file mode 100644 index 00000000..6ea44636 --- /dev/null +++ b/metal-cpp/Metal/MTLRasterizationRate.hpp @@ -0,0 +1,386 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLRasterizationRate.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLDevice.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +class RasterizationRateSampleArray : public NS::Referencing +{ +public: + static class RasterizationRateSampleArray* alloc(); + + class RasterizationRateSampleArray* init(); + + NS::Number* object(NS::UInteger index); + + void setObject(const NS::Number* value, NS::UInteger index); +}; + +class RasterizationRateLayerDescriptor : public NS::Copying +{ +public: + static class RasterizationRateLayerDescriptor* alloc(); + + MTL::RasterizationRateLayerDescriptor* init(); + + MTL::RasterizationRateLayerDescriptor* init(MTL::Size sampleCount); + + MTL::RasterizationRateLayerDescriptor* init(MTL::Size sampleCount, const float* horizontal, const float* vertical); + + MTL::Size sampleCount() const; + + MTL::Size maxSampleCount() const; + + float* horizontalSampleStorage() const; + + float* verticalSampleStorage() const; + + class RasterizationRateSampleArray* horizontal() const; + + class RasterizationRateSampleArray* vertical() const; + + void setSampleCount(MTL::Size sampleCount); +}; + +class RasterizationRateLayerArray : public NS::Referencing +{ +public: + static class RasterizationRateLayerArray* alloc(); + + class RasterizationRateLayerArray* init(); + + class RasterizationRateLayerDescriptor* object(NS::UInteger layerIndex); + + void setObject(const class RasterizationRateLayerDescriptor* layer, NS::UInteger layerIndex); +}; + +class RasterizationRateMapDescriptor : public NS::Copying +{ +public: + static class RasterizationRateMapDescriptor* alloc(); + + class RasterizationRateMapDescriptor* init(); + + static class RasterizationRateMapDescriptor* rasterizationRateMapDescriptor(MTL::Size screenSize); + + static class RasterizationRateMapDescriptor* rasterizationRateMapDescriptor(MTL::Size screenSize, const class RasterizationRateLayerDescriptor* layer); + + static class RasterizationRateMapDescriptor* rasterizationRateMapDescriptor(MTL::Size screenSize, NS::UInteger layerCount, const class RasterizationRateLayerDescriptor* const* layers); + + class RasterizationRateLayerDescriptor* layer(NS::UInteger layerIndex); + + void setLayer(const class RasterizationRateLayerDescriptor* layer, NS::UInteger layerIndex); + + class RasterizationRateLayerArray* layers() const; + + MTL::Size screenSize() const; + void setScreenSize(MTL::Size screenSize); + + NS::String* label() const; + void setLabel(const NS::String* label); + + NS::UInteger layerCount() const; +}; + +class RasterizationRateMap : public NS::Referencing +{ +public: + class Device* device() const; + + NS::String* label() const; + + MTL::Size screenSize() const; + + MTL::Size physicalGranularity() const; + + NS::UInteger layerCount() const; + + MTL::SizeAndAlign parameterBufferSizeAndAlign() const; + + void copyParameterDataToBuffer(const class Buffer* buffer, NS::UInteger offset); + + MTL::Size physicalSize(NS::UInteger layerIndex); + + MTL::Coordinate2D mapScreenToPhysicalCoordinates(MTL::Coordinate2D screenCoordinates, NS::UInteger layerIndex); + + MTL::Coordinate2D mapPhysicalToScreenCoordinates(MTL::Coordinate2D physicalCoordinates, NS::UInteger layerIndex); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::RasterizationRateSampleArray* MTL::RasterizationRateSampleArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRasterizationRateSampleArray)); +} + +// method: init +_MTL_INLINE MTL::RasterizationRateSampleArray* MTL::RasterizationRateSampleArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE NS::Number* MTL::RasterizationRateSampleArray::object(NS::UInteger index) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::RasterizationRateSampleArray::setObject(const NS::Number* value, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), value, index); +} + +// static method: alloc +_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRasterizationRateLayerDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerDescriptor::init() +{ + return NS::Object::init(); +} + +// method: initWithSampleCount: +_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerDescriptor::init(MTL::Size sampleCount) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(initWithSampleCount_), sampleCount); +} + +// method: initWithSampleCount:horizontal:vertical: +_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerDescriptor::init(MTL::Size sampleCount, const float* horizontal, const float* vertical) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(initWithSampleCount_horizontal_vertical_), sampleCount, horizontal, vertical); +} + +// property: sampleCount +_MTL_INLINE MTL::Size MTL::RasterizationRateLayerDescriptor::sampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCount)); +} + +// property: maxSampleCount +_MTL_INLINE MTL::Size MTL::RasterizationRateLayerDescriptor::maxSampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxSampleCount)); +} + +// property: horizontalSampleStorage +_MTL_INLINE float* MTL::RasterizationRateLayerDescriptor::horizontalSampleStorage() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(horizontalSampleStorage)); +} + +// property: verticalSampleStorage +_MTL_INLINE float* MTL::RasterizationRateLayerDescriptor::verticalSampleStorage() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(verticalSampleStorage)); +} + +// property: horizontal +_MTL_INLINE MTL::RasterizationRateSampleArray* MTL::RasterizationRateLayerDescriptor::horizontal() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(horizontal)); +} + +// property: vertical +_MTL_INLINE MTL::RasterizationRateSampleArray* MTL::RasterizationRateLayerDescriptor::vertical() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertical)); +} + +// method: setSampleCount: +_MTL_INLINE void MTL::RasterizationRateLayerDescriptor::setSampleCount(MTL::Size sampleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleCount_), sampleCount); +} + +// static method: alloc +_MTL_INLINE MTL::RasterizationRateLayerArray* MTL::RasterizationRateLayerArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRasterizationRateLayerArray)); +} + +// method: init +_MTL_INLINE MTL::RasterizationRateLayerArray* MTL::RasterizationRateLayerArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerArray::object(NS::UInteger layerIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), layerIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::RasterizationRateLayerArray::setObject(const MTL::RasterizationRateLayerDescriptor* layer, NS::UInteger layerIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), layer, layerIndex); +} + +// static method: alloc +_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRasterizationRateMapDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: rasterizationRateMapDescriptorWithScreenSize: +_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::rasterizationRateMapDescriptor(MTL::Size screenSize) +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLRasterizationRateMapDescriptor), _MTL_PRIVATE_SEL(rasterizationRateMapDescriptorWithScreenSize_), screenSize); +} + +// static method: rasterizationRateMapDescriptorWithScreenSize:layer: +_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::rasterizationRateMapDescriptor(MTL::Size screenSize, const MTL::RasterizationRateLayerDescriptor* layer) +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLRasterizationRateMapDescriptor), _MTL_PRIVATE_SEL(rasterizationRateMapDescriptorWithScreenSize_layer_), screenSize, layer); +} + +// static method: rasterizationRateMapDescriptorWithScreenSize:layerCount:layers: +_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::rasterizationRateMapDescriptor(MTL::Size screenSize, NS::UInteger layerCount, const MTL::RasterizationRateLayerDescriptor* const* layers) +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLRasterizationRateMapDescriptor), _MTL_PRIVATE_SEL(rasterizationRateMapDescriptorWithScreenSize_layerCount_layers_), screenSize, layerCount, layers); +} + +// method: layerAtIndex: +_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateMapDescriptor::layer(NS::UInteger layerIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(layerAtIndex_), layerIndex); +} + +// method: setLayer:atIndex: +_MTL_INLINE void MTL::RasterizationRateMapDescriptor::setLayer(const MTL::RasterizationRateLayerDescriptor* layer, NS::UInteger layerIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLayer_atIndex_), layer, layerIndex); +} + +// property: layers +_MTL_INLINE MTL::RasterizationRateLayerArray* MTL::RasterizationRateMapDescriptor::layers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(layers)); +} + +// property: screenSize +_MTL_INLINE MTL::Size MTL::RasterizationRateMapDescriptor::screenSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(screenSize)); +} + +_MTL_INLINE void MTL::RasterizationRateMapDescriptor::setScreenSize(MTL::Size screenSize) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setScreenSize_), screenSize); +} + +// property: label +_MTL_INLINE NS::String* MTL::RasterizationRateMapDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::RasterizationRateMapDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: layerCount +_MTL_INLINE NS::UInteger MTL::RasterizationRateMapDescriptor::layerCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(layerCount)); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::RasterizationRateMap::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: label +_MTL_INLINE NS::String* MTL::RasterizationRateMap::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +// property: screenSize +_MTL_INLINE MTL::Size MTL::RasterizationRateMap::screenSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(screenSize)); +} + +// property: physicalGranularity +_MTL_INLINE MTL::Size MTL::RasterizationRateMap::physicalGranularity() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(physicalGranularity)); +} + +// property: layerCount +_MTL_INLINE NS::UInteger MTL::RasterizationRateMap::layerCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(layerCount)); +} + +// property: parameterBufferSizeAndAlign +_MTL_INLINE MTL::SizeAndAlign MTL::RasterizationRateMap::parameterBufferSizeAndAlign() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(parameterBufferSizeAndAlign)); +} + +// method: copyParameterDataToBuffer:offset: +_MTL_INLINE void MTL::RasterizationRateMap::copyParameterDataToBuffer(const MTL::Buffer* buffer, NS::UInteger offset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(copyParameterDataToBuffer_offset_), buffer, offset); +} + +// method: physicalSizeForLayer: +_MTL_INLINE MTL::Size MTL::RasterizationRateMap::physicalSize(NS::UInteger layerIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(physicalSizeForLayer_), layerIndex); +} + +// method: mapScreenToPhysicalCoordinates:forLayer: +_MTL_INLINE MTL::Coordinate2D MTL::RasterizationRateMap::mapScreenToPhysicalCoordinates(MTL::Coordinate2D screenCoordinates, NS::UInteger layerIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(mapScreenToPhysicalCoordinates_forLayer_), screenCoordinates, layerIndex); +} + +// method: mapPhysicalToScreenCoordinates:forLayer: +_MTL_INLINE MTL::Coordinate2D MTL::RasterizationRateMap::mapPhysicalToScreenCoordinates(MTL::Coordinate2D physicalCoordinates, NS::UInteger layerIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(mapPhysicalToScreenCoordinates_forLayer_), physicalCoordinates, layerIndex); +} diff --git a/metal-cpp/Metal/MTLRenderCommandEncoder.hpp b/metal-cpp/Metal/MTLRenderCommandEncoder.hpp new file mode 100644 index 00000000..8bdffbff --- /dev/null +++ b/metal-cpp/Metal/MTLRenderCommandEncoder.hpp @@ -0,0 +1,1145 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLRenderCommandEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLCommandEncoder.hpp" +#include "MTLRenderCommandEncoder.hpp" +#include "MTLRenderPass.hpp" +#include "MTLStageInputOutputDescriptor.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, PrimitiveType) { + PrimitiveTypePoint = 0, + PrimitiveTypeLine = 1, + PrimitiveTypeLineStrip = 2, + PrimitiveTypeTriangle = 3, + PrimitiveTypeTriangleStrip = 4, +}; + +_MTL_ENUM(NS::UInteger, VisibilityResultMode) { + VisibilityResultModeDisabled = 0, + VisibilityResultModeBoolean = 1, + VisibilityResultModeCounting = 2, +}; + +struct ScissorRect +{ + NS::UInteger x; + NS::UInteger y; + NS::UInteger width; + NS::UInteger height; +} _MTL_PACKED; + +struct Viewport +{ + double originX; + double originY; + double width; + double height; + double znear; + double zfar; +} _MTL_PACKED; + +_MTL_ENUM(NS::UInteger, CullMode) { + CullModeNone = 0, + CullModeFront = 1, + CullModeBack = 2, +}; + +_MTL_ENUM(NS::UInteger, Winding) { + WindingClockwise = 0, + WindingCounterClockwise = 1, +}; + +_MTL_ENUM(NS::UInteger, DepthClipMode) { + DepthClipModeClip = 0, + DepthClipModeClamp = 1, +}; + +_MTL_ENUM(NS::UInteger, TriangleFillMode) { + TriangleFillModeFill = 0, + TriangleFillModeLines = 1, +}; + +struct DrawPrimitivesIndirectArguments +{ + uint32_t vertexCount; + uint32_t instanceCount; + uint32_t vertexStart; + uint32_t baseInstance; +} _MTL_PACKED; + +struct DrawIndexedPrimitivesIndirectArguments +{ + uint32_t indexCount; + uint32_t instanceCount; + uint32_t indexStart; + int32_t baseVertex; + uint32_t baseInstance; +} _MTL_PACKED; + +struct VertexAmplificationViewMapping +{ + uint32_t viewportArrayIndexOffset; + uint32_t renderTargetArrayIndexOffset; +} _MTL_PACKED; + +struct DrawPatchIndirectArguments +{ + uint32_t patchCount; + uint32_t instanceCount; + uint32_t patchStart; + uint32_t baseInstance; +} _MTL_PACKED; + +struct QuadTessellationFactorsHalf +{ + uint16_t edgeTessellationFactor[4]; + uint16_t insideTessellationFactor[2]; +} _MTL_PACKED; + +struct TriangleTessellationFactorsHalf +{ + uint16_t edgeTessellationFactor[3]; + uint16_t insideTessellationFactor; +} _MTL_PACKED; + +_MTL_OPTIONS(NS::UInteger, RenderStages) { + RenderStageVertex = 1, + RenderStageFragment = 2, + RenderStageTile = 4, + RenderStageObject = 8, + RenderStageMesh = 16, +}; + +class RenderCommandEncoder : public NS::Referencing +{ +public: + void setRenderPipelineState(const class RenderPipelineState* pipelineState); + + void setVertexBytes(const void* bytes, NS::UInteger length, NS::UInteger index); + + void setVertexBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setVertexBufferOffset(NS::UInteger offset, NS::UInteger index); + + void setVertexBuffers(const class Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range); + + void setVertexTexture(const class Texture* texture, NS::UInteger index); + + void setVertexTextures(const class Texture* const textures[], NS::Range range); + + void setVertexSamplerState(const class SamplerState* sampler, NS::UInteger index); + + void setVertexSamplerStates(const class SamplerState* const samplers[], NS::Range range); + + void setVertexSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index); + + void setVertexSamplerStates(const class SamplerState* const samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range); + + void setVertexVisibleFunctionTable(const class VisibleFunctionTable* functionTable, NS::UInteger bufferIndex); + + void setVertexVisibleFunctionTables(const class VisibleFunctionTable* const functionTables[], NS::Range range); + + void setVertexIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex); + + void setVertexIntersectionFunctionTables(const class IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range); + + void setVertexAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex); + + void setViewport(MTL::Viewport viewport); + + void setViewports(const MTL::Viewport* viewports, NS::UInteger count); + + void setFrontFacingWinding(MTL::Winding frontFacingWinding); + + void setVertexAmplificationCount(NS::UInteger count, const MTL::VertexAmplificationViewMapping* viewMappings); + + void setCullMode(MTL::CullMode cullMode); + + void setDepthClipMode(MTL::DepthClipMode depthClipMode); + + void setDepthBias(float depthBias, float slopeScale, float clamp); + + void setScissorRect(MTL::ScissorRect rect); + + void setScissorRects(const MTL::ScissorRect* scissorRects, NS::UInteger count); + + void setTriangleFillMode(MTL::TriangleFillMode fillMode); + + void setFragmentBytes(const void* bytes, NS::UInteger length, NS::UInteger index); + + void setFragmentBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setFragmentBufferOffset(NS::UInteger offset, NS::UInteger index); + + void setFragmentBuffers(const class Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range); + + void setFragmentTexture(const class Texture* texture, NS::UInteger index); + + void setFragmentTextures(const class Texture* const textures[], NS::Range range); + + void setFragmentSamplerState(const class SamplerState* sampler, NS::UInteger index); + + void setFragmentSamplerStates(const class SamplerState* const samplers[], NS::Range range); + + void setFragmentSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index); + + void setFragmentSamplerStates(const class SamplerState* const samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range); + + void setFragmentVisibleFunctionTable(const class VisibleFunctionTable* functionTable, NS::UInteger bufferIndex); + + void setFragmentVisibleFunctionTables(const class VisibleFunctionTable* const functionTables[], NS::Range range); + + void setFragmentIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex); + + void setFragmentIntersectionFunctionTables(const class IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range); + + void setFragmentAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex); + + void setBlendColor(float red, float green, float blue, float alpha); + + void setDepthStencilState(const class DepthStencilState* depthStencilState); + + void setStencilReferenceValue(uint32_t referenceValue); + + void setStencilReferenceValues(uint32_t frontReferenceValue, uint32_t backReferenceValue); + + void setVisibilityResultMode(MTL::VisibilityResultMode mode, NS::UInteger offset); + + void setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex); + + void setDepthStoreAction(MTL::StoreAction storeAction); + + void setStencilStoreAction(MTL::StoreAction storeAction); + + void setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex); + + void setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions); + + void setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions); + + void setObjectBytes(const void* bytes, NS::UInteger length, NS::UInteger index); + + void setObjectBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setObjectBufferOffset(NS::UInteger offset, NS::UInteger index); + + void setObjectBuffers(const class Buffer* const buffers[], const NS::UInteger* offsets, NS::Range range); + + void setObjectTexture(const class Texture* texture, NS::UInteger index); + + void setObjectTextures(const class Texture* const textures[], NS::Range range); + + void setObjectSamplerState(const class SamplerState* sampler, NS::UInteger index); + + void setObjectSamplerStates(const class SamplerState* const samplers[], NS::Range range); + + void setObjectSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index); + + void setObjectSamplerStates(const class SamplerState* const samplers[], const float* lodMinClamps, const float* lodMaxClamps, NS::Range range); + + void setObjectThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index); + + void setMeshBytes(const void* bytes, NS::UInteger length, NS::UInteger index); + + void setMeshBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setMeshBufferOffset(NS::UInteger offset, NS::UInteger index); + + void setMeshBuffers(const class Buffer* const buffers[], const NS::UInteger* offsets, NS::Range range); + + void setMeshTexture(const class Texture* texture, NS::UInteger index); + + void setMeshTextures(const class Texture* const textures[], NS::Range range); + + void setMeshSamplerState(const class SamplerState* sampler, NS::UInteger index); + + void setMeshSamplerStates(const class SamplerState* const samplers[], NS::Range range); + + void setMeshSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index); + + void setMeshSamplerStates(const class SamplerState* const samplers[], const float* lodMinClamps, const float* lodMaxClamps, NS::Range range); + + void drawMeshThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup); + + void drawMeshThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup); + + void drawMeshThreadgroups(const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup); + + void drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount); + + void drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount); + + void drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount); + + void drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset); + + void drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount, NS::UInteger baseInstance); + + void drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance); + + void drawPrimitives(MTL::PrimitiveType primitiveType, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset); + + void drawIndexedPrimitives(MTL::PrimitiveType primitiveType, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset); + + void textureBarrier(); + + void updateFence(const class Fence* fence, MTL::RenderStages stages); + + void waitForFence(const class Fence* fence, MTL::RenderStages stages); + + void setTessellationFactorBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride); + + void setTessellationFactorScale(float scale); + + void drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance); + + void drawPatches(NS::UInteger numberOfPatchControlPoints, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset); + + void drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance); + + void drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset); + + NS::UInteger tileWidth() const; + + NS::UInteger tileHeight() const; + + void setTileBytes(const void* bytes, NS::UInteger length, NS::UInteger index); + + void setTileBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setTileBufferOffset(NS::UInteger offset, NS::UInteger index); + + void setTileBuffers(const class Buffer* const buffers[], const NS::UInteger* offsets, NS::Range range); + + void setTileTexture(const class Texture* texture, NS::UInteger index); + + void setTileTextures(const class Texture* const textures[], NS::Range range); + + void setTileSamplerState(const class SamplerState* sampler, NS::UInteger index); + + void setTileSamplerStates(const class SamplerState* const samplers[], NS::Range range); + + void setTileSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index); + + void setTileSamplerStates(const class SamplerState* const samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range); + + void setTileVisibleFunctionTable(const class VisibleFunctionTable* functionTable, NS::UInteger bufferIndex); + + void setTileVisibleFunctionTables(const class VisibleFunctionTable* const functionTables[], NS::Range range); + + void setTileIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex); + + void setTileIntersectionFunctionTables(const class IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range); + + void setTileAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex); + + void dispatchThreadsPerTile(MTL::Size threadsPerTile); + + void setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger offset, NS::UInteger index); + + void useResource(const class Resource* resource, MTL::ResourceUsage usage); + + void useResources(const class Resource* const resources[], NS::UInteger count, MTL::ResourceUsage usage); + + void useResource(const class Resource* resource, MTL::ResourceUsage usage, MTL::RenderStages stages); + + void useResources(const class Resource* const resources[], NS::UInteger count, MTL::ResourceUsage usage, MTL::RenderStages stages); + + void useHeap(const class Heap* heap); + + void useHeaps(const class Heap* const heaps[], NS::UInteger count); + + void useHeap(const class Heap* heap, MTL::RenderStages stages); + + void useHeaps(const class Heap* const heaps[], NS::UInteger count, MTL::RenderStages stages); + + void executeCommandsInBuffer(const class IndirectCommandBuffer* indirectCommandBuffer, NS::Range executionRange); + + void executeCommandsInBuffer(const class IndirectCommandBuffer* indirectCommandbuffer, const class Buffer* indirectRangeBuffer, NS::UInteger indirectBufferOffset); + + void memoryBarrier(MTL::BarrierScope scope, MTL::RenderStages after, MTL::RenderStages before); + + void memoryBarrier(const class Resource* const resources[], NS::UInteger count, MTL::RenderStages after, MTL::RenderStages before); + + void sampleCountersInBuffer(const class CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier); +}; + +} + +// method: setRenderPipelineState: +_MTL_INLINE void MTL::RenderCommandEncoder::setRenderPipelineState(const MTL::RenderPipelineState* pipelineState) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRenderPipelineState_), pipelineState); +} + +// method: setVertexBytes:length:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBytes(const void* bytes, NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBytes_length_atIndex_), bytes, length, index); +} + +// method: setVertexBuffer:offset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setVertexBufferOffset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBufferOffset(NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBufferOffset_atIndex_), offset, index); +} + +// method: setVertexBuffers:offsets:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBuffers(const MTL::Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBuffers_offsets_withRange_), buffers, offsets, range); +} + +// method: setVertexTexture:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexTexture(const MTL::Texture* texture, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexTexture_atIndex_), texture, index); +} + +// method: setVertexTextures:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexTextures(const MTL::Texture* const textures[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexTextures_withRange_), textures, range); +} + +// method: setVertexSamplerState:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexSamplerState(const MTL::SamplerState* sampler, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexSamplerState_atIndex_), sampler, index); +} + +// method: setVertexSamplerStates:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexSamplerStates(const MTL::SamplerState* const samplers[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexSamplerStates_withRange_), samplers, range); +} + +// method: setVertexSamplerState:lodMinClamp:lodMaxClamp:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index); +} + +// method: setVertexSamplerStates:lodMinClamps:lodMaxClamps:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexSamplerStates(const MTL::SamplerState* const samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range); +} + +// method: setVertexVisibleFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexVisibleFunctionTable_atBufferIndex_), functionTable, bufferIndex); +} + +// method: setVertexVisibleFunctionTables:withBufferRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexVisibleFunctionTables(const MTL::VisibleFunctionTable* const functionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexVisibleFunctionTables_withBufferRange_), functionTables, range); +} + +// method: setVertexIntersectionFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexIntersectionFunctionTable_atBufferIndex_), intersectionFunctionTable, bufferIndex); +} + +// method: setVertexIntersectionFunctionTables:withBufferRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexIntersectionFunctionTables(const MTL::IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexIntersectionFunctionTables_withBufferRange_), intersectionFunctionTables, range); +} + +// method: setVertexAccelerationStructure:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexAccelerationStructure_atBufferIndex_), accelerationStructure, bufferIndex); +} + +// method: setViewport: +_MTL_INLINE void MTL::RenderCommandEncoder::setViewport(MTL::Viewport viewport) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setViewport_), viewport); +} + +// method: setViewports:count: +_MTL_INLINE void MTL::RenderCommandEncoder::setViewports(const MTL::Viewport* viewports, NS::UInteger count) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setViewports_count_), viewports, count); +} + +// method: setFrontFacingWinding: +_MTL_INLINE void MTL::RenderCommandEncoder::setFrontFacingWinding(MTL::Winding frontFacingWinding) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFrontFacingWinding_), frontFacingWinding); +} + +// method: setVertexAmplificationCount:viewMappings: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexAmplificationCount(NS::UInteger count, const MTL::VertexAmplificationViewMapping* viewMappings) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexAmplificationCount_viewMappings_), count, viewMappings); +} + +// method: setCullMode: +_MTL_INLINE void MTL::RenderCommandEncoder::setCullMode(MTL::CullMode cullMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCullMode_), cullMode); +} + +// method: setDepthClipMode: +_MTL_INLINE void MTL::RenderCommandEncoder::setDepthClipMode(MTL::DepthClipMode depthClipMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthClipMode_), depthClipMode); +} + +// method: setDepthBias:slopeScale:clamp: +_MTL_INLINE void MTL::RenderCommandEncoder::setDepthBias(float depthBias, float slopeScale, float clamp) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthBias_slopeScale_clamp_), depthBias, slopeScale, clamp); +} + +// method: setScissorRect: +_MTL_INLINE void MTL::RenderCommandEncoder::setScissorRect(MTL::ScissorRect rect) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setScissorRect_), rect); +} + +// method: setScissorRects:count: +_MTL_INLINE void MTL::RenderCommandEncoder::setScissorRects(const MTL::ScissorRect* scissorRects, NS::UInteger count) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setScissorRects_count_), scissorRects, count); +} + +// method: setTriangleFillMode: +_MTL_INLINE void MTL::RenderCommandEncoder::setTriangleFillMode(MTL::TriangleFillMode fillMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTriangleFillMode_), fillMode); +} + +// method: setFragmentBytes:length:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentBytes(const void* bytes, NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentBytes_length_atIndex_), bytes, length, index); +} + +// method: setFragmentBuffer:offset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setFragmentBufferOffset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentBufferOffset(NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentBufferOffset_atIndex_), offset, index); +} + +// method: setFragmentBuffers:offsets:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentBuffers(const MTL::Buffer* const buffers[], const NS::UInteger offsets[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentBuffers_offsets_withRange_), buffers, offsets, range); +} + +// method: setFragmentTexture:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentTexture(const MTL::Texture* texture, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentTexture_atIndex_), texture, index); +} + +// method: setFragmentTextures:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentTextures(const MTL::Texture* const textures[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentTextures_withRange_), textures, range); +} + +// method: setFragmentSamplerState:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentSamplerState(const MTL::SamplerState* sampler, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentSamplerState_atIndex_), sampler, index); +} + +// method: setFragmentSamplerStates:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentSamplerStates(const MTL::SamplerState* const samplers[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentSamplerStates_withRange_), samplers, range); +} + +// method: setFragmentSamplerState:lodMinClamp:lodMaxClamp:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index); +} + +// method: setFragmentSamplerStates:lodMinClamps:lodMaxClamps:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentSamplerStates(const MTL::SamplerState* const samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range); +} + +// method: setFragmentVisibleFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentVisibleFunctionTable_atBufferIndex_), functionTable, bufferIndex); +} + +// method: setFragmentVisibleFunctionTables:withBufferRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentVisibleFunctionTables(const MTL::VisibleFunctionTable* const functionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentVisibleFunctionTables_withBufferRange_), functionTables, range); +} + +// method: setFragmentIntersectionFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentIntersectionFunctionTable_atBufferIndex_), intersectionFunctionTable, bufferIndex); +} + +// method: setFragmentIntersectionFunctionTables:withBufferRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentIntersectionFunctionTables(const MTL::IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentIntersectionFunctionTables_withBufferRange_), intersectionFunctionTables, range); +} + +// method: setFragmentAccelerationStructure:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentAccelerationStructure_atBufferIndex_), accelerationStructure, bufferIndex); +} + +// method: setBlendColorRed:green:blue:alpha: +_MTL_INLINE void MTL::RenderCommandEncoder::setBlendColor(float red, float green, float blue, float alpha) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBlendColorRed_green_blue_alpha_), red, green, blue, alpha); +} + +// method: setDepthStencilState: +_MTL_INLINE void MTL::RenderCommandEncoder::setDepthStencilState(const MTL::DepthStencilState* depthStencilState) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthStencilState_), depthStencilState); +} + +// method: setStencilReferenceValue: +_MTL_INLINE void MTL::RenderCommandEncoder::setStencilReferenceValue(uint32_t referenceValue) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilReferenceValue_), referenceValue); +} + +// method: setStencilFrontReferenceValue:backReferenceValue: +_MTL_INLINE void MTL::RenderCommandEncoder::setStencilReferenceValues(uint32_t frontReferenceValue, uint32_t backReferenceValue) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilFrontReferenceValue_backReferenceValue_), frontReferenceValue, backReferenceValue); +} + +// method: setVisibilityResultMode:offset: +_MTL_INLINE void MTL::RenderCommandEncoder::setVisibilityResultMode(MTL::VisibilityResultMode mode, NS::UInteger offset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVisibilityResultMode_offset_), mode, offset); +} + +// method: setColorStoreAction:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setColorStoreAction_atIndex_), storeAction, colorAttachmentIndex); +} + +// method: setDepthStoreAction: +_MTL_INLINE void MTL::RenderCommandEncoder::setDepthStoreAction(MTL::StoreAction storeAction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthStoreAction_), storeAction); +} + +// method: setStencilStoreAction: +_MTL_INLINE void MTL::RenderCommandEncoder::setStencilStoreAction(MTL::StoreAction storeAction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilStoreAction_), storeAction); +} + +// method: setColorStoreActionOptions:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setColorStoreActionOptions_atIndex_), storeActionOptions, colorAttachmentIndex); +} + +// method: setDepthStoreActionOptions: +_MTL_INLINE void MTL::RenderCommandEncoder::setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthStoreActionOptions_), storeActionOptions); +} + +// method: setStencilStoreActionOptions: +_MTL_INLINE void MTL::RenderCommandEncoder::setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilStoreActionOptions_), storeActionOptions); +} + +// method: setObjectBytes:length:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectBytes(const void* bytes, NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectBytes_length_atIndex_), bytes, length, index); +} + +// method: setObjectBuffer:offset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setObjectBufferOffset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectBufferOffset(NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectBufferOffset_atIndex_), offset, index); +} + +// method: setObjectBuffers:offsets:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectBuffers(const MTL::Buffer* const buffers[], const NS::UInteger* offsets, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectBuffers_offsets_withRange_), buffers, offsets, range); +} + +// method: setObjectTexture:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectTexture(const MTL::Texture* texture, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectTexture_atIndex_), texture, index); +} + +// method: setObjectTextures:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectTextures(const MTL::Texture* const textures[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectTextures_withRange_), textures, range); +} + +// method: setObjectSamplerState:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectSamplerState(const MTL::SamplerState* sampler, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectSamplerState_atIndex_), sampler, index); +} + +// method: setObjectSamplerStates:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectSamplerStates(const MTL::SamplerState* const samplers[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectSamplerStates_withRange_), samplers, range); +} + +// method: setObjectSamplerState:lodMinClamp:lodMaxClamp:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index); +} + +// method: setObjectSamplerStates:lodMinClamps:lodMaxClamps:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectSamplerStates(const MTL::SamplerState* const samplers[], const float* lodMinClamps, const float* lodMaxClamps, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range); +} + +// method: setObjectThreadgroupMemoryLength:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setObjectThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectThreadgroupMemoryLength_atIndex_), length, index); +} + +// method: setMeshBytes:length:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshBytes(const void* bytes, NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshBytes_length_atIndex_), bytes, length, index); +} + +// method: setMeshBuffer:offset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setMeshBufferOffset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshBufferOffset(NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshBufferOffset_atIndex_), offset, index); +} + +// method: setMeshBuffers:offsets:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshBuffers(const MTL::Buffer* const buffers[], const NS::UInteger* offsets, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshBuffers_offsets_withRange_), buffers, offsets, range); +} + +// method: setMeshTexture:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshTexture(const MTL::Texture* texture, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshTexture_atIndex_), texture, index); +} + +// method: setMeshTextures:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshTextures(const MTL::Texture* const textures[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshTextures_withRange_), textures, range); +} + +// method: setMeshSamplerState:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshSamplerState(const MTL::SamplerState* sampler, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshSamplerState_atIndex_), sampler, index); +} + +// method: setMeshSamplerStates:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshSamplerStates(const MTL::SamplerState* const samplers[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshSamplerStates_withRange_), samplers, range); +} + +// method: setMeshSamplerState:lodMinClamp:lodMaxClamp:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index); +} + +// method: setMeshSamplerStates:lodMinClamps:lodMaxClamps:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setMeshSamplerStates(const MTL::SamplerState* const samplers[], const float* lodMinClamps, const float* lodMaxClamps, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range); +} + +// method: drawMeshThreadgroups:threadsPerObjectThreadgroup:threadsPerMeshThreadgroup: +_MTL_INLINE void MTL::RenderCommandEncoder::drawMeshThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawMeshThreadgroups_threadsPerObjectThreadgroup_threadsPerMeshThreadgroup_), threadgroupsPerGrid, threadsPerObjectThreadgroup, threadsPerMeshThreadgroup); +} + +// method: drawMeshThreads:threadsPerObjectThreadgroup:threadsPerMeshThreadgroup: +_MTL_INLINE void MTL::RenderCommandEncoder::drawMeshThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawMeshThreads_threadsPerObjectThreadgroup_threadsPerMeshThreadgroup_), threadsPerGrid, threadsPerObjectThreadgroup, threadsPerMeshThreadgroup); +} + +// method: drawMeshThreadgroupsWithIndirectBuffer:indirectBufferOffset:threadsPerObjectThreadgroup:threadsPerMeshThreadgroup: +_MTL_INLINE void MTL::RenderCommandEncoder::drawMeshThreadgroups(const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawMeshThreadgroupsWithIndirectBuffer_indirectBufferOffset_threadsPerObjectThreadgroup_threadsPerMeshThreadgroup_), indirectBuffer, indirectBufferOffset, threadsPerObjectThreadgroup, threadsPerMeshThreadgroup); +} + +// method: drawPrimitives:vertexStart:vertexCount:instanceCount: +_MTL_INLINE void MTL::RenderCommandEncoder::drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_), primitiveType, vertexStart, vertexCount, instanceCount); +} + +// method: drawPrimitives:vertexStart:vertexCount: +_MTL_INLINE void MTL::RenderCommandEncoder::drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawPrimitives_vertexStart_vertexCount_), primitiveType, vertexStart, vertexCount); +} + +// method: drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount: +_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset, instanceCount); +} + +// method: drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset: +_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset); +} + +// method: drawPrimitives:vertexStart:vertexCount:instanceCount:baseInstance: +_MTL_INLINE void MTL::RenderCommandEncoder::drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount, NS::UInteger baseInstance) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_baseInstance_), primitiveType, vertexStart, vertexCount, instanceCount, baseInstance); +} + +// method: drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:baseVertex:baseInstance: +_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_baseVertex_baseInstance_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); +} + +// method: drawPrimitives:indirectBuffer:indirectBufferOffset: +_MTL_INLINE void MTL::RenderCommandEncoder::drawPrimitives(MTL::PrimitiveType primitiveType, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawPrimitives_indirectBuffer_indirectBufferOffset_), primitiveType, indirectBuffer, indirectBufferOffset); +} + +// method: drawIndexedPrimitives:indexType:indexBuffer:indexBufferOffset:indirectBuffer:indirectBufferOffset: +_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexType_indexBuffer_indexBufferOffset_indirectBuffer_indirectBufferOffset_), primitiveType, indexType, indexBuffer, indexBufferOffset, indirectBuffer, indirectBufferOffset); +} + +// method: textureBarrier +_MTL_INLINE void MTL::RenderCommandEncoder::textureBarrier() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(textureBarrier)); +} + +// method: updateFence:afterStages: +_MTL_INLINE void MTL::RenderCommandEncoder::updateFence(const MTL::Fence* fence, MTL::RenderStages stages) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(updateFence_afterStages_), fence, stages); +} + +// method: waitForFence:beforeStages: +_MTL_INLINE void MTL::RenderCommandEncoder::waitForFence(const MTL::Fence* fence, MTL::RenderStages stages) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitForFence_beforeStages_), fence, stages); +} + +// method: setTessellationFactorBuffer:offset:instanceStride: +_MTL_INLINE void MTL::RenderCommandEncoder::setTessellationFactorBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTessellationFactorBuffer_offset_instanceStride_), buffer, offset, instanceStride); +} + +// method: setTessellationFactorScale: +_MTL_INLINE void MTL::RenderCommandEncoder::setTessellationFactorScale(float scale) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTessellationFactorScale_), scale); +} + +// method: drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance: +_MTL_INLINE void MTL::RenderCommandEncoder::drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_instanceCount_baseInstance_), numberOfPatchControlPoints, patchStart, patchCount, patchIndexBuffer, patchIndexBufferOffset, instanceCount, baseInstance); +} + +// method: drawPatches:patchIndexBuffer:patchIndexBufferOffset:indirectBuffer:indirectBufferOffset: +_MTL_INLINE void MTL::RenderCommandEncoder::drawPatches(NS::UInteger numberOfPatchControlPoints, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawPatches_patchIndexBuffer_patchIndexBufferOffset_indirectBuffer_indirectBufferOffset_), numberOfPatchControlPoints, patchIndexBuffer, patchIndexBufferOffset, indirectBuffer, indirectBufferOffset); +} + +// method: drawIndexedPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:instanceCount:baseInstance: +_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const MTL::Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_instanceCount_baseInstance_), numberOfPatchControlPoints, patchStart, patchCount, patchIndexBuffer, patchIndexBufferOffset, controlPointIndexBuffer, controlPointIndexBufferOffset, instanceCount, baseInstance); +} + +// method: drawIndexedPatches:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:indirectBuffer:indirectBufferOffset: +_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const MTL::Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPatches_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_indirectBuffer_indirectBufferOffset_), numberOfPatchControlPoints, patchIndexBuffer, patchIndexBufferOffset, controlPointIndexBuffer, controlPointIndexBufferOffset, indirectBuffer, indirectBufferOffset); +} + +// property: tileWidth +_MTL_INLINE NS::UInteger MTL::RenderCommandEncoder::tileWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileWidth)); +} + +// property: tileHeight +_MTL_INLINE NS::UInteger MTL::RenderCommandEncoder::tileHeight() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileHeight)); +} + +// method: setTileBytes:length:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileBytes(const void* bytes, NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileBytes_length_atIndex_), bytes, length, index); +} + +// method: setTileBuffer:offset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setTileBufferOffset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileBufferOffset(NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileBufferOffset_atIndex_), offset, index); +} + +// method: setTileBuffers:offsets:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileBuffers(const MTL::Buffer* const buffers[], const NS::UInteger* offsets, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileBuffers_offsets_withRange_), buffers, offsets, range); +} + +// method: setTileTexture:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileTexture(const MTL::Texture* texture, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileTexture_atIndex_), texture, index); +} + +// method: setTileTextures:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileTextures(const MTL::Texture* const textures[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileTextures_withRange_), textures, range); +} + +// method: setTileSamplerState:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileSamplerState(const MTL::SamplerState* sampler, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileSamplerState_atIndex_), sampler, index); +} + +// method: setTileSamplerStates:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileSamplerStates(const MTL::SamplerState* const samplers[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileSamplerStates_withRange_), samplers, range); +} + +// method: setTileSamplerState:lodMinClamp:lodMaxClamp:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index); +} + +// method: setTileSamplerStates:lodMinClamps:lodMaxClamps:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileSamplerStates(const MTL::SamplerState* const samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range); +} + +// method: setTileVisibleFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileVisibleFunctionTable_atBufferIndex_), functionTable, bufferIndex); +} + +// method: setTileVisibleFunctionTables:withBufferRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileVisibleFunctionTables(const MTL::VisibleFunctionTable* const functionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileVisibleFunctionTables_withBufferRange_), functionTables, range); +} + +// method: setTileIntersectionFunctionTable:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileIntersectionFunctionTable_atBufferIndex_), intersectionFunctionTable, bufferIndex); +} + +// method: setTileIntersectionFunctionTables:withBufferRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileIntersectionFunctionTables(const MTL::IntersectionFunctionTable* const intersectionFunctionTables[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileIntersectionFunctionTables_withBufferRange_), intersectionFunctionTables, range); +} + +// method: setTileAccelerationStructure:atBufferIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setTileAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileAccelerationStructure_atBufferIndex_), accelerationStructure, bufferIndex); +} + +// method: dispatchThreadsPerTile: +_MTL_INLINE void MTL::RenderCommandEncoder::dispatchThreadsPerTile(MTL::Size threadsPerTile) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(dispatchThreadsPerTile_), threadsPerTile); +} + +// method: setThreadgroupMemoryLength:offset:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setThreadgroupMemoryLength_offset_atIndex_), length, offset, index); +} + +// method: useResource:usage: +_MTL_INLINE void MTL::RenderCommandEncoder::useResource(const MTL::Resource* resource, MTL::ResourceUsage usage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useResource_usage_), resource, usage); +} + +// method: useResources:count:usage: +_MTL_INLINE void MTL::RenderCommandEncoder::useResources(const MTL::Resource* const resources[], NS::UInteger count, MTL::ResourceUsage usage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useResources_count_usage_), resources, count, usage); +} + +// method: useResource:usage:stages: +_MTL_INLINE void MTL::RenderCommandEncoder::useResource(const MTL::Resource* resource, MTL::ResourceUsage usage, MTL::RenderStages stages) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useResource_usage_stages_), resource, usage, stages); +} + +// method: useResources:count:usage:stages: +_MTL_INLINE void MTL::RenderCommandEncoder::useResources(const MTL::Resource* const resources[], NS::UInteger count, MTL::ResourceUsage usage, MTL::RenderStages stages) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useResources_count_usage_stages_), resources, count, usage, stages); +} + +// method: useHeap: +_MTL_INLINE void MTL::RenderCommandEncoder::useHeap(const MTL::Heap* heap) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useHeap_), heap); +} + +// method: useHeaps:count: +_MTL_INLINE void MTL::RenderCommandEncoder::useHeaps(const MTL::Heap* const heaps[], NS::UInteger count) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useHeaps_count_), heaps, count); +} + +// method: useHeap:stages: +_MTL_INLINE void MTL::RenderCommandEncoder::useHeap(const MTL::Heap* heap, MTL::RenderStages stages) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useHeap_stages_), heap, stages); +} + +// method: useHeaps:count:stages: +_MTL_INLINE void MTL::RenderCommandEncoder::useHeaps(const MTL::Heap* const heaps[], NS::UInteger count, MTL::RenderStages stages) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(useHeaps_count_stages_), heaps, count, stages); +} + +// method: executeCommandsInBuffer:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::executeCommandsInBuffer(const MTL::IndirectCommandBuffer* indirectCommandBuffer, NS::Range executionRange) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(executeCommandsInBuffer_withRange_), indirectCommandBuffer, executionRange); +} + +// method: executeCommandsInBuffer:indirectBuffer:indirectBufferOffset: +_MTL_INLINE void MTL::RenderCommandEncoder::executeCommandsInBuffer(const MTL::IndirectCommandBuffer* indirectCommandbuffer, const MTL::Buffer* indirectRangeBuffer, NS::UInteger indirectBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(executeCommandsInBuffer_indirectBuffer_indirectBufferOffset_), indirectCommandbuffer, indirectRangeBuffer, indirectBufferOffset); +} + +// method: memoryBarrierWithScope:afterStages:beforeStages: +_MTL_INLINE void MTL::RenderCommandEncoder::memoryBarrier(MTL::BarrierScope scope, MTL::RenderStages after, MTL::RenderStages before) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(memoryBarrierWithScope_afterStages_beforeStages_), scope, after, before); +} + +// method: memoryBarrierWithResources:count:afterStages:beforeStages: +_MTL_INLINE void MTL::RenderCommandEncoder::memoryBarrier(const MTL::Resource* const resources[], NS::UInteger count, MTL::RenderStages after, MTL::RenderStages before) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(memoryBarrierWithResources_count_afterStages_beforeStages_), resources, count, after, before); +} + +// method: sampleCountersInBuffer:atSampleIndex:withBarrier: +_MTL_INLINE void MTL::RenderCommandEncoder::sampleCountersInBuffer(const MTL::CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_), sampleBuffer, sampleIndex, barrier); +} diff --git a/metal-cpp/Metal/MTLRenderPass.hpp b/metal-cpp/Metal/MTLRenderPass.hpp new file mode 100644 index 00000000..d5d03d75 --- /dev/null +++ b/metal-cpp/Metal/MTLRenderPass.hpp @@ -0,0 +1,786 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLRenderPass.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLRenderPass.hpp" + +namespace MTL +{ +struct ClearColor +{ + static ClearColor Make(double red, double green, double blue, double alpha); + + ClearColor() = default; + + ClearColor(double red, double green, double blue, double alpha); + + double red; + double green; + double blue; + double alpha; +} _MTL_PACKED; + +_MTL_ENUM(NS::UInteger, LoadAction) { + LoadActionDontCare = 0, + LoadActionLoad = 1, + LoadActionClear = 2, +}; + +_MTL_ENUM(NS::UInteger, StoreAction) { + StoreActionDontCare = 0, + StoreActionStore = 1, + StoreActionMultisampleResolve = 2, + StoreActionStoreAndMultisampleResolve = 3, + StoreActionUnknown = 4, + StoreActionCustomSampleDepthStore = 5, +}; + +_MTL_OPTIONS(NS::UInteger, StoreActionOptions) { + StoreActionOptionNone = 0, + StoreActionOptionValidMask = 1, + StoreActionOptionCustomSamplePositions = 1, +}; + +class RenderPassAttachmentDescriptor : public NS::Copying +{ +public: + static class RenderPassAttachmentDescriptor* alloc(); + + class RenderPassAttachmentDescriptor* init(); + + class Texture* texture() const; + void setTexture(const class Texture* texture); + + NS::UInteger level() const; + void setLevel(NS::UInteger level); + + NS::UInteger slice() const; + void setSlice(NS::UInteger slice); + + NS::UInteger depthPlane() const; + void setDepthPlane(NS::UInteger depthPlane); + + class Texture* resolveTexture() const; + void setResolveTexture(const class Texture* resolveTexture); + + NS::UInteger resolveLevel() const; + void setResolveLevel(NS::UInteger resolveLevel); + + NS::UInteger resolveSlice() const; + void setResolveSlice(NS::UInteger resolveSlice); + + NS::UInteger resolveDepthPlane() const; + void setResolveDepthPlane(NS::UInteger resolveDepthPlane); + + MTL::LoadAction loadAction() const; + void setLoadAction(MTL::LoadAction loadAction); + + MTL::StoreAction storeAction() const; + void setStoreAction(MTL::StoreAction storeAction); + + MTL::StoreActionOptions storeActionOptions() const; + void setStoreActionOptions(MTL::StoreActionOptions storeActionOptions); +}; + +class RenderPassColorAttachmentDescriptor : public NS::Copying +{ +public: + static class RenderPassColorAttachmentDescriptor* alloc(); + + class RenderPassColorAttachmentDescriptor* init(); + + MTL::ClearColor clearColor() const; + void setClearColor(MTL::ClearColor clearColor); +}; + +_MTL_ENUM(NS::UInteger, MultisampleDepthResolveFilter) { + MultisampleDepthResolveFilterSample0 = 0, + MultisampleDepthResolveFilterMin = 1, + MultisampleDepthResolveFilterMax = 2, +}; + +class RenderPassDepthAttachmentDescriptor : public NS::Copying +{ +public: + static class RenderPassDepthAttachmentDescriptor* alloc(); + + class RenderPassDepthAttachmentDescriptor* init(); + + double clearDepth() const; + void setClearDepth(double clearDepth); + + MTL::MultisampleDepthResolveFilter depthResolveFilter() const; + void setDepthResolveFilter(MTL::MultisampleDepthResolveFilter depthResolveFilter); +}; + +_MTL_ENUM(NS::UInteger, MultisampleStencilResolveFilter) { + MultisampleStencilResolveFilterSample0 = 0, + MultisampleStencilResolveFilterDepthResolvedSample = 1, +}; + +class RenderPassStencilAttachmentDescriptor : public NS::Copying +{ +public: + static class RenderPassStencilAttachmentDescriptor* alloc(); + + class RenderPassStencilAttachmentDescriptor* init(); + + uint32_t clearStencil() const; + void setClearStencil(uint32_t clearStencil); + + MTL::MultisampleStencilResolveFilter stencilResolveFilter() const; + void setStencilResolveFilter(MTL::MultisampleStencilResolveFilter stencilResolveFilter); +}; + +class RenderPassColorAttachmentDescriptorArray : public NS::Referencing +{ +public: + static class RenderPassColorAttachmentDescriptorArray* alloc(); + + class RenderPassColorAttachmentDescriptorArray* init(); + + class RenderPassColorAttachmentDescriptor* object(NS::UInteger attachmentIndex); + + void setObject(const class RenderPassColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex); +}; + +class RenderPassSampleBufferAttachmentDescriptor : public NS::Copying +{ +public: + static class RenderPassSampleBufferAttachmentDescriptor* alloc(); + + class RenderPassSampleBufferAttachmentDescriptor* init(); + + class CounterSampleBuffer* sampleBuffer() const; + void setSampleBuffer(const class CounterSampleBuffer* sampleBuffer); + + NS::UInteger startOfVertexSampleIndex() const; + void setStartOfVertexSampleIndex(NS::UInteger startOfVertexSampleIndex); + + NS::UInteger endOfVertexSampleIndex() const; + void setEndOfVertexSampleIndex(NS::UInteger endOfVertexSampleIndex); + + NS::UInteger startOfFragmentSampleIndex() const; + void setStartOfFragmentSampleIndex(NS::UInteger startOfFragmentSampleIndex); + + NS::UInteger endOfFragmentSampleIndex() const; + void setEndOfFragmentSampleIndex(NS::UInteger endOfFragmentSampleIndex); +}; + +class RenderPassSampleBufferAttachmentDescriptorArray : public NS::Referencing +{ +public: + static class RenderPassSampleBufferAttachmentDescriptorArray* alloc(); + + class RenderPassSampleBufferAttachmentDescriptorArray* init(); + + class RenderPassSampleBufferAttachmentDescriptor* object(NS::UInteger attachmentIndex); + + void setObject(const class RenderPassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex); +}; + +class RenderPassDescriptor : public NS::Copying +{ +public: + static class RenderPassDescriptor* alloc(); + + class RenderPassDescriptor* init(); + + static class RenderPassDescriptor* renderPassDescriptor(); + + class RenderPassColorAttachmentDescriptorArray* colorAttachments() const; + + class RenderPassDepthAttachmentDescriptor* depthAttachment() const; + void setDepthAttachment(const class RenderPassDepthAttachmentDescriptor* depthAttachment); + + class RenderPassStencilAttachmentDescriptor* stencilAttachment() const; + void setStencilAttachment(const class RenderPassStencilAttachmentDescriptor* stencilAttachment); + + class Buffer* visibilityResultBuffer() const; + void setVisibilityResultBuffer(const class Buffer* visibilityResultBuffer); + + NS::UInteger renderTargetArrayLength() const; + void setRenderTargetArrayLength(NS::UInteger renderTargetArrayLength); + + NS::UInteger imageblockSampleLength() const; + void setImageblockSampleLength(NS::UInteger imageblockSampleLength); + + NS::UInteger threadgroupMemoryLength() const; + void setThreadgroupMemoryLength(NS::UInteger threadgroupMemoryLength); + + NS::UInteger tileWidth() const; + void setTileWidth(NS::UInteger tileWidth); + + NS::UInteger tileHeight() const; + void setTileHeight(NS::UInteger tileHeight); + + NS::UInteger defaultRasterSampleCount() const; + void setDefaultRasterSampleCount(NS::UInteger defaultRasterSampleCount); + + NS::UInteger renderTargetWidth() const; + void setRenderTargetWidth(NS::UInteger renderTargetWidth); + + NS::UInteger renderTargetHeight() const; + void setRenderTargetHeight(NS::UInteger renderTargetHeight); + + void setSamplePositions(const MTL::SamplePosition* positions, NS::UInteger count); + + NS::UInteger getSamplePositions(MTL::SamplePosition* positions, NS::UInteger count); + + class RasterizationRateMap* rasterizationRateMap() const; + void setRasterizationRateMap(const class RasterizationRateMap* rasterizationRateMap); + + class RenderPassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const; +}; + +} + +_MTL_INLINE MTL::ClearColor MTL::ClearColor::Make(double red, double green, double blue, double alpha) +{ + return ClearColor(red, green, blue, alpha); +} + +_MTL_INLINE MTL::ClearColor::ClearColor(double _red, double _green, double _blue, double _alpha) + : red(_red) + , green(_green) + , blue(_blue) + , alpha(_alpha) +{ +} + +// static method: alloc +_MTL_INLINE MTL::RenderPassAttachmentDescriptor* MTL::RenderPassAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPassAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPassAttachmentDescriptor* MTL::RenderPassAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: texture +_MTL_INLINE MTL::Texture* MTL::RenderPassAttachmentDescriptor::texture() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(texture)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setTexture(const MTL::Texture* texture) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTexture_), texture); +} + +// property: level +_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::level() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(level)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setLevel(NS::UInteger level) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLevel_), level); +} + +// property: slice +_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::slice() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(slice)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setSlice(NS::UInteger slice) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSlice_), slice); +} + +// property: depthPlane +_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::depthPlane() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depthPlane)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setDepthPlane(NS::UInteger depthPlane) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthPlane_), depthPlane); +} + +// property: resolveTexture +_MTL_INLINE MTL::Texture* MTL::RenderPassAttachmentDescriptor::resolveTexture() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resolveTexture)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setResolveTexture(const MTL::Texture* resolveTexture) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setResolveTexture_), resolveTexture); +} + +// property: resolveLevel +_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::resolveLevel() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resolveLevel)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setResolveLevel(NS::UInteger resolveLevel) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setResolveLevel_), resolveLevel); +} + +// property: resolveSlice +_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::resolveSlice() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resolveSlice)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setResolveSlice(NS::UInteger resolveSlice) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setResolveSlice_), resolveSlice); +} + +// property: resolveDepthPlane +_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::resolveDepthPlane() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resolveDepthPlane)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setResolveDepthPlane(NS::UInteger resolveDepthPlane) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setResolveDepthPlane_), resolveDepthPlane); +} + +// property: loadAction +_MTL_INLINE MTL::LoadAction MTL::RenderPassAttachmentDescriptor::loadAction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(loadAction)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setLoadAction(MTL::LoadAction loadAction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLoadAction_), loadAction); +} + +// property: storeAction +_MTL_INLINE MTL::StoreAction MTL::RenderPassAttachmentDescriptor::storeAction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(storeAction)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setStoreAction(MTL::StoreAction storeAction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStoreAction_), storeAction); +} + +// property: storeActionOptions +_MTL_INLINE MTL::StoreActionOptions MTL::RenderPassAttachmentDescriptor::storeActionOptions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(storeActionOptions)); +} + +_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setStoreActionOptions(MTL::StoreActionOptions storeActionOptions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStoreActionOptions_), storeActionOptions); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPassColorAttachmentDescriptor* MTL::RenderPassColorAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPassColorAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPassColorAttachmentDescriptor* MTL::RenderPassColorAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: clearColor +_MTL_INLINE MTL::ClearColor MTL::RenderPassColorAttachmentDescriptor::clearColor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(clearColor)); +} + +_MTL_INLINE void MTL::RenderPassColorAttachmentDescriptor::setClearColor(MTL::ClearColor clearColor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setClearColor_), clearColor); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPassDepthAttachmentDescriptor* MTL::RenderPassDepthAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPassDepthAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPassDepthAttachmentDescriptor* MTL::RenderPassDepthAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: clearDepth +_MTL_INLINE double MTL::RenderPassDepthAttachmentDescriptor::clearDepth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(clearDepth)); +} + +_MTL_INLINE void MTL::RenderPassDepthAttachmentDescriptor::setClearDepth(double clearDepth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setClearDepth_), clearDepth); +} + +// property: depthResolveFilter +_MTL_INLINE MTL::MultisampleDepthResolveFilter MTL::RenderPassDepthAttachmentDescriptor::depthResolveFilter() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depthResolveFilter)); +} + +_MTL_INLINE void MTL::RenderPassDepthAttachmentDescriptor::setDepthResolveFilter(MTL::MultisampleDepthResolveFilter depthResolveFilter) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthResolveFilter_), depthResolveFilter); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPassStencilAttachmentDescriptor* MTL::RenderPassStencilAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPassStencilAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPassStencilAttachmentDescriptor* MTL::RenderPassStencilAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: clearStencil +_MTL_INLINE uint32_t MTL::RenderPassStencilAttachmentDescriptor::clearStencil() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(clearStencil)); +} + +_MTL_INLINE void MTL::RenderPassStencilAttachmentDescriptor::setClearStencil(uint32_t clearStencil) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setClearStencil_), clearStencil); +} + +// property: stencilResolveFilter +_MTL_INLINE MTL::MultisampleStencilResolveFilter MTL::RenderPassStencilAttachmentDescriptor::stencilResolveFilter() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stencilResolveFilter)); +} + +_MTL_INLINE void MTL::RenderPassStencilAttachmentDescriptor::setStencilResolveFilter(MTL::MultisampleStencilResolveFilter stencilResolveFilter) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilResolveFilter_), stencilResolveFilter); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPassColorAttachmentDescriptorArray* MTL::RenderPassColorAttachmentDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPassColorAttachmentDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::RenderPassColorAttachmentDescriptorArray* MTL::RenderPassColorAttachmentDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::RenderPassColorAttachmentDescriptor* MTL::RenderPassColorAttachmentDescriptorArray::object(NS::UInteger attachmentIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::RenderPassColorAttachmentDescriptorArray::setObject(const MTL::RenderPassColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptor* MTL::RenderPassSampleBufferAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPassSampleBufferAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptor* MTL::RenderPassSampleBufferAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: sampleBuffer +_MTL_INLINE MTL::CounterSampleBuffer* MTL::RenderPassSampleBufferAttachmentDescriptor::sampleBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBuffer)); +} + +_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer); +} + +// property: startOfVertexSampleIndex +_MTL_INLINE NS::UInteger MTL::RenderPassSampleBufferAttachmentDescriptor::startOfVertexSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(startOfVertexSampleIndex)); +} + +_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setStartOfVertexSampleIndex(NS::UInteger startOfVertexSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStartOfVertexSampleIndex_), startOfVertexSampleIndex); +} + +// property: endOfVertexSampleIndex +_MTL_INLINE NS::UInteger MTL::RenderPassSampleBufferAttachmentDescriptor::endOfVertexSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(endOfVertexSampleIndex)); +} + +_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setEndOfVertexSampleIndex(NS::UInteger endOfVertexSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setEndOfVertexSampleIndex_), endOfVertexSampleIndex); +} + +// property: startOfFragmentSampleIndex +_MTL_INLINE NS::UInteger MTL::RenderPassSampleBufferAttachmentDescriptor::startOfFragmentSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(startOfFragmentSampleIndex)); +} + +_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setStartOfFragmentSampleIndex(NS::UInteger startOfFragmentSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStartOfFragmentSampleIndex_), startOfFragmentSampleIndex); +} + +// property: endOfFragmentSampleIndex +_MTL_INLINE NS::UInteger MTL::RenderPassSampleBufferAttachmentDescriptor::endOfFragmentSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(endOfFragmentSampleIndex)); +} + +_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setEndOfFragmentSampleIndex(NS::UInteger endOfFragmentSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setEndOfFragmentSampleIndex_), endOfFragmentSampleIndex); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptorArray* MTL::RenderPassSampleBufferAttachmentDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPassSampleBufferAttachmentDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptorArray* MTL::RenderPassSampleBufferAttachmentDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptor* MTL::RenderPassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptorArray::setObject(const MTL::RenderPassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPassDescriptor* MTL::RenderPassDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPassDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPassDescriptor* MTL::RenderPassDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: renderPassDescriptor +_MTL_INLINE MTL::RenderPassDescriptor* MTL::RenderPassDescriptor::renderPassDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLRenderPassDescriptor), _MTL_PRIVATE_SEL(renderPassDescriptor)); +} + +// property: colorAttachments +_MTL_INLINE MTL::RenderPassColorAttachmentDescriptorArray* MTL::RenderPassDescriptor::colorAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(colorAttachments)); +} + +// property: depthAttachment +_MTL_INLINE MTL::RenderPassDepthAttachmentDescriptor* MTL::RenderPassDescriptor::depthAttachment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depthAttachment)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setDepthAttachment(const MTL::RenderPassDepthAttachmentDescriptor* depthAttachment) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthAttachment_), depthAttachment); +} + +// property: stencilAttachment +_MTL_INLINE MTL::RenderPassStencilAttachmentDescriptor* MTL::RenderPassDescriptor::stencilAttachment() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stencilAttachment)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setStencilAttachment(const MTL::RenderPassStencilAttachmentDescriptor* stencilAttachment) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilAttachment_), stencilAttachment); +} + +// property: visibilityResultBuffer +_MTL_INLINE MTL::Buffer* MTL::RenderPassDescriptor::visibilityResultBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(visibilityResultBuffer)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setVisibilityResultBuffer(const MTL::Buffer* visibilityResultBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVisibilityResultBuffer_), visibilityResultBuffer); +} + +// property: renderTargetArrayLength +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::renderTargetArrayLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(renderTargetArrayLength)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setRenderTargetArrayLength(NS::UInteger renderTargetArrayLength) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRenderTargetArrayLength_), renderTargetArrayLength); +} + +// property: imageblockSampleLength +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::imageblockSampleLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(imageblockSampleLength)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setImageblockSampleLength(NS::UInteger imageblockSampleLength) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setImageblockSampleLength_), imageblockSampleLength); +} + +// property: threadgroupMemoryLength +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::threadgroupMemoryLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadgroupMemoryLength)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setThreadgroupMemoryLength(NS::UInteger threadgroupMemoryLength) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setThreadgroupMemoryLength_), threadgroupMemoryLength); +} + +// property: tileWidth +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::tileWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileWidth)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setTileWidth(NS::UInteger tileWidth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileWidth_), tileWidth); +} + +// property: tileHeight +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::tileHeight() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileHeight)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setTileHeight(NS::UInteger tileHeight) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileHeight_), tileHeight); +} + +// property: defaultRasterSampleCount +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::defaultRasterSampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(defaultRasterSampleCount)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setDefaultRasterSampleCount(NS::UInteger defaultRasterSampleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDefaultRasterSampleCount_), defaultRasterSampleCount); +} + +// property: renderTargetWidth +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::renderTargetWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(renderTargetWidth)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setRenderTargetWidth(NS::UInteger renderTargetWidth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRenderTargetWidth_), renderTargetWidth); +} + +// property: renderTargetHeight +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::renderTargetHeight() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(renderTargetHeight)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setRenderTargetHeight(NS::UInteger renderTargetHeight) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRenderTargetHeight_), renderTargetHeight); +} + +// method: setSamplePositions:count: +_MTL_INLINE void MTL::RenderPassDescriptor::setSamplePositions(const MTL::SamplePosition* positions, NS::UInteger count) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSamplePositions_count_), positions, count); +} + +// method: getSamplePositions:count: +_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::getSamplePositions(MTL::SamplePosition* positions, NS::UInteger count) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(getSamplePositions_count_), positions, count); +} + +// property: rasterizationRateMap +_MTL_INLINE MTL::RasterizationRateMap* MTL::RenderPassDescriptor::rasterizationRateMap() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(rasterizationRateMap)); +} + +_MTL_INLINE void MTL::RenderPassDescriptor::setRasterizationRateMap(const MTL::RasterizationRateMap* rasterizationRateMap) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRasterizationRateMap_), rasterizationRateMap); +} + +// property: sampleBufferAttachments +_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptorArray* MTL::RenderPassDescriptor::sampleBufferAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBufferAttachments)); +} diff --git a/metal-cpp/Metal/MTLRenderPipeline.hpp b/metal-cpp/Metal/MTLRenderPipeline.hpp new file mode 100644 index 00000000..903f12a5 --- /dev/null +++ b/metal-cpp/Metal/MTLRenderPipeline.hpp @@ -0,0 +1,1598 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLRenderPipeline.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLPixelFormat.hpp" +#include "MTLRenderCommandEncoder.hpp" +#include "MTLRenderPipeline.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, BlendFactor) { + BlendFactorZero = 0, + BlendFactorOne = 1, + BlendFactorSourceColor = 2, + BlendFactorOneMinusSourceColor = 3, + BlendFactorSourceAlpha = 4, + BlendFactorOneMinusSourceAlpha = 5, + BlendFactorDestinationColor = 6, + BlendFactorOneMinusDestinationColor = 7, + BlendFactorDestinationAlpha = 8, + BlendFactorOneMinusDestinationAlpha = 9, + BlendFactorSourceAlphaSaturated = 10, + BlendFactorBlendColor = 11, + BlendFactorOneMinusBlendColor = 12, + BlendFactorBlendAlpha = 13, + BlendFactorOneMinusBlendAlpha = 14, + BlendFactorSource1Color = 15, + BlendFactorOneMinusSource1Color = 16, + BlendFactorSource1Alpha = 17, + BlendFactorOneMinusSource1Alpha = 18, +}; + +_MTL_ENUM(NS::UInteger, BlendOperation) { + BlendOperationAdd = 0, + BlendOperationSubtract = 1, + BlendOperationReverseSubtract = 2, + BlendOperationMin = 3, + BlendOperationMax = 4, +}; + +_MTL_OPTIONS(NS::UInteger, ColorWriteMask) { + ColorWriteMaskNone = 0, + ColorWriteMaskAlpha = 1, + ColorWriteMaskBlue = 2, + ColorWriteMaskGreen = 4, + ColorWriteMaskRed = 8, + ColorWriteMaskAll = 15, +}; + +_MTL_ENUM(NS::UInteger, PrimitiveTopologyClass) { + PrimitiveTopologyClassUnspecified = 0, + PrimitiveTopologyClassPoint = 1, + PrimitiveTopologyClassLine = 2, + PrimitiveTopologyClassTriangle = 3, +}; + +_MTL_ENUM(NS::UInteger, TessellationPartitionMode) { + TessellationPartitionModePow2 = 0, + TessellationPartitionModeInteger = 1, + TessellationPartitionModeFractionalOdd = 2, + TessellationPartitionModeFractionalEven = 3, +}; + +_MTL_ENUM(NS::UInteger, TessellationFactorStepFunction) { + TessellationFactorStepFunctionConstant = 0, + TessellationFactorStepFunctionPerPatch = 1, + TessellationFactorStepFunctionPerInstance = 2, + TessellationFactorStepFunctionPerPatchAndPerInstance = 3, +}; + +_MTL_ENUM(NS::UInteger, TessellationFactorFormat) { + TessellationFactorFormatHalf = 0, +}; + +_MTL_ENUM(NS::UInteger, TessellationControlPointIndexType) { + TessellationControlPointIndexTypeNone = 0, + TessellationControlPointIndexTypeUInt16 = 1, + TessellationControlPointIndexTypeUInt32 = 2, +}; + +class RenderPipelineColorAttachmentDescriptor : public NS::Copying +{ +public: + static class RenderPipelineColorAttachmentDescriptor* alloc(); + + class RenderPipelineColorAttachmentDescriptor* init(); + + MTL::PixelFormat pixelFormat() const; + void setPixelFormat(MTL::PixelFormat pixelFormat); + + bool blendingEnabled() const; + void setBlendingEnabled(bool blendingEnabled); + + MTL::BlendFactor sourceRGBBlendFactor() const; + void setSourceRGBBlendFactor(MTL::BlendFactor sourceRGBBlendFactor); + + MTL::BlendFactor destinationRGBBlendFactor() const; + void setDestinationRGBBlendFactor(MTL::BlendFactor destinationRGBBlendFactor); + + MTL::BlendOperation rgbBlendOperation() const; + void setRgbBlendOperation(MTL::BlendOperation rgbBlendOperation); + + MTL::BlendFactor sourceAlphaBlendFactor() const; + void setSourceAlphaBlendFactor(MTL::BlendFactor sourceAlphaBlendFactor); + + MTL::BlendFactor destinationAlphaBlendFactor() const; + void setDestinationAlphaBlendFactor(MTL::BlendFactor destinationAlphaBlendFactor); + + MTL::BlendOperation alphaBlendOperation() const; + void setAlphaBlendOperation(MTL::BlendOperation alphaBlendOperation); + + MTL::ColorWriteMask writeMask() const; + void setWriteMask(MTL::ColorWriteMask writeMask); +}; + +class RenderPipelineReflection : public NS::Referencing +{ +public: + static class RenderPipelineReflection* alloc(); + + class RenderPipelineReflection* init(); + + NS::Array* vertexBindings() const; + + NS::Array* fragmentBindings() const; + + NS::Array* tileBindings() const; + + NS::Array* objectBindings() const; + + NS::Array* meshBindings() const; + + NS::Array* vertexArguments() const; + + NS::Array* fragmentArguments() const; + + NS::Array* tileArguments() const; +}; + +class RenderPipelineDescriptor : public NS::Copying +{ +public: + static class RenderPipelineDescriptor* alloc(); + + class RenderPipelineDescriptor* init(); + + NS::String* label() const; + void setLabel(const NS::String* label); + + class Function* vertexFunction() const; + void setVertexFunction(const class Function* vertexFunction); + + class Function* fragmentFunction() const; + void setFragmentFunction(const class Function* fragmentFunction); + + class VertexDescriptor* vertexDescriptor() const; + void setVertexDescriptor(const class VertexDescriptor* vertexDescriptor); + + NS::UInteger sampleCount() const; + void setSampleCount(NS::UInteger sampleCount); + + NS::UInteger rasterSampleCount() const; + void setRasterSampleCount(NS::UInteger rasterSampleCount); + + bool alphaToCoverageEnabled() const; + void setAlphaToCoverageEnabled(bool alphaToCoverageEnabled); + + bool alphaToOneEnabled() const; + void setAlphaToOneEnabled(bool alphaToOneEnabled); + + bool rasterizationEnabled() const; + void setRasterizationEnabled(bool rasterizationEnabled); + + NS::UInteger maxVertexAmplificationCount() const; + void setMaxVertexAmplificationCount(NS::UInteger maxVertexAmplificationCount); + + class RenderPipelineColorAttachmentDescriptorArray* colorAttachments() const; + + MTL::PixelFormat depthAttachmentPixelFormat() const; + void setDepthAttachmentPixelFormat(MTL::PixelFormat depthAttachmentPixelFormat); + + MTL::PixelFormat stencilAttachmentPixelFormat() const; + void setStencilAttachmentPixelFormat(MTL::PixelFormat stencilAttachmentPixelFormat); + + MTL::PrimitiveTopologyClass inputPrimitiveTopology() const; + void setInputPrimitiveTopology(MTL::PrimitiveTopologyClass inputPrimitiveTopology); + + MTL::TessellationPartitionMode tessellationPartitionMode() const; + void setTessellationPartitionMode(MTL::TessellationPartitionMode tessellationPartitionMode); + + NS::UInteger maxTessellationFactor() const; + void setMaxTessellationFactor(NS::UInteger maxTessellationFactor); + + bool tessellationFactorScaleEnabled() const; + void setTessellationFactorScaleEnabled(bool tessellationFactorScaleEnabled); + + MTL::TessellationFactorFormat tessellationFactorFormat() const; + void setTessellationFactorFormat(MTL::TessellationFactorFormat tessellationFactorFormat); + + MTL::TessellationControlPointIndexType tessellationControlPointIndexType() const; + void setTessellationControlPointIndexType(MTL::TessellationControlPointIndexType tessellationControlPointIndexType); + + MTL::TessellationFactorStepFunction tessellationFactorStepFunction() const; + void setTessellationFactorStepFunction(MTL::TessellationFactorStepFunction tessellationFactorStepFunction); + + MTL::Winding tessellationOutputWindingOrder() const; + void setTessellationOutputWindingOrder(MTL::Winding tessellationOutputWindingOrder); + + class PipelineBufferDescriptorArray* vertexBuffers() const; + + class PipelineBufferDescriptorArray* fragmentBuffers() const; + + bool supportIndirectCommandBuffers() const; + void setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers); + + NS::Array* binaryArchives() const; + void setBinaryArchives(const NS::Array* binaryArchives); + + NS::Array* vertexPreloadedLibraries() const; + void setVertexPreloadedLibraries(const NS::Array* vertexPreloadedLibraries); + + NS::Array* fragmentPreloadedLibraries() const; + void setFragmentPreloadedLibraries(const NS::Array* fragmentPreloadedLibraries); + + class LinkedFunctions* vertexLinkedFunctions() const; + void setVertexLinkedFunctions(const class LinkedFunctions* vertexLinkedFunctions); + + class LinkedFunctions* fragmentLinkedFunctions() const; + void setFragmentLinkedFunctions(const class LinkedFunctions* fragmentLinkedFunctions); + + bool supportAddingVertexBinaryFunctions() const; + void setSupportAddingVertexBinaryFunctions(bool supportAddingVertexBinaryFunctions); + + bool supportAddingFragmentBinaryFunctions() const; + void setSupportAddingFragmentBinaryFunctions(bool supportAddingFragmentBinaryFunctions); + + NS::UInteger maxVertexCallStackDepth() const; + void setMaxVertexCallStackDepth(NS::UInteger maxVertexCallStackDepth); + + NS::UInteger maxFragmentCallStackDepth() const; + void setMaxFragmentCallStackDepth(NS::UInteger maxFragmentCallStackDepth); + + void reset(); +}; + +class RenderPipelineFunctionsDescriptor : public NS::Copying +{ +public: + static class RenderPipelineFunctionsDescriptor* alloc(); + + class RenderPipelineFunctionsDescriptor* init(); + + NS::Array* vertexAdditionalBinaryFunctions() const; + void setVertexAdditionalBinaryFunctions(const NS::Array* vertexAdditionalBinaryFunctions); + + NS::Array* fragmentAdditionalBinaryFunctions() const; + void setFragmentAdditionalBinaryFunctions(const NS::Array* fragmentAdditionalBinaryFunctions); + + NS::Array* tileAdditionalBinaryFunctions() const; + void setTileAdditionalBinaryFunctions(const NS::Array* tileAdditionalBinaryFunctions); +}; + +class RenderPipelineState : public NS::Referencing +{ +public: + NS::String* label() const; + + class Device* device() const; + + NS::UInteger maxTotalThreadsPerThreadgroup() const; + + bool threadgroupSizeMatchesTileSize() const; + + NS::UInteger imageblockSampleLength() const; + + NS::UInteger imageblockMemoryLength(MTL::Size imageblockDimensions); + + bool supportIndirectCommandBuffers() const; + + NS::UInteger maxTotalThreadsPerObjectThreadgroup() const; + + NS::UInteger maxTotalThreadsPerMeshThreadgroup() const; + + NS::UInteger objectThreadExecutionWidth() const; + + NS::UInteger meshThreadExecutionWidth() const; + + NS::UInteger maxTotalThreadgroupsPerMeshGrid() const; + + MTL::ResourceID gpuResourceID() const; + + class FunctionHandle* functionHandle(const class Function* function, MTL::RenderStages stage); + + class VisibleFunctionTable* newVisibleFunctionTable(const class VisibleFunctionTableDescriptor* descriptor, MTL::RenderStages stage); + + class IntersectionFunctionTable* newIntersectionFunctionTable(const class IntersectionFunctionTableDescriptor* descriptor, MTL::RenderStages stage); + + class RenderPipelineState* newRenderPipelineState(const class RenderPipelineFunctionsDescriptor* additionalBinaryFunctions, NS::Error** error); +}; + +class RenderPipelineColorAttachmentDescriptorArray : public NS::Referencing +{ +public: + static class RenderPipelineColorAttachmentDescriptorArray* alloc(); + + class RenderPipelineColorAttachmentDescriptorArray* init(); + + class RenderPipelineColorAttachmentDescriptor* object(NS::UInteger attachmentIndex); + + void setObject(const class RenderPipelineColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex); +}; + +class TileRenderPipelineColorAttachmentDescriptor : public NS::Copying +{ +public: + static class TileRenderPipelineColorAttachmentDescriptor* alloc(); + + class TileRenderPipelineColorAttachmentDescriptor* init(); + + MTL::PixelFormat pixelFormat() const; + void setPixelFormat(MTL::PixelFormat pixelFormat); +}; + +class TileRenderPipelineColorAttachmentDescriptorArray : public NS::Referencing +{ +public: + static class TileRenderPipelineColorAttachmentDescriptorArray* alloc(); + + class TileRenderPipelineColorAttachmentDescriptorArray* init(); + + class TileRenderPipelineColorAttachmentDescriptor* object(NS::UInteger attachmentIndex); + + void setObject(const class TileRenderPipelineColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex); +}; + +class TileRenderPipelineDescriptor : public NS::Copying +{ +public: + static class TileRenderPipelineDescriptor* alloc(); + + class TileRenderPipelineDescriptor* init(); + + NS::String* label() const; + void setLabel(const NS::String* label); + + class Function* tileFunction() const; + void setTileFunction(const class Function* tileFunction); + + NS::UInteger rasterSampleCount() const; + void setRasterSampleCount(NS::UInteger rasterSampleCount); + + class TileRenderPipelineColorAttachmentDescriptorArray* colorAttachments() const; + + bool threadgroupSizeMatchesTileSize() const; + void setThreadgroupSizeMatchesTileSize(bool threadgroupSizeMatchesTileSize); + + class PipelineBufferDescriptorArray* tileBuffers() const; + + NS::UInteger maxTotalThreadsPerThreadgroup() const; + void setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup); + + NS::Array* binaryArchives() const; + void setBinaryArchives(const NS::Array* binaryArchives); + + NS::Array* preloadedLibraries() const; + void setPreloadedLibraries(const NS::Array* preloadedLibraries); + + class LinkedFunctions* linkedFunctions() const; + void setLinkedFunctions(const class LinkedFunctions* linkedFunctions); + + bool supportAddingBinaryFunctions() const; + void setSupportAddingBinaryFunctions(bool supportAddingBinaryFunctions); + + NS::UInteger maxCallStackDepth() const; + void setMaxCallStackDepth(NS::UInteger maxCallStackDepth); + + void reset(); +}; + +class MeshRenderPipelineDescriptor : public NS::Copying +{ +public: + static class MeshRenderPipelineDescriptor* alloc(); + + class MeshRenderPipelineDescriptor* init(); + + NS::String* label() const; + void setLabel(const NS::String* label); + + class Function* objectFunction() const; + void setObjectFunction(const class Function* objectFunction); + + class Function* meshFunction() const; + void setMeshFunction(const class Function* meshFunction); + + class Function* fragmentFunction() const; + void setFragmentFunction(const class Function* fragmentFunction); + + NS::UInteger maxTotalThreadsPerObjectThreadgroup() const; + void setMaxTotalThreadsPerObjectThreadgroup(NS::UInteger maxTotalThreadsPerObjectThreadgroup); + + NS::UInteger maxTotalThreadsPerMeshThreadgroup() const; + void setMaxTotalThreadsPerMeshThreadgroup(NS::UInteger maxTotalThreadsPerMeshThreadgroup); + + bool objectThreadgroupSizeIsMultipleOfThreadExecutionWidth() const; + void setObjectThreadgroupSizeIsMultipleOfThreadExecutionWidth(bool objectThreadgroupSizeIsMultipleOfThreadExecutionWidth); + + bool meshThreadgroupSizeIsMultipleOfThreadExecutionWidth() const; + void setMeshThreadgroupSizeIsMultipleOfThreadExecutionWidth(bool meshThreadgroupSizeIsMultipleOfThreadExecutionWidth); + + NS::UInteger payloadMemoryLength() const; + void setPayloadMemoryLength(NS::UInteger payloadMemoryLength); + + NS::UInteger maxTotalThreadgroupsPerMeshGrid() const; + void setMaxTotalThreadgroupsPerMeshGrid(NS::UInteger maxTotalThreadgroupsPerMeshGrid); + + class PipelineBufferDescriptorArray* objectBuffers() const; + + class PipelineBufferDescriptorArray* meshBuffers() const; + + class PipelineBufferDescriptorArray* fragmentBuffers() const; + + NS::UInteger rasterSampleCount() const; + void setRasterSampleCount(NS::UInteger rasterSampleCount); + + bool alphaToCoverageEnabled() const; + void setAlphaToCoverageEnabled(bool alphaToCoverageEnabled); + + bool alphaToOneEnabled() const; + void setAlphaToOneEnabled(bool alphaToOneEnabled); + + bool rasterizationEnabled() const; + void setRasterizationEnabled(bool rasterizationEnabled); + + NS::UInteger maxVertexAmplificationCount() const; + void setMaxVertexAmplificationCount(NS::UInteger maxVertexAmplificationCount); + + class RenderPipelineColorAttachmentDescriptorArray* colorAttachments() const; + + MTL::PixelFormat depthAttachmentPixelFormat() const; + void setDepthAttachmentPixelFormat(MTL::PixelFormat depthAttachmentPixelFormat); + + MTL::PixelFormat stencilAttachmentPixelFormat() const; + void setStencilAttachmentPixelFormat(MTL::PixelFormat stencilAttachmentPixelFormat); + + void reset(); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptor* MTL::RenderPipelineColorAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPipelineColorAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptor* MTL::RenderPipelineColorAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: pixelFormat +_MTL_INLINE MTL::PixelFormat MTL::RenderPipelineColorAttachmentDescriptor::pixelFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(pixelFormat)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setPixelFormat(MTL::PixelFormat pixelFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPixelFormat_), pixelFormat); +} + +// property: blendingEnabled +_MTL_INLINE bool MTL::RenderPipelineColorAttachmentDescriptor::blendingEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isBlendingEnabled)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setBlendingEnabled(bool blendingEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBlendingEnabled_), blendingEnabled); +} + +// property: sourceRGBBlendFactor +_MTL_INLINE MTL::BlendFactor MTL::RenderPipelineColorAttachmentDescriptor::sourceRGBBlendFactor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sourceRGBBlendFactor)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setSourceRGBBlendFactor(MTL::BlendFactor sourceRGBBlendFactor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSourceRGBBlendFactor_), sourceRGBBlendFactor); +} + +// property: destinationRGBBlendFactor +_MTL_INLINE MTL::BlendFactor MTL::RenderPipelineColorAttachmentDescriptor::destinationRGBBlendFactor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(destinationRGBBlendFactor)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setDestinationRGBBlendFactor(MTL::BlendFactor destinationRGBBlendFactor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDestinationRGBBlendFactor_), destinationRGBBlendFactor); +} + +// property: rgbBlendOperation +_MTL_INLINE MTL::BlendOperation MTL::RenderPipelineColorAttachmentDescriptor::rgbBlendOperation() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(rgbBlendOperation)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setRgbBlendOperation(MTL::BlendOperation rgbBlendOperation) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRgbBlendOperation_), rgbBlendOperation); +} + +// property: sourceAlphaBlendFactor +_MTL_INLINE MTL::BlendFactor MTL::RenderPipelineColorAttachmentDescriptor::sourceAlphaBlendFactor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sourceAlphaBlendFactor)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setSourceAlphaBlendFactor(MTL::BlendFactor sourceAlphaBlendFactor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSourceAlphaBlendFactor_), sourceAlphaBlendFactor); +} + +// property: destinationAlphaBlendFactor +_MTL_INLINE MTL::BlendFactor MTL::RenderPipelineColorAttachmentDescriptor::destinationAlphaBlendFactor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(destinationAlphaBlendFactor)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setDestinationAlphaBlendFactor(MTL::BlendFactor destinationAlphaBlendFactor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDestinationAlphaBlendFactor_), destinationAlphaBlendFactor); +} + +// property: alphaBlendOperation +_MTL_INLINE MTL::BlendOperation MTL::RenderPipelineColorAttachmentDescriptor::alphaBlendOperation() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(alphaBlendOperation)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setAlphaBlendOperation(MTL::BlendOperation alphaBlendOperation) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAlphaBlendOperation_), alphaBlendOperation); +} + +// property: writeMask +_MTL_INLINE MTL::ColorWriteMask MTL::RenderPipelineColorAttachmentDescriptor::writeMask() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(writeMask)); +} + +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setWriteMask(MTL::ColorWriteMask writeMask) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setWriteMask_), writeMask); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPipelineReflection* MTL::RenderPipelineReflection::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPipelineReflection)); +} + +// method: init +_MTL_INLINE MTL::RenderPipelineReflection* MTL::RenderPipelineReflection::init() +{ + return NS::Object::init(); +} + +// property: vertexBindings +_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::vertexBindings() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexBindings)); +} + +// property: fragmentBindings +_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::fragmentBindings() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentBindings)); +} + +// property: tileBindings +_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::tileBindings() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileBindings)); +} + +// property: objectBindings +_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::objectBindings() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectBindings)); +} + +// property: meshBindings +_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::meshBindings() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(meshBindings)); +} + +// property: vertexArguments +_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::vertexArguments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexArguments)); +} + +// property: fragmentArguments +_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::fragmentArguments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentArguments)); +} + +// property: tileArguments +_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::tileArguments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileArguments)); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPipelineDescriptor* MTL::RenderPipelineDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPipelineDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPipelineDescriptor* MTL::RenderPipelineDescriptor::init() +{ + return NS::Object::init(); +} + +// property: label +_MTL_INLINE NS::String* MTL::RenderPipelineDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: vertexFunction +_MTL_INLINE MTL::Function* MTL::RenderPipelineDescriptor::vertexFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexFunction)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setVertexFunction(const MTL::Function* vertexFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexFunction_), vertexFunction); +} + +// property: fragmentFunction +_MTL_INLINE MTL::Function* MTL::RenderPipelineDescriptor::fragmentFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentFunction)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setFragmentFunction(const MTL::Function* fragmentFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentFunction_), fragmentFunction); +} + +// property: vertexDescriptor +_MTL_INLINE MTL::VertexDescriptor* MTL::RenderPipelineDescriptor::vertexDescriptor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexDescriptor)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setVertexDescriptor(const MTL::VertexDescriptor* vertexDescriptor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexDescriptor_), vertexDescriptor); +} + +// property: sampleCount +_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::sampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCount)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setSampleCount(NS::UInteger sampleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleCount_), sampleCount); +} + +// property: rasterSampleCount +_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::rasterSampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(rasterSampleCount)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setRasterSampleCount(NS::UInteger rasterSampleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRasterSampleCount_), rasterSampleCount); +} + +// property: alphaToCoverageEnabled +_MTL_INLINE bool MTL::RenderPipelineDescriptor::alphaToCoverageEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isAlphaToCoverageEnabled)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setAlphaToCoverageEnabled(bool alphaToCoverageEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAlphaToCoverageEnabled_), alphaToCoverageEnabled); +} + +// property: alphaToOneEnabled +_MTL_INLINE bool MTL::RenderPipelineDescriptor::alphaToOneEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isAlphaToOneEnabled)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setAlphaToOneEnabled(bool alphaToOneEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAlphaToOneEnabled_), alphaToOneEnabled); +} + +// property: rasterizationEnabled +_MTL_INLINE bool MTL::RenderPipelineDescriptor::rasterizationEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isRasterizationEnabled)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setRasterizationEnabled(bool rasterizationEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRasterizationEnabled_), rasterizationEnabled); +} + +// property: maxVertexAmplificationCount +_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::maxVertexAmplificationCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxVertexAmplificationCount)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setMaxVertexAmplificationCount(NS::UInteger maxVertexAmplificationCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxVertexAmplificationCount_), maxVertexAmplificationCount); +} + +// property: colorAttachments +_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptorArray* MTL::RenderPipelineDescriptor::colorAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(colorAttachments)); +} + +// property: depthAttachmentPixelFormat +_MTL_INLINE MTL::PixelFormat MTL::RenderPipelineDescriptor::depthAttachmentPixelFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depthAttachmentPixelFormat)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setDepthAttachmentPixelFormat(MTL::PixelFormat depthAttachmentPixelFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthAttachmentPixelFormat_), depthAttachmentPixelFormat); +} + +// property: stencilAttachmentPixelFormat +_MTL_INLINE MTL::PixelFormat MTL::RenderPipelineDescriptor::stencilAttachmentPixelFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stencilAttachmentPixelFormat)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setStencilAttachmentPixelFormat(MTL::PixelFormat stencilAttachmentPixelFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilAttachmentPixelFormat_), stencilAttachmentPixelFormat); +} + +// property: inputPrimitiveTopology +_MTL_INLINE MTL::PrimitiveTopologyClass MTL::RenderPipelineDescriptor::inputPrimitiveTopology() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(inputPrimitiveTopology)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setInputPrimitiveTopology(MTL::PrimitiveTopologyClass inputPrimitiveTopology) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInputPrimitiveTopology_), inputPrimitiveTopology); +} + +// property: tessellationPartitionMode +_MTL_INLINE MTL::TessellationPartitionMode MTL::RenderPipelineDescriptor::tessellationPartitionMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tessellationPartitionMode)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationPartitionMode(MTL::TessellationPartitionMode tessellationPartitionMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTessellationPartitionMode_), tessellationPartitionMode); +} + +// property: maxTessellationFactor +_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::maxTessellationFactor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTessellationFactor)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setMaxTessellationFactor(NS::UInteger maxTessellationFactor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxTessellationFactor_), maxTessellationFactor); +} + +// property: tessellationFactorScaleEnabled +_MTL_INLINE bool MTL::RenderPipelineDescriptor::tessellationFactorScaleEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isTessellationFactorScaleEnabled)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationFactorScaleEnabled(bool tessellationFactorScaleEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTessellationFactorScaleEnabled_), tessellationFactorScaleEnabled); +} + +// property: tessellationFactorFormat +_MTL_INLINE MTL::TessellationFactorFormat MTL::RenderPipelineDescriptor::tessellationFactorFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tessellationFactorFormat)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationFactorFormat(MTL::TessellationFactorFormat tessellationFactorFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTessellationFactorFormat_), tessellationFactorFormat); +} + +// property: tessellationControlPointIndexType +_MTL_INLINE MTL::TessellationControlPointIndexType MTL::RenderPipelineDescriptor::tessellationControlPointIndexType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tessellationControlPointIndexType)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationControlPointIndexType(MTL::TessellationControlPointIndexType tessellationControlPointIndexType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTessellationControlPointIndexType_), tessellationControlPointIndexType); +} + +// property: tessellationFactorStepFunction +_MTL_INLINE MTL::TessellationFactorStepFunction MTL::RenderPipelineDescriptor::tessellationFactorStepFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tessellationFactorStepFunction)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationFactorStepFunction(MTL::TessellationFactorStepFunction tessellationFactorStepFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTessellationFactorStepFunction_), tessellationFactorStepFunction); +} + +// property: tessellationOutputWindingOrder +_MTL_INLINE MTL::Winding MTL::RenderPipelineDescriptor::tessellationOutputWindingOrder() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tessellationOutputWindingOrder)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationOutputWindingOrder(MTL::Winding tessellationOutputWindingOrder) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTessellationOutputWindingOrder_), tessellationOutputWindingOrder); +} + +// property: vertexBuffers +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::RenderPipelineDescriptor::vertexBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexBuffers)); +} + +// property: fragmentBuffers +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::RenderPipelineDescriptor::fragmentBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentBuffers)); +} + +// property: supportIndirectCommandBuffers +_MTL_INLINE bool MTL::RenderPipelineDescriptor::supportIndirectCommandBuffers() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportIndirectCommandBuffers_), supportIndirectCommandBuffers); +} + +// property: binaryArchives +_MTL_INLINE NS::Array* MTL::RenderPipelineDescriptor::binaryArchives() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(binaryArchives)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setBinaryArchives(const NS::Array* binaryArchives) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBinaryArchives_), binaryArchives); +} + +// property: vertexPreloadedLibraries +_MTL_INLINE NS::Array* MTL::RenderPipelineDescriptor::vertexPreloadedLibraries() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexPreloadedLibraries)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setVertexPreloadedLibraries(const NS::Array* vertexPreloadedLibraries) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexPreloadedLibraries_), vertexPreloadedLibraries); +} + +// property: fragmentPreloadedLibraries +_MTL_INLINE NS::Array* MTL::RenderPipelineDescriptor::fragmentPreloadedLibraries() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentPreloadedLibraries)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setFragmentPreloadedLibraries(const NS::Array* fragmentPreloadedLibraries) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentPreloadedLibraries_), fragmentPreloadedLibraries); +} + +// property: vertexLinkedFunctions +_MTL_INLINE MTL::LinkedFunctions* MTL::RenderPipelineDescriptor::vertexLinkedFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexLinkedFunctions)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setVertexLinkedFunctions(const MTL::LinkedFunctions* vertexLinkedFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexLinkedFunctions_), vertexLinkedFunctions); +} + +// property: fragmentLinkedFunctions +_MTL_INLINE MTL::LinkedFunctions* MTL::RenderPipelineDescriptor::fragmentLinkedFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentLinkedFunctions)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setFragmentLinkedFunctions(const MTL::LinkedFunctions* fragmentLinkedFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentLinkedFunctions_), fragmentLinkedFunctions); +} + +// property: supportAddingVertexBinaryFunctions +_MTL_INLINE bool MTL::RenderPipelineDescriptor::supportAddingVertexBinaryFunctions() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportAddingVertexBinaryFunctions)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setSupportAddingVertexBinaryFunctions(bool supportAddingVertexBinaryFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportAddingVertexBinaryFunctions_), supportAddingVertexBinaryFunctions); +} + +// property: supportAddingFragmentBinaryFunctions +_MTL_INLINE bool MTL::RenderPipelineDescriptor::supportAddingFragmentBinaryFunctions() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportAddingFragmentBinaryFunctions)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setSupportAddingFragmentBinaryFunctions(bool supportAddingFragmentBinaryFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportAddingFragmentBinaryFunctions_), supportAddingFragmentBinaryFunctions); +} + +// property: maxVertexCallStackDepth +_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::maxVertexCallStackDepth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxVertexCallStackDepth)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setMaxVertexCallStackDepth(NS::UInteger maxVertexCallStackDepth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxVertexCallStackDepth_), maxVertexCallStackDepth); +} + +// property: maxFragmentCallStackDepth +_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::maxFragmentCallStackDepth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxFragmentCallStackDepth)); +} + +_MTL_INLINE void MTL::RenderPipelineDescriptor::setMaxFragmentCallStackDepth(NS::UInteger maxFragmentCallStackDepth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxFragmentCallStackDepth_), maxFragmentCallStackDepth); +} + +// method: reset +_MTL_INLINE void MTL::RenderPipelineDescriptor::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPipelineFunctionsDescriptor* MTL::RenderPipelineFunctionsDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPipelineFunctionsDescriptor)); +} + +// method: init +_MTL_INLINE MTL::RenderPipelineFunctionsDescriptor* MTL::RenderPipelineFunctionsDescriptor::init() +{ + return NS::Object::init(); +} + +// property: vertexAdditionalBinaryFunctions +_MTL_INLINE NS::Array* MTL::RenderPipelineFunctionsDescriptor::vertexAdditionalBinaryFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(vertexAdditionalBinaryFunctions)); +} + +_MTL_INLINE void MTL::RenderPipelineFunctionsDescriptor::setVertexAdditionalBinaryFunctions(const NS::Array* vertexAdditionalBinaryFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexAdditionalBinaryFunctions_), vertexAdditionalBinaryFunctions); +} + +// property: fragmentAdditionalBinaryFunctions +_MTL_INLINE NS::Array* MTL::RenderPipelineFunctionsDescriptor::fragmentAdditionalBinaryFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentAdditionalBinaryFunctions)); +} + +_MTL_INLINE void MTL::RenderPipelineFunctionsDescriptor::setFragmentAdditionalBinaryFunctions(const NS::Array* fragmentAdditionalBinaryFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentAdditionalBinaryFunctions_), fragmentAdditionalBinaryFunctions); +} + +// property: tileAdditionalBinaryFunctions +_MTL_INLINE NS::Array* MTL::RenderPipelineFunctionsDescriptor::tileAdditionalBinaryFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileAdditionalBinaryFunctions)); +} + +_MTL_INLINE void MTL::RenderPipelineFunctionsDescriptor::setTileAdditionalBinaryFunctions(const NS::Array* tileAdditionalBinaryFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileAdditionalBinaryFunctions_), tileAdditionalBinaryFunctions); +} + +// property: label +_MTL_INLINE NS::String* MTL::RenderPipelineState::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::RenderPipelineState::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: maxTotalThreadsPerThreadgroup +_MTL_INLINE NS::UInteger MTL::RenderPipelineState::maxTotalThreadsPerThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup)); +} + +// property: threadgroupSizeMatchesTileSize +_MTL_INLINE bool MTL::RenderPipelineState::threadgroupSizeMatchesTileSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadgroupSizeMatchesTileSize)); +} + +// property: imageblockSampleLength +_MTL_INLINE NS::UInteger MTL::RenderPipelineState::imageblockSampleLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(imageblockSampleLength)); +} + +// method: imageblockMemoryLengthForDimensions: +_MTL_INLINE NS::UInteger MTL::RenderPipelineState::imageblockMemoryLength(MTL::Size imageblockDimensions) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(imageblockMemoryLengthForDimensions_), imageblockDimensions); +} + +// property: supportIndirectCommandBuffers +_MTL_INLINE bool MTL::RenderPipelineState::supportIndirectCommandBuffers() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers)); +} + +// property: maxTotalThreadsPerObjectThreadgroup +_MTL_INLINE NS::UInteger MTL::RenderPipelineState::maxTotalThreadsPerObjectThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerObjectThreadgroup)); +} + +// property: maxTotalThreadsPerMeshThreadgroup +_MTL_INLINE NS::UInteger MTL::RenderPipelineState::maxTotalThreadsPerMeshThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerMeshThreadgroup)); +} + +// property: objectThreadExecutionWidth +_MTL_INLINE NS::UInteger MTL::RenderPipelineState::objectThreadExecutionWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectThreadExecutionWidth)); +} + +// property: meshThreadExecutionWidth +_MTL_INLINE NS::UInteger MTL::RenderPipelineState::meshThreadExecutionWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(meshThreadExecutionWidth)); +} + +// property: maxTotalThreadgroupsPerMeshGrid +_MTL_INLINE NS::UInteger MTL::RenderPipelineState::maxTotalThreadgroupsPerMeshGrid() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadgroupsPerMeshGrid)); +} + +// property: gpuResourceID +_MTL_INLINE MTL::ResourceID MTL::RenderPipelineState::gpuResourceID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuResourceID)); +} + +// method: functionHandleWithFunction:stage: +_MTL_INLINE MTL::FunctionHandle* MTL::RenderPipelineState::functionHandle(const MTL::Function* function, MTL::RenderStages stage) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionHandleWithFunction_stage_), function, stage); +} + +// method: newVisibleFunctionTableWithDescriptor:stage: +_MTL_INLINE MTL::VisibleFunctionTable* MTL::RenderPipelineState::newVisibleFunctionTable(const MTL::VisibleFunctionTableDescriptor* descriptor, MTL::RenderStages stage) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newVisibleFunctionTableWithDescriptor_stage_), descriptor, stage); +} + +// method: newIntersectionFunctionTableWithDescriptor:stage: +_MTL_INLINE MTL::IntersectionFunctionTable* MTL::RenderPipelineState::newIntersectionFunctionTable(const MTL::IntersectionFunctionTableDescriptor* descriptor, MTL::RenderStages stage) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIntersectionFunctionTableWithDescriptor_stage_), descriptor, stage); +} + +// method: newRenderPipelineStateWithAdditionalBinaryFunctions:error: +_MTL_INLINE MTL::RenderPipelineState* MTL::RenderPipelineState::newRenderPipelineState(const MTL::RenderPipelineFunctionsDescriptor* additionalBinaryFunctions, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithAdditionalBinaryFunctions_error_), additionalBinaryFunctions, error); +} + +// static method: alloc +_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptorArray* MTL::RenderPipelineColorAttachmentDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLRenderPipelineColorAttachmentDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptorArray* MTL::RenderPipelineColorAttachmentDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptor* MTL::RenderPipelineColorAttachmentDescriptorArray::object(NS::UInteger attachmentIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptorArray::setObject(const MTL::RenderPipelineColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex); +} + +// static method: alloc +_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptor* MTL::TileRenderPipelineColorAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLTileRenderPipelineColorAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptor* MTL::TileRenderPipelineColorAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: pixelFormat +_MTL_INLINE MTL::PixelFormat MTL::TileRenderPipelineColorAttachmentDescriptor::pixelFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(pixelFormat)); +} + +_MTL_INLINE void MTL::TileRenderPipelineColorAttachmentDescriptor::setPixelFormat(MTL::PixelFormat pixelFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPixelFormat_), pixelFormat); +} + +// static method: alloc +_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptorArray* MTL::TileRenderPipelineColorAttachmentDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLTileRenderPipelineColorAttachmentDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptorArray* MTL::TileRenderPipelineColorAttachmentDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptor* MTL::TileRenderPipelineColorAttachmentDescriptorArray::object(NS::UInteger attachmentIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::TileRenderPipelineColorAttachmentDescriptorArray::setObject(const MTL::TileRenderPipelineColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex); +} + +// static method: alloc +_MTL_INLINE MTL::TileRenderPipelineDescriptor* MTL::TileRenderPipelineDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLTileRenderPipelineDescriptor)); +} + +// method: init +_MTL_INLINE MTL::TileRenderPipelineDescriptor* MTL::TileRenderPipelineDescriptor::init() +{ + return NS::Object::init(); +} + +// property: label +_MTL_INLINE NS::String* MTL::TileRenderPipelineDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: tileFunction +_MTL_INLINE MTL::Function* MTL::TileRenderPipelineDescriptor::tileFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileFunction)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setTileFunction(const MTL::Function* tileFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTileFunction_), tileFunction); +} + +// property: rasterSampleCount +_MTL_INLINE NS::UInteger MTL::TileRenderPipelineDescriptor::rasterSampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(rasterSampleCount)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setRasterSampleCount(NS::UInteger rasterSampleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRasterSampleCount_), rasterSampleCount); +} + +// property: colorAttachments +_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptorArray* MTL::TileRenderPipelineDescriptor::colorAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(colorAttachments)); +} + +// property: threadgroupSizeMatchesTileSize +_MTL_INLINE bool MTL::TileRenderPipelineDescriptor::threadgroupSizeMatchesTileSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(threadgroupSizeMatchesTileSize)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setThreadgroupSizeMatchesTileSize(bool threadgroupSizeMatchesTileSize) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setThreadgroupSizeMatchesTileSize_), threadgroupSizeMatchesTileSize); +} + +// property: tileBuffers +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::TileRenderPipelineDescriptor::tileBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tileBuffers)); +} + +// property: maxTotalThreadsPerThreadgroup +_MTL_INLINE NS::UInteger MTL::TileRenderPipelineDescriptor::maxTotalThreadsPerThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxTotalThreadsPerThreadgroup_), maxTotalThreadsPerThreadgroup); +} + +// property: binaryArchives +_MTL_INLINE NS::Array* MTL::TileRenderPipelineDescriptor::binaryArchives() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(binaryArchives)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setBinaryArchives(const NS::Array* binaryArchives) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBinaryArchives_), binaryArchives); +} + +// property: preloadedLibraries +_MTL_INLINE NS::Array* MTL::TileRenderPipelineDescriptor::preloadedLibraries() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(preloadedLibraries)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setPreloadedLibraries(const NS::Array* preloadedLibraries) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPreloadedLibraries_), preloadedLibraries); +} + +// property: linkedFunctions +_MTL_INLINE MTL::LinkedFunctions* MTL::TileRenderPipelineDescriptor::linkedFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(linkedFunctions)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setLinkedFunctions(const MTL::LinkedFunctions* linkedFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLinkedFunctions_), linkedFunctions); +} + +// property: supportAddingBinaryFunctions +_MTL_INLINE bool MTL::TileRenderPipelineDescriptor::supportAddingBinaryFunctions() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportAddingBinaryFunctions)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setSupportAddingBinaryFunctions(bool supportAddingBinaryFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportAddingBinaryFunctions_), supportAddingBinaryFunctions); +} + +// property: maxCallStackDepth +_MTL_INLINE NS::UInteger MTL::TileRenderPipelineDescriptor::maxCallStackDepth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxCallStackDepth)); +} + +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setMaxCallStackDepth(NS::UInteger maxCallStackDepth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxCallStackDepth_), maxCallStackDepth); +} + +// method: reset +_MTL_INLINE void MTL::TileRenderPipelineDescriptor::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} + +// static method: alloc +_MTL_INLINE MTL::MeshRenderPipelineDescriptor* MTL::MeshRenderPipelineDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLMeshRenderPipelineDescriptor)); +} + +// method: init +_MTL_INLINE MTL::MeshRenderPipelineDescriptor* MTL::MeshRenderPipelineDescriptor::init() +{ + return NS::Object::init(); +} + +// property: label +_MTL_INLINE NS::String* MTL::MeshRenderPipelineDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: objectFunction +_MTL_INLINE MTL::Function* MTL::MeshRenderPipelineDescriptor::objectFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectFunction)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setObjectFunction(const MTL::Function* objectFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectFunction_), objectFunction); +} + +// property: meshFunction +_MTL_INLINE MTL::Function* MTL::MeshRenderPipelineDescriptor::meshFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(meshFunction)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setMeshFunction(const MTL::Function* meshFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshFunction_), meshFunction); +} + +// property: fragmentFunction +_MTL_INLINE MTL::Function* MTL::MeshRenderPipelineDescriptor::fragmentFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentFunction)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setFragmentFunction(const MTL::Function* fragmentFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentFunction_), fragmentFunction); +} + +// property: maxTotalThreadsPerObjectThreadgroup +_MTL_INLINE NS::UInteger MTL::MeshRenderPipelineDescriptor::maxTotalThreadsPerObjectThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerObjectThreadgroup)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setMaxTotalThreadsPerObjectThreadgroup(NS::UInteger maxTotalThreadsPerObjectThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxTotalThreadsPerObjectThreadgroup_), maxTotalThreadsPerObjectThreadgroup); +} + +// property: maxTotalThreadsPerMeshThreadgroup +_MTL_INLINE NS::UInteger MTL::MeshRenderPipelineDescriptor::maxTotalThreadsPerMeshThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerMeshThreadgroup)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setMaxTotalThreadsPerMeshThreadgroup(NS::UInteger maxTotalThreadsPerMeshThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxTotalThreadsPerMeshThreadgroup_), maxTotalThreadsPerMeshThreadgroup); +} + +// property: objectThreadgroupSizeIsMultipleOfThreadExecutionWidth +_MTL_INLINE bool MTL::MeshRenderPipelineDescriptor::objectThreadgroupSizeIsMultipleOfThreadExecutionWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectThreadgroupSizeIsMultipleOfThreadExecutionWidth)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setObjectThreadgroupSizeIsMultipleOfThreadExecutionWidth(bool objectThreadgroupSizeIsMultipleOfThreadExecutionWidth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectThreadgroupSizeIsMultipleOfThreadExecutionWidth_), objectThreadgroupSizeIsMultipleOfThreadExecutionWidth); +} + +// property: meshThreadgroupSizeIsMultipleOfThreadExecutionWidth +_MTL_INLINE bool MTL::MeshRenderPipelineDescriptor::meshThreadgroupSizeIsMultipleOfThreadExecutionWidth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(meshThreadgroupSizeIsMultipleOfThreadExecutionWidth)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setMeshThreadgroupSizeIsMultipleOfThreadExecutionWidth(bool meshThreadgroupSizeIsMultipleOfThreadExecutionWidth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshThreadgroupSizeIsMultipleOfThreadExecutionWidth_), meshThreadgroupSizeIsMultipleOfThreadExecutionWidth); +} + +// property: payloadMemoryLength +_MTL_INLINE NS::UInteger MTL::MeshRenderPipelineDescriptor::payloadMemoryLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(payloadMemoryLength)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setPayloadMemoryLength(NS::UInteger payloadMemoryLength) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPayloadMemoryLength_), payloadMemoryLength); +} + +// property: maxTotalThreadgroupsPerMeshGrid +_MTL_INLINE NS::UInteger MTL::MeshRenderPipelineDescriptor::maxTotalThreadgroupsPerMeshGrid() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadgroupsPerMeshGrid)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setMaxTotalThreadgroupsPerMeshGrid(NS::UInteger maxTotalThreadgroupsPerMeshGrid) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxTotalThreadgroupsPerMeshGrid_), maxTotalThreadgroupsPerMeshGrid); +} + +// property: objectBuffers +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::MeshRenderPipelineDescriptor::objectBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectBuffers)); +} + +// property: meshBuffers +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::MeshRenderPipelineDescriptor::meshBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(meshBuffers)); +} + +// property: fragmentBuffers +_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::MeshRenderPipelineDescriptor::fragmentBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentBuffers)); +} + +// property: rasterSampleCount +_MTL_INLINE NS::UInteger MTL::MeshRenderPipelineDescriptor::rasterSampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(rasterSampleCount)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setRasterSampleCount(NS::UInteger rasterSampleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRasterSampleCount_), rasterSampleCount); +} + +// property: alphaToCoverageEnabled +_MTL_INLINE bool MTL::MeshRenderPipelineDescriptor::alphaToCoverageEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isAlphaToCoverageEnabled)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setAlphaToCoverageEnabled(bool alphaToCoverageEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAlphaToCoverageEnabled_), alphaToCoverageEnabled); +} + +// property: alphaToOneEnabled +_MTL_INLINE bool MTL::MeshRenderPipelineDescriptor::alphaToOneEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isAlphaToOneEnabled)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setAlphaToOneEnabled(bool alphaToOneEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAlphaToOneEnabled_), alphaToOneEnabled); +} + +// property: rasterizationEnabled +_MTL_INLINE bool MTL::MeshRenderPipelineDescriptor::rasterizationEnabled() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isRasterizationEnabled)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setRasterizationEnabled(bool rasterizationEnabled) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRasterizationEnabled_), rasterizationEnabled); +} + +// property: maxVertexAmplificationCount +_MTL_INLINE NS::UInteger MTL::MeshRenderPipelineDescriptor::maxVertexAmplificationCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxVertexAmplificationCount)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setMaxVertexAmplificationCount(NS::UInteger maxVertexAmplificationCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxVertexAmplificationCount_), maxVertexAmplificationCount); +} + +// property: colorAttachments +_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptorArray* MTL::MeshRenderPipelineDescriptor::colorAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(colorAttachments)); +} + +// property: depthAttachmentPixelFormat +_MTL_INLINE MTL::PixelFormat MTL::MeshRenderPipelineDescriptor::depthAttachmentPixelFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depthAttachmentPixelFormat)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setDepthAttachmentPixelFormat(MTL::PixelFormat depthAttachmentPixelFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthAttachmentPixelFormat_), depthAttachmentPixelFormat); +} + +// property: stencilAttachmentPixelFormat +_MTL_INLINE MTL::PixelFormat MTL::MeshRenderPipelineDescriptor::stencilAttachmentPixelFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stencilAttachmentPixelFormat)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setStencilAttachmentPixelFormat(MTL::PixelFormat stencilAttachmentPixelFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilAttachmentPixelFormat_), stencilAttachmentPixelFormat); +} + +// method: reset +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} diff --git a/metal-cpp/Metal/MTLResource.hpp b/metal-cpp/Metal/MTLResource.hpp new file mode 100644 index 00000000..7de3c7f7 --- /dev/null +++ b/metal-cpp/Metal/MTLResource.hpp @@ -0,0 +1,178 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLResource.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLResource.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, PurgeableState) { + PurgeableStateKeepCurrent = 1, + PurgeableStateNonVolatile = 2, + PurgeableStateVolatile = 3, + PurgeableStateEmpty = 4, +}; + +_MTL_ENUM(NS::UInteger, CPUCacheMode) { + CPUCacheModeDefaultCache = 0, + CPUCacheModeWriteCombined = 1, +}; + +_MTL_ENUM(NS::UInteger, StorageMode) { + StorageModeShared = 0, + StorageModeManaged = 1, + StorageModePrivate = 2, + StorageModeMemoryless = 3, +}; + +_MTL_ENUM(NS::UInteger, HazardTrackingMode) { + HazardTrackingModeDefault = 0, + HazardTrackingModeUntracked = 1, + HazardTrackingModeTracked = 2, +}; + +_MTL_OPTIONS(NS::UInteger, ResourceOptions) { + ResourceStorageModeShared = 0, + ResourceHazardTrackingModeDefault = 0, + ResourceCPUCacheModeDefaultCache = 0, + ResourceOptionCPUCacheModeDefault = 0, + ResourceCPUCacheModeWriteCombined = 1, + ResourceOptionCPUCacheModeWriteCombined = 1, + ResourceStorageModeManaged = 16, + ResourceStorageModePrivate = 32, + ResourceStorageModeMemoryless = 48, + ResourceHazardTrackingModeUntracked = 256, + ResourceHazardTrackingModeTracked = 512, +}; + +class Resource : public NS::Referencing +{ +public: + NS::String* label() const; + void setLabel(const NS::String* label); + + class Device* device() const; + + MTL::CPUCacheMode cpuCacheMode() const; + + MTL::StorageMode storageMode() const; + + MTL::HazardTrackingMode hazardTrackingMode() const; + + MTL::ResourceOptions resourceOptions() const; + + MTL::PurgeableState setPurgeableState(MTL::PurgeableState state); + + class Heap* heap() const; + + NS::UInteger heapOffset() const; + + NS::UInteger allocatedSize() const; + + void makeAliasable(); + + bool isAliasable(); +}; + +} + +// property: label +_MTL_INLINE NS::String* MTL::Resource::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::Resource::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::Resource::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: cpuCacheMode +_MTL_INLINE MTL::CPUCacheMode MTL::Resource::cpuCacheMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(cpuCacheMode)); +} + +// property: storageMode +_MTL_INLINE MTL::StorageMode MTL::Resource::storageMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(storageMode)); +} + +// property: hazardTrackingMode +_MTL_INLINE MTL::HazardTrackingMode MTL::Resource::hazardTrackingMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(hazardTrackingMode)); +} + +// property: resourceOptions +_MTL_INLINE MTL::ResourceOptions MTL::Resource::resourceOptions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resourceOptions)); +} + +// method: setPurgeableState: +_MTL_INLINE MTL::PurgeableState MTL::Resource::setPurgeableState(MTL::PurgeableState state) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(setPurgeableState_), state); +} + +// property: heap +_MTL_INLINE MTL::Heap* MTL::Resource::heap() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(heap)); +} + +// property: heapOffset +_MTL_INLINE NS::UInteger MTL::Resource::heapOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(heapOffset)); +} + +// property: allocatedSize +_MTL_INLINE NS::UInteger MTL::Resource::allocatedSize() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(allocatedSize)); +} + +// method: makeAliasable +_MTL_INLINE void MTL::Resource::makeAliasable() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(makeAliasable)); +} + +// method: isAliasable +_MTL_INLINE bool MTL::Resource::isAliasable() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isAliasable)); +} diff --git a/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp b/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp new file mode 100644 index 00000000..97041a82 --- /dev/null +++ b/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp @@ -0,0 +1,103 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLResourceStateCommandEncoder.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLCommandEncoder.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, SparseTextureMappingMode) { + SparseTextureMappingModeMap = 0, + SparseTextureMappingModeUnmap = 1, +}; + +struct MapIndirectArguments +{ + uint32_t regionOriginX; + uint32_t regionOriginY; + uint32_t regionOriginZ; + uint32_t regionSizeWidth; + uint32_t regionSizeHeight; + uint32_t regionSizeDepth; + uint32_t mipMapLevel; + uint32_t sliceId; +} _MTL_PACKED; + +class ResourceStateCommandEncoder : public NS::Referencing +{ +public: + void updateTextureMappings(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region* regions, const NS::UInteger* mipLevels, const NS::UInteger* slices, NS::UInteger numRegions); + + void updateTextureMapping(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region region, const NS::UInteger mipLevel, const NS::UInteger slice); + + void updateTextureMapping(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset); + + void updateFence(const class Fence* fence); + + void waitForFence(const class Fence* fence); + + void moveTextureMappingsFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin); +}; + +} + +// method: updateTextureMappings:mode:regions:mipLevels:slices:numRegions: +_MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMappings(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region* regions, const NS::UInteger* mipLevels, const NS::UInteger* slices, NS::UInteger numRegions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(updateTextureMappings_mode_regions_mipLevels_slices_numRegions_), texture, mode, regions, mipLevels, slices, numRegions); +} + +// method: updateTextureMapping:mode:region:mipLevel:slice: +_MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMapping(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region region, const NS::UInteger mipLevel, const NS::UInteger slice) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(updateTextureMapping_mode_region_mipLevel_slice_), texture, mode, region, mipLevel, slice); +} + +// method: updateTextureMapping:mode:indirectBuffer:indirectBufferOffset: +_MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMapping(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(updateTextureMapping_mode_indirectBuffer_indirectBufferOffset_), texture, mode, indirectBuffer, indirectBufferOffset); +} + +// method: updateFence: +_MTL_INLINE void MTL::ResourceStateCommandEncoder::updateFence(const MTL::Fence* fence) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(updateFence_), fence); +} + +// method: waitForFence: +_MTL_INLINE void MTL::ResourceStateCommandEncoder::waitForFence(const MTL::Fence* fence) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(waitForFence_), fence); +} + +// method: moveTextureMappingsFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin: +_MTL_INLINE void MTL::ResourceStateCommandEncoder::moveTextureMappingsFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(moveTextureMappingsFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_), sourceTexture, sourceSlice, sourceLevel, sourceOrigin, sourceSize, destinationTexture, destinationSlice, destinationLevel, destinationOrigin); +} diff --git a/metal-cpp/Metal/MTLResourceStatePass.hpp b/metal-cpp/Metal/MTLResourceStatePass.hpp new file mode 100644 index 00000000..f20aa383 --- /dev/null +++ b/metal-cpp/Metal/MTLResourceStatePass.hpp @@ -0,0 +1,165 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLResourceStatePass.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +namespace MTL +{ +class ResourceStatePassSampleBufferAttachmentDescriptor : public NS::Copying +{ +public: + static class ResourceStatePassSampleBufferAttachmentDescriptor* alloc(); + + class ResourceStatePassSampleBufferAttachmentDescriptor* init(); + + class CounterSampleBuffer* sampleBuffer() const; + void setSampleBuffer(const class CounterSampleBuffer* sampleBuffer); + + NS::UInteger startOfEncoderSampleIndex() const; + void setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex); + + NS::UInteger endOfEncoderSampleIndex() const; + void setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex); +}; + +class ResourceStatePassSampleBufferAttachmentDescriptorArray : public NS::Referencing +{ +public: + static class ResourceStatePassSampleBufferAttachmentDescriptorArray* alloc(); + + class ResourceStatePassSampleBufferAttachmentDescriptorArray* init(); + + class ResourceStatePassSampleBufferAttachmentDescriptor* object(NS::UInteger attachmentIndex); + + void setObject(const class ResourceStatePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex); +}; + +class ResourceStatePassDescriptor : public NS::Copying +{ +public: + static class ResourceStatePassDescriptor* alloc(); + + class ResourceStatePassDescriptor* init(); + + static class ResourceStatePassDescriptor* resourceStatePassDescriptor(); + + class ResourceStatePassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptor* MTL::ResourceStatePassSampleBufferAttachmentDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLResourceStatePassSampleBufferAttachmentDescriptor)); +} + +// method: init +_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptor* MTL::ResourceStatePassSampleBufferAttachmentDescriptor::init() +{ + return NS::Object::init(); +} + +// property: sampleBuffer +_MTL_INLINE MTL::CounterSampleBuffer* MTL::ResourceStatePassSampleBufferAttachmentDescriptor::sampleBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBuffer)); +} + +_MTL_INLINE void MTL::ResourceStatePassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer); +} + +// property: startOfEncoderSampleIndex +_MTL_INLINE NS::UInteger MTL::ResourceStatePassSampleBufferAttachmentDescriptor::startOfEncoderSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(startOfEncoderSampleIndex)); +} + +_MTL_INLINE void MTL::ResourceStatePassSampleBufferAttachmentDescriptor::setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStartOfEncoderSampleIndex_), startOfEncoderSampleIndex); +} + +// property: endOfEncoderSampleIndex +_MTL_INLINE NS::UInteger MTL::ResourceStatePassSampleBufferAttachmentDescriptor::endOfEncoderSampleIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(endOfEncoderSampleIndex)); +} + +_MTL_INLINE void MTL::ResourceStatePassSampleBufferAttachmentDescriptor::setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setEndOfEncoderSampleIndex_), endOfEncoderSampleIndex); +} + +// static method: alloc +_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray* MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLResourceStatePassSampleBufferAttachmentDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray* MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptor* MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray::setObject(const MTL::ResourceStatePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex); +} + +// static method: alloc +_MTL_INLINE MTL::ResourceStatePassDescriptor* MTL::ResourceStatePassDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLResourceStatePassDescriptor)); +} + +// method: init +_MTL_INLINE MTL::ResourceStatePassDescriptor* MTL::ResourceStatePassDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: resourceStatePassDescriptor +_MTL_INLINE MTL::ResourceStatePassDescriptor* MTL::ResourceStatePassDescriptor::resourceStatePassDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLResourceStatePassDescriptor), _MTL_PRIVATE_SEL(resourceStatePassDescriptor)); +} + +// property: sampleBufferAttachments +_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray* MTL::ResourceStatePassDescriptor::sampleBufferAttachments() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleBufferAttachments)); +} diff --git a/metal-cpp/Metal/MTLSampler.hpp b/metal-cpp/Metal/MTLSampler.hpp new file mode 100644 index 00000000..b1b16d25 --- /dev/null +++ b/metal-cpp/Metal/MTLSampler.hpp @@ -0,0 +1,319 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLSampler.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLDepthStencil.hpp" +#include "MTLSampler.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, SamplerMinMagFilter) { + SamplerMinMagFilterNearest = 0, + SamplerMinMagFilterLinear = 1, +}; + +_MTL_ENUM(NS::UInteger, SamplerMipFilter) { + SamplerMipFilterNotMipmapped = 0, + SamplerMipFilterNearest = 1, + SamplerMipFilterLinear = 2, +}; + +_MTL_ENUM(NS::UInteger, SamplerAddressMode) { + SamplerAddressModeClampToEdge = 0, + SamplerAddressModeMirrorClampToEdge = 1, + SamplerAddressModeRepeat = 2, + SamplerAddressModeMirrorRepeat = 3, + SamplerAddressModeClampToZero = 4, + SamplerAddressModeClampToBorderColor = 5, +}; + +_MTL_ENUM(NS::UInteger, SamplerBorderColor) { + SamplerBorderColorTransparentBlack = 0, + SamplerBorderColorOpaqueBlack = 1, + SamplerBorderColorOpaqueWhite = 2, +}; + +class SamplerDescriptor : public NS::Copying +{ +public: + static class SamplerDescriptor* alloc(); + + class SamplerDescriptor* init(); + + MTL::SamplerMinMagFilter minFilter() const; + void setMinFilter(MTL::SamplerMinMagFilter minFilter); + + MTL::SamplerMinMagFilter magFilter() const; + void setMagFilter(MTL::SamplerMinMagFilter magFilter); + + MTL::SamplerMipFilter mipFilter() const; + void setMipFilter(MTL::SamplerMipFilter mipFilter); + + NS::UInteger maxAnisotropy() const; + void setMaxAnisotropy(NS::UInteger maxAnisotropy); + + MTL::SamplerAddressMode sAddressMode() const; + void setSAddressMode(MTL::SamplerAddressMode sAddressMode); + + MTL::SamplerAddressMode tAddressMode() const; + void setTAddressMode(MTL::SamplerAddressMode tAddressMode); + + MTL::SamplerAddressMode rAddressMode() const; + void setRAddressMode(MTL::SamplerAddressMode rAddressMode); + + MTL::SamplerBorderColor borderColor() const; + void setBorderColor(MTL::SamplerBorderColor borderColor); + + bool normalizedCoordinates() const; + void setNormalizedCoordinates(bool normalizedCoordinates); + + float lodMinClamp() const; + void setLodMinClamp(float lodMinClamp); + + float lodMaxClamp() const; + void setLodMaxClamp(float lodMaxClamp); + + bool lodAverage() const; + void setLodAverage(bool lodAverage); + + MTL::CompareFunction compareFunction() const; + void setCompareFunction(MTL::CompareFunction compareFunction); + + bool supportArgumentBuffers() const; + void setSupportArgumentBuffers(bool supportArgumentBuffers); + + NS::String* label() const; + void setLabel(const NS::String* label); +}; + +class SamplerState : public NS::Referencing +{ +public: + NS::String* label() const; + + class Device* device() const; + + MTL::ResourceID gpuResourceID() const; +}; + +} + +// static method: alloc +_MTL_INLINE MTL::SamplerDescriptor* MTL::SamplerDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLSamplerDescriptor)); +} + +// method: init +_MTL_INLINE MTL::SamplerDescriptor* MTL::SamplerDescriptor::init() +{ + return NS::Object::init(); +} + +// property: minFilter +_MTL_INLINE MTL::SamplerMinMagFilter MTL::SamplerDescriptor::minFilter() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(minFilter)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setMinFilter(MTL::SamplerMinMagFilter minFilter) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMinFilter_), minFilter); +} + +// property: magFilter +_MTL_INLINE MTL::SamplerMinMagFilter MTL::SamplerDescriptor::magFilter() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(magFilter)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setMagFilter(MTL::SamplerMinMagFilter magFilter) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMagFilter_), magFilter); +} + +// property: mipFilter +_MTL_INLINE MTL::SamplerMipFilter MTL::SamplerDescriptor::mipFilter() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(mipFilter)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setMipFilter(MTL::SamplerMipFilter mipFilter) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMipFilter_), mipFilter); +} + +// property: maxAnisotropy +_MTL_INLINE NS::UInteger MTL::SamplerDescriptor::maxAnisotropy() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxAnisotropy)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setMaxAnisotropy(NS::UInteger maxAnisotropy) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxAnisotropy_), maxAnisotropy); +} + +// property: sAddressMode +_MTL_INLINE MTL::SamplerAddressMode MTL::SamplerDescriptor::sAddressMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sAddressMode)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setSAddressMode(MTL::SamplerAddressMode sAddressMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSAddressMode_), sAddressMode); +} + +// property: tAddressMode +_MTL_INLINE MTL::SamplerAddressMode MTL::SamplerDescriptor::tAddressMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tAddressMode)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setTAddressMode(MTL::SamplerAddressMode tAddressMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTAddressMode_), tAddressMode); +} + +// property: rAddressMode +_MTL_INLINE MTL::SamplerAddressMode MTL::SamplerDescriptor::rAddressMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(rAddressMode)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setRAddressMode(MTL::SamplerAddressMode rAddressMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRAddressMode_), rAddressMode); +} + +// property: borderColor +_MTL_INLINE MTL::SamplerBorderColor MTL::SamplerDescriptor::borderColor() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(borderColor)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setBorderColor(MTL::SamplerBorderColor borderColor) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBorderColor_), borderColor); +} + +// property: normalizedCoordinates +_MTL_INLINE bool MTL::SamplerDescriptor::normalizedCoordinates() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(normalizedCoordinates)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setNormalizedCoordinates(bool normalizedCoordinates) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setNormalizedCoordinates_), normalizedCoordinates); +} + +// property: lodMinClamp +_MTL_INLINE float MTL::SamplerDescriptor::lodMinClamp() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(lodMinClamp)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setLodMinClamp(float lodMinClamp) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLodMinClamp_), lodMinClamp); +} + +// property: lodMaxClamp +_MTL_INLINE float MTL::SamplerDescriptor::lodMaxClamp() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(lodMaxClamp)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setLodMaxClamp(float lodMaxClamp) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLodMaxClamp_), lodMaxClamp); +} + +// property: lodAverage +_MTL_INLINE bool MTL::SamplerDescriptor::lodAverage() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(lodAverage)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setLodAverage(bool lodAverage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLodAverage_), lodAverage); +} + +// property: compareFunction +_MTL_INLINE MTL::CompareFunction MTL::SamplerDescriptor::compareFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(compareFunction)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setCompareFunction(MTL::CompareFunction compareFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCompareFunction_), compareFunction); +} + +// property: supportArgumentBuffers +_MTL_INLINE bool MTL::SamplerDescriptor::supportArgumentBuffers() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportArgumentBuffers)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setSupportArgumentBuffers(bool supportArgumentBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportArgumentBuffers_), supportArgumentBuffers); +} + +// property: label +_MTL_INLINE NS::String* MTL::SamplerDescriptor::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +_MTL_INLINE void MTL::SamplerDescriptor::setLabel(const NS::String* label) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); +} + +// property: label +_MTL_INLINE NS::String* MTL::SamplerState::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::SamplerState::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: gpuResourceID +_MTL_INLINE MTL::ResourceID MTL::SamplerState::gpuResourceID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuResourceID)); +} diff --git a/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp b/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp new file mode 100644 index 00000000..75ccd81a --- /dev/null +++ b/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp @@ -0,0 +1,381 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLStageInputOutputDescriptor.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLStageInputOutputDescriptor.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, AttributeFormat) { + AttributeFormatInvalid = 0, + AttributeFormatUChar2 = 1, + AttributeFormatUChar3 = 2, + AttributeFormatUChar4 = 3, + AttributeFormatChar2 = 4, + AttributeFormatChar3 = 5, + AttributeFormatChar4 = 6, + AttributeFormatUChar2Normalized = 7, + AttributeFormatUChar3Normalized = 8, + AttributeFormatUChar4Normalized = 9, + AttributeFormatChar2Normalized = 10, + AttributeFormatChar3Normalized = 11, + AttributeFormatChar4Normalized = 12, + AttributeFormatUShort2 = 13, + AttributeFormatUShort3 = 14, + AttributeFormatUShort4 = 15, + AttributeFormatShort2 = 16, + AttributeFormatShort3 = 17, + AttributeFormatShort4 = 18, + AttributeFormatUShort2Normalized = 19, + AttributeFormatUShort3Normalized = 20, + AttributeFormatUShort4Normalized = 21, + AttributeFormatShort2Normalized = 22, + AttributeFormatShort3Normalized = 23, + AttributeFormatShort4Normalized = 24, + AttributeFormatHalf2 = 25, + AttributeFormatHalf3 = 26, + AttributeFormatHalf4 = 27, + AttributeFormatFloat = 28, + AttributeFormatFloat2 = 29, + AttributeFormatFloat3 = 30, + AttributeFormatFloat4 = 31, + AttributeFormatInt = 32, + AttributeFormatInt2 = 33, + AttributeFormatInt3 = 34, + AttributeFormatInt4 = 35, + AttributeFormatUInt = 36, + AttributeFormatUInt2 = 37, + AttributeFormatUInt3 = 38, + AttributeFormatUInt4 = 39, + AttributeFormatInt1010102Normalized = 40, + AttributeFormatUInt1010102Normalized = 41, + AttributeFormatUChar4Normalized_BGRA = 42, + AttributeFormatUChar = 45, + AttributeFormatChar = 46, + AttributeFormatUCharNormalized = 47, + AttributeFormatCharNormalized = 48, + AttributeFormatUShort = 49, + AttributeFormatShort = 50, + AttributeFormatUShortNormalized = 51, + AttributeFormatShortNormalized = 52, + AttributeFormatHalf = 53, +}; + +_MTL_ENUM(NS::UInteger, IndexType) { + IndexTypeUInt16 = 0, + IndexTypeUInt32 = 1, +}; + +_MTL_ENUM(NS::UInteger, StepFunction) { + StepFunctionConstant = 0, + StepFunctionPerVertex = 1, + StepFunctionPerInstance = 2, + StepFunctionPerPatch = 3, + StepFunctionPerPatchControlPoint = 4, + StepFunctionThreadPositionInGridX = 5, + StepFunctionThreadPositionInGridY = 6, + StepFunctionThreadPositionInGridXIndexed = 7, + StepFunctionThreadPositionInGridYIndexed = 8, +}; + +class BufferLayoutDescriptor : public NS::Copying +{ +public: + static class BufferLayoutDescriptor* alloc(); + + class BufferLayoutDescriptor* init(); + + NS::UInteger stride() const; + void setStride(NS::UInteger stride); + + MTL::StepFunction stepFunction() const; + void setStepFunction(MTL::StepFunction stepFunction); + + NS::UInteger stepRate() const; + void setStepRate(NS::UInteger stepRate); +}; + +class BufferLayoutDescriptorArray : public NS::Referencing +{ +public: + static class BufferLayoutDescriptorArray* alloc(); + + class BufferLayoutDescriptorArray* init(); + + class BufferLayoutDescriptor* object(NS::UInteger index); + + void setObject(const class BufferLayoutDescriptor* bufferDesc, NS::UInteger index); +}; + +class AttributeDescriptor : public NS::Copying +{ +public: + static class AttributeDescriptor* alloc(); + + class AttributeDescriptor* init(); + + MTL::AttributeFormat format() const; + void setFormat(MTL::AttributeFormat format); + + NS::UInteger offset() const; + void setOffset(NS::UInteger offset); + + NS::UInteger bufferIndex() const; + void setBufferIndex(NS::UInteger bufferIndex); +}; + +class AttributeDescriptorArray : public NS::Referencing +{ +public: + static class AttributeDescriptorArray* alloc(); + + class AttributeDescriptorArray* init(); + + class AttributeDescriptor* object(NS::UInteger index); + + void setObject(const class AttributeDescriptor* attributeDesc, NS::UInteger index); +}; + +class StageInputOutputDescriptor : public NS::Copying +{ +public: + static class StageInputOutputDescriptor* alloc(); + + class StageInputOutputDescriptor* init(); + + static class StageInputOutputDescriptor* stageInputOutputDescriptor(); + + class BufferLayoutDescriptorArray* layouts() const; + + class AttributeDescriptorArray* attributes() const; + + MTL::IndexType indexType() const; + void setIndexType(MTL::IndexType indexType); + + NS::UInteger indexBufferIndex() const; + void setIndexBufferIndex(NS::UInteger indexBufferIndex); + + void reset(); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::BufferLayoutDescriptor* MTL::BufferLayoutDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLBufferLayoutDescriptor)); +} + +// method: init +_MTL_INLINE MTL::BufferLayoutDescriptor* MTL::BufferLayoutDescriptor::init() +{ + return NS::Object::init(); +} + +// property: stride +_MTL_INLINE NS::UInteger MTL::BufferLayoutDescriptor::stride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stride)); +} + +_MTL_INLINE void MTL::BufferLayoutDescriptor::setStride(NS::UInteger stride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStride_), stride); +} + +// property: stepFunction +_MTL_INLINE MTL::StepFunction MTL::BufferLayoutDescriptor::stepFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stepFunction)); +} + +_MTL_INLINE void MTL::BufferLayoutDescriptor::setStepFunction(MTL::StepFunction stepFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStepFunction_), stepFunction); +} + +// property: stepRate +_MTL_INLINE NS::UInteger MTL::BufferLayoutDescriptor::stepRate() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stepRate)); +} + +_MTL_INLINE void MTL::BufferLayoutDescriptor::setStepRate(NS::UInteger stepRate) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStepRate_), stepRate); +} + +// static method: alloc +_MTL_INLINE MTL::BufferLayoutDescriptorArray* MTL::BufferLayoutDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLBufferLayoutDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::BufferLayoutDescriptorArray* MTL::BufferLayoutDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::BufferLayoutDescriptor* MTL::BufferLayoutDescriptorArray::object(NS::UInteger index) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::BufferLayoutDescriptorArray::setObject(const MTL::BufferLayoutDescriptor* bufferDesc, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), bufferDesc, index); +} + +// static method: alloc +_MTL_INLINE MTL::AttributeDescriptor* MTL::AttributeDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAttributeDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AttributeDescriptor* MTL::AttributeDescriptor::init() +{ + return NS::Object::init(); +} + +// property: format +_MTL_INLINE MTL::AttributeFormat MTL::AttributeDescriptor::format() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(format)); +} + +_MTL_INLINE void MTL::AttributeDescriptor::setFormat(MTL::AttributeFormat format) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFormat_), format); +} + +// property: offset +_MTL_INLINE NS::UInteger MTL::AttributeDescriptor::offset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(offset)); +} + +_MTL_INLINE void MTL::AttributeDescriptor::setOffset(NS::UInteger offset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOffset_), offset); +} + +// property: bufferIndex +_MTL_INLINE NS::UInteger MTL::AttributeDescriptor::bufferIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferIndex)); +} + +_MTL_INLINE void MTL::AttributeDescriptor::setBufferIndex(NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBufferIndex_), bufferIndex); +} + +// static method: alloc +_MTL_INLINE MTL::AttributeDescriptorArray* MTL::AttributeDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAttributeDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::AttributeDescriptorArray* MTL::AttributeDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::AttributeDescriptor* MTL::AttributeDescriptorArray::object(NS::UInteger index) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::AttributeDescriptorArray::setObject(const MTL::AttributeDescriptor* attributeDesc, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attributeDesc, index); +} + +// static method: alloc +_MTL_INLINE MTL::StageInputOutputDescriptor* MTL::StageInputOutputDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLStageInputOutputDescriptor)); +} + +// method: init +_MTL_INLINE MTL::StageInputOutputDescriptor* MTL::StageInputOutputDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: stageInputOutputDescriptor +_MTL_INLINE MTL::StageInputOutputDescriptor* MTL::StageInputOutputDescriptor::stageInputOutputDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLStageInputOutputDescriptor), _MTL_PRIVATE_SEL(stageInputOutputDescriptor)); +} + +// property: layouts +_MTL_INLINE MTL::BufferLayoutDescriptorArray* MTL::StageInputOutputDescriptor::layouts() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(layouts)); +} + +// property: attributes +_MTL_INLINE MTL::AttributeDescriptorArray* MTL::StageInputOutputDescriptor::attributes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(attributes)); +} + +// property: indexType +_MTL_INLINE MTL::IndexType MTL::StageInputOutputDescriptor::indexType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexType)); +} + +_MTL_INLINE void MTL::StageInputOutputDescriptor::setIndexType(MTL::IndexType indexType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexType_), indexType); +} + +// property: indexBufferIndex +_MTL_INLINE NS::UInteger MTL::StageInputOutputDescriptor::indexBufferIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBufferIndex)); +} + +_MTL_INLINE void MTL::StageInputOutputDescriptor::setIndexBufferIndex(NS::UInteger indexBufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBufferIndex_), indexBufferIndex); +} + +// method: reset +_MTL_INLINE void MTL::StageInputOutputDescriptor::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} diff --git a/metal-cpp/Metal/MTLTexture.hpp b/metal-cpp/Metal/MTLTexture.hpp new file mode 100644 index 00000000..51f8d7c2 --- /dev/null +++ b/metal-cpp/Metal/MTLTexture.hpp @@ -0,0 +1,684 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLTexture.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLPixelFormat.hpp" +#include "MTLResource.hpp" +#include "MTLTexture.hpp" +#include "MTLTypes.hpp" +#include + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, TextureType) { + TextureType1D = 0, + TextureType1DArray = 1, + TextureType2D = 2, + TextureType2DArray = 3, + TextureType2DMultisample = 4, + TextureTypeCube = 5, + TextureTypeCubeArray = 6, + TextureType3D = 7, + TextureType2DMultisampleArray = 8, + TextureTypeTextureBuffer = 9, +}; + +_MTL_ENUM(uint8_t, TextureSwizzle) { + TextureSwizzleZero = 0, + TextureSwizzleOne = 1, + TextureSwizzleRed = 2, + TextureSwizzleGreen = 3, + TextureSwizzleBlue = 4, + TextureSwizzleAlpha = 5, +}; + +struct TextureSwizzleChannels +{ + MTL::TextureSwizzle red; + MTL::TextureSwizzle green; + MTL::TextureSwizzle blue; + MTL::TextureSwizzle alpha; +} _MTL_PACKED; + +class SharedTextureHandle : public NS::SecureCoding +{ +public: + static class SharedTextureHandle* alloc(); + + class SharedTextureHandle* init(); + + class Device* device() const; + + NS::String* label() const; +}; + +_MTL_OPTIONS(NS::UInteger, TextureUsage) { + TextureUsageUnknown = 0, + TextureUsageShaderRead = 1, + TextureUsageShaderWrite = 2, + TextureUsageRenderTarget = 4, + TextureUsagePixelFormatView = 16, +}; + +_MTL_ENUM(NS::Integer, TextureCompressionType) { + TextureCompressionTypeLossless = 0, + TextureCompressionTypeLossy = 1, +}; + +class TextureDescriptor : public NS::Copying +{ +public: + static class TextureDescriptor* alloc(); + + class TextureDescriptor* init(); + + static class TextureDescriptor* texture2DDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger width, NS::UInteger height, bool mipmapped); + + static class TextureDescriptor* textureCubeDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger size, bool mipmapped); + + static class TextureDescriptor* textureBufferDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger width, MTL::ResourceOptions resourceOptions, MTL::TextureUsage usage); + + MTL::TextureType textureType() const; + void setTextureType(MTL::TextureType textureType); + + MTL::PixelFormat pixelFormat() const; + void setPixelFormat(MTL::PixelFormat pixelFormat); + + NS::UInteger width() const; + void setWidth(NS::UInteger width); + + NS::UInteger height() const; + void setHeight(NS::UInteger height); + + NS::UInteger depth() const; + void setDepth(NS::UInteger depth); + + NS::UInteger mipmapLevelCount() const; + void setMipmapLevelCount(NS::UInteger mipmapLevelCount); + + NS::UInteger sampleCount() const; + void setSampleCount(NS::UInteger sampleCount); + + NS::UInteger arrayLength() const; + void setArrayLength(NS::UInteger arrayLength); + + MTL::ResourceOptions resourceOptions() const; + void setResourceOptions(MTL::ResourceOptions resourceOptions); + + MTL::CPUCacheMode cpuCacheMode() const; + void setCpuCacheMode(MTL::CPUCacheMode cpuCacheMode); + + MTL::StorageMode storageMode() const; + void setStorageMode(MTL::StorageMode storageMode); + + MTL::HazardTrackingMode hazardTrackingMode() const; + void setHazardTrackingMode(MTL::HazardTrackingMode hazardTrackingMode); + + MTL::TextureUsage usage() const; + void setUsage(MTL::TextureUsage usage); + + bool allowGPUOptimizedContents() const; + void setAllowGPUOptimizedContents(bool allowGPUOptimizedContents); + + MTL::TextureCompressionType compressionType() const; + void setCompressionType(MTL::TextureCompressionType compressionType); + + MTL::TextureSwizzleChannels swizzle() const; + void setSwizzle(MTL::TextureSwizzleChannels swizzle); +}; + +class Texture : public NS::Referencing +{ +public: + class Resource* rootResource() const; + + class Texture* parentTexture() const; + + NS::UInteger parentRelativeLevel() const; + + NS::UInteger parentRelativeSlice() const; + + class Buffer* buffer() const; + + NS::UInteger bufferOffset() const; + + NS::UInteger bufferBytesPerRow() const; + + IOSurfaceRef iosurface() const; + + NS::UInteger iosurfacePlane() const; + + MTL::TextureType textureType() const; + + MTL::PixelFormat pixelFormat() const; + + NS::UInteger width() const; + + NS::UInteger height() const; + + NS::UInteger depth() const; + + NS::UInteger mipmapLevelCount() const; + + NS::UInteger sampleCount() const; + + NS::UInteger arrayLength() const; + + MTL::TextureUsage usage() const; + + bool shareable() const; + + bool framebufferOnly() const; + + NS::UInteger firstMipmapInTail() const; + + NS::UInteger tailSizeInBytes() const; + + bool isSparse() const; + + bool allowGPUOptimizedContents() const; + + MTL::TextureCompressionType compressionType() const; + + MTL::ResourceID gpuResourceID() const; + + void getBytes(const void* pixelBytes, NS::UInteger bytesPerRow, NS::UInteger bytesPerImage, MTL::Region region, NS::UInteger level, NS::UInteger slice); + + void replaceRegion(MTL::Region region, NS::UInteger level, NS::UInteger slice, const void* pixelBytes, NS::UInteger bytesPerRow, NS::UInteger bytesPerImage); + + void getBytes(const void* pixelBytes, NS::UInteger bytesPerRow, MTL::Region region, NS::UInteger level); + + void replaceRegion(MTL::Region region, NS::UInteger level, const void* pixelBytes, NS::UInteger bytesPerRow); + + class Texture* newTextureView(MTL::PixelFormat pixelFormat); + + class Texture* newTextureView(MTL::PixelFormat pixelFormat, MTL::TextureType textureType, NS::Range levelRange, NS::Range sliceRange); + + class SharedTextureHandle* newSharedTextureHandle(); + + class Texture* remoteStorageTexture() const; + + class Texture* newRemoteTextureViewForDevice(const class Device* device); + + MTL::TextureSwizzleChannels swizzle() const; + + class Texture* newTextureView(MTL::PixelFormat pixelFormat, MTL::TextureType textureType, NS::Range levelRange, NS::Range sliceRange, MTL::TextureSwizzleChannels swizzle); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::SharedTextureHandle* MTL::SharedTextureHandle::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLSharedTextureHandle)); +} + +// method: init +_MTL_INLINE MTL::SharedTextureHandle* MTL::SharedTextureHandle::init() +{ + return NS::Object::init(); +} + +// property: device +_MTL_INLINE MTL::Device* MTL::SharedTextureHandle::device() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); +} + +// property: label +_MTL_INLINE NS::String* MTL::SharedTextureHandle::label() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); +} + +// static method: alloc +_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLTextureDescriptor)); +} + +// method: init +_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: texture2DDescriptorWithPixelFormat:width:height:mipmapped: +_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger width, NS::UInteger height, bool mipmapped) +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLTextureDescriptor), _MTL_PRIVATE_SEL(texture2DDescriptorWithPixelFormat_width_height_mipmapped_), pixelFormat, width, height, mipmapped); +} + +// static method: textureCubeDescriptorWithPixelFormat:size:mipmapped: +_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger size, bool mipmapped) +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLTextureDescriptor), _MTL_PRIVATE_SEL(textureCubeDescriptorWithPixelFormat_size_mipmapped_), pixelFormat, size, mipmapped); +} + +// static method: textureBufferDescriptorWithPixelFormat:width:resourceOptions:usage: +_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::textureBufferDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger width, MTL::ResourceOptions resourceOptions, MTL::TextureUsage usage) +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLTextureDescriptor), _MTL_PRIVATE_SEL(textureBufferDescriptorWithPixelFormat_width_resourceOptions_usage_), pixelFormat, width, resourceOptions, usage); +} + +// property: textureType +_MTL_INLINE MTL::TextureType MTL::TextureDescriptor::textureType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureType)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setTextureType(MTL::TextureType textureType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setTextureType_), textureType); +} + +// property: pixelFormat +_MTL_INLINE MTL::PixelFormat MTL::TextureDescriptor::pixelFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(pixelFormat)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setPixelFormat(MTL::PixelFormat pixelFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setPixelFormat_), pixelFormat); +} + +// property: width +_MTL_INLINE NS::UInteger MTL::TextureDescriptor::width() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(width)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setWidth(NS::UInteger width) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setWidth_), width); +} + +// property: height +_MTL_INLINE NS::UInteger MTL::TextureDescriptor::height() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(height)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setHeight(NS::UInteger height) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setHeight_), height); +} + +// property: depth +_MTL_INLINE NS::UInteger MTL::TextureDescriptor::depth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depth)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setDepth(NS::UInteger depth) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepth_), depth); +} + +// property: mipmapLevelCount +_MTL_INLINE NS::UInteger MTL::TextureDescriptor::mipmapLevelCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(mipmapLevelCount)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setMipmapLevelCount(NS::UInteger mipmapLevelCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMipmapLevelCount_), mipmapLevelCount); +} + +// property: sampleCount +_MTL_INLINE NS::UInteger MTL::TextureDescriptor::sampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCount)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setSampleCount(NS::UInteger sampleCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSampleCount_), sampleCount); +} + +// property: arrayLength +_MTL_INLINE NS::UInteger MTL::TextureDescriptor::arrayLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arrayLength)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setArrayLength(NS::UInteger arrayLength) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setArrayLength_), arrayLength); +} + +// property: resourceOptions +_MTL_INLINE MTL::ResourceOptions MTL::TextureDescriptor::resourceOptions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(resourceOptions)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setResourceOptions(MTL::ResourceOptions resourceOptions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setResourceOptions_), resourceOptions); +} + +// property: cpuCacheMode +_MTL_INLINE MTL::CPUCacheMode MTL::TextureDescriptor::cpuCacheMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(cpuCacheMode)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setCpuCacheMode(MTL::CPUCacheMode cpuCacheMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCpuCacheMode_), cpuCacheMode); +} + +// property: storageMode +_MTL_INLINE MTL::StorageMode MTL::TextureDescriptor::storageMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(storageMode)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setStorageMode(MTL::StorageMode storageMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStorageMode_), storageMode); +} + +// property: hazardTrackingMode +_MTL_INLINE MTL::HazardTrackingMode MTL::TextureDescriptor::hazardTrackingMode() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(hazardTrackingMode)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setHazardTrackingMode(MTL::HazardTrackingMode hazardTrackingMode) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setHazardTrackingMode_), hazardTrackingMode); +} + +// property: usage +_MTL_INLINE MTL::TextureUsage MTL::TextureDescriptor::usage() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(usage)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setUsage(MTL::TextureUsage usage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setUsage_), usage); +} + +// property: allowGPUOptimizedContents +_MTL_INLINE bool MTL::TextureDescriptor::allowGPUOptimizedContents() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(allowGPUOptimizedContents)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setAllowGPUOptimizedContents(bool allowGPUOptimizedContents) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAllowGPUOptimizedContents_), allowGPUOptimizedContents); +} + +// property: compressionType +_MTL_INLINE MTL::TextureCompressionType MTL::TextureDescriptor::compressionType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(compressionType)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setCompressionType(MTL::TextureCompressionType compressionType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCompressionType_), compressionType); +} + +// property: swizzle +_MTL_INLINE MTL::TextureSwizzleChannels MTL::TextureDescriptor::swizzle() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(swizzle)); +} + +_MTL_INLINE void MTL::TextureDescriptor::setSwizzle(MTL::TextureSwizzleChannels swizzle) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSwizzle_), swizzle); +} + +// property: rootResource +_MTL_INLINE MTL::Resource* MTL::Texture::rootResource() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(rootResource)); +} + +// property: parentTexture +_MTL_INLINE MTL::Texture* MTL::Texture::parentTexture() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(parentTexture)); +} + +// property: parentRelativeLevel +_MTL_INLINE NS::UInteger MTL::Texture::parentRelativeLevel() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(parentRelativeLevel)); +} + +// property: parentRelativeSlice +_MTL_INLINE NS::UInteger MTL::Texture::parentRelativeSlice() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(parentRelativeSlice)); +} + +// property: buffer +_MTL_INLINE MTL::Buffer* MTL::Texture::buffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(buffer)); +} + +// property: bufferOffset +_MTL_INLINE NS::UInteger MTL::Texture::bufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferOffset)); +} + +// property: bufferBytesPerRow +_MTL_INLINE NS::UInteger MTL::Texture::bufferBytesPerRow() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferBytesPerRow)); +} + +// property: iosurface +_MTL_INLINE IOSurfaceRef MTL::Texture::iosurface() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(iosurface)); +} + +// property: iosurfacePlane +_MTL_INLINE NS::UInteger MTL::Texture::iosurfacePlane() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(iosurfacePlane)); +} + +// property: textureType +_MTL_INLINE MTL::TextureType MTL::Texture::textureType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(textureType)); +} + +// property: pixelFormat +_MTL_INLINE MTL::PixelFormat MTL::Texture::pixelFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(pixelFormat)); +} + +// property: width +_MTL_INLINE NS::UInteger MTL::Texture::width() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(width)); +} + +// property: height +_MTL_INLINE NS::UInteger MTL::Texture::height() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(height)); +} + +// property: depth +_MTL_INLINE NS::UInteger MTL::Texture::depth() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(depth)); +} + +// property: mipmapLevelCount +_MTL_INLINE NS::UInteger MTL::Texture::mipmapLevelCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(mipmapLevelCount)); +} + +// property: sampleCount +_MTL_INLINE NS::UInteger MTL::Texture::sampleCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(sampleCount)); +} + +// property: arrayLength +_MTL_INLINE NS::UInteger MTL::Texture::arrayLength() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(arrayLength)); +} + +// property: usage +_MTL_INLINE MTL::TextureUsage MTL::Texture::usage() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(usage)); +} + +// property: shareable +_MTL_INLINE bool MTL::Texture::shareable() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isShareable)); +} + +// property: framebufferOnly +_MTL_INLINE bool MTL::Texture::framebufferOnly() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isFramebufferOnly)); +} + +// property: firstMipmapInTail +_MTL_INLINE NS::UInteger MTL::Texture::firstMipmapInTail() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(firstMipmapInTail)); +} + +// property: tailSizeInBytes +_MTL_INLINE NS::UInteger MTL::Texture::tailSizeInBytes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(tailSizeInBytes)); +} + +// property: isSparse +_MTL_INLINE bool MTL::Texture::isSparse() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(isSparse)); +} + +// property: allowGPUOptimizedContents +_MTL_INLINE bool MTL::Texture::allowGPUOptimizedContents() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(allowGPUOptimizedContents)); +} + +// property: compressionType +_MTL_INLINE MTL::TextureCompressionType MTL::Texture::compressionType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(compressionType)); +} + +// property: gpuResourceID +_MTL_INLINE MTL::ResourceID MTL::Texture::gpuResourceID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuResourceID)); +} + +// method: getBytes:bytesPerRow:bytesPerImage:fromRegion:mipmapLevel:slice: +_MTL_INLINE void MTL::Texture::getBytes(const void* pixelBytes, NS::UInteger bytesPerRow, NS::UInteger bytesPerImage, MTL::Region region, NS::UInteger level, NS::UInteger slice) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(getBytes_bytesPerRow_bytesPerImage_fromRegion_mipmapLevel_slice_), pixelBytes, bytesPerRow, bytesPerImage, region, level, slice); +} + +// method: replaceRegion:mipmapLevel:slice:withBytes:bytesPerRow:bytesPerImage: +_MTL_INLINE void MTL::Texture::replaceRegion(MTL::Region region, NS::UInteger level, NS::UInteger slice, const void* pixelBytes, NS::UInteger bytesPerRow, NS::UInteger bytesPerImage) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(replaceRegion_mipmapLevel_slice_withBytes_bytesPerRow_bytesPerImage_), region, level, slice, pixelBytes, bytesPerRow, bytesPerImage); +} + +// method: getBytes:bytesPerRow:fromRegion:mipmapLevel: +_MTL_INLINE void MTL::Texture::getBytes(const void* pixelBytes, NS::UInteger bytesPerRow, MTL::Region region, NS::UInteger level) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(getBytes_bytesPerRow_fromRegion_mipmapLevel_), pixelBytes, bytesPerRow, region, level); +} + +// method: replaceRegion:mipmapLevel:withBytes:bytesPerRow: +_MTL_INLINE void MTL::Texture::replaceRegion(MTL::Region region, NS::UInteger level, const void* pixelBytes, NS::UInteger bytesPerRow) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(replaceRegion_mipmapLevel_withBytes_bytesPerRow_), region, level, pixelBytes, bytesPerRow); +} + +// method: newTextureViewWithPixelFormat: +_MTL_INLINE MTL::Texture* MTL::Texture::newTextureView(MTL::PixelFormat pixelFormat) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureViewWithPixelFormat_), pixelFormat); +} + +// method: newTextureViewWithPixelFormat:textureType:levels:slices: +_MTL_INLINE MTL::Texture* MTL::Texture::newTextureView(MTL::PixelFormat pixelFormat, MTL::TextureType textureType, NS::Range levelRange, NS::Range sliceRange) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureViewWithPixelFormat_textureType_levels_slices_), pixelFormat, textureType, levelRange, sliceRange); +} + +// method: newSharedTextureHandle +_MTL_INLINE MTL::SharedTextureHandle* MTL::Texture::newSharedTextureHandle() +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newSharedTextureHandle)); +} + +// property: remoteStorageTexture +_MTL_INLINE MTL::Texture* MTL::Texture::remoteStorageTexture() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(remoteStorageTexture)); +} + +// method: newRemoteTextureViewForDevice: +_MTL_INLINE MTL::Texture* MTL::Texture::newRemoteTextureViewForDevice(const MTL::Device* device) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRemoteTextureViewForDevice_), device); +} + +// property: swizzle +_MTL_INLINE MTL::TextureSwizzleChannels MTL::Texture::swizzle() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(swizzle)); +} + +// method: newTextureViewWithPixelFormat:textureType:levels:slices:swizzle: +_MTL_INLINE MTL::Texture* MTL::Texture::newTextureView(MTL::PixelFormat pixelFormat, MTL::TextureType textureType, NS::Range levelRange, NS::Range sliceRange, MTL::TextureSwizzleChannels swizzle) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureViewWithPixelFormat_textureType_levels_slices_swizzle_), pixelFormat, textureType, levelRange, sliceRange, swizzle); +} diff --git a/metal-cpp/Metal/MTLTypes.hpp b/metal-cpp/Metal/MTLTypes.hpp new file mode 100644 index 00000000..2c7a9cea --- /dev/null +++ b/metal-cpp/Metal/MTLTypes.hpp @@ -0,0 +1,168 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLTypes.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLTypes.hpp" + +namespace MTL +{ +struct Origin +{ + Origin() = default; + + Origin(NS::UInteger x, NS::UInteger y, NS::UInteger z); + + static Origin Make(NS::UInteger x, NS::UInteger y, NS::UInteger z); + + NS::UInteger x; + NS::UInteger y; + NS::UInteger z; +} _MTL_PACKED; + +struct Size +{ + Size() = default; + + Size(NS::UInteger width, NS::UInteger height, NS::UInteger depth); + + static Size Make(NS::UInteger width, NS::UInteger height, NS::UInteger depth); + + NS::UInteger width; + NS::UInteger height; + NS::UInteger depth; +} _MTL_PACKED; + +struct Region +{ + Region() = default; + + Region(NS::UInteger x, NS::UInteger width); + + Region(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height); + + Region(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth); + + static Region Make1D(NS::UInteger x, NS::UInteger width); + + static Region Make2D(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height); + + static Region Make3D(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth); + + MTL::Origin origin; + MTL::Size size; +} _MTL_PACKED; + +struct SamplePosition; + +using Coordinate2D = SamplePosition; + +struct SamplePosition +{ + SamplePosition() = default; + + SamplePosition(float _x, float _y); + + static SamplePosition Make(float x, float y); + + float x; + float y; +} _MTL_PACKED; + +struct ResourceID +{ + uint64_t _impl; +} _MTL_PACKED; + +} + +_MTL_INLINE MTL::Origin::Origin(NS::UInteger _x, NS::UInteger _y, NS::UInteger _z) + : x(_x) + , y(_y) + , z(_z) +{ +} + +_MTL_INLINE MTL::Origin MTL::Origin::Make(NS::UInteger x, NS::UInteger y, NS::UInteger z) +{ + return Origin(x, y, z); +} + +_MTL_INLINE MTL::Size::Size(NS::UInteger _width, NS::UInteger _height, NS::UInteger _depth) + : width(_width) + , height(_height) + , depth(_depth) +{ +} + +_MTL_INLINE MTL::Size MTL::Size::Make(NS::UInteger width, NS::UInteger height, NS::UInteger depth) +{ + return Size(width, height, depth); +} + +_MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger width) + : origin(x, 0, 0) + , size(width, 1, 1) +{ +} + +_MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height) + : origin(x, y, 0) + , size(width, height, 1) +{ +} + +_MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth) + : origin(x, y, z) + , size(width, height, depth) +{ +} + +_MTL_INLINE MTL::Region MTL::Region::Make1D(NS::UInteger x, NS::UInteger width) +{ + return Region(x, width); +} + +_MTL_INLINE MTL::Region MTL::Region::Make2D(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height) +{ + return Region(x, y, width, height); +} + +_MTL_INLINE MTL::Region MTL::Region::Make3D(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth) +{ + return Region(x, y, z, width, height, depth); +} + +_MTL_INLINE MTL::SamplePosition::SamplePosition(float _x, float _y) + : x(_x) + , y(_y) +{ +} + +_MTL_INLINE MTL::SamplePosition MTL::SamplePosition::Make(float x, float y) +{ + return SamplePosition(x, y); +} diff --git a/metal-cpp/Metal/MTLVersion.hpp b/metal-cpp/Metal/MTLVersion.hpp new file mode 100644 index 00000000..aa6a03a5 --- /dev/null +++ b/metal-cpp/Metal/MTLVersion.hpp @@ -0,0 +1,32 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLVersion.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define METALCPP_VERSION_MAJOR 306 +#define METALCPP_VERSION_MINOR 2 +#define METALCPP_VERSION_PATCH 4 + +#define METALCPP_SUPPORTS_VERSION(major, minor, patch) \ + ((major < METALCPP_VERSION_MAJOR) || \ + (major == METALCPP_VERSION_MAJOR && minor < METALCPP_VERSION_MINOR) || \ + (major == METALCPP_VERSION_MAJOR && minor == METALCPP_VERSION_MINOR && patch <= METALCPP_VERSION_PATCH)) diff --git a/metal-cpp/Metal/MTLVertexDescriptor.hpp b/metal-cpp/Metal/MTLVertexDescriptor.hpp new file mode 100644 index 00000000..8bc7fc7b --- /dev/null +++ b/metal-cpp/Metal/MTLVertexDescriptor.hpp @@ -0,0 +1,344 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLVertexDescriptor.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLVertexDescriptor.hpp" + +namespace MTL +{ +_MTL_ENUM(NS::UInteger, VertexFormat) { + VertexFormatInvalid = 0, + VertexFormatUChar2 = 1, + VertexFormatUChar3 = 2, + VertexFormatUChar4 = 3, + VertexFormatChar2 = 4, + VertexFormatChar3 = 5, + VertexFormatChar4 = 6, + VertexFormatUChar2Normalized = 7, + VertexFormatUChar3Normalized = 8, + VertexFormatUChar4Normalized = 9, + VertexFormatChar2Normalized = 10, + VertexFormatChar3Normalized = 11, + VertexFormatChar4Normalized = 12, + VertexFormatUShort2 = 13, + VertexFormatUShort3 = 14, + VertexFormatUShort4 = 15, + VertexFormatShort2 = 16, + VertexFormatShort3 = 17, + VertexFormatShort4 = 18, + VertexFormatUShort2Normalized = 19, + VertexFormatUShort3Normalized = 20, + VertexFormatUShort4Normalized = 21, + VertexFormatShort2Normalized = 22, + VertexFormatShort3Normalized = 23, + VertexFormatShort4Normalized = 24, + VertexFormatHalf2 = 25, + VertexFormatHalf3 = 26, + VertexFormatHalf4 = 27, + VertexFormatFloat = 28, + VertexFormatFloat2 = 29, + VertexFormatFloat3 = 30, + VertexFormatFloat4 = 31, + VertexFormatInt = 32, + VertexFormatInt2 = 33, + VertexFormatInt3 = 34, + VertexFormatInt4 = 35, + VertexFormatUInt = 36, + VertexFormatUInt2 = 37, + VertexFormatUInt3 = 38, + VertexFormatUInt4 = 39, + VertexFormatInt1010102Normalized = 40, + VertexFormatUInt1010102Normalized = 41, + VertexFormatUChar4Normalized_BGRA = 42, + VertexFormatUChar = 45, + VertexFormatChar = 46, + VertexFormatUCharNormalized = 47, + VertexFormatCharNormalized = 48, + VertexFormatUShort = 49, + VertexFormatShort = 50, + VertexFormatUShortNormalized = 51, + VertexFormatShortNormalized = 52, + VertexFormatHalf = 53, +}; + +_MTL_ENUM(NS::UInteger, VertexStepFunction) { + VertexStepFunctionConstant = 0, + VertexStepFunctionPerVertex = 1, + VertexStepFunctionPerInstance = 2, + VertexStepFunctionPerPatch = 3, + VertexStepFunctionPerPatchControlPoint = 4, +}; + +class VertexBufferLayoutDescriptor : public NS::Copying +{ +public: + static class VertexBufferLayoutDescriptor* alloc(); + + class VertexBufferLayoutDescriptor* init(); + + NS::UInteger stride() const; + void setStride(NS::UInteger stride); + + MTL::VertexStepFunction stepFunction() const; + void setStepFunction(MTL::VertexStepFunction stepFunction); + + NS::UInteger stepRate() const; + void setStepRate(NS::UInteger stepRate); +}; + +class VertexBufferLayoutDescriptorArray : public NS::Referencing +{ +public: + static class VertexBufferLayoutDescriptorArray* alloc(); + + class VertexBufferLayoutDescriptorArray* init(); + + class VertexBufferLayoutDescriptor* object(NS::UInteger index); + + void setObject(const class VertexBufferLayoutDescriptor* bufferDesc, NS::UInteger index); +}; + +class VertexAttributeDescriptor : public NS::Copying +{ +public: + static class VertexAttributeDescriptor* alloc(); + + class VertexAttributeDescriptor* init(); + + MTL::VertexFormat format() const; + void setFormat(MTL::VertexFormat format); + + NS::UInteger offset() const; + void setOffset(NS::UInteger offset); + + NS::UInteger bufferIndex() const; + void setBufferIndex(NS::UInteger bufferIndex); +}; + +class VertexAttributeDescriptorArray : public NS::Referencing +{ +public: + static class VertexAttributeDescriptorArray* alloc(); + + class VertexAttributeDescriptorArray* init(); + + class VertexAttributeDescriptor* object(NS::UInteger index); + + void setObject(const class VertexAttributeDescriptor* attributeDesc, NS::UInteger index); +}; + +class VertexDescriptor : public NS::Copying +{ +public: + static class VertexDescriptor* alloc(); + + class VertexDescriptor* init(); + + static class VertexDescriptor* vertexDescriptor(); + + class VertexBufferLayoutDescriptorArray* layouts() const; + + class VertexAttributeDescriptorArray* attributes() const; + + void reset(); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::VertexBufferLayoutDescriptor* MTL::VertexBufferLayoutDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLVertexBufferLayoutDescriptor)); +} + +// method: init +_MTL_INLINE MTL::VertexBufferLayoutDescriptor* MTL::VertexBufferLayoutDescriptor::init() +{ + return NS::Object::init(); +} + +// property: stride +_MTL_INLINE NS::UInteger MTL::VertexBufferLayoutDescriptor::stride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stride)); +} + +_MTL_INLINE void MTL::VertexBufferLayoutDescriptor::setStride(NS::UInteger stride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStride_), stride); +} + +// property: stepFunction +_MTL_INLINE MTL::VertexStepFunction MTL::VertexBufferLayoutDescriptor::stepFunction() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stepFunction)); +} + +_MTL_INLINE void MTL::VertexBufferLayoutDescriptor::setStepFunction(MTL::VertexStepFunction stepFunction) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStepFunction_), stepFunction); +} + +// property: stepRate +_MTL_INLINE NS::UInteger MTL::VertexBufferLayoutDescriptor::stepRate() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(stepRate)); +} + +_MTL_INLINE void MTL::VertexBufferLayoutDescriptor::setStepRate(NS::UInteger stepRate) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setStepRate_), stepRate); +} + +// static method: alloc +_MTL_INLINE MTL::VertexBufferLayoutDescriptorArray* MTL::VertexBufferLayoutDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLVertexBufferLayoutDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::VertexBufferLayoutDescriptorArray* MTL::VertexBufferLayoutDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::VertexBufferLayoutDescriptor* MTL::VertexBufferLayoutDescriptorArray::object(NS::UInteger index) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::VertexBufferLayoutDescriptorArray::setObject(const MTL::VertexBufferLayoutDescriptor* bufferDesc, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), bufferDesc, index); +} + +// static method: alloc +_MTL_INLINE MTL::VertexAttributeDescriptor* MTL::VertexAttributeDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLVertexAttributeDescriptor)); +} + +// method: init +_MTL_INLINE MTL::VertexAttributeDescriptor* MTL::VertexAttributeDescriptor::init() +{ + return NS::Object::init(); +} + +// property: format +_MTL_INLINE MTL::VertexFormat MTL::VertexAttributeDescriptor::format() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(format)); +} + +_MTL_INLINE void MTL::VertexAttributeDescriptor::setFormat(MTL::VertexFormat format) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFormat_), format); +} + +// property: offset +_MTL_INLINE NS::UInteger MTL::VertexAttributeDescriptor::offset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(offset)); +} + +_MTL_INLINE void MTL::VertexAttributeDescriptor::setOffset(NS::UInteger offset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOffset_), offset); +} + +// property: bufferIndex +_MTL_INLINE NS::UInteger MTL::VertexAttributeDescriptor::bufferIndex() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(bufferIndex)); +} + +_MTL_INLINE void MTL::VertexAttributeDescriptor::setBufferIndex(NS::UInteger bufferIndex) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBufferIndex_), bufferIndex); +} + +// static method: alloc +_MTL_INLINE MTL::VertexAttributeDescriptorArray* MTL::VertexAttributeDescriptorArray::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLVertexAttributeDescriptorArray)); +} + +// method: init +_MTL_INLINE MTL::VertexAttributeDescriptorArray* MTL::VertexAttributeDescriptorArray::init() +{ + return NS::Object::init(); +} + +// method: objectAtIndexedSubscript: +_MTL_INLINE MTL::VertexAttributeDescriptor* MTL::VertexAttributeDescriptorArray::object(NS::UInteger index) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index); +} + +// method: setObject:atIndexedSubscript: +_MTL_INLINE void MTL::VertexAttributeDescriptorArray::setObject(const MTL::VertexAttributeDescriptor* attributeDesc, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attributeDesc, index); +} + +// static method: alloc +_MTL_INLINE MTL::VertexDescriptor* MTL::VertexDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLVertexDescriptor)); +} + +// method: init +_MTL_INLINE MTL::VertexDescriptor* MTL::VertexDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: vertexDescriptor +_MTL_INLINE MTL::VertexDescriptor* MTL::VertexDescriptor::vertexDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLVertexDescriptor), _MTL_PRIVATE_SEL(vertexDescriptor)); +} + +// property: layouts +_MTL_INLINE MTL::VertexBufferLayoutDescriptorArray* MTL::VertexDescriptor::layouts() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(layouts)); +} + +// property: attributes +_MTL_INLINE MTL::VertexAttributeDescriptorArray* MTL::VertexDescriptor::attributes() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(attributes)); +} + +// method: reset +_MTL_INLINE void MTL::VertexDescriptor::reset() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); +} diff --git a/metal-cpp/Metal/MTLVisibleFunctionTable.hpp b/metal-cpp/Metal/MTLVisibleFunctionTable.hpp new file mode 100644 index 00000000..70daccbd --- /dev/null +++ b/metal-cpp/Metal/MTLVisibleFunctionTable.hpp @@ -0,0 +1,104 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/MTLVisibleFunctionTable.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include "MTLDefines.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLPrivate.hpp" + +#include + +#include "MTLResource.hpp" +#include "MTLTypes.hpp" + +namespace MTL +{ +class VisibleFunctionTableDescriptor : public NS::Copying +{ +public: + static class VisibleFunctionTableDescriptor* alloc(); + + class VisibleFunctionTableDescriptor* init(); + + static class VisibleFunctionTableDescriptor* visibleFunctionTableDescriptor(); + + NS::UInteger functionCount() const; + void setFunctionCount(NS::UInteger functionCount); +}; + +class VisibleFunctionTable : public NS::Referencing +{ +public: + MTL::ResourceID gpuResourceID() const; + + void setFunction(const class FunctionHandle* function, NS::UInteger index); + + void setFunctions(const class FunctionHandle* const functions[], NS::Range range); +}; + +} + +// static method: alloc +_MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLVisibleFunctionTableDescriptor)); +} + +// method: init +_MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::init() +{ + return NS::Object::init(); +} + +// static method: visibleFunctionTableDescriptor +_MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::visibleFunctionTableDescriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLVisibleFunctionTableDescriptor), _MTL_PRIVATE_SEL(visibleFunctionTableDescriptor)); +} + +// property: functionCount +_MTL_INLINE NS::UInteger MTL::VisibleFunctionTableDescriptor::functionCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionCount)); +} + +_MTL_INLINE void MTL::VisibleFunctionTableDescriptor::setFunctionCount(NS::UInteger functionCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctionCount_), functionCount); +} + +// property: gpuResourceID +_MTL_INLINE MTL::ResourceID MTL::VisibleFunctionTable::gpuResourceID() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(gpuResourceID)); +} + +// method: setFunction:atIndex: +_MTL_INLINE void MTL::VisibleFunctionTable::setFunction(const MTL::FunctionHandle* function, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunction_atIndex_), function, index); +} + +// method: setFunctions:withRange: +_MTL_INLINE void MTL::VisibleFunctionTable::setFunctions(const MTL::FunctionHandle* const functions[], NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctions_withRange_), functions, range); +} diff --git a/metal-cpp/Metal/Metal.hpp b/metal-cpp/Metal/Metal.hpp new file mode 100644 index 00000000..f4cf931b --- /dev/null +++ b/metal-cpp/Metal/Metal.hpp @@ -0,0 +1,84 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// Metal/Metal.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "MTLAccelerationStructure.hpp" +#include "MTLAccelerationStructureCommandEncoder.hpp" +#include "MTLAccelerationStructureTypes.hpp" +#include "MTLArgument.hpp" +#include "MTLArgumentEncoder.hpp" +#include "MTLBinaryArchive.hpp" +#include "MTLBlitCommandEncoder.hpp" +#include "MTLBlitPass.hpp" +#include "MTLBuffer.hpp" +#include "MTLCaptureManager.hpp" +#include "MTLCaptureScope.hpp" +#include "MTLCommandBuffer.hpp" +#include "MTLCommandEncoder.hpp" +#include "MTLCommandQueue.hpp" +#include "MTLComputeCommandEncoder.hpp" +#include "MTLComputePass.hpp" +#include "MTLComputePipeline.hpp" +#include "MTLCounters.hpp" +#include "MTLDefines.hpp" +#include "MTLDepthStencil.hpp" +#include "MTLDevice.hpp" +#include "MTLDrawable.hpp" +#include "MTLDynamicLibrary.hpp" +#include "MTLEvent.hpp" +#include "MTLFence.hpp" +#include "MTLFunctionConstantValues.hpp" +#include "MTLFunctionDescriptor.hpp" +#include "MTLFunctionHandle.hpp" +#include "MTLFunctionLog.hpp" +#include "MTLFunctionStitching.hpp" +#include "MTLHeaderBridge.hpp" +#include "MTLHeap.hpp" +#include "MTLIndirectCommandBuffer.hpp" +#include "MTLIndirectCommandEncoder.hpp" +#include "MTLIntersectionFunctionTable.hpp" +#include "MTLIOCommandBuffer.hpp" +#include "MTLIOCommandQueue.hpp" +#include "MTLIOCompressor.hpp" +#include "MTLLibrary.hpp" +#include "MTLLinkedFunctions.hpp" +#include "MTLParallelRenderCommandEncoder.hpp" +#include "MTLPipeline.hpp" +#include "MTLPixelFormat.hpp" +#include "MTLPrivate.hpp" +#include "MTLRasterizationRate.hpp" +#include "MTLRenderCommandEncoder.hpp" +#include "MTLRenderPass.hpp" +#include "MTLRenderPipeline.hpp" +#include "MTLResource.hpp" +#include "MTLResourceStateCommandEncoder.hpp" +#include "MTLResourceStatePass.hpp" +#include "MTLSampler.hpp" +#include "MTLStageInputOutputDescriptor.hpp" +#include "MTLTexture.hpp" +#include "MTLTypes.hpp" +#include "MTLVertexDescriptor.hpp" +#include "MTLVisibleFunctionTable.hpp" +#include "MTLVersion.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/QuartzCore/CADefines.hpp b/metal-cpp/QuartzCore/CADefines.hpp new file mode 100644 index 00000000..d9df7486 --- /dev/null +++ b/metal-cpp/QuartzCore/CADefines.hpp @@ -0,0 +1,41 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// QuartzCore/CADefines.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "../Foundation/NSDefines.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define _CA_EXPORT _NS_EXPORT +#define _CA_EXTERN _NS_EXTERN +#define _CA_INLINE _NS_INLINE +#define _CA_PACKED _NS_PACKED + +#define _CA_CONST(type, name) _NS_CONST(type, name) +#define _CA_ENUM(type, name) _NS_ENUM(type, name) +#define _CA_OPTIONS(type, name) _NS_OPTIONS(type, name) + +#define _CA_VALIDATE_SIZE(ns, name) _NS_VALIDATE_SIZE(ns, name) +#define _CA_VALIDATE_ENUM(ns, name) _NS_VALIDATE_ENUM(ns, name) + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/QuartzCore/CAMetalDrawable.hpp b/metal-cpp/QuartzCore/CAMetalDrawable.hpp new file mode 100644 index 00000000..8bc55b0c --- /dev/null +++ b/metal-cpp/QuartzCore/CAMetalDrawable.hpp @@ -0,0 +1,57 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// QuartzCore/CAMetalDrawable.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "../Metal/MTLDrawable.hpp" +#include "../Metal/MTLTexture.hpp" + +#include "CADefines.hpp" +#include "CAPrivate.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace CA +{ +class MetalDrawable : public NS::Referencing +{ +public: + class MetalLayer* layer() const; + MTL::Texture* texture() const; +}; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE CA::MetalLayer* CA::MetalDrawable::layer() const +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(layer)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE MTL::Texture* CA::MetalDrawable::texture() const +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(texture)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/QuartzCore/CAMetalLayer.hpp b/metal-cpp/QuartzCore/CAMetalLayer.hpp new file mode 100644 index 00000000..1914f778 --- /dev/null +++ b/metal-cpp/QuartzCore/CAMetalLayer.hpp @@ -0,0 +1,131 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// QuartzCore/CAMetalDrawable.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "../Metal/MTLPixelFormat.hpp" +#include "../Metal/MTLTexture.hpp" +#include + +#include "CADefines.hpp" +#include "CAMetalDrawable.hpp" +#include "CAPrivate.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace CA +{ + +class MetalLayer : public NS::Referencing +{ +public: + static class MetalLayer* layer(); + + MTL::Device* device() const; + void setDevice(MTL::Device* device); + + MTL::PixelFormat pixelFormat() const; + void setPixelFormat(MTL::PixelFormat pixelFormat); + + bool framebufferOnly() const; + void setFramebufferOnly(bool framebufferOnly); + + CGSize drawableSize() const; + void setDrawableSize(CGSize drawableSize); + + class MetalDrawable* nextDrawable(); +}; +} // namespace CA + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +_CA_INLINE CA::MetalLayer* CA::MetalLayer::layer() +{ + return Object::sendMessage(_CA_PRIVATE_CLS(CAMetalLayer), _CA_PRIVATE_SEL(layer)); +} +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE MTL::Device* CA::MetalLayer::device() const +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(device)); +} +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE void CA::MetalLayer::setDevice(MTL::Device* device) +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(setDevice_), device); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE MTL::PixelFormat CA::MetalLayer::pixelFormat() const +{ + return Object::sendMessage(this, + _CA_PRIVATE_SEL(pixelFormat)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE void CA::MetalLayer::setPixelFormat(MTL::PixelFormat pixelFormat) +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(setPixelFormat_), + pixelFormat); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE bool CA::MetalLayer::framebufferOnly() const +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(framebufferOnly)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE void CA::MetalLayer::setFramebufferOnly(bool framebufferOnly) +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(setFramebufferOnly_), + framebufferOnly); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE CGSize CA::MetalLayer::drawableSize() const +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(drawableSize)); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE void CA::MetalLayer::setDrawableSize(CGSize drawableSize) +{ + return Object::sendMessage(this, _CA_PRIVATE_SEL(setDrawableSize_), + drawableSize); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_CA_INLINE CA::MetalDrawable* CA::MetalLayer::nextDrawable() +{ + return Object::sendMessage(this, + _CA_PRIVATE_SEL(nextDrawable)); +} diff --git a/metal-cpp/QuartzCore/CAPrivate.hpp b/metal-cpp/QuartzCore/CAPrivate.hpp new file mode 100644 index 00000000..624bc527 --- /dev/null +++ b/metal-cpp/QuartzCore/CAPrivate.hpp @@ -0,0 +1,132 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// QuartzCore/CAPrivate.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "CADefines.hpp" + +#include + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define _CA_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol) +#define _CA_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor) + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#if defined(CA_PRIVATE_IMPLEMENTATION) + +#ifdef METALCPP_SYMBOL_VISIBILITY_HIDDEN +#define _CA_PRIVATE_VISIBILITY __attribute__((visibility("hidden"))) +#else +#define _CA_PRIVATE_VISIBILITY __attribute__((visibility("default"))) +#endif // METALCPP_SYMBOL_VISIBILITY_HIDDEN + +#define _CA_PRIVATE_IMPORT __attribute__((weak_import)) + +#ifdef __OBJC__ +#define _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol)) +#define _CA_PRIVATE_OBJC_GET_PROTOCOL(symbol) ((__bridge void*)objc_getProtocol(#symbol)) +#else +#define _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol) +#define _CA_PRIVATE_OBJC_GET_PROTOCOL(symbol) objc_getProtocol(#symbol) +#endif // __OBJC__ + +#define _CA_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _CA_PRIVATE_VISIBILITY = _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol) +#define _CA_PRIVATE_DEF_PRO(symbol) void* s_k##symbol _CA_PRIVATE_VISIBILITY = _CA_PRIVATE_OBJC_GET_PROTOCOL(symbol) +#define _CA_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _CA_PRIVATE_VISIBILITY = sel_registerName(symbol) +#define _CA_PRIVATE_DEF_STR(type, symbol) \ + _CA_EXTERN type const CA##symbol _CA_PRIVATE_IMPORT; \ + type const CA::symbol = (nullptr != &CA##symbol) ? CA##symbol : nullptr + +#else + +#define _CA_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol +#define _CA_PRIVATE_DEF_PRO(symbol) extern void* s_k##symbol +#define _CA_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor +#define _CA_PRIVATE_DEF_STR(type, symbol) extern type const CA::symbol + +#endif // CA_PRIVATE_IMPLEMENTATION + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace CA +{ +namespace Private +{ + namespace Class + { + _CA_PRIVATE_DEF_CLS(CAMetalLayer); + } // Class +} // Private +} // CA + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace CA +{ +namespace Private +{ + namespace Protocol + { + + _CA_PRIVATE_DEF_PRO(CAMetalDrawable); + + } // Protocol +} // Private +} // CA + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace CA +{ +namespace Private +{ + namespace Selector + { + _CA_PRIVATE_DEF_SEL(device, + "device"); + _CA_PRIVATE_DEF_SEL(drawableSize, + "drawableSize"); + _CA_PRIVATE_DEF_SEL(framebufferOnly, + "framebufferOnly"); + _CA_PRIVATE_DEF_SEL(layer, + "layer"); + _CA_PRIVATE_DEF_SEL(nextDrawable, + "nextDrawable"); + _CA_PRIVATE_DEF_SEL(pixelFormat, + "pixelFormat"); + _CA_PRIVATE_DEF_SEL(setDevice_, + "setDevice:"); + _CA_PRIVATE_DEF_SEL(setDrawableSize_, + "setDrawableSize:"); + _CA_PRIVATE_DEF_SEL(setFramebufferOnly_, + "setFramebufferOnly:"); + _CA_PRIVATE_DEF_SEL(setPixelFormat_, + "setPixelFormat:"); + _CA_PRIVATE_DEF_SEL(texture, + "texture"); + } // Class +} // Private +} // CA + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/QuartzCore/QuartzCore.hpp b/metal-cpp/QuartzCore/QuartzCore.hpp new file mode 100644 index 00000000..66f7e132 --- /dev/null +++ b/metal-cpp/QuartzCore/QuartzCore.hpp @@ -0,0 +1,28 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// QuartzCore/QuartzCore.hpp +// +// Copyright 2020-2022 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "CAMetalDrawable.hpp" +#include "CAMetalLayer.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/README.md b/metal-cpp/README.md new file mode 100644 index 00000000..f003dc83 --- /dev/null +++ b/metal-cpp/README.md @@ -0,0 +1,309 @@ +## About + +**metal-cpp** is a low overhead and header only C++ interface for Metal that helps developers add Metal functionality to graphics applications that are written in C++ (such as game engines). **metal-cpp** removes the need to create a shim and allows developers to call Metal functions directly from anywhere in their existing C++ code. + + +## Highlights + +- Drop in C++ alternative interface to the Metal Objective-C headers. +- Direct mapping of all Metal Objective-C classes, constants and enums to C++ in the MTL C++ namespace. +- No measurable overhead compared to calling Metal Objective-C headers, due to inlining of C++ function calls. +- No usage of wrapper containers that require additional allocations. +- Requires C++17 due to the usage of `constexpr` in `NS::Object`. +- Identical header files and function/constant/enum availability for iOS, macOS and tvOS. +- Backwards compatibility: All `bool MTL::Device::supports...()` functions check if their required selectors exist and automatically return `false` if not. +- String (`ErrorDomain`) constants are weak linked and automatically set to `nullptr` if not available. + +## Memory Allocation Policy + +**metal-cpp** follows the object allocation policies of Cocoa, Cocoa Touch, and CoreFoundation. Understanding these rules is especially important when using metal-cpp, as C++ objects are not eligible for automatic reference counting (ARC). + +**metal-cpp** objects are reference counted. To help convey and manage object lifecycles, the following conventions are observed: + +1. *You own any object returned by methods whose name begins with* `alloc` *,* `new` *,* `copy` *,* `mutableCopy` *, or* `Create`. The method returns these objects with `retainCount` equals to `1`. +2. *You can take ownership of an object by calling its* ```retain()``` *method*. A received object is normally guaranteed to remain valid within the method it was received in. You use `retain` in two situations: (1) In the implementation of an accessor method (a setter) or to take ownership of an object; and (2) To prevent an object from being deallocated as a side-effect of some other operation. +3. *When you no longer need it, you must relinquish ownership of an object you own*. You relinquish ownership by calling its `release()` or `autorelease()` method. +4. *You must not relinquish ownership of an object you do not own*. + +When an object's `retainCount` reaches `0`, the object is immediately deallocated. It is illegal to call methods on a deallocated object and it may lead to an application crash. + +### AutoreleasePools and Objects + +Several methods that create temporary objects in **metal-cpp** add them to an `AutoreleasePool` to help manage their lifetimes. In these situations, after **metal-cpp** creates the object, it adds it to an `AutoreleasePool`, which will release its objects when you release (or drain) it. + +By adding temporary objects to an AutoreleasePool, you do not need to explicitly call `release()` to deallocate them. Instead, you can rely on the `AutoreleasePool` to implicitly manage those lifetimes. + +If you create an object with a method that does not begin with `alloc`, `new`, `copy`, `mutableCopy`, or `Create`, the creating method adds the object to an autorelease pool. + +The typical scope of an `AutoreleasePool` is one frame of rendering for the main thread of the program. When the thread returns control to the RunLoop (an object responsible for receiving input and events from the windowing system), the pool is *drained*, releasing its objects. + +You can create and manage additional `AutoreleasePool`s at smaller scopes to reduce your program's working set, and you are required to do so for any additional threads your program creates. + +If an object's lifecycle needs to be extended beyond the scope of an `AutoreleasePool` instance, you can claim ownership of it by calling its `retain()` method before the pool is drained. In these cases, you are responsible for making the appropriate `release()` call on the object after you no longer need it. + +You can find a more-detailed introduction to the memory management rules here: https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MemoryMgmt/Articles/mmRules.html, and here: https://developer.apple.com/library/archive/documentation/CoreFoundation/Conceptual/CFMemoryMgmt/Concepts/Ownership.html + +For more details about the application's RunLoop, please find its documentation here: https://developer.apple.com/documentation/foundation/nsrunloop + +### Use and debug AutoreleasePools + +When you create an autoreleased object and there is no enclosing `AutoreleasePool`, the object is leaked. + +To prevent this, you normally create an `AutoreleasePool` in your program's `main` function, and in the entry function for every thread you create. You may also create additional `AutoreleasePool`s to avoid growing your program's high memory watermark when you create several autoreleased objects, such as when rendering. + +Use the Environment Variable `OBJC_DEBUG_MISSING_POOLS=YES` to print a runtime warning when an autoreleased object is leaked because no enclosing `AutoreleasePool` is available for its thread. + +You can also run `leaks --autoreleasePools` on a memgraph file or a process ID (macOS only) to view a listing of your program's `AutoreleasePool`s and all objects they contain. + +### NS::SharedPtr + +The **metal-cpp** headers include an optional `NS::SharedPtr<>` (shared pointer) template that can help you manually manage memory in your apps. + +Shared pointers in **metal-cpp** are different from `std::shared_ptr<>` in that they implement specific optimizations for its memory model. For example, **metal-cpp**'s shared pointers avoid the overhead of the standard library's version by leveraging the reference counting implementation of the `NS::Object` type. + +#### Note + +The **metal-cpp** shared pointer’s destructor method always calls the `release()` method of the pointer that it wraps. + +You can create an `NS::SharedPtr<>` by calling the metal-cpp's factory method that's appropriate for your application's intent: + +* You can **transfer** ownership of a pointer to a new shared pointer instance by calling the `NS::TransferPtr()` factory function, which is the correct function for Resource Acquisition is Initialization (RAII) implementations because it doesn't increase the pointee's retain count. + +* You can **share** ownership of a pointer with another entity by calling the `NS::RetainPtr()` factory function. This function can also extend an object's lifecycle beyond an `AutoreleasePool` instance's scope because it creates a strong reference to the pointee and increases its retain count. + +Usage of `NS::SharedPtr<>` is optional. + +### nullptr + +Similar to Objective-C, it is legal to call any method, including `retain()` and `release()`, on `nullptr` "objects". While calling methods on `nullptr` still does incur in function call overhead, the effective result is equivalent of a NOP. + +Conversely, do not assume that because calling a method on a pointer did not result in a crash, that the pointed-to object is valid. + +## Adding metal-cpp to a Project + +Simply include `Metal/Metal.hpp`. To ensure that the selector and class symbols are linked, add to one of your cpp files: + +```cpp +#define NS_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION + +#include "Metal/Metal.hpp" +``` + +If you want to use the QuartzCore wrapper, add: + +```cpp +#define CA_PRIVATE_IMPLEMENTATION + +#include "QuartzCore/QuartzCore.hpp" +``` + +## Generating a Single Header File + +Purely optional: You can generate a single header file that contains all **metal-cpp** headers via: + +```shell +./SingleHeader/MakeSingleHeader.py Foundation/Foundation.hpp QuartzCore/QuartzCore.hpp Metal/Metal.hpp +``` + +By default the generator script writes its output to `./SingleHeader/Metal.hpp`. Use the `-o` option to customize output filename. + +## Global Symbol Visibility + +metal-cpp marks all its symbols with `default` visibility. Define the macro: `METALCPP_SYMBOL_VISIBILITY_HIDDEN` to override this behavior and hide its symbols. + +## Examples + +#### Creating the device + +###### Objective-C (with automatic reference counting) + +```objc +id< MTLDevice > device = MTLCreateSystemDefaultDevice(); + +// ... +``` + +###### Objective-C + +```objc +id< MTLDevice > device = MTLCreateSystemDefaultDevice(); + +// ... + +[device release]; +``` + +###### C++ + +```cpp +MTL::Device* pDevice = MTL::CreateSystemDefaultDevice(); + +// ... + +pDevice->release(); +``` + +###### C++ (using NS::SharedPtr) + +```cpp +NS::SharedPtr< MTL::Device > pDevice = NS::TransferPtr( MTL::CreateSystemDefaultDevice() ); + +// ... +``` + +#### Metal function calls map directly to C++ + +###### Objective-C (with automatic reference counting) + +```objc +MTLSamplerDescriptor* samplerDescriptor = [[MTLSamplerDescriptor alloc] init]; + +[samplerDescriptor setSAddressMode: MTLSamplerAddressModeRepeat]; +[samplerDescriptor setTAddressMode: MTLSamplerAddressModeRepeat]; +[samplerDescriptor setRAddressMode: MTLSamplerAddressModeRepeat]; +[samplerDescriptor setMagFilter: MTLSamplerMinMagFilterLinear]; +[samplerDescriptor setMinFilter: MTLSamplerMinMagFilterLinear]; +[samplerDescriptor setMipFilter: MTLSamplerMipFilterLinear]; +[samplerDescriptor setSupportArgumentBuffers: YES]; + +id< MTLSamplerState > samplerState = [device newSamplerStateWithDescriptor:samplerDescriptor]; +``` + +###### Objective-C + +```objc +MTLSamplerDescriptor* samplerDescriptor = [[MTLSamplerDescriptor alloc] init]; + +[samplerDescriptor setSAddressMode: MTLSamplerAddressModeRepeat]; +[samplerDescriptor setTAddressMode: MTLSamplerAddressModeRepeat]; +[samplerDescriptor setRAddressMode: MTLSamplerAddressModeRepeat]; +[samplerDescriptor setMagFilter: MTLSamplerMinMagFilterLinear]; +[samplerDescriptor setMinFilter: MTLSamplerMinMagFilterLinear]; +[samplerDescriptor setMipFilter: MTLSamplerMipFilterLinear]; +[samplerDescriptor setSupportArgumentBuffers: YES]; + +id< MTLSamplerState > samplerState = [device newSamplerStateWithDescriptor:samplerDescriptor]; + +[samplerDescriptor release]; + +// ... + +[samplerState release]; +``` + +###### C++ + +```cpp +MTL::SamplerDescriptor* pSamplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + +pSamplerDescriptor->setSAddressMode( MTL::SamplerAddressModeRepeat ); +pSamplerDescriptor->setTAddressMode( MTL::SamplerAddressModeRepeat ); +pSamplerDescriptor->setRAddressMode( MTL::SamplerAddressModeRepeat ); +pSamplerDescriptor->setMagFilter( MTL::SamplerMinMagFilterLinear ); +pSamplerDescriptor->setMinFilter( MTL::SamplerMinMagFilterLinear ); +pSamplerDescriptor->setMipFilter( MTL::SamplerMipFilterLinear ); +pSamplerDescriptor->setSupportArgumentBuffers( true ); + +MTL::SamplerState* pSamplerState = pDevice->newSamplerState( pSamplerDescriptor ); + +pSamplerDescriptor->release(); + +// ... + +pSamplerState->release(); +``` + +###### C++ (using NS::SharedPtr) + +```cpp +NS::SharedPtr< MTL::SamplerDescriptor > pSamplerDescriptor = NS::TransferPtr( MTL::SamplerDescriptor::alloc()->init() ); + +pSamplerDescriptor->setSAddressMode( MTL::SamplerAddressModeRepeat ); +pSamplerDescriptor->setTAddressMode( MTL::SamplerAddressModeRepeat ); +pSamplerDescriptor->setRAddressMode( MTL::SamplerAddressModeRepeat ); +pSamplerDescriptor->setMagFilter( MTL::SamplerMinMagFilterLinear ); +pSamplerDescriptor->setMinFilter( MTL::SamplerMinMagFilterLinear ); +pSamplerDescriptor->setMipFilter( MTL::SamplerMipFilterLinear ); +pSamplerDescriptor->setSupportArgumentBuffers( true ); + +NS::SharedPtr< MTL::SamplerState > pSamplerState( pDevice->newSamplerState( pSamplerDescriptor ) ); +``` + +#### A subset of bindings for Foundation classes is provided for seamless integration + +###### Objective-C (with automatic reference counting) + +```objc +NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; +NSString* string = [NSString stringWithCString: "Hello World" encoding: NSASCIIStringEncoding]; + +printf( "string = \"%s\"\n", [string cStringUsingEncoding: NSASCIIStringEncoding] ); +``` + +###### Objective-C + +```objc +NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; +NSString* string = [NSString stringWithCString: "Hello World" encoding: NSASCIIStringEncoding]; + +printf( "string = \"%s\"\n", [string cStringUsingEncoding: NSASCIIStringEncoding] ); + +[pool release]; +``` + +###### C++ + +```cpp +NS::AutoreleasePool* pPool = NS::AutoreleasePool::alloc()->init(); +NS::String* pString = NS::String::string( "Hello World", NS::ASCIIStringEncoding ); + +printf( "pString = \"%s\"\n", pString->cString( NS::ASCIIStringEncoding ) ); + +pPool->release(); +``` + +###### C++ (using NS::SharedPtr) + +```cpp +NS::SharedPtr< NS::AutoreleasePool > pPool = NS::TransferPtr( NS::AutoreleasePool::alloc()->init() ); +NS::String* pString = NS::String::string( "Hello World", NS::ASCIIStringEncoding ); + +printf( "pString = \"%s\"\n", pString->cString( NS::ASCIIStringEncoding ) ); +``` + +#### Containers + +Use the CoreFoundation framework to create `NS::Array` and `NS::Dictionary` instances. + +```cpp +MTL::AccelerationStructureTriangleGeometryDescriptor* pGeoDescriptor = MTL::AccelerationStructureTriangleGeometryDescriptor::alloc()->init(); +CFTypeRef descriptors[] = { ( CFTypeRef )( pGeoDescriptor ) }; +NS::Array* pGeoDescriptors = ( NS::Array* )( CFArrayCreate( kCFAllocatorDefault, descriptors, SIZEOF_ARRAY( descriptors), &kCFTypeArrayCallBacks ) ); + +// ... + +pGeoDescriptors->release(); +``` + +Containers, such as `NS::Array` and `NS::Dictionary`, retain the objects they hold and release them when the container is deallocated. + +#### Accessing the Metal Drawable + +```cpp +#import + +// ... + +CA::MetalLayer* pMetalLayer = /* layer associated with the view */; +CA::MetalDrawable* pMetalDrawable = pMetalLayer->nextDrawable(); + +// ... +``` + +## Changelog + +| Version | Changes | +|-|-| +| macOS 13, iOS 16| Add all APIs for macOS 13 and iOS 16.
New optional `NS::SharedPtr` type to assist with memory management.
New convenience function to create a `CA::MetalLayer`.
New `MTLSTR(str)` macro allows faster string creation from literals.
Fix a problem with the signature of functions that take an array of pointers as input.
Fix a problem with the signature of the `setGroups()` function in `MTL::LinkedFunctions`.| +| macOS 12, iOS 15 | Initial release. | \ No newline at end of file diff --git a/metal-cpp/SingleHeader/MakeSingleHeader.py b/metal-cpp/SingleHeader/MakeSingleHeader.py new file mode 100755 index 00000000..7dcf29ce --- /dev/null +++ b/metal-cpp/SingleHeader/MakeSingleHeader.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- +# +# SingleHeader/MakeSingleHeader.py +# +# Copyright 2020-2022 Apple Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +import argparse +import datetime +import logging +import os +import re +import subprocess +import sys + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +class HeaderPrefix( object ): + __template = ( '//\n' + '// {file}\n' + '//\n' + '// {meta_data}\n' + '//\n' + '// Copyright 2020-2022 Apple Inc.\n' + '//\n' + '// Licensed under the Apache License, Version 2.0 (the "License");\n' + '// you may not use this file except in compliance with the License.\n' + '// You may obtain a copy of the License at\n' + '//\n' + '// http://www.apache.org/licenses/LICENSE-2.0\n' + '//\n' + '// Unless required by applicable law or agreed to in writing, software\n' + '// distributed under the License is distributed on an "AS IS" BASIS,\n' + '// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n' + '// See the License for the specific language governing permissions and\n' + '// limitations under the License.\n' + '//\n' + '\n' ) + + __template_commit = 'Autogenerated from commit {commit}.' + __template_date = 'Autogenerated on %B %d, %Y.' + + def __init__( self, file ): + self.__file = file + + def __str__( self ): + return self.__template.format( file = self.__file, meta_data = self.__meta_data_string() ) + + def __get_commit_hash( self ): + git_commit_hash = None + + try: + git_dir = os.path.dirname( os.path.realpath( __file__ ) ) + proc = subprocess.Popen( [ 'git', 'rev-parse', 'HEAD' ], cwd = git_dir, stdout = subprocess.PIPE, stderr = subprocess.PIPE ) + git_commit_hash = proc.stdout.read().decode( 'utf-8', 'replace' ).strip() + except: + logging.error( 'Failed to determine git commit hash!' ) + pass + + return git_commit_hash + + def __get_commit_string( self ): + meta_data = None + git_commit_hash = self.__get_commit_hash() + + if git_commit_hash: + meta_data = self.__template_commit.format( commit = git_commit_hash ) + + return meta_data + + def __get_date_string( self ): + today = datetime.date.today() + + return today.strftime( self.__template_date ) + + def __meta_data_string( self ): + meta_data = self.__get_commit_string() + + if not meta_data: + meta_data = self.__get_date_string() + + return meta_data + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +class SingleHeader( object ): + __pragma_once = '#pragma once\n\n' + + def __init__( self ): + self.__header_paths = list() + + def __str__( self ): + return self.process() + + def append( self, header_path ): + self.__header_paths.append( header_path ) + + def process( self ): + out_header = self.__pragma_once + + self.__included_headers = set() + self.__base_path = list() + + for header_path in self.__header_paths: + out_header += self.__process_header( header_path ) + + return self.__strip_empty_lines( out_header ) + + def __read_header( self, path ): + path = os.path.realpath( path ) + + try: + f = open( path, 'r' ) + except: + raise RuntimeError( 'Failed to open file \"' + path + '\" for read!' ) + + return f.read() + + def __strip_pragma_once( self, header ): + return re.sub( '\\s*#pragma once\s*\\/\\/-*\\n', '', header ) + + def __strip_comments( self, header ): + return re.sub( '^//.*\\n', '', header, flags = re.MULTILINE ) + + def __strip_empty_lines( self, header ): + return re.sub( '\\n\\n+', '\\n\\n', header, flags = re.MULTILINE ) + + def __substitute_include_directive( self, match ): + header_path = match.group( 'HEADER_PATH' ) + + logging.info( '\tSubstituting \"' + header_path + '\"...' ) + + return self.__process_header( os.path.join( self.__base_path[-1], header_path ) ) + + def __process_include_directives( self, header ): + return re.sub( '^\\s*#include\\s\\"(?P\\S*)\\"', self.__substitute_include_directive, header, flags = re.MULTILINE ) + + def __process_foundation_directives( self, header ): + if header.find("#include ") != -1: + logging.info( '\tSubstituting ...' ) + return header.replace("#include ", self.__process_header( os.path.join( self.__base_path[-1], "../Foundation/Foundation.hpp" ) ) ) + return header + + + def __process_header( self, header_path ): + out_header = '' + + header_path = os.path.realpath( header_path ) + + if not header_path in self.__included_headers: + logging.info( 'Processing \"' + header_path + '\"...' ) + + self.__base_path.append( os.path.dirname( header_path ) ) + self.__included_headers.add( header_path ) + + out_header = self.__read_header( header_path ) + out_header = self.__strip_pragma_once( out_header ) + out_header = self.__strip_comments( out_header ) + out_header = self.__process_include_directives( out_header ) + out_header = self.__process_foundation_directives( out_header ) + + self.__base_path.pop() + else: + logging.info( '\tSkipping \"' + header_path + '\"...' ) + + return out_header + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +def create_argument_parser(): + parser = argparse.ArgumentParser() + base_path = os.path.dirname( os.path.realpath( __file__ ) ) + output_path = os.path.join( base_path, 'Metal.hpp' ) + + parser.add_argument( '-o', '--output', dest = 'output_path', metavar = 'PATH', default = output_path, help = 'Output path for the single header file.' ) + parser.add_argument( '-v', '--verbose', action = 'store_true', help = 'Show verbose output.' ) + parser.add_argument( dest = 'header_paths', metavar = 'HEADER_FILE', nargs='+', help = 'Input header file.' ) + + return parser + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +def parse_arguments(): + parser = create_argument_parser() + args = parser.parse_args() + + if args.verbose: + logging.getLogger().setLevel( logging.INFO ) + else: + logging.getLogger().setLevel( logging.ERROR ) + + return args + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +def make_header( args ): + prefix = HeaderPrefix( os.path.basename( args.output_path ) ) + header = SingleHeader() + + for header_path in args.header_paths: + header.append( header_path ) + + return str( prefix ) + str( header ) + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +def make_dir( path ): + try: + if not os.path.exists( path ): + os.makedirs( path ) + except os.error: + pass + except: + raise + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +def write_header( args, content ): + path = os.path.realpath( args.output_path ) + + logging.info( 'Writing \"' + path + '\"...' ) + + make_dir( os.path.dirname( path ) ) + + try: + f = open( path, 'w' ) + except: + raise RuntimeError( 'Failed to open file \"' + path + '\" for write!' ) + + f.write( content ) + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- + +if __name__ == '__main__': + result = -1 + + try: + if sys.getdefaultencoding().lower() == 'ascii': + reload( sys ) + sys.setdefaultencoding( 'utf-8' ) + + args = parse_arguments() + header = make_header( args ) + + write_header( args, header ) + + result = 0 + + except ( KeyboardInterrupt, SystemExit ): + pass + except: + raise + + sys.exit( result ) + +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- From d23203c20670547abc4f494cb72c5a8c2edbb455 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Tue, 16 Jan 2024 03:27:41 -0500 Subject: [PATCH 03/37] add element-wise add test in swift --- llm/tests/metal/MetalAdder.h | 2 +- llm/tests/metal/add.metal | 2 +- llm/tests/metal/add_test.swift | 120 +++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 llm/tests/metal/add_test.swift diff --git a/llm/tests/metal/MetalAdder.h b/llm/tests/metal/MetalAdder.h index b1db6bb0..26e08665 100644 --- a/llm/tests/metal/MetalAdder.h +++ b/llm/tests/metal/MetalAdder.h @@ -4,7 +4,7 @@ A class to manage all of the Metal objects this app creates. #import #import -#include "common.h" +#include "../../include/common.h" NS_ASSUME_NONNULL_BEGIN diff --git a/llm/tests/metal/add.metal b/llm/tests/metal/add.metal index 9dae4b9b..54745610 100644 --- a/llm/tests/metal/add.metal +++ b/llm/tests/metal/add.metal @@ -9,7 +9,7 @@ using namespace metal; kernel void elementwise_add( device const Matrix3D &inputA, device const Matrix3D &inputB, - device const Matrix3D &output, + device const Matrix3D &output, uint3 gid [[ thread_position_in_grid ]] ) { diff --git a/llm/tests/metal/add_test.swift b/llm/tests/metal/add_test.swift new file mode 100644 index 00000000..364a5f28 --- /dev/null +++ b/llm/tests/metal/add_test.swift @@ -0,0 +1,120 @@ +import Metal + +let metalSourceCode = """ + kernel void arrayAdd(const device float* inputA [[buffer(0)]], + const device float* inputB [[buffer(1)]], + device float* output [[buffer(2)]], + uint id [[thread_position_in_grid]]) + { + // Perform array addition + output[id] = inputA[id] + inputB[id]; + } +""" +let arraySize: Int = 1*108*768 // simulate the Matrix3D size in test_ops.cc +let inputA = getRandomArray() +let inputB = getRandomArray() +let output = [Float](repeating: 1.0, count: arraySize) + +var timeElapsedGPU: CFAbsoluteTime = 0.0 +var timeElapsedCPU: CFAbsoluteTime = 0.0 + + +func performArrayAddition() { + print("Metal GPU") + // Begin the process + let startTime = CFAbsoluteTimeGetCurrent() + + // Create Metal device and command queue + let device = MTLCreateSystemDefaultDevice()! + let commandQueue = device.makeCommandQueue()! + + // Create Metal buffers for input and output + let bufferA = device.makeBuffer(bytes: inputA, length: arraySize * MemoryLayout.size, options: .storageModeShared)! + let bufferB = device.makeBuffer(bytes: inputB, length: arraySize * MemoryLayout.size, options: .storageModeShared)! + let bufferOutput = device.makeBuffer(bytes: output, length: arraySize * MemoryLayout.size, options: .storageModeShared)! + + // Create Metal compute pipeline and set kernel function + let library = try! device.makeLibrary(source: metalSourceCode, options: nil) + let kernelFunction = library.makeFunction(name: "arrayAdd")! + let pipeline = try! device.makeComputePipelineState(function: kernelFunction) + + // Create Metal compute command encoder + let commandBuffer = commandQueue.makeCommandBuffer()! + let computeEncoder = commandBuffer.makeComputeCommandEncoder()! + + // Set compute pipeline state + computeEncoder.setComputePipelineState(pipeline) + + // Set buffers for input and output + computeEncoder.setBuffer(bufferA, offset: 0, index: 0) + computeEncoder.setBuffer(bufferB, offset: 0, index: 1) + computeEncoder.setBuffer(bufferOutput, offset: 0, index: 2) + + // Set threadgroup size and dispatch compute threads + let maxThreadsperthreadgroup = pipeline.maxTotalThreadsPerThreadgroup + + let threadsPerThreadgroup = min(arraySize, maxThreadsperthreadgroup) + let threadgroupCount = MTLSize(width: (arraySize + threadsPerThreadgroup - 1) / threadsPerThreadgroup, height: 1, depth: 1) + + // Dispatch threads in multiple threadgroups + let threadgroups = MTLSize(width: threadsPerThreadgroup, height: 1, depth: 1) + computeEncoder.dispatchThreads(threadgroups, threadsPerThreadgroup: threadgroupCount) + + // End encoding and execute command buffer + computeEncoder.endEncoding() + commandBuffer.commit() + commandBuffer.waitUntilCompleted() + + // Access the result from the output buffer + var resultBufferPointer = UnsafeMutablePointer(bufferOutput.contents().bindMemory(to: Float.self, + capacity: MemoryLayout.size * arraySize)) + + // Print the result + for i in 0..<3 { + print("Metal GPU result: \(inputA[i]) + \(inputB[i]) = \(Float(resultBufferPointer.pointee) as Any)") + resultBufferPointer = UnsafeMutablePointer(resultBufferPointer.advanced(by: 1)) + } + + timeElapsedGPU = CFAbsoluteTimeGetCurrent() - startTime + print("Time elapsed \(String(format: "%.05f", timeElapsedGPU)) seconds") + print() +} + +// Call the function to perform array addition using Metal GPU +performArrayAddition() +basicForLoopWay(arr1: inputA, arr2: inputB) +let speedup = timeElapsedCPU/timeElapsedGPU +print("Speedup: \(speedup)") + +func basicForLoopWay(arr1: [Float], arr2: [Float]) { + print("CPU") + + // Begin the process + let startTime = CFAbsoluteTimeGetCurrent() + + var result = [Float].init(repeating: 0.0, count: arraySize) + + // Process our additions of the arrays together + for i in 0..[Float] { + var result = [Float].init(repeating: 0.0, count: arraySize) + for i in 0.. Date: Wed, 24 Jan 2024 17:06:17 -0500 Subject: [PATCH 04/37] metal c++ version inside cpp folder --- llm/tests/metal/cpp_version/main.cc | 132 ++++++ llm/tests/metal/cpp_version/op.metal | 432 ++++++++++++++++++ llm/tests/metal/{ => object_c}/MetalAdder.h | 2 +- llm/tests/metal/{ => object_c}/MetalAdder.m | 0 llm/tests/metal/{ => object_c}/add.metal | 0 llm/tests/metal/object_c/add_test.cpp | 112 +++++ llm/tests/metal/{ => object_c}/main.m | 0 .../metal/{ => swift_version}/add_test.swift | 0 8 files changed, 677 insertions(+), 1 deletion(-) create mode 100644 llm/tests/metal/cpp_version/main.cc create mode 100644 llm/tests/metal/cpp_version/op.metal rename llm/tests/metal/{ => object_c}/MetalAdder.h (90%) rename llm/tests/metal/{ => object_c}/MetalAdder.m (100%) rename llm/tests/metal/{ => object_c}/add.metal (100%) create mode 100644 llm/tests/metal/object_c/add_test.cpp rename llm/tests/metal/{ => object_c}/main.m (100%) rename llm/tests/metal/{ => swift_version}/add_test.swift (100%) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc new file mode 100644 index 00000000..2813eb4d --- /dev/null +++ b/llm/tests/metal/cpp_version/main.cc @@ -0,0 +1,132 @@ +// +// main.cpp +// metal_cpp +// +// Created by Derrick on 1/24/24. +// + +#include +#include +#include + +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "Metal/Metal.hpp" +#include "Foundation/Foundation" + +int arraySize = 100; + +// Function to generate a random integer array +void generateRandomIntArray(int* array) { + // Use a random device to seed the random number generator + std::random_device rd; + // Use the current time as a seed for the random number generator + std::mt19937 gen(rd()); + // Define the range of random numbers (adjust as needed) + std::uniform_int_distribution distribution(1, 100); + + // Generate random integers and fill the array + for (int i = 0; i < arraySize; ++i) { + array[i] = distribution(gen); + } +} + +int main(){ +// int M1[5][5], M2[5][5], Output[5][5]; + int *M1 = new int[arraySize]; + int *M2 = new int[arraySize]; + int *Output = new int[arraySize]; + + generateRandomIntArray(M1); + generateRandomIntArray(M2); + generateRandomIntArray(Output); + + + MTL::Device *_mDevice = MTL::CreateSystemDefaultDevice(); + NS::Error *error = nullptr; + MTL::Library *defaultLibrary = _mDevice->newDefaultLibrary(); + + if (defaultLibrary == nullptr) { + std::cout << "Failed to find the default library." << std::endl; + return 0; + } + + // Give matmul kernel + auto str = NS::String::string("arrayAdd", NS::ASCIIStringEncoding); + MTL::Function *matmulFunction = defaultLibrary->newFunction(str); + defaultLibrary->release(); + + if (matmulFunction == nullptr) { + std::cout << "Failed to find the function." << std::endl; + return 0; + } + + // Create a compute pipeline state object. + MTL::ComputePipelineState * _mMatmulFunctionPSO = _mDevice->newComputePipelineState(matmulFunction, &error); + matmulFunction->release(); + + if (_mMatmulFunctionPSO == nullptr) { + // If the Metal API validation is enabled, you can find out more information about what + // went wrong. (Metal API validation is enabled by default when a debug build is run + // from Xcode) + std::cout << "Failed to created pipeline state object, error " << error << "." << std::endl; + return 0; + } + + MTL::CommandQueue * _mCommandQueue = _mDevice->newCommandQueue(); + if (_mCommandQueue == nullptr) { + std::cout << "Failed to find the command queue." << std::endl; + return 0; + } + + //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance + MTL::Buffer *buffer1 = _mDevice->newBuffer(sizeof(M1), MTL::ResourceStorageModeShared); + MTL::Buffer *buffer2 = _mDevice->newBuffer(sizeof(M2), MTL::ResourceStorageModeShared); + MTL::Buffer *buffer3 = _mDevice->newBuffer(sizeof(Output), MTL::ResourceStorageModeShared); + + // Start the computation in metal gpu + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Set buffers for input and output + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(buffer1, 0, 0); + computeEncoder->setBuffer(buffer2, 0, 1); + computeEncoder->setBuffer(buffer3, 0, 2); + + // number of threadgroup + uint32_t maxThreadsperthreadgroup = (uint32_t)_mMatmulFunctionPSO->maxTotalThreadsPerThreadgroup(); + uint32_t threadsPerthreadgroup = MIN(maxThreadsperthreadgroup, arraySize); + MTL::Size threadgroupCount = MTL::Size::Make((arraySize+threadsPerthreadgroup-1)/threadsPerthreadgroup, 1, 1); + + // Calculate a thread number per group + MTL::Size threadgroupSize = MTL::Size::Make(threadsPerthreadgroup, 1, 1); + + // Encode the compute command. + computeEncoder->dispatchThreads(threadgroupCount, threadgroupSize); + + // End the compute pass. + computeEncoder->endEncoding(); + + // Execute the command. + commandBuffer->commit(); + + // Normally, you want to do other work in your app while the GPU is running, + // but in this example, the code simply blocks until the calculation is complete. + commandBuffer->waitUntilCompleted(); + + int *output = (int*)buffer3->contents(); + std::cout << "The output from Metal GPU is: " << output[0] << std::endl; + + computeEncoder->release(); + commandBuffer->release(); + +} + diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal new file mode 100644 index 00000000..f01752b9 --- /dev/null +++ b/llm/tests/metal/cpp_version/op.metal @@ -0,0 +1,432 @@ +// +// op.metal +// metal_cpp +// +// Created by Derrick on 1/24/24. +// + +#include +using namespace metal; + +kernel void arrayAdd(const device float* inputA [[buffer(0)]], + const device float* inputB [[buffer(1)]], + device float* output [[buffer(2)]], + uint id [[thread_position_in_grid]]) + { + // Perform array addition + output[id] = inputA[id] + inputB[id]; + } + + +// kernel void matmul(device const float* inA, +// device const float* inB, // column major +// device float* result, +// constant MetalMatMulParams& params, +// uint2 id [[thread_position_in_grid]]) +// { +// // the for-loop is replaced with a collection of threads, each of which +// // calls this function. + +// const uint n = params.n; +// const uint k = params.k; + +// const uint idx = id.x; // column index of the output +// const uint idy = id.y; // row index of the output + +// float sum = 0; +// for (uint i = 0; i < k; i++){ +// float vA = inA[idy * k + i]; +// float vB = inB[idx * k + i]; + +// sum += vA * vB; +// } +// result[idy * n + idx] = sum; +// } + +// kernel void matmulInt4(device const float* inA, +// device const uint8_t* inB, // column major +// device float* result, +// device const float* scales, +// constant MetalMatMulParams& params, +// uint2 id [[thread_position_in_grid]]) +// { +// // the for-loop is replaced with a collection of threads, each of which +// // calls this function. + +// const uint n = params.n; +// const uint k = params.k; +// const uint group_size = params.group_size; + +// const uint idx = id.x; // column index of the output +// const uint idy = id.y; // row index of the output + +// float sum = 0; +// for (uint i = 0; i < k; i += group_size){ +// float scale = scales[(idx * k + i) / group_size]; +// for (uint j = 0; j < group_size; j+=2){ +// size_t weight_idx = (idx * k + i + j) / 2; +// uint8_t weight_packed = inB[weight_idx]; +// int8_t vl = (weight_packed & 0x0F) - 8; +// int8_t vh = (weight_packed >> 4) - 8; + +// sum += (inA[idy * k + i + j] * vl) * scale; +// sum += (inA[idy * k + i + j + 1] * vh) * scale; +// } +// } +// result[idy * n + idx] = sum; +// } + + +// kernel void matmulInt4_SIMD_Q4Interleave( +// device const packed_float4* inA, +// device const packed_char4* inB, // column major +// device float* result, +// device const float* scales, +// constant MetalMatMulParams& params, +// uint2 id [[thread_position_in_grid]]) +// { +// // the for-loop is replaced with a collection of threads, each of which +// // calls this function. + +// const uint n = params.n; +// const uint k = params.k; +// const uint group_size = params.group_size; + +// const uint idx = id.x; // column index of the output +// const uint idy = id.y; // row index of the output + +// packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; +// packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; + +// for (uint i = 0; i < k; i += group_size){ +// float scale = scales[(idx * k + i) / group_size]; +// packed_float4 scale4 = {scale, scale, scale, scale}; +// for (uint j = 0; j < group_size; j+= 8){ +// // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 +// // expected layout of inB: (a, e), (b, f), (c, g), (d, h) +// // low; (a, 0), (b, 0), (c, 0), (d, 0) +// // high: (e, 0), (f, 0), (g, 0), (h, 0) +// size_t weight_idx = (idx * k + i + j) / 8; +// size_t activation_idx = (idy * k + i + j) / 4; +// packed_char4 packed_8 = inB[weight_idx]; +// packed_char4 packed_low = packed_8 & lowMask; +// packed_char4 packed_high = (packed_8 >> 4) & lowMask; + +// packed_float4 inAlow = inA[activation_idx]; +// packed_float4 inAhigh = inA[activation_idx+1]; +// packed_float4 inBlow = packed_float4(packed_low) * scale4; +// packed_float4 inBhigh = packed_float4(packed_high) * scale4; + +// sum4 += inAlow * inBlow; +// sum4 += inAhigh * inBhigh; +// } +// } +// float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; +// result[idy * n + idx] = sum; +// } + +// kernel void matmulUInt4_SIMD_Q4Interleave_unroll16( +// device const packed_float4* inA, +// device const packed_char4* inB, // column major +// device float* result, +// device const float* scales, +// constant MetalMatMulParams& params, +// uint2 id [[thread_position_in_grid]]) +// { +// // the for-loop is replaced with a collection of threads, each of which +// // calls this function. + +// const uint n = params.n; +// const uint k = params.k; +// const uint group_size = params.group_size; + +// const uint idx = id.x; // column index of the output +// const uint idy = id.y; // row index of the output + +// packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; +// packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; +// packed_char4 offsets = {8, 8, 8, 8}; + +// for (uint i = 0; i < k; i += group_size){ +// float scale = scales[(idx * k + i) / group_size]; +// packed_float4 scale4 = {scale, scale, scale, scale}; +// for (uint j = 0; j < group_size; j+= 16){ +// // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 +// // expected layout of inB: (a, e), (b, f), (c, g), (d, h) +// // low; (a, 0), (b, 0), (c, 0), (d, 0) +// // high: (e, 0), (f, 0), (g, 0), (h, 0) +// size_t weight_idx = (idx * k + i + j) / 8; +// size_t activation_idx = (idy * k + i + j) / 4; +// packed_char4 packed_8_0 = inB[weight_idx]; +// packed_char4 packed_8_1 = inB[weight_idx + 1]; +// packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; +// packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; +// packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; + +// packed_float4 inAlow_0 = inA[activation_idx]; +// packed_float4 inAlow_1 = inA[activation_idx+2]; +// packed_float4 inAhigh_0 = inA[activation_idx+1]; +// packed_float4 inAhigh_1 = inA[activation_idx+3]; +// packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; +// packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; +// packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; +// packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; + +// sum4 += inAlow_0 * inBlow_0; +// sum4 += inAlow_1 * inBlow_1; +// sum4 += inAhigh_0 * inBhigh_0; +// sum4 += inAhigh_1 * inBhigh_1; +// } +// } +// float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; +// result[idy * n + idx] = sum; +// } + + +// kernel void matmulUInt4_SIMD_Q4Interleave_unroll32( +// device const packed_float4* inA, +// device const packed_char4* inB, // column major +// device float* result, +// device const float* scales, +// constant MetalMatMulParams& params, +// uint2 id [[thread_position_in_grid]]) +// { +// // the for-loop is replaced with a collection of threads, each of which +// // calls this function. + +// const uint n = params.n; +// const uint k = params.k; +// const uint group_size = params.group_size; + +// const uint idx = id.x; // column index of the output +// const uint idy = id.y; // row index of the output + +// packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; +// packed_char4 offsets = {8, 8, 8, 8}; +// packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; + +// for (uint i = 0; i < k; i += group_size){ +// float scale = scales[(idx * k + i) / group_size]; +// packed_float4 scale4 = {scale, scale, scale, scale}; +// for (uint j = 0; j < group_size; j+= 32){ +// // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 +// // expected layout of inB: (a, e), (b, f), (c, g), (d, h) +// // low; (a, 0), (b, 0), (c, 0), (d, 0) +// // high: (e, 0), (f, 0), (g, 0), (h, 0) +// size_t weight_idx = (idx * k + i + j) / 8; +// size_t activation_idx = (idy * k + i + j) / 4; +// packed_char4 packed_8_0 = inB[weight_idx]; +// packed_char4 packed_8_1 = inB[weight_idx + 1]; +// packed_char4 packed_8_2 = inB[weight_idx + 2]; +// packed_char4 packed_8_3 = inB[weight_idx + 3]; + +// packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; +// packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; +// packed_char4 packed_low_2 = (packed_8_2 & lowMask) - offsets;; +// packed_char4 packed_low_3 = (packed_8_3 & lowMask) - offsets;; + +// packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_2 = ((packed_8_2 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_3 = ((packed_8_3 >> 4) & lowMask) - offsets; + +// packed_float4 inAlow_0 = inA[activation_idx]; +// packed_float4 inAhigh_0 = inA[activation_idx+1]; +// packed_float4 inAlow_1 = inA[activation_idx+2]; +// packed_float4 inAhigh_1 = inA[activation_idx+3]; +// packed_float4 inAlow_2 = inA[activation_idx+4]; +// packed_float4 inAhigh_2 = inA[activation_idx+5]; +// packed_float4 inAlow_3 = inA[activation_idx+6]; +// packed_float4 inAhigh_3 = inA[activation_idx+7]; + +// packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; +// packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; +// packed_float4 inBlow_2 = packed_float4(packed_low_2) * scale4; +// packed_float4 inBlow_3 = packed_float4(packed_low_3) * scale4; + +// packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; +// packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; +// packed_float4 inBhigh_2 = packed_float4(packed_high_2) * scale4; +// packed_float4 inBhigh_3 = packed_float4(packed_high_3) * scale4; + +// sum4 += inAlow_0 * inBlow_0; +// sum4 += inAlow_1 * inBlow_1; +// sum4 += inAlow_2 * inBlow_2; +// sum4 += inAlow_3 * inBlow_3; +// sum4 += inAhigh_0 * inBhigh_0; +// sum4 += inAhigh_1 * inBhigh_1; +// sum4 += inAhigh_2 * inBhigh_2; +// sum4 += inAhigh_3 * inBhigh_3; +// } +// } +// float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; +// result[idy * n + idx] = sum; +// } + +// kernel void matmulUInt4_SIMD_Q4Interleave_unroll2x32( +// device const packed_float4* inA, +// device const packed_char4* inB, // column major +// device float* result, +// device const float* scales, +// constant MetalMatMulParams& params, +// uint2 id [[thread_position_in_grid]]) +// { +// // the for-loop is replaced with a collection of threads, each of which +// // calls this function. + +// const uint n = params.n; +// const uint k = params.k; +// const uint group_size = params.group_size; + +// const uint idx = id.x; // column index of the output +// const uint idy = id.y; // row index of the output + +// packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; +// packed_char4 offsets = {8, 8, 8, 8}; +// packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; +// packed_float4 sum4_col2 = {0.0f, 0.0f, 0.0f, 0.0f}; + +// packed_float4 a; + +// for (uint i = 0; i < k; i += group_size){ +// float scale = scales[(idx * k + i) / group_size]; +// float scale_col2 = scales[((idx+1) * k + i) / group_size]; +// packed_float4 scale4 = {scale, scale, scale, scale}; +// packed_float4 scale4_col2 = {scale_col2, scale_col2, scale_col2, scale_col2}; +// for (uint j = 0; j < group_size; j+= 32){ +// // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 +// // expected layout of inB: (a, e), (b, f), (c, g), (d, h) +// // low; (a, 0), (b, 0), (c, 0), (d, 0) +// // high: (e, 0), (f, 0), (g, 0), (h, 0) +// size_t weight_idx = (idx * k + i + j) / 8; +// size_t weight_col2_idx = ((idx+1) * k + i + j) / 8; +// size_t activation_idx = (idy * k + i + j) / 4; +// packed_char4 packed_8_0 = inB[weight_idx]; +// packed_char4 packed_8_1 = inB[weight_idx + 1]; +// packed_char4 packed_8_2 = inB[weight_idx + 2]; +// packed_char4 packed_8_3 = inB[weight_idx + 3]; + +// packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets; +// packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets; +// packed_char4 packed_low_2 = (packed_8_2 & lowMask) - offsets; +// packed_char4 packed_low_3 = (packed_8_3 & lowMask) - offsets; + +// packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_2 = ((packed_8_2 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_3 = ((packed_8_3 >> 4) & lowMask) - offsets; + +// packed_float4 inAlow_0 = inA[activation_idx]; +// packed_float4 inAhigh_0 = inA[activation_idx+1]; +// packed_float4 inAlow_1 = inA[activation_idx+2]; +// packed_float4 inAhigh_1 = inA[activation_idx+3]; +// packed_float4 inAlow_2 = inA[activation_idx+4]; +// packed_float4 inAhigh_2 = inA[activation_idx+5]; +// packed_float4 inAlow_3 = inA[activation_idx+6]; +// packed_float4 inAhigh_3 = inA[activation_idx+7]; + +// packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; +// packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; +// packed_float4 inBlow_2 = packed_float4(packed_low_2) * scale4; +// packed_float4 inBlow_3 = packed_float4(packed_low_3) * scale4; + +// packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; +// packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; +// packed_float4 inBhigh_2 = packed_float4(packed_high_2) * scale4; +// packed_float4 inBhigh_3 = packed_float4(packed_high_3) * scale4; + +// sum4 += inAlow_0 * inBlow_0; +// sum4 += inAlow_1 * inBlow_1; +// sum4 += inAlow_2 * inBlow_2; +// sum4 += inAlow_3 * inBlow_3; +// sum4 += inAhigh_0 * inBhigh_0; +// sum4 += inAhigh_1 * inBhigh_1; +// sum4 += inAhigh_2 * inBhigh_2; +// sum4 += inAhigh_3 * inBhigh_3; + +// } +// } +// float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; +// result[idy * n + idx] = sum; +// } + +// kernel void matmulUInt4_SIMD_Q4Interleave_half_unroll32( +// device const packed_half4* inA, +// device const packed_char4* inB, // column major +// device float* result, +// device const float* scales, +// constant MetalMatMulParams& params, +// uint2 id [[thread_position_in_grid]]) +// { +// // the for-loop is replaced with a collection of threads, each of which +// // calls this function. + +// const uint n = params.n; +// const uint k = params.k; +// const uint group_size = params.group_size; + +// const uint idx = id.x; // column index of the output +// const uint idy = id.y; // row index of the output + +// packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; +// packed_char4 offsets = {8, 8, 8, 8}; +// packed_half4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; + +// for (uint i = 0; i < k; i += group_size){ +// half scale = half(scales[(idx * k + i) / group_size]); +// packed_half4 scale4 = {scale, scale, scale, scale}; +// for (uint j = 0; j < group_size; j+= 32){ +// // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 +// // expected layout of inB: (a, e), (b, f), (c, g), (d, h) +// // low; (a, 0), (b, 0), (c, 0), (d, 0) +// // high: (e, 0), (f, 0), (g, 0), (h, 0) +// size_t weight_idx = (idx * k + i + j) / 8; +// size_t activation_idx = (idy * k + i + j) / 4; +// packed_char4 packed_8_0 = inB[weight_idx]; +// packed_char4 packed_8_1 = inB[weight_idx + 1]; +// packed_char4 packed_8_2 = inB[weight_idx + 2]; +// packed_char4 packed_8_3 = inB[weight_idx + 3]; + +// packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; +// packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; +// packed_char4 packed_low_2 = (packed_8_2 & lowMask) - offsets;; +// packed_char4 packed_low_3 = (packed_8_3 & lowMask) - offsets;; + +// packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_2 = ((packed_8_2 >> 4) & lowMask) - offsets; +// packed_char4 packed_high_3 = ((packed_8_3 >> 4) & lowMask) - offsets; + +// packed_half4 inAlow_0 = inA[activation_idx]; +// packed_half4 inAhigh_0 = inA[activation_idx+1]; +// packed_half4 inAlow_1 = inA[activation_idx+2]; +// packed_half4 inAhigh_1 = inA[activation_idx+3]; +// packed_half4 inAlow_2 = inA[activation_idx+4]; +// packed_half4 inAhigh_2 = inA[activation_idx+5]; +// packed_half4 inAlow_3 = inA[activation_idx+6]; +// packed_half4 inAhigh_3 = inA[activation_idx+7]; + +// packed_half4 inBlow_0 = packed_half4(packed_low_0) * scale4; +// packed_half4 inBlow_1 = packed_half4(packed_low_1) * scale4; +// packed_half4 inBlow_2 = packed_half4(packed_low_2) * scale4; +// packed_half4 inBlow_3 = packed_half4(packed_low_3) * scale4; + +// packed_half4 inBhigh_0 = packed_half4(packed_high_0) * scale4; +// packed_half4 inBhigh_1 = packed_half4(packed_high_1) * scale4; +// packed_half4 inBhigh_2 = packed_half4(packed_high_2) * scale4; +// packed_half4 inBhigh_3 = packed_half4(packed_high_3) * scale4; + +// sum4 += inAlow_0 * inBlow_0; +// sum4 += inAlow_1 * inBlow_1; +// sum4 += inAlow_2 * inBlow_2; +// sum4 += inAlow_3 * inBlow_3; +// sum4 += inAhigh_0 * inBhigh_0; +// sum4 += inAhigh_1 * inBhigh_1; +// sum4 += inAhigh_2 * inBhigh_2; +// sum4 += inAhigh_3 * inBhigh_3; +// } +// } +// half sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; +// result[idy * n + idx] = float(sum); +// } diff --git a/llm/tests/metal/MetalAdder.h b/llm/tests/metal/object_c/MetalAdder.h similarity index 90% rename from llm/tests/metal/MetalAdder.h rename to llm/tests/metal/object_c/MetalAdder.h index 26e08665..fbe909d8 100644 --- a/llm/tests/metal/MetalAdder.h +++ b/llm/tests/metal/object_c/MetalAdder.h @@ -4,7 +4,7 @@ A class to manage all of the Metal objects this app creates. #import #import -#include "../../include/common.h" +#import "../../include/common.h" NS_ASSUME_NONNULL_BEGIN diff --git a/llm/tests/metal/MetalAdder.m b/llm/tests/metal/object_c/MetalAdder.m similarity index 100% rename from llm/tests/metal/MetalAdder.m rename to llm/tests/metal/object_c/MetalAdder.m diff --git a/llm/tests/metal/add.metal b/llm/tests/metal/object_c/add.metal similarity index 100% rename from llm/tests/metal/add.metal rename to llm/tests/metal/object_c/add.metal diff --git a/llm/tests/metal/object_c/add_test.cpp b/llm/tests/metal/object_c/add_test.cpp new file mode 100644 index 00000000..a319089a --- /dev/null +++ b/llm/tests/metal/object_c/add_test.cpp @@ -0,0 +1,112 @@ +#include +#include + + +class MetalArrayAddition { +public: + MetalArrayAddition() { + // Initialize Metal + device = MTLCreateSystemDefaultDevice(); + if (!device) { + std::cerr << "Metal is not supported on this device." << std::endl; + exit(EXIT_FAILURE); + } + + // Create a command queue + commandQueue = [device newCommandQueue]; + + // Load and compile the Metal Shaders + NSError *error = nil; + NSString *shaderSource = [NSString stringWithContentsOfFile:@"add.metal" + encoding:NSUTF8StringEncoding + error:&error]; + if (error) { + std::cerr << "Error reading shader source: " << error.localizedDescription.UTF8String << std::endl; + exit(EXIT_FAILURE); + } + + NSBundle *bundle = [NSBundle mainBundle]; + NSString *shaderPath = [bundle pathForResource:@"ArrayAdditionShader" ofType:@"metal"]; + NSURL *shaderURL = [NSURL fileURLWithPath:shaderPath]; + + NSError *compileError = nil; + library = [device newLibraryWithSource:shaderSource options:nil error:&compileError]; + if (compileError) { + std::cerr << "Shader compilation error: " << compileError.localizedDescription.UTF8String << std::endl; + exit(EXIT_FAILURE); + } + + // Create a pipeline state + MTLFunction *kernelFunction = [library newFunctionWithName:@"elementwise_add"]; + pipelineState = [device newComputePipelineStateWithFunction:kernelFunction error:&error]; + if (error) { + std::cerr << "Pipeline state creation error: " << error.localizedDescription.UTF8String << std::endl; + exit(EXIT_FAILURE); + } + } + + void performArrayAddition(const float* inputArrayA, const float* inputArrayB, float* outputArray, int arraySize) { + // Create buffers + MTLBuffer *bufferA = [device newBufferWithBytes:inputArrayA length:arraySize * sizeof(float) options:MTLResourceStorageModeShared]; + MTLBuffer *bufferB = [device newBufferWithBytes:inputArrayB length:arraySize * sizeof(float) options:MTLResourceStorageModeShared]; + MTLBuffer *bufferResult = [device newBufferWithLength:arraySize * sizeof(float) options:MTLResourceStorageModeShared]; + + // Create a command buffer + id commandBuffer = [commandQueue commandBuffer]; + + // Create a compute command encoder + id computeEncoder = [commandBuffer computeCommandEncoder]; + [computeEncoder setComputePipelineState:pipelineState]; + [computeEncoder setBuffer:bufferA offset:0 atIndex:0]; + [computeEncoder setBuffer:bufferB offset:0 atIndex:1]; + [computeEncoder setBuffer:bufferResult offset:0 atIndex:2]; + + // Set thread group size + MTLSize threadGroupSize = MTLSizeMake(1, 1, 1); + MTLSize threadGroups = MTLSizeMake(arraySize / threadGroupSize.width, 1, 1); + + // Dispatch the compute kernel + [computeEncoder dispatchThreadgroups:threadGroups threadsPerThreadgroup:threadGroupSize]; + [computeEncoder endEncoding]; + + // Commit the command buffer + [commandBuffer commit]; + [commandBuffer waitUntilCompleted]; + + // Read back the result + memcpy(outputArray, [bufferResult contents], arraySize * sizeof(float)); + } + +private: + id device; + id commandQueue; + id library; + id pipelineState; +}; + +int main() { + const int arraySize = 100; + float inputArrayA[arraySize]; + float inputArrayB[arraySize]; + float outputArray[arraySize]; + + // Initialize input arrays + for (int i = 0; i < arraySize; ++i) { + inputArrayA[i] = i; + inputArrayB[i] = 2 * i; + } + + // Create MetalArrayAddition instance + MetalArrayAddition metalArrayAddition; + + // Perform array addition using Metal GPU + metalArrayAddition.performArrayAddition(inputArrayA, inputArrayB, outputArray, arraySize); + + // Display the result + std::cout << "Resultant Array:" << std::endl; + for (int i = 0; i < arraySize; ++i) { + std::cout << outputArray[i] << " "; + } + + return 0; +} diff --git a/llm/tests/metal/main.m b/llm/tests/metal/object_c/main.m similarity index 100% rename from llm/tests/metal/main.m rename to llm/tests/metal/object_c/main.m diff --git a/llm/tests/metal/add_test.swift b/llm/tests/metal/swift_version/add_test.swift similarity index 100% rename from llm/tests/metal/add_test.swift rename to llm/tests/metal/swift_version/add_test.swift From 7cad52067d1e306da65fded51fe7ced6698e05b5 Mon Sep 17 00:00:00 2001 From: RaymondWang0 Date: Wed, 24 Jan 2024 23:01:13 -0500 Subject: [PATCH 05/37] minor fix --- llm/tests/metal/cpp_version/Makefile | 29 ++++++++++++++++++++++++++++ llm/tests/metal/cpp_version/main.cc | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 llm/tests/metal/cpp_version/Makefile diff --git a/llm/tests/metal/cpp_version/Makefile b/llm/tests/metal/cpp_version/Makefile new file mode 100644 index 00000000..3c089572 --- /dev/null +++ b/llm/tests/metal/cpp_version/Makefile @@ -0,0 +1,29 @@ +CXX = /opt/homebrew/opt/llvm/bin/clang++ +CXXFLAGS = -std=c++17 -stdlib=libc++ -O3 + +# Executable and source files +TEST_TARGET = benchmark +TARGET = $(TEST_TARGET) +KERNEL_SRC = $(wildcard ./src/*.cpp) + +SRC = $(KERNEL_SRC) +INCLUDE_DIRS = -I../../../../metal-cpp +LIB = -framework Metal -framework Foundation -framework MetalKit + + +# Default target +all: $(TARGET) + +# Linking +benchmark: build_metallib + $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -o main main.cc $(SRC) $(LIB) $(LDFLAGS) + +build_air: + xcrun -sdk macosx metal -ffast-math -fno-fast-math $(INCLUDE_DIRS) -c op.metal -o library.air + +build_metallib: build_air + xcrun -sdk macosx metallib library.air -o default.metallib + +# Clean up +clean: + rm -f main library.air library.metallib default.metallib diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index 2813eb4d..bad4d82f 100644 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -13,7 +13,7 @@ #define CA_PRIVATE_IMPLEMENTATION #define MTL_PRIVATE_IMPLEMENTATION #include "Metal/Metal.hpp" -#include "Foundation/Foundation" +#include "Foundation/Foundation.hpp" int arraySize = 100; From 69c929d4f202f40ab7439b6430a509cd6ed9dca6 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Thu, 25 Jan 2024 10:58:14 -0500 Subject: [PATCH 06/37] speedup issue with metal --- llm/tests/metal/cpp_version/main.cc | 89 +++++++++++++++++++++------- llm/tests/metal/cpp_version/op.metal | 10 ++-- 2 files changed, 74 insertions(+), 25 deletions(-) mode change 100644 => 100755 llm/tests/metal/cpp_version/main.cc diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc old mode 100644 new mode 100755 index bad4d82f..5d1c8eec --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -8,6 +8,7 @@ #include #include #include +#include #define NS_PRIVATE_IMPLEMENTATION #define CA_PRIVATE_IMPLEMENTATION @@ -15,7 +16,20 @@ #include "Metal/Metal.hpp" #include "Foundation/Foundation.hpp" -int arraySize = 100; +MTL::Buffer* pM1; +MTL::Buffer* pM2; +MTL::Buffer* pM3; + +using namespace std; +using namespace chrono; + +int arraySize = 100000; + +void addArrays(const int arr1[], const int arr2[], int result[], int size) { + for (int i = 0; i < size; ++i) { + result[i] = arr1[i] + arr2[i]; + } +} // Function to generate a random integer array void generateRandomIntArray(int* array) { @@ -36,13 +50,24 @@ int main(){ // int M1[5][5], M2[5][5], Output[5][5]; int *M1 = new int[arraySize]; int *M2 = new int[arraySize]; - int *Output = new int[arraySize]; + int *M3 = new int[arraySize]; + + generateRandomIntArray(M1); generateRandomIntArray(M2); - generateRandomIntArray(Output); - + generateRandomIntArray(M3); + std::cout << "M1[0]: " << M1[0] << " " << M1[1] << " " << M1[2] << std::endl; + std::cout << "M2[0]: " << M2[0] << " " << M2[1] << " " << M2[2] << std::endl; + std::cout << "M3[0]: " << M3[0] << " " << M3[1] << " " << M3[2] << std::endl; + + + auto start2 = high_resolution_clock::now(); + addArrays(M1, M2, M3, arraySize); + auto stop2 = high_resolution_clock::now(); + auto duration2 = duration_cast(stop2 - start2); + // auto start = high_resolution_clock::now(); MTL::Device *_mDevice = MTL::CreateSystemDefaultDevice(); NS::Error *error = nullptr; MTL::Library *defaultLibrary = _mDevice->newDefaultLibrary(); @@ -81,9 +106,16 @@ int main(){ } //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance - MTL::Buffer *buffer1 = _mDevice->newBuffer(sizeof(M1), MTL::ResourceStorageModeShared); - MTL::Buffer *buffer2 = _mDevice->newBuffer(sizeof(M2), MTL::ResourceStorageModeShared); - MTL::Buffer *buffer3 = _mDevice->newBuffer(sizeof(Output), MTL::ResourceStorageModeShared); + MTL::Buffer *buffer1 = _mDevice->newBuffer(sizeof(int)*arraySize, MTL::ResourceStorageModeShared); + MTL::Buffer *buffer2 = _mDevice->newBuffer(sizeof(int)*arraySize, MTL::ResourceStorageModeShared); + MTL::Buffer *buffer3 = _mDevice->newBuffer(sizeof(int)*arraySize, MTL::ResourceStorageModeShared); + + pM1 = buffer1; + pM2 = buffer2; + pM3 = buffer3; + memcpy(pM1->contents(), M1, arraySize); + memcpy(pM2->contents(), M2, arraySize); + memcpy(pM3->contents(), M3, arraySize); // Start the computation in metal gpu // Create a command buffer to hold commands. @@ -97,20 +129,28 @@ int main(){ // Set buffers for input and output // Encode the pipeline state object and its parameters. computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); - computeEncoder->setBuffer(buffer1, 0, 0); - computeEncoder->setBuffer(buffer2, 0, 1); - computeEncoder->setBuffer(buffer3, 0, 2); + computeEncoder->setBuffer(pM1, 0, 0); + computeEncoder->setBuffer(pM2, 0, 1); + computeEncoder->setBuffer(pM3, 0, 2); // number of threadgroup - uint32_t maxThreadsperthreadgroup = (uint32_t)_mMatmulFunctionPSO->maxTotalThreadsPerThreadgroup(); - uint32_t threadsPerthreadgroup = MIN(maxThreadsperthreadgroup, arraySize); - MTL::Size threadgroupCount = MTL::Size::Make((arraySize+threadsPerthreadgroup-1)/threadsPerthreadgroup, 1, 1); - + MTL::Size mtlthreadsPerthreadgroup = MTL::Size::Make(arraySize, 1, 1); // Calculate a thread number per group - MTL::Size threadgroupSize = MTL::Size::Make(threadsPerthreadgroup, 1, 1); + MTL::Size threadgroupSize = MTL::Size::Make(1, 1, 1); + + // Set threadgroup size and dispatch compute threads + NS::UInteger maxThreadsperthreadgroup = _mMatmulFunctionPSO->maxTotalThreadsPerThreadgroup(); + NS::UInteger threadsPerThreadgroup = MIN(arraySize, maxThreadsperthreadgroup); + MTL::Size threadgroupCount = MTL::Size::Make((arraySize + threadsPerThreadgroup - 1) / threadsPerThreadgroup, 1, 1); + // Dispatch threads in multiple threadgroups + MTL::Size threadgroups = MTL::Size::Make(threadsPerThreadgroup, 1, 1); + + + + auto start = high_resolution_clock::now(); // Encode the compute command. - computeEncoder->dispatchThreads(threadgroupCount, threadgroupSize); + computeEncoder->dispatchThreads(mtlthreadsPerthreadgroup, threadgroupSize); // End the compute pass. computeEncoder->endEncoding(); @@ -122,11 +162,20 @@ int main(){ // but in this example, the code simply blocks until the calculation is complete. commandBuffer->waitUntilCompleted(); - int *output = (int*)buffer3->contents(); - std::cout << "The output from Metal GPU is: " << output[0] << std::endl; + // std::cout << "M1[0]: " << ((int*)(buffer1->contents()))[0] << " " << ((int*)(buffer1->contents()))[1] << " " << ((int*)(buffer1->contents()))[2] << std::endl; + // std::cout << "M2[0]: " << ((int*)(buffer2->contents()))[0] << " " << ((int*)(buffer2->contents()))[1] << " " << ((int*)(buffer2->contents()))[2] << std::endl; + // std::cout << "M3[0]: " << ((int*)(buffer3->contents()))[0] << " " << ((int*)(buffer3->contents()))[1] << " " << ((int*)(buffer3->contents()))[2] << std::endl; + computeEncoder->release(); commandBuffer->release(); - -} + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + + cout << "GPU: " << duration.count() << " microseconds" << endl; + cout << "CPU: " << duration2.count() << " microseconds" << endl; +} + + + diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index f01752b9..96c04041 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -8,13 +8,13 @@ #include using namespace metal; -kernel void arrayAdd(const device float* inputA [[buffer(0)]], - const device float* inputB [[buffer(1)]], - device float* output [[buffer(2)]], +kernel void arrayAdd(const device int* inputA, + const device int* inputB, + device int* output, uint id [[thread_position_in_grid]]) { - // Perform array addition - output[id] = inputA[id] + inputB[id]; + // Perform array addition + output[id] = inputA[id] + inputB[id]; } From 11fa7b6cd4525d79f9663335ace2941b29ea7180 Mon Sep 17 00:00:00 2001 From: RaymondWang0 Date: Fri, 26 Jan 2024 16:26:31 -0500 Subject: [PATCH 07/37] minor fix --- llm/tests/metal/cpp_version/main.cc | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index 5d1c8eec..db1e1f2f 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -23,7 +23,7 @@ MTL::Buffer* pM3; using namespace std; using namespace chrono; -int arraySize = 100000; +int arraySize = 100000000; void addArrays(const int arr1[], const int arr2[], int result[], int size) { for (int i = 0; i < size; ++i) { @@ -52,20 +52,18 @@ int main(){ int *M2 = new int[arraySize]; int *M3 = new int[arraySize]; - - generateRandomIntArray(M1); generateRandomIntArray(M2); generateRandomIntArray(M3); - std::cout << "M1[0]: " << M1[0] << " " << M1[1] << " " << M1[2] << std::endl; - std::cout << "M2[0]: " << M2[0] << " " << M2[1] << " " << M2[2] << std::endl; - std::cout << "M3[0]: " << M3[0] << " " << M3[1] << " " << M3[2] << std::endl; - auto start2 = high_resolution_clock::now(); addArrays(M1, M2, M3, arraySize); auto stop2 = high_resolution_clock::now(); auto duration2 = duration_cast(stop2 - start2); + std::cout << "CPU" << std::endl; + std::cout << "M1[0]: " << M1[0] << " " << M1[1] << " " << M1[2] << std::endl; + std::cout << "M2[0]: " << M2[0] << " " << M2[1] << " " << M2[2] << std::endl; + std::cout << "M3[0]: " << M3[0] << " " << M3[1] << " " << M3[2] << std::endl; // auto start = high_resolution_clock::now(); MTL::Device *_mDevice = MTL::CreateSystemDefaultDevice(); @@ -145,12 +143,11 @@ int main(){ MTL::Size threadgroupCount = MTL::Size::Make((arraySize + threadsPerThreadgroup - 1) / threadsPerThreadgroup, 1, 1); // Dispatch threads in multiple threadgroups MTL::Size threadgroups = MTL::Size::Make(threadsPerThreadgroup, 1, 1); - - auto start = high_resolution_clock::now(); // Encode the compute command. - computeEncoder->dispatchThreads(mtlthreadsPerthreadgroup, threadgroupSize); + // computeEncoder->dispatchThreads(mtlthreadsPerthreadgroup, threadgroupSize); + computeEncoder->dispatchThreadgroups(threadgroups, threadgroupCount); // End the compute pass. computeEncoder->endEncoding(); @@ -162,10 +159,10 @@ int main(){ // but in this example, the code simply blocks until the calculation is complete. commandBuffer->waitUntilCompleted(); - // std::cout << "M1[0]: " << ((int*)(buffer1->contents()))[0] << " " << ((int*)(buffer1->contents()))[1] << " " << ((int*)(buffer1->contents()))[2] << std::endl; - // std::cout << "M2[0]: " << ((int*)(buffer2->contents()))[0] << " " << ((int*)(buffer2->contents()))[1] << " " << ((int*)(buffer2->contents()))[2] << std::endl; - // std::cout << "M3[0]: " << ((int*)(buffer3->contents()))[0] << " " << ((int*)(buffer3->contents()))[1] << " " << ((int*)(buffer3->contents()))[2] << std::endl; - + std::cout << "GPU" << std::endl; + std::cout << "M1[0]: " << ((int*)(buffer1->contents()))[0] << " " << ((int*)(buffer1->contents()))[1] << " " << ((int*)(buffer1->contents()))[2] << std::endl; + std::cout << "M2[0]: " << ((int*)(buffer2->contents()))[0] << " " << ((int*)(buffer2->contents()))[1] << " " << ((int*)(buffer2->contents()))[2] << std::endl; + std::cout << "M3[0]: " << ((int*)(buffer3->contents()))[0] << " " << ((int*)(buffer3->contents()))[1] << " " << ((int*)(buffer3->contents()))[2] << std::endl; computeEncoder->release(); commandBuffer->release(); From 37ff4872406086371bcfc3bc05c789a39ce50556 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sat, 27 Jan 2024 01:14:45 -0500 Subject: [PATCH 08/37] reorganized clear version for metal main --- llm/tests/metal/cpp_version/main.cc | 182 ++++++++++++---------------- 1 file changed, 78 insertions(+), 104 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index db1e1f2f..609d54e0 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -3,7 +3,8 @@ // metal_cpp // // Created by Derrick on 1/24/24. -// +// Some to-do list: +// 1. keep a map: ptr on CPU -> buffer on GPU #include #include @@ -13,17 +14,27 @@ #define NS_PRIVATE_IMPLEMENTATION #define CA_PRIVATE_IMPLEMENTATION #define MTL_PRIVATE_IMPLEMENTATION + #include "Metal/Metal.hpp" #include "Foundation/Foundation.hpp" -MTL::Buffer* pM1; -MTL::Buffer* pM2; -MTL::Buffer* pM3; +MTL::Buffer *bM1, *bM2, *bM3; +MTL::Device* mDevice; +MTL::ComputePipelineState* mfnPipelineState; +MTL::CommandQueue* mCommandQueue; +NS::Error *error = nullptr; + +const char * fn_name = "arrayAdd"; + +int *A1, *A2, *A3; + using namespace std; using namespace chrono; -int arraySize = 100000000; +uint row = 100; +uint col = 100; +uint arraySize = row*col; void addArrays(const int arr1[], const int arr2[], int result[], int size) { for (int i = 0; i < size; ++i) { @@ -46,131 +57,94 @@ void generateRandomIntArray(int* array) { } } -int main(){ -// int M1[5][5], M2[5][5], Output[5][5]; - int *M1 = new int[arraySize]; - int *M2 = new int[arraySize]; - int *M3 = new int[arraySize]; - - generateRandomIntArray(M1); - generateRandomIntArray(M2); - generateRandomIntArray(M3); - - auto start2 = high_resolution_clock::now(); - addArrays(M1, M2, M3, arraySize); - auto stop2 = high_resolution_clock::now(); - auto duration2 = duration_cast(stop2 - start2); - std::cout << "CPU" << std::endl; - std::cout << "M1[0]: " << M1[0] << " " << M1[1] << " " << M1[2] << std::endl; - std::cout << "M2[0]: " << M2[0] << " " << M2[1] << " " << M2[2] << std::endl; - std::cout << "M3[0]: " << M3[0] << " " << M3[1] << " " << M3[2] << std::endl; - - // auto start = high_resolution_clock::now(); - MTL::Device *_mDevice = MTL::CreateSystemDefaultDevice(); - NS::Error *error = nullptr; - MTL::Library *defaultLibrary = _mDevice->newDefaultLibrary(); - +void metal_init(){ + mDevice = MTL::CreateSystemDefaultDevice(); + MTL::Library *defaultLibrary = mDevice->newDefaultLibrary(); if (defaultLibrary == nullptr) { std::cout << "Failed to find the default library." << std::endl; - return 0; + return; } - - // Give matmul kernel - auto str = NS::String::string("arrayAdd", NS::ASCIIStringEncoding); + auto str = NS::String::string(fn_name, NS::ASCIIStringEncoding); MTL::Function *matmulFunction = defaultLibrary->newFunction(str); defaultLibrary->release(); - if (matmulFunction == nullptr) { std::cout << "Failed to find the function." << std::endl; - return 0; + return; } - - // Create a compute pipeline state object. - MTL::ComputePipelineState * _mMatmulFunctionPSO = _mDevice->newComputePipelineState(matmulFunction, &error); + mfnPipelineState = mDevice->newComputePipelineState(matmulFunction, &error); matmulFunction->release(); - - if (_mMatmulFunctionPSO == nullptr) { - // If the Metal API validation is enabled, you can find out more information about what - // went wrong. (Metal API validation is enabled by default when a debug build is run - // from Xcode) + if (mfnPipelineState == nullptr) { std::cout << "Failed to created pipeline state object, error " << error << "." << std::endl; - return 0; + return; } - - MTL::CommandQueue * _mCommandQueue = _mDevice->newCommandQueue(); - if (_mCommandQueue == nullptr) { + mCommandQueue = mDevice->newCommandQueue(); + if (mCommandQueue == nullptr) { std::cout << "Failed to find the command queue." << std::endl; - return 0; + return; } - +} + +MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size){ + return mDevice->newBuffer(type_size*size, MTL::ResourceStorageModeShared); +} + +void metal_encodecommand(MTL::ComputeCommandEncoder *computeEncoder){ //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance - MTL::Buffer *buffer1 = _mDevice->newBuffer(sizeof(int)*arraySize, MTL::ResourceStorageModeShared); - MTL::Buffer *buffer2 = _mDevice->newBuffer(sizeof(int)*arraySize, MTL::ResourceStorageModeShared); - MTL::Buffer *buffer3 = _mDevice->newBuffer(sizeof(int)*arraySize, MTL::ResourceStorageModeShared); - - pM1 = buffer1; - pM2 = buffer2; - pM3 = buffer3; - memcpy(pM1->contents(), M1, arraySize); - memcpy(pM2->contents(), M2, arraySize); - memcpy(pM3->contents(), M3, arraySize); - - // Start the computation in metal gpu - // Create a command buffer to hold commands. - MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); - assert(commandBuffer != nullptr); + bM1 = metal_newBuf(sizeof(int), arraySize); + bM2 = metal_newBuf(sizeof(int), arraySize); + bM3 = metal_newBuf(sizeof(int), arraySize); + + computeEncoder->setComputePipelineState(mfnPipelineState); + computeEncoder->setBuffer(bM1, 0, 0); + computeEncoder->setBuffer(bM2, 0, 1); + computeEncoder->setBuffer(bM3, 0, 2); + + memcpy(bM1->contents(), A1, arraySize); + memcpy(bM2->contents(), A2, arraySize); + memcpy(bM3->contents(), A3, arraySize); +} - // Start a compute pass. +void metal_compute(){ + // Initialization of GPU vals + MTL::CommandBuffer *commandBuffer = mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); assert(computeEncoder != nullptr); - - // Set buffers for input and output - // Encode the pipeline state object and its parameters. - computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); - computeEncoder->setBuffer(pM1, 0, 0); - computeEncoder->setBuffer(pM2, 0, 1); - computeEncoder->setBuffer(pM3, 0, 2); - // number of threadgroup - MTL::Size mtlthreadsPerthreadgroup = MTL::Size::Make(arraySize, 1, 1); - // Calculate a thread number per group - MTL::Size threadgroupSize = MTL::Size::Make(1, 1, 1); + // Encode command and set buffer to GPU + metal_encodecommand(computeEncoder); + // Threads -> ThreadGroup -> Grid + NS::UInteger maxThreadGroupSize = mfnPipelineState->maxTotalThreadsPerThreadgroup(); + NS::UInteger ThreadGroupSize = MIN(arraySize, maxThreadGroupSize); + MTL::Size mGridSize = MTL::Size::Make((arraySize + ThreadGroupSize - 1) / ThreadGroupSize, 1, 1); + MTL::Size mThreadGroupSize = MTL::Size::Make(ThreadGroupSize, 1, 1); - // Set threadgroup size and dispatch compute threads - NS::UInteger maxThreadsperthreadgroup = _mMatmulFunctionPSO->maxTotalThreadsPerThreadgroup(); - NS::UInteger threadsPerThreadgroup = MIN(arraySize, maxThreadsperthreadgroup); - MTL::Size threadgroupCount = MTL::Size::Make((arraySize + threadsPerThreadgroup - 1) / threadsPerThreadgroup, 1, 1); - // Dispatch threads in multiple threadgroups - MTL::Size threadgroups = MTL::Size::Make(threadsPerThreadgroup, 1, 1); - - auto start = high_resolution_clock::now(); - // Encode the compute command. - // computeEncoder->dispatchThreads(mtlthreadsPerthreadgroup, threadgroupSize); - computeEncoder->dispatchThreadgroups(threadgroups, threadgroupCount); - - // End the compute pass. + // Dispatch and Run Computation + computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); computeEncoder->endEncoding(); - - // Execute the command. commandBuffer->commit(); - - // Normally, you want to do other work in your app while the GPU is running, - // but in this example, the code simply blocks until the calculation is complete. commandBuffer->waitUntilCompleted(); - - std::cout << "GPU" << std::endl; - std::cout << "M1[0]: " << ((int*)(buffer1->contents()))[0] << " " << ((int*)(buffer1->contents()))[1] << " " << ((int*)(buffer1->contents()))[2] << std::endl; - std::cout << "M2[0]: " << ((int*)(buffer2->contents()))[0] << " " << ((int*)(buffer2->contents()))[1] << " " << ((int*)(buffer2->contents()))[2] << std::endl; - std::cout << "M3[0]: " << ((int*)(buffer3->contents()))[0] << " " << ((int*)(buffer3->contents()))[1] << " " << ((int*)(buffer3->contents()))[2] << std::endl; - computeEncoder->release(); commandBuffer->release(); - auto stop = high_resolution_clock::now(); - auto duration = duration_cast(stop - start); +} + +int main(){ - cout << "GPU: " << duration.count() << " microseconds" << endl; - cout << "CPU: " << duration2.count() << " microseconds" << endl; + // Initialization for array addition + A1 = new int[arraySize]; + A2 = new int[arraySize]; + A3 = new int[arraySize]; + generateRandomIntArray(A1); + generateRandomIntArray(A2); + + // Initialization for matmul + + metal_init(); + metal_compute(); + printf("A1: %d; A2 %d; A3 %d\n", A1[0], A2[0], ((int*)(bM3->contents()))[0]); + + } From c3e3316d156a198a2e43014380d09ad020463ea2 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sat, 27 Jan 2024 02:24:05 -0500 Subject: [PATCH 09/37] matmul metal --- llm/tests/metal/cpp_version/main.cc | 91 ++++++++++++++++++++-------- llm/tests/metal/cpp_version/op.metal | 44 ++++++-------- 2 files changed, 87 insertions(+), 48 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index 609d54e0..e3ac5cdd 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -24,32 +24,58 @@ MTL::ComputePipelineState* mfnPipelineState; MTL::CommandQueue* mCommandQueue; NS::Error *error = nullptr; -const char * fn_name = "arrayAdd"; -int *A1, *A2, *A3; using namespace std; using namespace chrono; -uint row = 100; -uint col = 100; +// Customizable parameters for testing +uint row = 2; +uint col = 2; uint arraySize = row*col; - -void addArrays(const int arr1[], const int arr2[], int result[], int size) { +const char * fn_name = "matmul"; +float *A1, *A2, *A3; +struct dim { + uint r; + uint c; +}; +struct dim matdim; + +void test_addArrays(const float arr1[], const float arr2[], float result[], uint size) { for (int i = 0; i < size; ++i) { result[i] = arr1[i] + arr2[i]; } } +void test_matmul(const float* matA, int rowsA, int colsA, + const float* matB, int rowsB, int colsB, + float* result) { + for (int i = 0; i < rowsA; ++i) { + for (int j = 0; j < colsB; ++j) { + result[i * colsB + j] = 0; + for (int k = 0; k < colsA; ++k) { + result[i * colsB + j] += matA[i * colsA + k] * matB[k * colsB + j]; + } + } + } +} + +void printArray(const float* array) { + for (int i = 0; i < arraySize; ++i) { + std::cout << array[i] << " "; + } + std::cout << std::endl; +} + // Function to generate a random integer array -void generateRandomIntArray(int* array) { +void generateRandomFloatArray(float* array) { // Use a random device to seed the random number generator std::random_device rd; // Use the current time as a seed for the random number generator std::mt19937 gen(rd()); // Define the range of random numbers (adjust as needed) - std::uniform_int_distribution distribution(1, 100); + std::uniform_real_distribution distribution(1, 100); // Generate random integers and fill the array for (int i = 0; i < arraySize; ++i) { @@ -90,9 +116,9 @@ MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size){ void metal_encodecommand(MTL::ComputeCommandEncoder *computeEncoder){ //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance - bM1 = metal_newBuf(sizeof(int), arraySize); - bM2 = metal_newBuf(sizeof(int), arraySize); - bM3 = metal_newBuf(sizeof(int), arraySize); + bM1 = metal_newBuf(sizeof(float), arraySize); + bM2 = metal_newBuf(sizeof(float), arraySize); + bM3 = metal_newBuf(sizeof(float), arraySize); computeEncoder->setComputePipelineState(mfnPipelineState); computeEncoder->setBuffer(bM1, 0, 0); @@ -115,10 +141,16 @@ void metal_compute(){ metal_encodecommand(computeEncoder); // Threads -> ThreadGroup -> Grid - NS::UInteger maxThreadGroupSize = mfnPipelineState->maxTotalThreadsPerThreadgroup(); - NS::UInteger ThreadGroupSize = MIN(arraySize, maxThreadGroupSize); - MTL::Size mGridSize = MTL::Size::Make((arraySize + ThreadGroupSize - 1) / ThreadGroupSize, 1, 1); - MTL::Size mThreadGroupSize = MTL::Size::Make(ThreadGroupSize, 1, 1); + // NS::UInteger maxThreadGroupSize = mfnPipelineState->maxTotalThreadsPerThreadgroup(); + // NS::UInteger ThreadGroupSize = MIN(arraySize, maxThreadGroupSize); + // MTL::Size mGridSize = MTL::Size::Make((arraySize + ThreadGroupSize - 1) / ThreadGroupSize, 1, 1); + // MTL::Size mThreadGroupSize = MTL::Size::Make(ThreadGroupSize, 1, 1); + // NS::UInteger maxThreadGroupSize = mfnPipelineState->maxTotalThreadsPerThreadgroup(); + // NS::UInteger ThreadGroupSize = MIN(arraySize, maxThreadGroupSize); + MTL::Size mThreadGroupSize = MTL::Size::Make(2, 2, 1); + MTL::Size mGridSize = MTL::Size::Make((matdim.r + mThreadGroupSize.width - 1) / mThreadGroupSize.width, + (matdim.r + mThreadGroupSize.height - 1) / mThreadGroupSize.height, + 1); // Dispatch and Run Computation computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); @@ -132,19 +164,30 @@ void metal_compute(){ int main(){ // Initialization for array addition - A1 = new int[arraySize]; - A2 = new int[arraySize]; - A3 = new int[arraySize]; - generateRandomIntArray(A1); - generateRandomIntArray(A2); + A1 = new float[arraySize]; + A2 = new float[arraySize]; + A3 = new float[arraySize]; + generateRandomFloatArray(A1); + generateRandomFloatArray(A2); + printArray(A1); + printArray(A2); + matdim.r = row; + matdim.c = col; + - // Initialization for matmul + // CPU + test_matmul(A1, row, col, A2, row, col, A3); + printf("A1: %f; A2 %f; A3 %f\n", A1[0], A2[0], A3[0]); + free(A3); + A3 = new float[arraySize]; + // GPU metal_init(); metal_compute(); - printf("A1: %d; A2 %d; A3 %d\n", A1[0], A2[0], ((int*)(bM3->contents()))[0]); - - + printf("A1: %f; A2 %f; A3 %f\n", A1[0], A2[0], ((float*)(bM3->contents()))[0]); + free(A1); + free(A2); + free(A3); } diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index 96c04041..ded55cc6 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -8,9 +8,9 @@ #include using namespace metal; -kernel void arrayAdd(const device int* inputA, - const device int* inputB, - device int* output, +kernel void arrayAdd(const device float* inputA, + const device float* inputB, + device float* output, uint id [[thread_position_in_grid]]) { // Perform array addition @@ -18,30 +18,26 @@ kernel void arrayAdd(const device int* inputA, } -// kernel void matmul(device const float* inA, -// device const float* inB, // column major -// device float* result, -// constant MetalMatMulParams& params, -// uint2 id [[thread_position_in_grid]]) -// { -// // the for-loop is replaced with a collection of threads, each of which -// // calls this function. +kernel void matmul(device const float* matrixA, + device const float* matrixB, + device float* matrixC, + uint2 gid [[thread_position_in_grid]]) +{ + unsigned int widthA = 2; // Set the width of matrix A + unsigned int widthB = 2; // Set the width of matrix B + unsigned int heightA = 2; // Set the height of matrix A -// const uint n = params.n; -// const uint k = params.k; + if (gid.x >= widthB || gid.y >= heightA) { + return; + } -// const uint idx = id.x; // column index of the output -// const uint idy = id.y; // row index of the output - -// float sum = 0; -// for (uint i = 0; i < k; i++){ -// float vA = inA[idy * k + i]; -// float vB = inB[idx * k + i]; + float sum = 0.0; + for (unsigned int k = 0; k < widthA; k++) { + sum += matrixA[gid.y * widthA + k] * matrixB[k * widthB + gid.x]; + } -// sum += vA * vB; -// } -// result[idy * n + idx] = sum; -// } + matrixC[gid.y * widthB + gid.x] = sum; +} // kernel void matmulInt4(device const float* inA, // device const uint8_t* inB, // column major From d55be07b5d5da0a72bc2aa4227f7f5c4fed3716b Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sat, 27 Jan 2024 11:52:59 -0500 Subject: [PATCH 10/37] matmul correctness pass --- llm/tests/metal/cpp_version/main.cc | 29 ++++++++++++++++------------ llm/tests/metal/cpp_version/op.metal | 6 +++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index e3ac5cdd..dff0fb32 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -31,8 +31,8 @@ using namespace std; using namespace chrono; // Customizable parameters for testing -uint row = 2; -uint col = 2; +uint row = 8; +uint col = 8; uint arraySize = row*col; const char * fn_name = "matmul"; float *A1, *A2, *A3; @@ -40,7 +40,8 @@ struct dim { uint r; uint c; }; -struct dim matdim; +struct dim matdim1; +struct dim matdim2; void test_addArrays(const float arr1[], const float arr2[], float result[], uint size) { for (int i = 0; i < size; ++i) { @@ -125,9 +126,9 @@ void metal_encodecommand(MTL::ComputeCommandEncoder *computeEncoder){ computeEncoder->setBuffer(bM2, 0, 1); computeEncoder->setBuffer(bM3, 0, 2); - memcpy(bM1->contents(), A1, arraySize); - memcpy(bM2->contents(), A2, arraySize); - memcpy(bM3->contents(), A3, arraySize); + memcpy(bM1->contents(), A1, arraySize*sizeof(float)); + memcpy(bM2->contents(), A2, arraySize*sizeof(float)); + memcpy(bM3->contents(), A3, arraySize*sizeof(float)); } void metal_compute(){ @@ -145,9 +146,7 @@ void metal_compute(){ // NS::UInteger ThreadGroupSize = MIN(arraySize, maxThreadGroupSize); // MTL::Size mGridSize = MTL::Size::Make((arraySize + ThreadGroupSize - 1) / ThreadGroupSize, 1, 1); // MTL::Size mThreadGroupSize = MTL::Size::Make(ThreadGroupSize, 1, 1); - // NS::UInteger maxThreadGroupSize = mfnPipelineState->maxTotalThreadsPerThreadgroup(); - // NS::UInteger ThreadGroupSize = MIN(arraySize, maxThreadGroupSize); - MTL::Size mThreadGroupSize = MTL::Size::Make(2, 2, 1); + MTL::Size mThreadGroupSize = MTL::Size::Make(8, 8, 1); MTL::Size mGridSize = MTL::Size::Make((matdim.r + mThreadGroupSize.width - 1) / mThreadGroupSize.width, (matdim.r + mThreadGroupSize.height - 1) / mThreadGroupSize.height, 1); @@ -177,18 +176,24 @@ int main(){ // CPU test_matmul(A1, row, col, A2, row, col, A3); - printf("A1: %f; A2 %f; A3 %f\n", A1[0], A2[0], A3[0]); + printf("CPU Results: \n"); + for (uint8_t i = 0; i < arraySize; i++){ + printf("A1: %f; A2 %f; A3 %f\n", A1[i], A2[i], A3[i]); + } free(A3); A3 = new float[arraySize]; // GPU metal_init(); metal_compute(); - printf("A1: %f; A2 %f; A3 %f\n", A1[0], A2[0], ((float*)(bM3->contents()))[0]); + printf("GPU Results: \n"); + for (uint8_t i = 0; i < arraySize; i++){ + printf("bM1: %f; bM2 %f; bM3 %f\n", ((float*)(bM1->contents()))[i], ((float*)(bM2->contents()))[i], ((float*)(bM3->contents()))[i]); + } free(A1); free(A2); free(A3); -} +} diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index ded55cc6..e284086b 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -23,9 +23,9 @@ kernel void matmul(device const float* matrixA, device float* matrixC, uint2 gid [[thread_position_in_grid]]) { - unsigned int widthA = 2; // Set the width of matrix A - unsigned int widthB = 2; // Set the width of matrix B - unsigned int heightA = 2; // Set the height of matrix A + unsigned int widthA = 8; // Set the width of matrix A + unsigned int widthB = 8; // Set the width of matrix B + unsigned int heightA = 8; // Set the height of matrix A if (gid.x >= widthB || gid.y >= heightA) { return; From ab446e7d12d68b33333790338e673809b32281e5 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sat, 27 Jan 2024 15:57:25 -0500 Subject: [PATCH 11/37] matmul work --- llm/tests/metal/cpp_version/main.cc | 127 ++++++++++++++------------- llm/tests/metal/cpp_version/op.metal | 8 +- 2 files changed, 72 insertions(+), 63 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index dff0fb32..28f20d2d 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -17,60 +17,52 @@ #include "Metal/Metal.hpp" #include "Foundation/Foundation.hpp" +#include "param.h" -MTL::Buffer *bM1, *bM2, *bM3; +// .h +MTL::Buffer *bM1, *bM2, *bM3, *bParam; MTL::Device* mDevice; MTL::ComputePipelineState* mfnPipelineState; MTL::CommandQueue* mCommandQueue; NS::Error *error = nullptr; - - - using namespace std; using namespace chrono; -// Customizable parameters for testing -uint row = 8; -uint col = 8; -uint arraySize = row*col; +// .cc const char * fn_name = "matmul"; + +// main +uint height1 = 100; +uint width1 = 100; +uint height2 = 100; +uint width2 = 100; float *A1, *A2, *A3; -struct dim { - uint r; - uint c; -}; -struct dim matdim1; -struct dim matdim2; +matmul_param *param; +// Test Use void test_addArrays(const float arr1[], const float arr2[], float result[], uint size) { for (int i = 0; i < size; ++i) { result[i] = arr1[i] + arr2[i]; } } - -void test_matmul(const float* matA, int rowsA, int colsA, - const float* matB, int rowsB, int colsB, - float* result) { - for (int i = 0; i < rowsA; ++i) { - for (int j = 0; j < colsB; ++j) { +void test_matmul(const float* matA, int rowsA, int colsA, const float* matB, int rowsB, int colsB, float* result) { + for (int i = 0; i < rowsA; i++) { + for (int j = 0; j < colsB; j++) { result[i * colsB + j] = 0; - for (int k = 0; k < colsA; ++k) { + for (int k = 0; k < colsA; k++) { result[i * colsB + j] += matA[i * colsA + k] * matB[k * colsB + j]; } } } } - -void printArray(const float* array) { +void printArray(const float* array, uint arraySize) { for (int i = 0; i < arraySize; ++i) { std::cout << array[i] << " "; } std::cout << std::endl; } - -// Function to generate a random integer array -void generateRandomFloatArray(float* array) { +void generateRandomFloatArray(float* array, uint arraySize) { // Use a random device to seed the random number generator std::random_device rd; // Use the current time as a seed for the random number generator @@ -84,6 +76,7 @@ void generateRandomFloatArray(float* array) { } } +// Metal functions void metal_init(){ mDevice = MTL::CreateSystemDefaultDevice(); MTL::Library *defaultLibrary = mDevice->newDefaultLibrary(); @@ -117,18 +110,22 @@ MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size){ void metal_encodecommand(MTL::ComputeCommandEncoder *computeEncoder){ //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance - bM1 = metal_newBuf(sizeof(float), arraySize); - bM2 = metal_newBuf(sizeof(float), arraySize); - bM3 = metal_newBuf(sizeof(float), arraySize); + bM1 = metal_newBuf(sizeof(float), param->arraySize1); + bM2 = metal_newBuf(sizeof(float), param->arraySize2); + bM3 = metal_newBuf(sizeof(float), param->outputsize); + bParam = metal_newBuf(sizeof(matmul_param), 1); computeEncoder->setComputePipelineState(mfnPipelineState); computeEncoder->setBuffer(bM1, 0, 0); computeEncoder->setBuffer(bM2, 0, 1); - computeEncoder->setBuffer(bM3, 0, 2); + computeEncoder->setBuffer(bParam, 0, 2); + computeEncoder->setBuffer(bM3, 0, 3); + - memcpy(bM1->contents(), A1, arraySize*sizeof(float)); - memcpy(bM2->contents(), A2, arraySize*sizeof(float)); - memcpy(bM3->contents(), A3, arraySize*sizeof(float)); + memcpy(bM1->contents(), A1, param->arraySize1*sizeof(float)); + memcpy(bM2->contents(), A2, param->arraySize2*sizeof(float)); + memcpy(bM3->contents(), A3, param->outputsize*sizeof(float)); + memcpy(bParam->contents(), param, sizeof(matmul_param)); } void metal_compute(){ @@ -142,54 +139,64 @@ void metal_compute(){ metal_encodecommand(computeEncoder); // Threads -> ThreadGroup -> Grid - // NS::UInteger maxThreadGroupSize = mfnPipelineState->maxTotalThreadsPerThreadgroup(); - // NS::UInteger ThreadGroupSize = MIN(arraySize, maxThreadGroupSize); - // MTL::Size mGridSize = MTL::Size::Make((arraySize + ThreadGroupSize - 1) / ThreadGroupSize, 1, 1); - // MTL::Size mThreadGroupSize = MTL::Size::Make(ThreadGroupSize, 1, 1); MTL::Size mThreadGroupSize = MTL::Size::Make(8, 8, 1); - MTL::Size mGridSize = MTL::Size::Make((matdim.r + mThreadGroupSize.width - 1) / mThreadGroupSize.width, - (matdim.r + mThreadGroupSize.height - 1) / mThreadGroupSize.height, + MTL::Size mGridSize = MTL::Size::Make((param->width1 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, + (param->height2 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, 1); // Dispatch and Run Computation + // auto start = high_resolution_clock::now(); computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); computeEncoder->endEncoding(); commandBuffer->commit(); commandBuffer->waitUntilCompleted(); + // auto stop = high_resolution_clock::now(); + // auto duration = duration_cast(stop - start); + // std::cout << "GPU: " << duration.count() << "ms" << std::endl; computeEncoder->release(); commandBuffer->release(); } int main(){ - // Initialization for array addition - A1 = new float[arraySize]; - A2 = new float[arraySize]; - A3 = new float[arraySize]; - generateRandomFloatArray(A1); - generateRandomFloatArray(A2); - printArray(A1); - printArray(A2); - matdim.r = row; - matdim.c = col; + param = new matmul_param; + param->height1 = height1; + param->height2 = height2; + param->width1 = width1; + param->width2 = width2; + param->outputsize = height1*width2; + param->arraySize1 = width1*height1; + param->arraySize2 = width2*height2; + A1 = new float[param->arraySize1]; + A2 = new float[param->arraySize2]; + A3 = new float[param->outputsize]; + generateRandomFloatArray(A1, param->arraySize1); + generateRandomFloatArray(A2, param->arraySize2); + // printArray(A1, param->arraySize1); + // printArray(A2, param->arraySize2); // CPU - test_matmul(A1, row, col, A2, row, col, A3); - printf("CPU Results: \n"); - for (uint8_t i = 0; i < arraySize; i++){ - printf("A1: %f; A2 %f; A3 %f\n", A1[i], A2[i], A3[i]); - } - free(A3); - A3 = new float[arraySize]; + // auto start = high_resolution_clock::now(); + // test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); + // auto stop = high_resolution_clock::now(); + // auto duration = duration_cast(stop - start); + // std::cout << "CPU: " << duration.count() << "ms" << std::endl; + // printf("CPU Results: \n"); + // for (uint32_t i = 0; i < param->outputsize; i++){ + // printf("A3[%d]: %f\n", i, A3[i]); + // } + // free(A3); + // A3 = new float[param->outputsize]; // GPU metal_init(); metal_compute(); - printf("GPU Results: \n"); - for (uint8_t i = 0; i < arraySize; i++){ - printf("bM1: %f; bM2 %f; bM3 %f\n", ((float*)(bM1->contents()))[i], ((float*)(bM2->contents()))[i], ((float*)(bM3->contents()))[i]); - } + // printf("GPU Results: \n"); + // for (uint32_t i = 0; i < param->outputsize; i++){ + // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + // } + free(A1); free(A2); free(A3); diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index e284086b..de76677b 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -7,6 +7,7 @@ #include using namespace metal; +#include "param.h" kernel void arrayAdd(const device float* inputA, const device float* inputB, @@ -20,12 +21,13 @@ kernel void arrayAdd(const device float* inputA, kernel void matmul(device const float* matrixA, device const float* matrixB, + device matmul_param *param, device float* matrixC, uint2 gid [[thread_position_in_grid]]) { - unsigned int widthA = 8; // Set the width of matrix A - unsigned int widthB = 8; // Set the width of matrix B - unsigned int heightA = 8; // Set the height of matrix A + unsigned int widthA = param->width1; // Set the width of matrix A + unsigned int widthB = param->width2; // Set the width of matrix B + unsigned int heightA = param->height1; // Set the height of matrix A if (gid.x >= widthB || gid.y >= heightA) { return; From 69407d528c4853a478eaec4f115e379a8b18c3d8 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sat, 27 Jan 2024 19:34:54 -0500 Subject: [PATCH 12/37] header param --- llm/tests/metal/cpp_version/param.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 llm/tests/metal/cpp_version/param.h diff --git a/llm/tests/metal/cpp_version/param.h b/llm/tests/metal/cpp_version/param.h new file mode 100644 index 00000000..5b091f65 --- /dev/null +++ b/llm/tests/metal/cpp_version/param.h @@ -0,0 +1,15 @@ +// +// param.h +// metal_cpp +// +// Created by Derrick on 1/27/24. +// + +#ifndef param_h +#define param_h + + +#endif /* param_h */ +typedef struct { + unsigned int width1, height1, width2, height2, outputsize, arraySize1, arraySize2; +}matmul_param; From dd9fda7cfcea5f3ac48146a70bcf1fdb5a89b1ca Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Mon, 29 Jan 2024 01:28:39 -0500 Subject: [PATCH 13/37] metal matmul Int4 working --- llm/tests/metal/cpp_version/main.cc | 156 +++++++++++++++++++++------ llm/tests/metal/cpp_version/op.metal | 64 +++++------ llm/tests/metal/cpp_version/param.h | 14 +++ 3 files changed, 171 insertions(+), 63 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index 28f20d2d..b5f87afe 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -5,6 +5,9 @@ // Created by Derrick on 1/24/24. // Some to-do list: // 1. keep a map: ptr on CPU -> buffer on GPU +// Notes: +// 1. Offset hasn't been considered +// 2. Group_Size is multiple of 32 #include #include @@ -20,25 +23,37 @@ #include "param.h" // .h -MTL::Buffer *bM1, *bM2, *bM3, *bParam; +MTL::Buffer *bM1, *bM2, *bM3, *bParam, *bScales, *bOffset; MTL::Device* mDevice; MTL::ComputePipelineState* mfnPipelineState; MTL::CommandQueue* mCommandQueue; NS::Error *error = nullptr; +typedef struct { + float *A, *C, *scales, *offset; + unsigned char *B; +} MetalMatmulBuffers; + using namespace std; using namespace chrono; // .cc -const char * fn_name = "matmul"; +const char * fn_name = "matmulInt4"; + // main -uint height1 = 100; -uint width1 = 100; -uint height2 = 100; -uint width2 = 100; -float *A1, *A2, *A3; +unsigned int height1 = 32; +unsigned int width1 = 32; +unsigned int height2 = 32; +unsigned int width2 = 32; +float *A1, *A3; +unsigned char *A2; matmul_param *param; +// for MatmulInt4 use +unsigned int group_size = 32; +float* scales, *offset; +MetalMatmulBuffers *Int4_buffer; +MetalMatMulParams *Int4_params; // Test Use void test_addArrays(const float arr1[], const float arr2[], float result[], uint size) { @@ -46,7 +61,7 @@ void test_addArrays(const float arr1[], const float arr2[], float result[], uint result[i] = arr1[i] + arr2[i]; } } -void test_matmul(const float* matA, int rowsA, int colsA, const float* matB, int rowsB, int colsB, float* result) { +void test_matmul(const float* matA, int rowsA, int colsA, const unsigned char* matB, int rowsB, int colsB, float* result) { for (int i = 0; i < rowsA; i++) { for (int j = 0; j < colsB; j++) { result[i * colsB + j] = 0; @@ -75,6 +90,19 @@ void generateRandomFloatArray(float* array, uint arraySize) { array[i] = distribution(gen); } } +void generateRandomCharArray(unsigned char* array, uint arraySize) { + // Use a random device to seed the random number generator + std::random_device rd; + // Use the current time as a seed for the random number generator + std::mt19937 gen(rd()); + // Define the range of random numbers (adjust as needed) + std::uniform_int_distribution distrib(0, 255); + + // Generate random integers and fill the array + for (int i = 0; i < arraySize; ++i) { + array[i] = static_cast(distrib(gen)); + } +} // Metal functions void metal_init(){ @@ -108,7 +136,30 @@ MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size){ return mDevice->newBuffer(type_size*size, MTL::ResourceStorageModeShared); } -void metal_encodecommand(MTL::ComputeCommandEncoder *computeEncoder){ +void metal_encodecommand_matmulInt4(MTL::ComputeCommandEncoder *computeEncoder){ + //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance + + bScales = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width1/Int4_params->group_size); + bM1 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width1); + bM2 = metal_newBuf(sizeof(unsigned char), Int4_params->width1*Int4_params->width3); + bParam = metal_newBuf(sizeof(MetalMatMulParams), 1); + bM3 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width3); + + computeEncoder->setComputePipelineState(mfnPipelineState); + computeEncoder->setBuffer(bM1, 0, 0); + computeEncoder->setBuffer(bM2, 0, 1); + computeEncoder->setBuffer(bM3, 0, 2); + computeEncoder->setBuffer(bScales, 0, 3); + computeEncoder->setBuffer(bParam, 0, 4); + + memcpy(bM1->contents(), Int4_buffer->A, Int4_params->height1*Int4_params->width1*sizeof(float)); + memcpy(bM2->contents(), Int4_buffer->B, Int4_params->width1*Int4_params->width3*sizeof(unsigned char)); + memcpy(bM3->contents(), Int4_buffer->C, Int4_params->height1*Int4_params->width3*sizeof(float)); + memcpy(bParam->contents(), Int4_params, sizeof(MetalMatMulParams)); + memcpy(bScales->contents(), Int4_buffer->scales, ((Int4_params->height1*Int4_params->width1)/Int4_params->group_size)*sizeof(float)); +} + +void metal_encodecommand_matmul(MTL::ComputeCommandEncoder *computeEncoder){ //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance bM1 = metal_newBuf(sizeof(float), param->arraySize1); bM2 = metal_newBuf(sizeof(float), param->arraySize2); @@ -136,14 +187,25 @@ void metal_compute(){ assert(computeEncoder != nullptr); // Encode command and set buffer to GPU - metal_encodecommand(computeEncoder); - + if (strcmp(fn_name, "matmulInt4") == 0) { + metal_encodecommand_matmulInt4(computeEncoder); + } else if (strcmp(fn_name, "matmul") == 0) { + metal_encodecommand_matmul(computeEncoder); + } + // Threads -> ThreadGroup -> Grid - MTL::Size mThreadGroupSize = MTL::Size::Make(8, 8, 1); - MTL::Size mGridSize = MTL::Size::Make((param->width1 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, - (param->height2 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, - 1); - + MTL::Size mThreadGroupSize; + MTL::Size mGridSize; + if (strcmp(fn_name, "matmulInt4") == 0){ + mThreadGroupSize = MTL::Size::Make(Int4_params->width3, Int4_params->height1, 1); + mGridSize = MTL::Size::Make(16, 1, 1); + } else if (strcmp(fn_name, "matmul") == 0) { + mThreadGroupSize = MTL::Size::Make(8, 8, 1); + mGridSize = MTL::Size::Make((param->width1 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, + (param->height2 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, + 1); + } + // Dispatch and Run Computation // auto start = high_resolution_clock::now(); computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); @@ -157,8 +219,8 @@ void metal_compute(){ commandBuffer->release(); } -int main(){ - // Initialization for array addition +void test_normal_matmul(){ + // Initialization for test param = new matmul_param; param->height1 = height1; param->height2 = height2; @@ -168,26 +230,26 @@ int main(){ param->arraySize1 = width1*height1; param->arraySize2 = width2*height2; A1 = new float[param->arraySize1]; - A2 = new float[param->arraySize2]; + A2 = new unsigned char[param->arraySize2]; A3 = new float[param->outputsize]; generateRandomFloatArray(A1, param->arraySize1); - generateRandomFloatArray(A2, param->arraySize2); + generateRandomCharArray(A2, param->arraySize2); // printArray(A1, param->arraySize1); // printArray(A2, param->arraySize2); // CPU - // auto start = high_resolution_clock::now(); - // test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); - // auto stop = high_resolution_clock::now(); - // auto duration = duration_cast(stop - start); - // std::cout << "CPU: " << duration.count() << "ms" << std::endl; - // printf("CPU Results: \n"); - // for (uint32_t i = 0; i < param->outputsize; i++){ - // printf("A3[%d]: %f\n", i, A3[i]); - // } - // free(A3); - // A3 = new float[param->outputsize]; + auto start = high_resolution_clock::now(); + test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + std::cout << "CPU: " << duration.count() << "ms" << std::endl; + printf("CPU Results: \n"); + for (uint32_t i = 0; i < param->outputsize; i++){ + printf("A3[%d]: %f\n", i, A3[i]); + } + free(A3); + A3 = new float[param->outputsize]; // GPU metal_init(); @@ -202,6 +264,38 @@ int main(){ free(A3); } +void test_matmulInt4(){ + // not considering offset atm + Int4_buffer = new MetalMatmulBuffers; + Int4_params = new MetalMatMulParams; + Int4_params->group_size = group_size; + Int4_params->height1 = height1; // m + Int4_params->width1 = width1; // k + Int4_params->width3 = width2; // n + A1 = new float[Int4_params->height1*Int4_params->width1]; + A2 = new unsigned char[Int4_params->width1*Int4_params->width3]; + A3 = new float[Int4_params->height1*Int4_params->width3]; + scales = new float[Int4_params->height1*Int4_params->width1/Int4_params->group_size]; + generateRandomFloatArray(A1, Int4_params->height1*Int4_params->width1); + generateRandomCharArray(A2, Int4_params->width1*Int4_params->width3); + generateRandomFloatArray(scales, Int4_params->height1*Int4_params->width1/Int4_params->group_size); + Int4_buffer->A = A1; + Int4_buffer->B = A2; + Int4_buffer->C = A3; + Int4_buffer->scales = scales; + metal_init(); + metal_compute(); + printf("GPU Results: \n"); + for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + } +} + +int main(){ + test_matmulInt4(); + return 0; +} + diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index de76677b..0b590d42 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -41,38 +41,38 @@ kernel void matmul(device const float* matrixA, matrixC[gid.y * widthB + gid.x] = sum; } -// kernel void matmulInt4(device const float* inA, -// device const uint8_t* inB, // column major -// device float* result, -// device const float* scales, -// constant MetalMatMulParams& params, -// uint2 id [[thread_position_in_grid]]) -// { -// // the for-loop is replaced with a collection of threads, each of which -// // calls this function. - -// const uint n = params.n; -// const uint k = params.k; -// const uint group_size = params.group_size; - -// const uint idx = id.x; // column index of the output -// const uint idy = id.y; // row index of the output - -// float sum = 0; -// for (uint i = 0; i < k; i += group_size){ -// float scale = scales[(idx * k + i) / group_size]; -// for (uint j = 0; j < group_size; j+=2){ -// size_t weight_idx = (idx * k + i + j) / 2; -// uint8_t weight_packed = inB[weight_idx]; -// int8_t vl = (weight_packed & 0x0F) - 8; -// int8_t vh = (weight_packed >> 4) - 8; - -// sum += (inA[idy * k + i + j] * vl) * scale; -// sum += (inA[idy * k + i + j + 1] * vh) * scale; -// } -// } -// result[idy * n + idx] = sum; -// } +kernel void matmulInt4(device const float* inA, + device const uint8_t* inB, // column major + device float* result, + device const float* scales, + device const MetalMatMulParams* params, + uint2 id [[thread_position_in_grid]]) +{ + // the for-loop is replaced with a collection of threads, each of which + // calls this function. + + const uint n = params->width3; + const uint k = params->width1; + const uint group_size = params->group_size; + + const uint idx = id.x; // column index of the output + const uint idy = id.y; // row index of the output + + float sum = 0; + for (uint i = 0; i < k; i += group_size){ + float scale = scales[(idx * k + i) / group_size]; + for (uint j = 0; j < group_size; j+=2){ + size_t weight_idx = (idx * k + i + j) / 2; + uint8_t weight_packed = inB[weight_idx]; + int8_t vl = (weight_packed & 0x0F) - 8; + int8_t vh = (weight_packed >> 4) - 8; + + sum += (inA[idy * k + i + j] * vl) * scale; + sum += (inA[idy * k + i + j + 1] * vh) * scale; + } + } + result[idy * n + idx] = sum; +} // kernel void matmulInt4_SIMD_Q4Interleave( diff --git a/llm/tests/metal/cpp_version/param.h b/llm/tests/metal/cpp_version/param.h index 5b091f65..91f5c12f 100644 --- a/llm/tests/metal/cpp_version/param.h +++ b/llm/tests/metal/cpp_version/param.h @@ -13,3 +13,17 @@ typedef struct { unsigned int width1, height1, width2, height2, outputsize, arraySize1, arraySize2; }matmul_param; + +// For customized MatmulInt4 use +typedef struct { + unsigned int height1; + unsigned int width3; + unsigned int width1; + unsigned int group_size; +} MetalMatMulParams; + +// should be inside metal header +// typedef struct { +// float *A, *C, *scales, *offset; +// unsigned char *B; +// } MetalMatmulBuffers; From e8331c3b2b15d2cc7888000d578ea2db8ada3c99 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Mon, 29 Jan 2024 16:03:39 -0500 Subject: [PATCH 14/37] kernel minor change --- llm/tests/metal/cpp_version/op.metal | 198 +++++++++++++-------------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index 0b590d42..bab9062f 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -64,8 +64,8 @@ kernel void matmulInt4(device const float* inA, for (uint j = 0; j < group_size; j+=2){ size_t weight_idx = (idx * k + i + j) / 2; uint8_t weight_packed = inB[weight_idx]; - int8_t vl = (weight_packed & 0x0F) - 8; - int8_t vh = (weight_packed >> 4) - 8; + int8_t vl = (weight_packed & 0x0F) ; // -8? + int8_t vh = (weight_packed >> 4) & 0x0F ; // -8? sum += (inA[idy * k + i + j] * vl) * scale; sum += (inA[idy * k + i + j + 1] * vh) * scale; @@ -75,111 +75,111 @@ kernel void matmulInt4(device const float* inA, } -// kernel void matmulInt4_SIMD_Q4Interleave( -// device const packed_float4* inA, -// device const packed_char4* inB, // column major -// device float* result, -// device const float* scales, -// constant MetalMatMulParams& params, -// uint2 id [[thread_position_in_grid]]) -// { -// // the for-loop is replaced with a collection of threads, each of which -// // calls this function. - -// const uint n = params.n; -// const uint k = params.k; -// const uint group_size = params.group_size; - -// const uint idx = id.x; // column index of the output -// const uint idy = id.y; // row index of the output - -// packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; -// packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; - -// for (uint i = 0; i < k; i += group_size){ -// float scale = scales[(idx * k + i) / group_size]; -// packed_float4 scale4 = {scale, scale, scale, scale}; -// for (uint j = 0; j < group_size; j+= 8){ -// // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 -// // expected layout of inB: (a, e), (b, f), (c, g), (d, h) -// // low; (a, 0), (b, 0), (c, 0), (d, 0) -// // high: (e, 0), (f, 0), (g, 0), (h, 0) -// size_t weight_idx = (idx * k + i + j) / 8; -// size_t activation_idx = (idy * k + i + j) / 4; -// packed_char4 packed_8 = inB[weight_idx]; -// packed_char4 packed_low = packed_8 & lowMask; -// packed_char4 packed_high = (packed_8 >> 4) & lowMask; - -// packed_float4 inAlow = inA[activation_idx]; -// packed_float4 inAhigh = inA[activation_idx+1]; -// packed_float4 inBlow = packed_float4(packed_low) * scale4; -// packed_float4 inBhigh = packed_float4(packed_high) * scale4; +kernel void matmulInt4_SIMD_Q4Interleave( + device const packed_float4* inA, + device const packed_char4* inB, // column major + device float* result, + device const float* scales, + constant MetalMatMulParams* params, + uint2 id [[thread_position_in_grid]]) +{ + // the for-loop is replaced with a collection of threads, each of which + // calls this function. -// sum4 += inAlow * inBlow; -// sum4 += inAhigh * inBhigh; -// } -// } -// float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; -// result[idy * n + idx] = sum; -// } + const uint n = params->width3; + const uint k = params->width1; + const uint group_size = params->group_size; -// kernel void matmulUInt4_SIMD_Q4Interleave_unroll16( -// device const packed_float4* inA, -// device const packed_char4* inB, // column major -// device float* result, -// device const float* scales, -// constant MetalMatMulParams& params, -// uint2 id [[thread_position_in_grid]]) -// { -// // the for-loop is replaced with a collection of threads, each of which -// // calls this function. + const uint idx = id.x; // column index of the output + const uint idy = id.y; // row index of the output -// const uint n = params.n; -// const uint k = params.k; -// const uint group_size = params.group_size; + packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; + packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; -// const uint idx = id.x; // column index of the output -// const uint idy = id.y; // row index of the output + for (uint i = 0; i < k; i += group_size){ + float scale = scales[(idx * k + i) / group_size]; + packed_float4 scale4 = {scale, scale, scale, scale}; + for (uint j = 0; j < group_size; j+= 8){ + // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 + // expected layout of inB: (a, e), (b, f), (c, g), (d, h) + // low; (a, 0), (b, 0), (c, 0), (d, 0) + // high: (e, 0), (f, 0), (g, 0), (h, 0) + size_t weight_idx = (idx * k + i + j) / 8; + size_t activation_idx = (idy * k + i + j) / 4; + packed_char4 packed_8 = inB[weight_idx]; + packed_char4 packed_low = packed_8 & lowMask; + packed_char4 packed_high = (packed_8 >> 4) & lowMask; + + packed_float4 inAlow = inA[activation_idx]; + packed_float4 inAhigh = inA[activation_idx+1]; + packed_float4 inBlow = packed_float4(packed_low) * scale4; + packed_float4 inBhigh = packed_float4(packed_high) * scale4; + + sum4 += inAlow * inBlow; + sum4 += inAhigh * inBhigh; + } + } + float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; + result[idy * n + idx] = sum; +} -// packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; -// packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; -// packed_char4 offsets = {8, 8, 8, 8}; +kernel void matmulUInt4_SIMD_Q4Interleave_unroll16( + device const packed_float4* inA, + device const packed_char4* inB, // column major + device float* result, + device const float* scales, + constant MetalMatMulParams* params, + uint2 id [[thread_position_in_grid]]) +{ + // the for-loop is replaced with a collection of threads, each of which + // calls this function. -// for (uint i = 0; i < k; i += group_size){ -// float scale = scales[(idx * k + i) / group_size]; -// packed_float4 scale4 = {scale, scale, scale, scale}; -// for (uint j = 0; j < group_size; j+= 16){ -// // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 -// // expected layout of inB: (a, e), (b, f), (c, g), (d, h) -// // low; (a, 0), (b, 0), (c, 0), (d, 0) -// // high: (e, 0), (f, 0), (g, 0), (h, 0) -// size_t weight_idx = (idx * k + i + j) / 8; -// size_t activation_idx = (idy * k + i + j) / 4; -// packed_char4 packed_8_0 = inB[weight_idx]; -// packed_char4 packed_8_1 = inB[weight_idx + 1]; -// packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; -// packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; -// packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; -// packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; + const uint n = params->width3; + const uint k = params->width1; + const uint group_size = params->group_size; + const uint idx = id.x; // column index of the output + const uint idy = id.y; // row index of the output -// packed_float4 inAlow_0 = inA[activation_idx]; -// packed_float4 inAlow_1 = inA[activation_idx+2]; -// packed_float4 inAhigh_0 = inA[activation_idx+1]; -// packed_float4 inAhigh_1 = inA[activation_idx+3]; -// packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; -// packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; -// packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; -// packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; + packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; + packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; + packed_char4 offsets = {8, 8, 8, 8}; + // packed_char4 offsets = {0, 0, 0, 0}; -// sum4 += inAlow_0 * inBlow_0; -// sum4 += inAlow_1 * inBlow_1; -// sum4 += inAhigh_0 * inBhigh_0; -// sum4 += inAhigh_1 * inBhigh_1; -// } -// } -// float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; -// result[idy * n + idx] = sum; -// } + for (uint i = 0; i < k; i += group_size){ + float scale = scales[(idx * k + i) / group_size]; + packed_float4 scale4 = {scale, scale, scale, scale}; + for (uint j = 0; j < group_size; j+= 16){ + // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 + // expected layout of inB: (a, e), (b, f), (c, g), (d, h) + // low; (a, 0), (b, 0), (c, 0), (d, 0) + // high: (e, 0), (f, 0), (g, 0), (h, 0) + size_t weight_idx = (idx * k + i + j) / 8; + size_t activation_idx = (idy * k + i + j) / 4; + packed_char4 packed_8_0 = inB[weight_idx]; + packed_char4 packed_8_1 = inB[weight_idx + 1]; + packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; + packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; + packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; + packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; + + packed_float4 inAlow_0 = inA[activation_idx]; + packed_float4 inAlow_1 = inA[activation_idx+2]; + packed_float4 inAhigh_0 = inA[activation_idx+1]; + packed_float4 inAhigh_1 = inA[activation_idx+3]; + packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; + packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; + packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; + packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; + + sum4 += inAlow_0 * inBlow_0; + sum4 += inAlow_1 * inBlow_1; + sum4 += inAhigh_0 * inBhigh_0; + sum4 += inAhigh_1 * inBhigh_1; + } + } + float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; + result[idy * n + idx] = sum; +} // kernel void matmulUInt4_SIMD_Q4Interleave_unroll32( From e79e7021e48fbfd18001b109514cdf5e480e2351 Mon Sep 17 00:00:00 2001 From: RaymondWang0 Date: Tue, 30 Jan 2024 15:32:17 -0500 Subject: [PATCH 15/37] add metal op --- llm/tests/metal/cpp_version/op.metal | 480 ++++++++++++++++++++++++++- llm/tests/metal/cpp_version/param.h | 1 + 2 files changed, 480 insertions(+), 1 deletion(-) diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index 0b590d42..2dbbdb13 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -6,9 +6,15 @@ // #include -using namespace metal; #include "param.h" +using namespace metal; + +#define N_SIMDWIDTH 32 +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; } + kernel void arrayAdd(const device float* inputA, const device float* inputB, device float* output, @@ -18,6 +24,478 @@ kernel void arrayAdd(const device float* inputA, output[id] = inputA[id] + inputB[id]; } + /* CUDA */ +// __global__ void batch_Add_cuda(Matrix3D input, Matrix3D input2, Matrix3D output) { +// int i = blockIdx.x * blockDim.x + threadIdx.x; +// int j = blockIdx.y * blockDim.y + threadIdx.y; +// int k = blockIdx.z * blockDim.z + threadIdx.z; + +// //// half version +// if (i < input.m_dim_x && j < input.m_dim_y && k < input.m_dim_z) { +// output(i, j, k) = __hadd(input(i, j, k), input2(0, j, k)); +// } +// } +kernel void kernel_batch_add(device const float* inputA, + device const float* inputB, + device float* output, + device matmul_param *param, + uint3 id[[thread_position_in_grid]]) { + const uint m = param->m_dim_x; + const uint n = param->m_dim_y; + + const uint idx = id.x; + const uint idy = id.y; + const uint idz = id.z; + output[idx * m * n + idy * n + idz] = inputA[idx * m * n + idy * n + idz] + inputB[idy * n + idz]; +} + +kernel void kernel_relu( + device const float * src0, + device float * dst, + uint tpig[[thread_position_in_grid]]) { + dst[tpig] = max(0.0f, src0[tpig]); +} + + kernel void kernel_silu( + device const float4 * src0, + device float4 * dst, + uint tpig[[thread_position_in_grid]]) { + device const float4 & x = src0[tpig]; + dst[tpig] = x / (1.0f + exp(-x)); +} + +constant float GELU_COEF_A = 0.044715f; +constant float GELU_QUICK_COEF = -1.702f; +constant float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f; +kernel void kernel_gelu( + device const float4 * src0, + device float4 * dst, + uint tpig[[thread_position_in_grid]]) { + device const float4 & x = src0[tpig]; + + // BEWARE !!! + // Simply using "tanh" instead of "precise::tanh" will sometimes results in NaNs! + // This was observed with Falcon 7B and 40B models + // + dst[tpig] = 0.5f * x * (1.0f + precise::tanh(SQRT_2_OVER_PI * x * (1.0f + GELU_COEF_A * x * x))); +} +kernel void kernel_gelu_quick( + device const float4 * src0, + device float4 * dst, + uint tpig[[thread_position_in_grid]]) { + device const float4 & x = src0[tpig]; + + dst[tpig] = x * (1.0f / (1.0f + exp(GELU_QUICK_COEF * x))); +} + +// TODO: to be fixed +kernel void kernel_rms_norm( + device const void * src0, + device float * dst, + constant int64_t & ne00, + constant uint64_t & nb01, + constant float & eps, + threadgroup float * buf [[threadgroup(0)]], + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]], + uint tiisg[[thread_index_in_simdgroup]], + uint ntg[[threads_per_threadgroup]]) { + device const float4 * x = (device const float4 *) ((device const char *) src0 + tgpig*nb01); + + float4 sumf = 0; + float all_sum = 0; + + // parallel sum + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + sumf += x[i00] * x[i00]; + } + all_sum = sumf[0] + sumf[1] + sumf[2] + sumf[3]; + all_sum = simd_sum(all_sum); + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = 0.0f; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = all_sum; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + all_sum = buf[tiisg]; + all_sum = simd_sum(all_sum); + } + + const float mean = all_sum / ne00; + const float scale = 1.0f / sqrt(mean + eps); + + device float4 * y = (device float4 *) (dst + tgpig*ne00); + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + y[i00] = x[i00] * scale; + } +} + + +// TODO: to be fixed +kernel void kernel_soft_max( + device const float * src0, + device const float * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant float & scale, + threadgroup float * buf [[threadgroup(0)]], + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]], + uint tiisg[[thread_index_in_simdgroup]], + uint ntg[[threads_per_threadgroup]]) { + const int64_t i03 = (tgpig) / (ne02*ne01); + const int64_t i02 = (tgpig - i03*ne02*ne01) / ne01; + const int64_t i01 = (tgpig - i03*ne02*ne01 - i02*ne01); + + device const float * psrc0 = src0 + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00; + device const float * pmask = src1 != src0 ? src1 + i01*ne00 : nullptr; + device float * pdst = dst + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00; + + // parallel max + float lmax = -INFINITY; + + for (int i00 = tpitg; i00 < ne00; i00 += ntg) { + lmax = MAX(lmax, psrc0[i00]*scale + (pmask ? pmask[i00] : 0.0f)); + } + + // find the max value in the block + float max_val = simd_max(lmax); + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = -INFINITY; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = max_val; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + max_val = buf[tiisg]; + max_val = simd_max(max_val); + } + + // parallel sum + float lsum = 0.0f; + for (int i00 = tpitg; i00 < ne00; i00 += ntg) { + const float exp_psrc0 = exp((psrc0[i00]*scale + (pmask ? pmask[i00] : 0.0f)) - max_val); + lsum += exp_psrc0; + pdst[i00] = exp_psrc0; + } + + // This barrier fixes a failing test + // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335 + threadgroup_barrier(mem_flags::mem_none); + + float sum = simd_sum(lsum); + + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = 0.0f; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = sum; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + sum = buf[tiisg]; + sum = simd_sum(sum); + } + + const float inv_sum = 1.0f/sum; + + for (int i00 = tpitg; i00 < ne00; i00 += ntg) { + pdst[i00] *= inv_sum; + } +} + +// TODO: to be fixed +kernel void kernel_soft_max_4( + device const float * src0, + device const float * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant float & scale, + threadgroup float * buf [[threadgroup(0)]], + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]], + uint tiisg[[thread_index_in_simdgroup]], + uint ntg[[threads_per_threadgroup]]) { + const int64_t i03 = (tgpig) / (ne02*ne01); + const int64_t i02 = (tgpig - i03*ne02*ne01) / ne01; + const int64_t i01 = (tgpig - i03*ne02*ne01 - i02*ne01); + + device const float4 * psrc4 = (device const float4 *)(src0 + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00); + device const float4 * pmask = src1 != src0 ? (device const float4 *)(src1 + i01*ne00) : nullptr; + device float4 * pdst4 = (device float4 *)(dst + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00); + + // parallel max + float4 lmax4 = -INFINITY; + + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + lmax4 = fmax(lmax4, psrc4[i00]*scale + (pmask ? pmask[i00] : 0.0f)); + } + + const float lmax = MAX(MAX(lmax4[0], lmax4[1]), MAX(lmax4[2], lmax4[3])); + + float max_val = simd_max(lmax); + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = -INFINITY; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = max_val; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + max_val = buf[tiisg]; + max_val = simd_max(max_val); + } + + // parallel sum + float4 lsum4 = 0.0f; + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + const float4 exp_psrc4 = exp((psrc4[i00]*scale + (pmask ? pmask[i00] : 0.0f)) - max_val); + lsum4 += exp_psrc4; + pdst4[i00] = exp_psrc4; + } + + const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3]; + + // This barrier fixes a failing test + // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335 + threadgroup_barrier(mem_flags::mem_none); + + float sum = simd_sum(lsum); + + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = 0.0f; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = sum; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + sum = buf[tiisg]; + sum = simd_sum(sum); + } + + const float inv_sum = 1.0f/sum; + + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + pdst4[i00] *= inv_sum; + } +} + + +// ROPE // +static float rope_yarn_ramp(const float low, const float high, const int i0) { + const float y = (i0 / 2 - low) / max(0.001f, high - low); + return 1.0f - min(1.0f, max(0.0f, y)); +} + +// YaRN algorithm based on LlamaYaRNScaledRotaryEmbedding.py from https://github.com/jquesnelle/yarn +// MIT licensed. Copyright (c) 2023 Jeffrey Quesnelle and Bowen Peng. +static void rope_yarn( + float theta_extrap, float freq_scale, float corr_dims[2], int64_t i0, float ext_factor, float mscale, + thread float * cos_theta, thread float * sin_theta +) { + // Get n-d rotational scaling corrected for extrapolation + float theta_interp = freq_scale * theta_extrap; + float theta = theta_interp; + if (ext_factor != 0.0f) { + float ramp_mix = rope_yarn_ramp(corr_dims[0], corr_dims[1], i0) * ext_factor; + theta = theta_interp * (1 - ramp_mix) + theta_extrap * ramp_mix; + + // Get n-d magnitude scaling corrected for interpolation + mscale *= 1.0f + 0.1f * log(1.0f / freq_scale); + } + *cos_theta = cos(theta) * mscale; + *sin_theta = sin(theta) * mscale; +} + +// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get +// `corr_fac(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))` +static float rope_yarn_corr_factor(int n_dims, int n_orig_ctx, float n_rot, float base) { + return n_dims * log(n_orig_ctx / (n_rot * 2 * M_PI_F)) / (2 * log(base)); +} + +static void rope_yarn_corr_dims( + int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2] +) { + // start and end correction dims + dims[0] = max(0.0f, floor(rope_yarn_corr_factor(n_dims, n_orig_ctx, beta_fast, freq_base))); + dims[1] = min(n_dims - 1.0f, ceil(rope_yarn_corr_factor(n_dims, n_orig_ctx, beta_slow, freq_base))); +} + +typedef void (rope_t)( + device const void * src0, + device const int32_t * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant int64_t & ne03, + constant uint64_t & nb00, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant uint64_t & nb03, + constant int64_t & ne0, + constant int64_t & ne1, + constant int64_t & ne2, + constant int64_t & ne3, + constant uint64_t & nb0, + constant uint64_t & nb1, + constant uint64_t & nb2, + constant uint64_t & nb3, + constant int & n_past, + constant int & n_dims, + constant int & mode, + constant int & n_orig_ctx, + constant float & freq_base, + constant float & freq_scale, + constant float & ext_factor, + constant float & attn_factor, + constant float & beta_fast, + constant float & beta_slow, + uint tiitg[[thread_index_in_threadgroup]], + uint3 tptg[[threads_per_threadgroup]], + uint3 tgpig[[threadgroup_position_in_grid]]); + +// TODO: to be fixed +template +kernel void kernel_rope( + device const void * src0, + device const int32_t * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant int64_t & ne03, + constant uint64_t & nb00, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant uint64_t & nb03, + constant int64_t & ne0, + constant int64_t & ne1, + constant int64_t & ne2, + constant int64_t & ne3, + constant uint64_t & nb0, + constant uint64_t & nb1, + constant uint64_t & nb2, + constant uint64_t & nb3, + constant int & n_past, + constant int & n_dims, + constant int & mode, + constant int & n_orig_ctx, + constant float & freq_base, + constant float & freq_scale, + constant float & ext_factor, + constant float & attn_factor, + constant float & beta_fast, + constant float & beta_slow, + uint tiitg[[thread_index_in_threadgroup]], + uint3 tptg[[threads_per_threadgroup]], + uint3 tgpig[[threadgroup_position_in_grid]]) { + const int64_t i3 = tgpig[2]; + const int64_t i2 = tgpig[1]; + const int64_t i1 = tgpig[0]; + + const bool is_neox = mode & 2; + + float corr_dims[2]; + rope_yarn_corr_dims(n_dims, n_orig_ctx, freq_base, beta_fast, beta_slow, corr_dims); + + device const int32_t * pos = src1; + + const int64_t p = pos[i2]; + + const float theta_0 = (float)p; + const float inv_ndims = -1.f/n_dims; + + if (!is_neox) { + for (int64_t i0 = 2*tiitg; i0 < ne0; i0 += 2*tptg.x) { + + const float theta = theta_0 * pow(freq_base, inv_ndims*i0); + float cos_theta, sin_theta; + rope_yarn(theta, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta); + + device const T * const src = (device T *)((device char *) src0 + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + device T * dst_data = (device T *)((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const T x0 = src[0]; + const T x1 = src[1]; + + dst_data[0] = x0*cos_theta - x1*sin_theta; + dst_data[1] = x0*sin_theta + x1*cos_theta; + } + } else { + for (int64_t ic = 2*tiitg; ic < ne0; ic += 2*tptg.x) { + if (ic < n_dims) { + const int64_t ib = 0; + + // simplified from `(ib * n_dims + ic) * inv_ndims` + const float cur_rot = inv_ndims*ic - ib; + + const float theta = theta_0 * pow(freq_base, cur_rot); + float cos_theta, sin_theta; + rope_yarn(theta, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor, &cos_theta, &sin_theta); + + const int64_t i0 = ib*n_dims + ic/2; + + device const T * const src = (device T *)((device char *) src0 + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + device T * dst_data = (device T *)((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float x0 = src[0]; + const float x1 = src[n_dims/2]; + + dst_data[0] = x0*cos_theta - x1*sin_theta; + dst_data[n_dims/2] = x0*sin_theta + x1*cos_theta; + } else { + const int64_t i0 = ic; + + device const T * const src = (device T *)((device char *) src0 + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + device T * dst_data = (device T *)((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + dst_data[0] = src[0]; + dst_data[1] = src[1]; + } + } + } +} + +template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope; +template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope; + kernel void matmul(device const float* matrixA, device const float* matrixB, diff --git a/llm/tests/metal/cpp_version/param.h b/llm/tests/metal/cpp_version/param.h index 91f5c12f..25f0a2b9 100644 --- a/llm/tests/metal/cpp_version/param.h +++ b/llm/tests/metal/cpp_version/param.h @@ -12,6 +12,7 @@ #endif /* param_h */ typedef struct { unsigned int width1, height1, width2, height2, outputsize, arraySize1, arraySize2; + unsigned int m_dim_x, m_dim_y, m_dim_z; }matmul_param; // For customized MatmulInt4 use From bc5cad2aa5b9059006765c73819cbd0285efc8b1 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Wed, 31 Jan 2024 01:37:48 -0500 Subject: [PATCH 16/37] interleave two versions equal --- llm/tests/metal/cpp_version/main.cc | 21 ++++++++++++++++++++- llm/tests/metal/cpp_version/param.h | 4 ---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index b5f87afe..36c6927c 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -191,6 +191,8 @@ void metal_compute(){ metal_encodecommand_matmulInt4(computeEncoder); } else if (strcmp(fn_name, "matmul") == 0) { metal_encodecommand_matmul(computeEncoder); + } else { + metal_encodecommand_matmulInt4(computeEncoder); } // Threads -> ThreadGroup -> Grid @@ -204,6 +206,9 @@ void metal_compute(){ mGridSize = MTL::Size::Make((param->width1 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, (param->height2 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, 1); + } else { + mThreadGroupSize = MTL::Size::Make(Int4_params->width3, Int4_params->height1, 1); + mGridSize = MTL::Size::Make(16, 1, 1); } // Dispatch and Run Computation @@ -283,6 +288,21 @@ void test_matmulInt4(){ Int4_buffer->B = A2; Int4_buffer->C = A3; Int4_buffer->scales = scales; + + metal_init(); + metal_compute(); + printf("GPU Results: \n"); + for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + } + fn_name = "matmulInt4_SIMD_Q4Interleave"; + metal_init(); + metal_compute(); + printf("GPU Results: \n"); + for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + } + fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll16"; metal_init(); metal_compute(); printf("GPU Results: \n"); @@ -298,4 +318,3 @@ int main(){ - diff --git a/llm/tests/metal/cpp_version/param.h b/llm/tests/metal/cpp_version/param.h index 91f5c12f..2dbb65b2 100644 --- a/llm/tests/metal/cpp_version/param.h +++ b/llm/tests/metal/cpp_version/param.h @@ -23,7 +23,3 @@ typedef struct { } MetalMatMulParams; // should be inside metal header -// typedef struct { -// float *A, *C, *scales, *offset; -// unsigned char *B; -// } MetalMatmulBuffers; From 17e90b6dc010e03b7863b2e86c5d9187ac9432ef Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Thu, 1 Feb 2024 02:07:38 -0500 Subject: [PATCH 17/37] building metal interface, done with kernel ops --- kernels/matmul.h | 15 +- kernels/metal/kernel/op.metal | 469 ++++++++++++++++++ kernels/metal/matmul_metal_imp.cc | 278 +++++++++++ kernels/metal/matmul_metal_imp.h | 55 ++ kernels/metal/matmul_metal_int4.cc | 4 +- kernels/metal/matmul_metal_int4_imp.cc | 154 ------ kernels/metal/matmul_metal_int4_imp.h | 42 -- kernels/metal/matmul_naive.cc | 30 ++ kernels/metal/metal_batch_add.cc | 24 + llm/include/operators.h | 10 + llm/src/ops/metal/batch_add.cc | 17 + .../metal/cpp_version/metal_Int4matmul.h | 23 + 12 files changed, 922 insertions(+), 199 deletions(-) create mode 100644 kernels/metal/matmul_metal_imp.cc create mode 100644 kernels/metal/matmul_metal_imp.h delete mode 100644 kernels/metal/matmul_metal_int4_imp.cc delete mode 100644 kernels/metal/matmul_metal_int4_imp.h create mode 100644 kernels/metal/matmul_naive.cc create mode 100644 kernels/metal/metal_batch_add.cc create mode 100644 llm/src/ops/metal/batch_add.cc create mode 100644 llm/tests/metal/cpp_version/metal_Int4matmul.h diff --git a/kernels/matmul.h b/kernels/matmul.h index ef645aa8..44439721 100644 --- a/kernels/matmul.h +++ b/kernels/matmul.h @@ -121,7 +121,7 @@ class MatmulOperator { // void mat_mul_accelerator_int8_fast_2x2_omp(const struct matmul_params *params); // int4 void mat_mul_accelerator_int4_fast(const struct matmul_params *params); - void mat_mul_accelerator_int4_fast_no_offset(const struct matmul_params *params); + void mat_mul_accelerator_int4_fast_no_offset(const struct matmul_params *params); //also supported by metal void mat_mul_accelerator_int8_int4_fast_no_offset(struct matmul_params *params); void naive_mat_mul_int4(const struct matmul_params *params); void naive_mat_mul_int4_with_offset(const struct matmul_params *params); @@ -136,6 +136,19 @@ class MatmulOperator { void gemm_forward_cuda_half_test(const struct matmul_params *params, int split_k_iters); //// GEMV void gemv_forward_cuda(const struct matmul_params *params); + // metal + void mat_mul_metal(const struct matmul_params *params); + void batch_add_metal(const struct matmul_params *params); + void relu_metal(const struct matmul_params *params); + void silu_metal(const struct matmul_params *params); + void gelu_metal(const struct matmul_params *params); + void gelu_quick_metal(const struct matmul_params *params); + void rms_norm_metal(const struct matmul_params *params); // TODO: to be fixed + void soft_max_metal(const struct matmul_params *params); // TODO: to be fixed + void soft_max_4_metal(const struct matmul_params *params); // TODO: to be fixed + void rope_metal(const struct matmul_params *params); // TODO: to be fixed + + private: float interval_to_us(struct timeval *start, struct timeval *end); diff --git a/kernels/metal/kernel/op.metal b/kernels/metal/kernel/op.metal index e4ccdb9e..0681e181 100644 --- a/kernels/metal/kernel/op.metal +++ b/kernels/metal/kernel/op.metal @@ -1,6 +1,475 @@ #include using namespace metal; + /* CUDA */ +// __global__ void batch_Add_cuda(Matrix3D input, Matrix3D input2, Matrix3D output) { +// int i = blockIdx.x * blockDim.x + threadIdx.x; +// int j = blockIdx.y * blockDim.y + threadIdx.y; +// int k = blockIdx.z * blockDim.z + threadIdx.z; + +// //// half version +// if (i < input.m_dim_x && j < input.m_dim_y && k < input.m_dim_z) { +// output(i, j, k) = __hadd(input(i, j, k), input2(0, j, k)); +// } +// } +kernel void kernel_batch_add(device const float* inputA, + device const float* inputB, + device float* output, + constant MetalMatMulParams& params, + uint3 id[[thread_position_in_grid]]) { + const uint m = param.m; + const uint n = param.k; + + const uint idx = id.x; + const uint idy = id.y; + const uint idz = id.z; + output[idx * m * n + idy * n + idz] = inputA[idx * m * n + idy * n + idz] + inputB[idy * n + idz]; +} + +kernel void kernel_relu( + device const float * src0, + device float * dst, + uint tpig[[thread_position_in_grid]]) { + dst[tpig] = max(0.0f, src0[tpig]); +} + + kernel void kernel_silu( + device const float4 * src0, + device float4 * dst, + uint tpig[[thread_position_in_grid]]) { + device const float4 & x = src0[tpig]; + dst[tpig] = x / (1.0f + exp(-x)); +} + +constant float GELU_COEF_A = 0.044715f; +constant float GELU_QUICK_COEF = -1.702f; +constant float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f; +kernel void kernel_gelu( + device const float4 * src0, + device float4 * dst, + uint tpig[[thread_position_in_grid]]) { + device const float4 & x = src0[tpig]; + + // BEWARE !!! + // Simply using "tanh" instead of "precise::tanh" will sometimes results in NaNs! + // This was observed with Falcon 7B and 40B models + // + dst[tpig] = 0.5f * x * (1.0f + precise::tanh(SQRT_2_OVER_PI * x * (1.0f + GELU_COEF_A * x * x))); +} +kernel void kernel_gelu_quick( + device const float4 * src0, + device float4 * dst, + uint tpig[[thread_position_in_grid]]) { + device const float4 & x = src0[tpig]; + + dst[tpig] = x * (1.0f / (1.0f + exp(GELU_QUICK_COEF * x))); +} + +// TODO: to be fixed +kernel void kernel_rms_norm( + device const void * src0, + device float * dst, + constant int64_t & ne00, + constant uint64_t & nb01, + constant float & eps, + threadgroup float * buf [[threadgroup(0)]], + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]], + uint tiisg[[thread_index_in_simdgroup]], + uint ntg[[threads_per_threadgroup]]) { + device const float4 * x = (device const float4 *) ((device const char *) src0 + tgpig*nb01); + + float4 sumf = 0; + float all_sum = 0; + + // parallel sum + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + sumf += x[i00] * x[i00]; + } + all_sum = sumf[0] + sumf[1] + sumf[2] + sumf[3]; + all_sum = simd_sum(all_sum); + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = 0.0f; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = all_sum; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + all_sum = buf[tiisg]; + all_sum = simd_sum(all_sum); + } + + const float mean = all_sum / ne00; + const float scale = 1.0f / sqrt(mean + eps); + + device float4 * y = (device float4 *) (dst + tgpig*ne00); + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + y[i00] = x[i00] * scale; + } +} + + +// TODO: to be fixed +kernel void kernel_soft_max( + device const float * src0, + device const float * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant float & scale, + threadgroup float * buf [[threadgroup(0)]], + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]], + uint tiisg[[thread_index_in_simdgroup]], + uint ntg[[threads_per_threadgroup]]) { + const int64_t i03 = (tgpig) / (ne02*ne01); + const int64_t i02 = (tgpig - i03*ne02*ne01) / ne01; + const int64_t i01 = (tgpig - i03*ne02*ne01 - i02*ne01); + + device const float * psrc0 = src0 + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00; + device const float * pmask = src1 != src0 ? src1 + i01*ne00 : nullptr; + device float * pdst = dst + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00; + + // parallel max + float lmax = -INFINITY; + + for (int i00 = tpitg; i00 < ne00; i00 += ntg) { + lmax = MAX(lmax, psrc0[i00]*scale + (pmask ? pmask[i00] : 0.0f)); + } + + // find the max value in the block + float max_val = simd_max(lmax); + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = -INFINITY; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = max_val; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + max_val = buf[tiisg]; + max_val = simd_max(max_val); + } + + // parallel sum + float lsum = 0.0f; + for (int i00 = tpitg; i00 < ne00; i00 += ntg) { + const float exp_psrc0 = exp((psrc0[i00]*scale + (pmask ? pmask[i00] : 0.0f)) - max_val); + lsum += exp_psrc0; + pdst[i00] = exp_psrc0; + } + + // This barrier fixes a failing test + // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335 + threadgroup_barrier(mem_flags::mem_none); + + float sum = simd_sum(lsum); + + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = 0.0f; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = sum; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + sum = buf[tiisg]; + sum = simd_sum(sum); + } + + const float inv_sum = 1.0f/sum; + + for (int i00 = tpitg; i00 < ne00; i00 += ntg) { + pdst[i00] *= inv_sum; + } +} + +// TODO: to be fixed +kernel void kernel_soft_max_4( + device const float * src0, + device const float * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant float & scale, + threadgroup float * buf [[threadgroup(0)]], + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]], + uint tiisg[[thread_index_in_simdgroup]], + uint ntg[[threads_per_threadgroup]]) { + const int64_t i03 = (tgpig) / (ne02*ne01); + const int64_t i02 = (tgpig - i03*ne02*ne01) / ne01; + const int64_t i01 = (tgpig - i03*ne02*ne01 - i02*ne01); + + device const float4 * psrc4 = (device const float4 *)(src0 + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00); + device const float4 * pmask = src1 != src0 ? (device const float4 *)(src1 + i01*ne00) : nullptr; + device float4 * pdst4 = (device float4 *)(dst + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00); + + // parallel max + float4 lmax4 = -INFINITY; + + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + lmax4 = fmax(lmax4, psrc4[i00]*scale + (pmask ? pmask[i00] : 0.0f)); + } + + const float lmax = MAX(MAX(lmax4[0], lmax4[1]), MAX(lmax4[2], lmax4[3])); + + float max_val = simd_max(lmax); + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = -INFINITY; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = max_val; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + max_val = buf[tiisg]; + max_val = simd_max(max_val); + } + + // parallel sum + float4 lsum4 = 0.0f; + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + const float4 exp_psrc4 = exp((psrc4[i00]*scale + (pmask ? pmask[i00] : 0.0f)) - max_val); + lsum4 += exp_psrc4; + pdst4[i00] = exp_psrc4; + } + + const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3]; + + // This barrier fixes a failing test + // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335 + threadgroup_barrier(mem_flags::mem_none); + + float sum = simd_sum(lsum); + + if (ntg > N_SIMDWIDTH) { + if (sgitg == 0) { + buf[tiisg] = 0.0f; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + buf[sgitg] = sum; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + sum = buf[tiisg]; + sum = simd_sum(sum); + } + + const float inv_sum = 1.0f/sum; + + for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { + pdst4[i00] *= inv_sum; + } +} + + +// ROPE // +static float rope_yarn_ramp(const float low, const float high, const int i0) { + const float y = (i0 / 2 - low) / max(0.001f, high - low); + return 1.0f - min(1.0f, max(0.0f, y)); +} + +// YaRN algorithm based on LlamaYaRNScaledRotaryEmbedding.py from https://github.com/jquesnelle/yarn +// MIT licensed. Copyright (c) 2023 Jeffrey Quesnelle and Bowen Peng. +static void rope_yarn( float theta_extrap, float freq_scale, float corr_dims[2], int64_t i0, float ext_factor, float mscale, + thread float * cos_theta, thread float * sin_theta) { + // Get n-d rotational scaling corrected for extrapolation + float theta_interp = freq_scale * theta_extrap; + float theta = theta_interp; + if (ext_factor != 0.0f) { + float ramp_mix = rope_yarn_ramp(corr_dims[0], corr_dims[1], i0) * ext_factor; + theta = theta_interp * (1 - ramp_mix) + theta_extrap * ramp_mix; + + // Get n-d magnitude scaling corrected for interpolation + mscale *= 1.0f + 0.1f * log(1.0f / freq_scale); + } + *cos_theta = cos(theta) * mscale; + *sin_theta = sin(theta) * mscale; +} + +// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get +// `corr_fac(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))` +static float rope_yarn_corr_factor(int n_dims, int n_orig_ctx, float n_rot, float base) { + return n_dims * log(n_orig_ctx / (n_rot * 2 * M_PI_F)) / (2 * log(base)); +} + +static void rope_yarn_corr_dims( + int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]) { + // start and end correction dims + dims[0] = max(0.0f, floor(rope_yarn_corr_factor(n_dims, n_orig_ctx, beta_fast, freq_base))); + dims[1] = min(n_dims - 1.0f, ceil(rope_yarn_corr_factor(n_dims, n_orig_ctx, beta_slow, freq_base))); +} + +typedef void (rope_t)( + device const void * src0, + device const int32_t * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant int64_t & ne03, + constant uint64_t & nb00, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant uint64_t & nb03, + constant int64_t & ne0, + constant int64_t & ne1, + constant int64_t & ne2, + constant int64_t & ne3, + constant uint64_t & nb0, + constant uint64_t & nb1, + constant uint64_t & nb2, + constant uint64_t & nb3, + constant int & n_past, + constant int & n_dims, + constant int & mode, + constant int & n_orig_ctx, + constant float & freq_base, + constant float & freq_scale, + constant float & ext_factor, + constant float & attn_factor, + constant float & beta_fast, + constant float & beta_slow, + uint tiitg[[thread_index_in_threadgroup]], + uint3 tptg[[threads_per_threadgroup]], + uint3 tgpig[[threadgroup_position_in_grid]]); + +// TODO: to be fixed +template +kernel void kernel_rope( + device const void * src0, + device const int32_t * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant int64_t & ne03, + constant uint64_t & nb00, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant uint64_t & nb03, + constant int64_t & ne0, + constant int64_t & ne1, + constant int64_t & ne2, + constant int64_t & ne3, + constant uint64_t & nb0, + constant uint64_t & nb1, + constant uint64_t & nb2, + constant uint64_t & nb3, + constant int & n_past, + constant int & n_dims, + constant int & mode, + constant int & n_orig_ctx, + constant float & freq_base, + constant float & freq_scale, + constant float & ext_factor, + constant float & attn_factor, + constant float & beta_fast, + constant float & beta_slow, + uint tiitg[[thread_index_in_threadgroup]], + uint3 tptg[[threads_per_threadgroup]], + uint3 tgpig[[threadgroup_position_in_grid]]) { + const int64_t i3 = tgpig[2]; + const int64_t i2 = tgpig[1]; + const int64_t i1 = tgpig[0]; + + const bool is_neox = mode & 2; + + float corr_dims[2]; + rope_yarn_corr_dims(n_dims, n_orig_ctx, freq_base, beta_fast, beta_slow, corr_dims); + + device const int32_t * pos = src1; + + const int64_t p = pos[i2]; + + const float theta_0 = (float)p; + const float inv_ndims = -1.f/n_dims; + + if (!is_neox) { + for (int64_t i0 = 2*tiitg; i0 < ne0; i0 += 2*tptg.x) { + + const float theta = theta_0 * pow(freq_base, inv_ndims*i0); + float cos_theta, sin_theta; + rope_yarn(theta, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta); + + device const T * const src = (device T *)((device char *) src0 + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + device T * dst_data = (device T *)((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const T x0 = src[0]; + const T x1 = src[1]; + + dst_data[0] = x0*cos_theta - x1*sin_theta; + dst_data[1] = x0*sin_theta + x1*cos_theta; + } + } else { + for (int64_t ic = 2*tiitg; ic < ne0; ic += 2*tptg.x) { + if (ic < n_dims) { + const int64_t ib = 0; + + // simplified from `(ib * n_dims + ic) * inv_ndims` + const float cur_rot = inv_ndims*ic - ib; + + const float theta = theta_0 * pow(freq_base, cur_rot); + float cos_theta, sin_theta; + rope_yarn(theta, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor, &cos_theta, &sin_theta); + + const int64_t i0 = ib*n_dims + ic/2; + + device const T * const src = (device T *)((device char *) src0 + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + device T * dst_data = (device T *)((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float x0 = src[0]; + const float x1 = src[n_dims/2]; + + dst_data[0] = x0*cos_theta - x1*sin_theta; + dst_data[n_dims/2] = x0*sin_theta + x1*cos_theta; + } else { + const int64_t i0 = ic; + + device const T * const src = (device T *)((device char *) src0 + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + device T * dst_data = (device T *)((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + dst_data[0] = src[0]; + dst_data[1] = src[1]; + } + } + } +} + +template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope; +template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope; + /* Performance comparision with the test case: CPU: 4000ms, ~60GOP/s diff --git a/kernels/metal/matmul_metal_imp.cc b/kernels/metal/matmul_metal_imp.cc new file mode 100644 index 00000000..02c50a7a --- /dev/null +++ b/kernels/metal/matmul_metal_imp.cc @@ -0,0 +1,278 @@ +#include "matmul_metal_imp.h" + +#include +// Some notes: +// 1. pipelinestate and encoder may be shared for improvement +// 2. since every time when a pointer is allocated, a metal buffer + its pointer will be +// associated together. In this case, the final result will be always stored in param.C +// 3. since metal is different from CUDA, it needs initialization and all Metal kernels +// should be placed in the same file, we place all metal kernels in the same kernel and +// all op helper functions here, which will be called later in ops. + +// static data +MTL::Device *MetalIMP::_mDevice; +MTL::ComputePipelineState *MetalIMP::_mMatmulFunctionPSO; +MTL::CommandQueue *MetalIMP::_mCommandQueue; + +MTL::Buffer *MetalIMP::_mBufferA; +MTL::Buffer *MetalIMP::_mBufferB; +MTL::Buffer *MetalIMP::_mBufferScales; +MTL::Buffer *MetalIMP::_mBufferResult; +MTL::Buffer *MetalIMP::_mParams; + +std::unordered_map MetalIMP::_mumap; + +MetalMatMulParams *MetalIMP::_mParamsPtr; +bool MetalIMP::has_init = false; + +void *MetalIMP::allocateSharedMem(size_t size) { + if (!has_init) { + MetalIMP::init(); + } + + MTL::Buffer *new_b = _mDevice->newBuffer(size, MTL::ResourceStorageModeShared); + + void *void_ptr = new_b->contents(); + + // push the pair to the map + _mumap.insert(std::make_pair(void_ptr, new_b)); + + return void_ptr; +} + +void MetalIMP::init() { + _mDevice = MTL::CreateSystemDefaultDevice(); + has_init = true; +} + +MTL::Buffer *MetalIMP::getBufferfromPtr(void *ptr) { + if (_mumap.find(ptr) == _mumap.end()) { + std::cerr << "Cannot find the corresponding MTL::Buffer." << std::endl; + return NULL; + } else + return _mumap[ptr]; +} + +void MetalIMP::setupLibrary(const char *kernel_name){ + NS::Error *error = nullptr; + + // Load the shader files with a .metal file extension in the project + MTL::Library *defaultLibrary = _mDevice->newDefaultLibrary(); + + if (defaultLibrary == nullptr) { + std::cout << "Failed to find the default library." << std::endl; + return; + } + + auto str = NS::String::string(kernel_name, NS::ASCIIStringEncoding); + MTL::Function *matmulFunction = defaultLibrary->newFunction(str); + defaultLibrary->release(); + + if (matmulFunction == nullptr) { + std::cout << "Failed to find the function." << std::endl; + return; + } + + // Create a compute pipeline state object. + _mMatmulFunctionPSO = _mDevice->newComputePipelineState(matmulFunction, &error); + matmulFunction->release(); + + if (_mMatmulFunctionPSO == nullptr) { + // If the Metal API validation is enabled, you can find out more information about what + // went wrong. (Metal API validation is enabled by default when a debug build is run + // from Xcode) + std::cout << "Failed to created pipeline state object, error " << error << "." << std::endl; + return; + } + + _mCommandQueue = _mDevice->newCommandQueue(); + if (_mCommandQueue == nullptr) { + std::cout << "Failed to find the command queue." << std::endl; + return; + } +} + +void MetalIMP::run_mat_mul_accelerator_int4_fast_no_offset(MetalMatMulParams param, MetalMatmulBuffers *bufferParams) { + setupLibrary("matmulUInt4_SIMD_Q4Interleave_unroll32"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int m, n, k; + m = param.m; + n = param.n; + k = param.k; + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferB = getBufferfromPtr((void *)bufferParams->B); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + _mBufferScales = getBufferfromPtr((void *)bufferParams->scales); + + if (!_mBufferA || !_mBufferB || !_mBufferResult || !_mBufferScales) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferB, 0, 1); + computeEncoder->setBuffer(_mBufferResult, 0, 2); + computeEncoder->setBuffer(_mBufferScales, 0, 3); + computeEncoder->setBuffer(_mParams, 0, 4); + + MTL::Size gridSize = MTL::Size::Make(_mParamsPtr->n, _mParamsPtr->m, 1); + + // Calculate a threadgroup size. + MTL::Size threadgroupSize = MTL::Size::Make(16, 1, 1); + + // Encode the compute command. + computeEncoder->dispatchThreads(gridSize, threadgroupSize); + + // End the compute pass. + computeEncoder->endEncoding(); + + // Execute the command. + commandBuffer->commit(); + + // Normally, you want to do other work in your app while the GPU is running, + // but in this example, the code simply blocks until the calculation is complete. + commandBuffer->waitUntilCompleted(); + + computeEncoder->release(); + commandBuffer->release(); + _mMatmulFunctionPSO->release(); +} + +void MetalIMP::run_naive_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bufferParams) { + setupLibrary("matmul"); + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int m, n, k; + m = param.m; + n = param.n; + k = param.k; + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferB = getBufferfromPtr((void *)bufferParams->B); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferB || !_mBufferResult || !_mBufferScales) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferB, 0, 1); + computeEncoder->setBuffer(_mBufferResult, 0, 2); + computeEncoder->setBuffer(_mParams, 0, 3); + + MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); + MTL::Size gridSize = MTL::Size::Make((n + threadgroupSize.width - 1) / threadgroupSize.width, + (m + threadgroupSize.height - 1) / threadgroupSize.height, + 1); + + // Encode the compute command. + computeEncoder->dispatchThreads(gridSize, threadgroupSize); + + // End the compute pass. + computeEncoder->endEncoding(); + + // Execute the command. + commandBuffer->commit(); + + // Normally, you want to do other work in your app while the GPU is running, + // but in this example, the code simply blocks until the calculation is complete. + commandBuffer->waitUntilCompleted(); + + computeEncoder->release(); + commandBuffer->release(); + _mMatmulFunctionPSO->release(); +} + +void MetalIMP::run_batch_add(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel_batch_add"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int m, n, k; + m = param.m; + n = param.n; + k = param.k; + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferB = getBufferfromPtr((void *)bufferParams->B); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferB || !_mBufferResult || !_mBufferScales) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferB, 0, 1); + computeEncoder->setBuffer(_mBufferResult, 0, 2); + computeEncoder->setBuffer(_mParams, 0, 3); + + MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); + MTL::Size gridSize = MTL::Size::Make((n + threadgroupSize.width - 1) / threadgroupSize.width, + (m + threadgroupSize.height - 1) / threadgroupSize.height, + 1); + + // Encode the compute command. + computeEncoder->dispatchThreads(gridSize, threadgroupSize); + + // End the compute pass. + computeEncoder->endEncoding(); + + // Execute the command. + commandBuffer->commit(); + + // Normally, you want to do other work in your app while the GPU is running, + // but in this example, the code simply blocks until the calculation is complete. + commandBuffer->waitUntilCompleted(); + + computeEncoder->release(); + commandBuffer->release(); + _mMatmulFunctionPSO->release(); + +} \ No newline at end of file diff --git a/kernels/metal/matmul_metal_imp.h b/kernels/metal/matmul_metal_imp.h new file mode 100644 index 00000000..2393b01c --- /dev/null +++ b/kernels/metal/matmul_metal_imp.h @@ -0,0 +1,55 @@ +#pragma once + +#include + +#include "Foundation/Foundation.hpp" +#include "Metal/Metal.hpp" +#include "include/opParams.h" + +typedef struct { + float *A, *C, *scales, *offset; + unsigned char *B; +} MetalMatmulBuffers; + +class MetalIMP { + public: + static MTL::Device *_mDevice; + + // The compute pipeline generated from the compute kernel in the .metal shader file. + static MTL::ComputePipelineState *_mMatmulFunctionPSO; + + // The command queue used to pass commands to the device. + static MTL::CommandQueue *_mCommandQueue; + + // Buffers to hold data. + static MTL::Buffer *_mBufferA; + static MTL::Buffer *_mBufferB; + static MTL::Buffer *_mBufferScales; + static MTL::Buffer *_mBufferResult; + static MTL::Buffer *_mParams; + + static std::unordered_map _mumap; + + static bool has_init; + static void init(); + static void setupLibrary(const char *kernel_name); + static void *allocateSharedMem(size_t size); + static MetalMatMulParams *_mParamsPtr; + static MTL::Buffer *getBufferfromPtr(void *ptr); + + static void run_mat_mul_accelerator_int4_fast_no_offset(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_naive_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_batch_add(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_relu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_silu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_gelu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_gelu_quick(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_rms_norm(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // TODO: to be fixed + static void run_soft_max(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // TODO: to be fixed + static void run_soft_max_4(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // TODO: to be fixed + static void run_rope(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // TODO: to be fixed + + + // static void sendComputeCommand(); + // static void encodeCommand(MTL::ComputeCommandEncoder *computeEncoder); +}; diff --git a/kernels/metal/matmul_metal_int4.cc b/kernels/metal/matmul_metal_int4.cc index 0bf43635..e54f6ec9 100644 --- a/kernels/metal/matmul_metal_int4.cc +++ b/kernels/metal/matmul_metal_int4.cc @@ -9,7 +9,7 @@ #define NS_PRIVATE_IMPLEMENTATION #define CA_PRIVATE_IMPLEMENTATION #define MTL_PRIVATE_IMPLEMENTATION -#include "matmul_metal_int4_imp.h" +#include "matmul_metal_imp.h" namespace matmul { void MatmulOperator::mat_mul_accelerator_int4_fast(const struct matmul_params *params) { @@ -68,6 +68,6 @@ void MatmulOperator::mat_mul_accelerator_int4_fast_no_offset(const struct matmul MetalMatMulParams matmulparams = {(unsigned int)A->row, (unsigned int)C->column, (unsigned int)A->column, (unsigned int)block_size}; MetalMatmulBuffers bufferparams = {A->data_ptr, C->data_ptr, scale, offset, B->int4_data_ptr}; - MetalMatmulInt4IMP::run(matmulparams, &bufferparams); + MetalIMP::run_mat_mul_accelerator_int4_fast_no_offset(matmulparams, &bufferparams); }; } // namespace matmul diff --git a/kernels/metal/matmul_metal_int4_imp.cc b/kernels/metal/matmul_metal_int4_imp.cc deleted file mode 100644 index 282b9c39..00000000 --- a/kernels/metal/matmul_metal_int4_imp.cc +++ /dev/null @@ -1,154 +0,0 @@ -#include "matmul_metal_int4_imp.h" - -#include - -// static data -MTL::Device *MetalMatmulInt4IMP::_mDevice; -MTL::ComputePipelineState *MetalMatmulInt4IMP::_mMatmulFunctionPSO; -MTL::CommandQueue *MetalMatmulInt4IMP::_mCommandQueue; - -MTL::Buffer *MetalMatmulInt4IMP::_mBufferA; -MTL::Buffer *MetalMatmulInt4IMP::_mBufferB; -MTL::Buffer *MetalMatmulInt4IMP::_mBufferScales; -MTL::Buffer *MetalMatmulInt4IMP::_mBufferResult; -MTL::Buffer *MetalMatmulInt4IMP::_mParams; - -std::unordered_map MetalMatmulInt4IMP::_mumap; - -MetalMatMulParams *MetalMatmulInt4IMP::_mParamsPtr; -bool MetalMatmulInt4IMP::has_init = false; - -void *MetalMatmulInt4IMP::allocateSharedMem(size_t size) { - if (!has_init) { - MetalMatmulInt4IMP::init(); - } - - MTL::Buffer *new_b = _mDevice->newBuffer(size, MTL::ResourceStorageModeShared); - - void *void_ptr = new_b->contents(); - - // push the pair to the map - _mumap.insert(std::make_pair(void_ptr, new_b)); - - return void_ptr; -} - -void MetalMatmulInt4IMP::init() { - _mDevice = MTL::CreateSystemDefaultDevice(); - ; - - NS::Error *error = nullptr; - - // Load the shader files with a .metal file extension in the project - MTL::Library *defaultLibrary = _mDevice->newDefaultLibrary(); - - if (defaultLibrary == nullptr) { - std::cout << "Failed to find the default library." << std::endl; - return; - } - - auto str = NS::String::string("matmulUInt4_SIMD_Q4Interleave_unroll32", NS::ASCIIStringEncoding); - MTL::Function *matmulFunction = defaultLibrary->newFunction(str); - defaultLibrary->release(); - - if (matmulFunction == nullptr) { - std::cout << "Failed to find the function." << std::endl; - return; - } - - // Create a compute pipeline state object. - _mMatmulFunctionPSO = _mDevice->newComputePipelineState(matmulFunction, &error); - matmulFunction->release(); - - if (_mMatmulFunctionPSO == nullptr) { - // If the Metal API validation is enabled, you can find out more information about what - // went wrong. (Metal API validation is enabled by default when a debug build is run - // from Xcode) - std::cout << "Failed to created pipeline state object, error " << error << "." << std::endl; - return; - } - - _mCommandQueue = _mDevice->newCommandQueue(); - if (_mCommandQueue == nullptr) { - std::cout << "Failed to find the command queue." << std::endl; - return; - } - - _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); - _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); - - has_init = true; -} - -MTL::Buffer *MetalMatmulInt4IMP::getBufferfromPtr(void *ptr) { - if (_mumap.find(ptr) == _mumap.end()) { - std::cerr << "Cannot find the corresponding MTL::Buffer." << std::endl; - return NULL; - } else - return _mumap[ptr]; -} - -void MetalMatmulInt4IMP::run(MetalMatMulParams param, MetalMatmulBuffers *bufferParams) { - *_mParamsPtr = param; - unsigned int m, n, k; - m = param.m; - n = param.n; - k = param.k; - - // assign the buffers to hold our data and the result. - _mBufferA = getBufferfromPtr((void *)bufferParams->A); - _mBufferB = getBufferfromPtr((void *)bufferParams->B); - _mBufferResult = getBufferfromPtr((void *)bufferParams->C); - _mBufferScales = getBufferfromPtr((void *)bufferParams->scales); - - if (!_mBufferA || !_mBufferB || !_mBufferResult || !_mBufferScales) { - std::cerr << "Failed to locate some buffer!" << std::endl; - exit(-1); - } - // TODO: offset? - sendComputeCommand(); -} - -typedef std::chrono::microseconds time_unit; -void MetalMatmulInt4IMP::sendComputeCommand() { - // Create a command buffer to hold commands. - MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); - assert(commandBuffer != nullptr); - - // Start a compute pass. - MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); - assert(computeEncoder != nullptr); - - encodeCommand(computeEncoder); - - // End the compute pass. - computeEncoder->endEncoding(); - - // Execute the command. - commandBuffer->commit(); - - // Normally, you want to do other work in your app while the GPU is running, - // but in this example, the code simply blocks until the calculation is complete. - commandBuffer->waitUntilCompleted(); - - computeEncoder->release(); - commandBuffer->release(); -} - -void MetalMatmulInt4IMP::encodeCommand(MTL::ComputeCommandEncoder *computeEncoder) { - // Encode the pipeline state object and its parameters. - computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); - computeEncoder->setBuffer(_mBufferA, 0, 0); - computeEncoder->setBuffer(_mBufferB, 0, 1); - computeEncoder->setBuffer(_mBufferResult, 0, 2); - computeEncoder->setBuffer(_mBufferScales, 0, 3); - computeEncoder->setBuffer(_mParams, 0, 4); - - MTL::Size gridSize = MTL::Size::Make(_mParamsPtr->n, _mParamsPtr->m, 1); - - // Calculate a threadgroup size. - MTL::Size threadgroupSize = MTL::Size::Make(16, 1, 1); - - // Encode the compute command. - computeEncoder->dispatchThreads(gridSize, threadgroupSize); -} diff --git a/kernels/metal/matmul_metal_int4_imp.h b/kernels/metal/matmul_metal_int4_imp.h deleted file mode 100644 index c3e2674a..00000000 --- a/kernels/metal/matmul_metal_int4_imp.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include - -#include "Foundation/Foundation.hpp" -#include "Metal/Metal.hpp" -#include "include/opParams.h" - -typedef struct { - float *A, *C, *scales, *offset; - unsigned char *B; -} MetalMatmulBuffers; - -class MetalMatmulInt4IMP { - public: - static MTL::Device *_mDevice; - - // The compute pipeline generated from the compute kernel in the .metal shader file. - static MTL::ComputePipelineState *_mMatmulFunctionPSO; - - // The command queue used to pass commands to the device. - static MTL::CommandQueue *_mCommandQueue; - - // Buffers to hold data. - static MTL::Buffer *_mBufferA; - static MTL::Buffer *_mBufferB; - static MTL::Buffer *_mBufferScales; - static MTL::Buffer *_mBufferResult; - static MTL::Buffer *_mParams; - - static std::unordered_map _mumap; - - static bool has_init; - static void init(); - static void run(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); - static void *allocateSharedMem(size_t size); - - static MetalMatMulParams *_mParamsPtr; - static void sendComputeCommand(); - static void encodeCommand(MTL::ComputeCommandEncoder *computeEncoder); - static MTL::Buffer *getBufferfromPtr(void *ptr); -}; diff --git a/kernels/metal/matmul_naive.cc b/kernels/metal/matmul_naive.cc new file mode 100644 index 00000000..541bb05d --- /dev/null +++ b/kernels/metal/matmul_naive.cc @@ -0,0 +1,30 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { + // naive float*float matmul +void MatmulOperator::mat_mul_metal(const struct matmul_params *params) { + int i, j, k; + const struct matrix *A = ¶ms->A, *B = ¶ms->B, *C = ¶ms->C; + const int block_size = params->block_size; + float *scale = params->scales, *offset = params->offset; + + assert(params->block_size % 32 == 0); // support block size to be multiply of 32 + assert(A->row == C->row); // support block size to be multiply of 32 + + MetalMatMulParams matmulparams = {(unsigned int)A->row, (unsigned int)C->column, (unsigned int)A->column, + (unsigned int)block_size}; + MetalMatmulBuffers bufferparams = {A->data_ptr, C->data_ptr, scale, offset, (unsigned char*)B->data_ptr}; + MetalIMP::run_mat_mul_accelerator_int4_fast_no_offset(matmulparams, &bufferparams); +}; +} // namespace matmul diff --git a/kernels/metal/metal_batch_add.cc b/kernels/metal/metal_batch_add.cc new file mode 100644 index 00000000..8a43b02b --- /dev/null +++ b/kernels/metal/metal_batch_add.cc @@ -0,0 +1,24 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { + // naive float*float matmul +void MatmulOperator::batch_add_metal(const struct matmul_params *params) { + int i, j, k; + const struct matrix *A = ¶ms->A, *B = ¶ms->B, *C = ¶ms->C; + + MetalMatMulParams matmulparams = {(unsigned int)A->row, (unsigned int)C->column, (unsigned int)A->column}; + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr, B: (unsigned char*)B->data_ptr}; + MetalIMP::run_batch_add(matmulparams, &bufferparams); +}; +} // namespace matmul \ No newline at end of file diff --git a/llm/include/operators.h b/llm/include/operators.h index 38cc2e62..5ec27963 100644 --- a/llm/include/operators.h +++ b/llm/include/operators.h @@ -43,4 +43,14 @@ __global__ void softmax_float(Matrix3D input, Matrix3D output); __global__ void softmax_cuda(Matrix3D input, Matrix3D output); #endif +#ifdef QM_METAL +#include "ops/metal/BMM_F16T.cuh" +#include "ops/metal/Embedding.cuh" +#include "ops/metal/LlamaRMSNorm.cuh" +#include "ops/metal/RotaryPosEmb.cuh" + +void batch_Add_metal(const Matrix3D input, const Matrix3D input2, Matrix3D output); +void softmax_metal(Matrix3D input, Matrix3D output); +#endif + #endif // OPERATORS_H diff --git a/llm/src/ops/metal/batch_add.cc b/llm/src/ops/metal/batch_add.cc new file mode 100644 index 00000000..fe25c212 --- /dev/null +++ b/llm/src/ops/metal/batch_add.cc @@ -0,0 +1,17 @@ +#include "operators.h" + +void batch_Add(const Matrix3D &input, const Matrix3D &input2, Matrix3D &output) { + struct matmul_params params; + params.A.row = input.m_dim_y; + params.A.column = input.m_dim_z; + params.A.data_ptr = input.m_data; + params.B.row = input.m_dim_z; // k + params.B.column = input2.m_dim_y; // n + params.B.data_ptr = input2.m_data; + params.C.row = output.m_dim_y; + params.C.column = output.m_dim_z; + params.C.data_ptr = output.m_data; + + matmul::MatmulOperator op = matmul::MatmulOperator(); + op.batch_add_metal(¶ms); +} diff --git a/llm/tests/metal/cpp_version/metal_Int4matmul.h b/llm/tests/metal/cpp_version/metal_Int4matmul.h new file mode 100644 index 00000000..7f7e2287 --- /dev/null +++ b/llm/tests/metal/cpp_version/metal_Int4matmul.h @@ -0,0 +1,23 @@ +#pragma once + +#include "Foundation/Foundation.hpp" +#include "Metal/Metal.hpp" +#include "param.h" + +class MetalMatmulInt4IMP { + public: + MTL::Buffer *bM1, *bM2, *bM3, *bParam, *bScales, *bOffset; + MTL::Device* mDevice; + MTL::ComputePipelineState* mfnPipelineState; + MTL::CommandQueue* mCommandQueue; + NS::Error *error = nullptr; + typedef struct { + float *A, *C, *scales, *offset; + unsigned char *B; + } MetalMatmulBuffers; + + void metal_init(); + void metal_encodecommand(MTL::ComputeCommandEncoder *computeEncoder); + void metal_compute(); + MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size); +}; From a3f88523ec271999edb1578de375f6bacc7685b2 Mon Sep 17 00:00:00 2001 From: RaymondWang0 Date: Thu, 1 Feb 2024 16:34:26 -0500 Subject: [PATCH 18/37] update metal-cpp --- metal-cpp/Foundation/Foundation.hpp | 2 +- metal-cpp/Foundation/NSArray.hpp | 2 +- metal-cpp/Foundation/NSAutoreleasePool.hpp | 2 +- metal-cpp/Foundation/NSBundle.hpp | 2 +- metal-cpp/Foundation/NSData.hpp | 2 +- metal-cpp/Foundation/NSDate.hpp | 2 +- metal-cpp/Foundation/NSDefines.hpp | 2 +- metal-cpp/Foundation/NSDictionary.hpp | 2 +- metal-cpp/Foundation/NSEnumerator.hpp | 2 +- metal-cpp/Foundation/NSError.hpp | 2 +- metal-cpp/Foundation/NSLock.hpp | 2 +- metal-cpp/Foundation/NSNotification.hpp | 2 +- metal-cpp/Foundation/NSNumber.hpp | 2 +- metal-cpp/Foundation/NSObjCRuntime.hpp | 8 +- metal-cpp/Foundation/NSObject.hpp | 4 +- metal-cpp/Foundation/NSPrivate.hpp | 6 +- metal-cpp/Foundation/NSProcessInfo.hpp | 2 +- metal-cpp/Foundation/NSRange.hpp | 2 +- metal-cpp/Foundation/NSSet.hpp | 2 +- metal-cpp/Foundation/NSSharedPtr.hpp | 4 +- metal-cpp/Foundation/NSString.hpp | 57 +- metal-cpp/Foundation/NSTypes.hpp | 2 +- metal-cpp/Foundation/NSURL.hpp | 2 +- metal-cpp/Metal/MTLAccelerationStructure.hpp | 747 +++++++++++++++++- ...MTLAccelerationStructureCommandEncoder.hpp | 2 +- .../Metal/MTLAccelerationStructureTypes.hpp | 2 +- metal-cpp/Metal/MTLArgument.hpp | 45 +- metal-cpp/Metal/MTLArgumentEncoder.hpp | 5 +- metal-cpp/Metal/MTLBinaryArchive.hpp | 2 +- metal-cpp/Metal/MTLBlitCommandEncoder.hpp | 2 +- metal-cpp/Metal/MTLBlitPass.hpp | 2 +- metal-cpp/Metal/MTLBuffer.hpp | 2 +- metal-cpp/Metal/MTLCaptureManager.hpp | 2 +- metal-cpp/Metal/MTLCaptureScope.hpp | 2 +- metal-cpp/Metal/MTLCommandBuffer.hpp | 4 +- metal-cpp/Metal/MTLCommandEncoder.hpp | 2 +- metal-cpp/Metal/MTLCommandQueue.hpp | 2 +- metal-cpp/Metal/MTLComputeCommandEncoder.hpp | 34 +- metal-cpp/Metal/MTLComputePass.hpp | 2 +- metal-cpp/Metal/MTLComputePipeline.hpp | 2 +- metal-cpp/Metal/MTLCounters.hpp | 2 +- metal-cpp/Metal/MTLDefines.hpp | 2 +- metal-cpp/Metal/MTLDepthStencil.hpp | 2 +- metal-cpp/Metal/MTLDevice.hpp | 89 ++- metal-cpp/Metal/MTLDrawable.hpp | 2 +- metal-cpp/Metal/MTLDynamicLibrary.hpp | 2 +- metal-cpp/Metal/MTLEvent.hpp | 2 +- metal-cpp/Metal/MTLFence.hpp | 2 +- metal-cpp/Metal/MTLFunctionConstantValues.hpp | 2 +- metal-cpp/Metal/MTLFunctionDescriptor.hpp | 3 +- metal-cpp/Metal/MTLFunctionHandle.hpp | 2 +- metal-cpp/Metal/MTLFunctionLog.hpp | 2 +- metal-cpp/Metal/MTLFunctionStitching.hpp | 4 +- metal-cpp/Metal/MTLHeaderBridge.hpp | 164 +++- metal-cpp/Metal/MTLHeap.hpp | 8 +- metal-cpp/Metal/MTLIOCommandBuffer.hpp | 2 +- metal-cpp/Metal/MTLIOCommandQueue.hpp | 2 +- metal-cpp/Metal/MTLIOCompressor.hpp | 10 +- metal-cpp/Metal/MTLIndirectCommandBuffer.hpp | 74 +- metal-cpp/Metal/MTLIndirectCommandEncoder.hpp | 74 +- .../Metal/MTLIntersectionFunctionTable.hpp | 20 +- metal-cpp/Metal/MTLLibrary.hpp | 50 +- metal-cpp/Metal/MTLLinkedFunctions.hpp | 2 +- .../Metal/MTLParallelRenderCommandEncoder.hpp | 2 +- metal-cpp/Metal/MTLPipeline.hpp | 2 +- metal-cpp/Metal/MTLPixelFormat.hpp | 10 +- metal-cpp/Metal/MTLPrivate.hpp | 6 +- metal-cpp/Metal/MTLRasterizationRate.hpp | 2 +- metal-cpp/Metal/MTLRenderCommandEncoder.hpp | 34 +- metal-cpp/Metal/MTLRenderPass.hpp | 4 +- metal-cpp/Metal/MTLRenderPipeline.hpp | 64 +- metal-cpp/Metal/MTLResource.hpp | 10 +- .../Metal/MTLResourceStateCommandEncoder.hpp | 2 +- metal-cpp/Metal/MTLResourceStatePass.hpp | 2 +- metal-cpp/Metal/MTLSampler.hpp | 2 +- .../Metal/MTLStageInputOutputDescriptor.hpp | 4 +- metal-cpp/Metal/MTLTexture.hpp | 3 +- metal-cpp/Metal/MTLTypes.hpp | 2 +- metal-cpp/Metal/MTLVersion.hpp | 8 +- metal-cpp/Metal/MTLVertexDescriptor.hpp | 7 +- metal-cpp/Metal/MTLVisibleFunctionTable.hpp | 2 +- metal-cpp/Metal/Metal.hpp | 2 +- metal-cpp/MetalFX/MTLFXDefines.hpp | 41 + metal-cpp/MetalFX/MTLFXPrivate.hpp | 285 +++++++ metal-cpp/MetalFX/MTLFXSpatialScaler.hpp | 372 +++++++++ metal-cpp/MetalFX/MTLFXTemporalScaler.hpp | 695 ++++++++++++++++ metal-cpp/MetalFX/MetalFX.hpp | 28 + metal-cpp/QuartzCore/CADefines.hpp | 2 +- metal-cpp/QuartzCore/CAMetalDrawable.hpp | 2 +- metal-cpp/QuartzCore/CAMetalLayer.hpp | 2 +- metal-cpp/QuartzCore/CAPrivate.hpp | 2 +- metal-cpp/QuartzCore/QuartzCore.hpp | 2 +- metal-cpp/README.md | 18 +- metal-cpp/SingleHeader/MakeSingleHeader.py | 4 +- 94 files changed, 2942 insertions(+), 175 deletions(-) create mode 100644 metal-cpp/MetalFX/MTLFXDefines.hpp create mode 100644 metal-cpp/MetalFX/MTLFXPrivate.hpp create mode 100644 metal-cpp/MetalFX/MTLFXSpatialScaler.hpp create mode 100644 metal-cpp/MetalFX/MTLFXTemporalScaler.hpp create mode 100644 metal-cpp/MetalFX/MetalFX.hpp diff --git a/metal-cpp/Foundation/Foundation.hpp b/metal-cpp/Foundation/Foundation.hpp index 4191d06d..8b64277f 100644 --- a/metal-cpp/Foundation/Foundation.hpp +++ b/metal-cpp/Foundation/Foundation.hpp @@ -2,7 +2,7 @@ // // Foundation/Foundation.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSArray.hpp b/metal-cpp/Foundation/NSArray.hpp index d5b2e370..7ccdb804 100644 --- a/metal-cpp/Foundation/NSArray.hpp +++ b/metal-cpp/Foundation/NSArray.hpp @@ -2,7 +2,7 @@ // // Foundation/NSArray.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSAutoreleasePool.hpp b/metal-cpp/Foundation/NSAutoreleasePool.hpp index 4fc2594d..3008590d 100644 --- a/metal-cpp/Foundation/NSAutoreleasePool.hpp +++ b/metal-cpp/Foundation/NSAutoreleasePool.hpp @@ -2,7 +2,7 @@ // // Foundation/NSAutoreleasePool.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSBundle.hpp b/metal-cpp/Foundation/NSBundle.hpp index 2a983c42..323d93f3 100644 --- a/metal-cpp/Foundation/NSBundle.hpp +++ b/metal-cpp/Foundation/NSBundle.hpp @@ -2,7 +2,7 @@ // // Foundation/NSBundle.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSData.hpp b/metal-cpp/Foundation/NSData.hpp index d518f5c6..ddfa6dd0 100644 --- a/metal-cpp/Foundation/NSData.hpp +++ b/metal-cpp/Foundation/NSData.hpp @@ -2,7 +2,7 @@ // // Foundation/NSData.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSDate.hpp b/metal-cpp/Foundation/NSDate.hpp index f016e617..61f10a95 100644 --- a/metal-cpp/Foundation/NSDate.hpp +++ b/metal-cpp/Foundation/NSDate.hpp @@ -2,7 +2,7 @@ // // Foundation/NSDate.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSDefines.hpp b/metal-cpp/Foundation/NSDefines.hpp index c1217e8a..a042be63 100644 --- a/metal-cpp/Foundation/NSDefines.hpp +++ b/metal-cpp/Foundation/NSDefines.hpp @@ -2,7 +2,7 @@ // // Foundation/NSDefines.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSDictionary.hpp b/metal-cpp/Foundation/NSDictionary.hpp index 017bf44e..078cd5c8 100644 --- a/metal-cpp/Foundation/NSDictionary.hpp +++ b/metal-cpp/Foundation/NSDictionary.hpp @@ -2,7 +2,7 @@ // // Foundation/NSDictionary.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSEnumerator.hpp b/metal-cpp/Foundation/NSEnumerator.hpp index 60343086..eed19dba 100644 --- a/metal-cpp/Foundation/NSEnumerator.hpp +++ b/metal-cpp/Foundation/NSEnumerator.hpp @@ -2,7 +2,7 @@ // // Foundation/NSEnumerator.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSError.hpp b/metal-cpp/Foundation/NSError.hpp index 1bc39de1..f19ff861 100644 --- a/metal-cpp/Foundation/NSError.hpp +++ b/metal-cpp/Foundation/NSError.hpp @@ -2,7 +2,7 @@ // // Foundation/NSError.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSLock.hpp b/metal-cpp/Foundation/NSLock.hpp index 7fee9435..ca371fba 100644 --- a/metal-cpp/Foundation/NSLock.hpp +++ b/metal-cpp/Foundation/NSLock.hpp @@ -2,7 +2,7 @@ // // Foundation/NSLock.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSNotification.hpp b/metal-cpp/Foundation/NSNotification.hpp index 8eb5f804..49cf2d4a 100644 --- a/metal-cpp/Foundation/NSNotification.hpp +++ b/metal-cpp/Foundation/NSNotification.hpp @@ -2,7 +2,7 @@ // // Foundation/NSNotification.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSNumber.hpp b/metal-cpp/Foundation/NSNumber.hpp index 4eaaf193..13c78024 100644 --- a/metal-cpp/Foundation/NSNumber.hpp +++ b/metal-cpp/Foundation/NSNumber.hpp @@ -2,7 +2,7 @@ // // Foundation/NSNumber.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSObjCRuntime.hpp b/metal-cpp/Foundation/NSObjCRuntime.hpp index e97592b1..a3860e94 100644 --- a/metal-cpp/Foundation/NSObjCRuntime.hpp +++ b/metal-cpp/Foundation/NSObjCRuntime.hpp @@ -2,7 +2,7 @@ // // Foundation/NSObjCRuntime.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,9 +31,9 @@ namespace NS { _NS_ENUM(Integer, ComparisonResult) { - OrderedAscending = -1, - OrderedSame = 0, - OrderedDescending = 1, + OrderedAscending = -1L, + OrderedSame, + OrderedDescending }; const Integer NotFound = IntegerMax; diff --git a/metal-cpp/Foundation/NSObject.hpp b/metal-cpp/Foundation/NSObject.hpp index 7ece1fdb..489fd36f 100644 --- a/metal-cpp/Foundation/NSObject.hpp +++ b/metal-cpp/Foundation/NSObject.hpp @@ -2,7 +2,7 @@ // // Foundation/NSObject.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -243,7 +243,7 @@ _NS_INLINE _Ret NS::Object::sendMessageSafe(const void* pObj, SEL selector, _Arg if constexpr (!std::is_void<_Ret>::value) { - return 0; + return _Ret(0); } } diff --git a/metal-cpp/Foundation/NSPrivate.hpp b/metal-cpp/Foundation/NSPrivate.hpp index 371e8feb..af5ffb14 100644 --- a/metal-cpp/Foundation/NSPrivate.hpp +++ b/metal-cpp/Foundation/NSPrivate.hpp @@ -2,7 +2,7 @@ // // Foundation/NSPrivate.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ #define _NS_PRIVATE_VISIBILITY __attribute__((visibility("hidden"))) #else #define _NS_PRIVATE_VISIBILITY __attribute__((visibility("default"))) -#endif //METALCPP_SYMBOL_VISIBILITY_HIDDEN +#endif // METALCPP_SYMBOL_VISIBILITY_HIDDEN #define _NS_PRIVATE_IMPORT __attribute__((weak_import)) @@ -164,6 +164,8 @@ namespace Private "bundleWithPath:"); _NS_PRIVATE_DEF_SEL(bundleWithURL_, "bundleWithURL:"); + _NS_PRIVATE_DEF_SEL(caseInsensitiveCompare_, + "caseInsensitiveCompare:"); _NS_PRIVATE_DEF_SEL(characterAtIndex_, "characterAtIndex:"); _NS_PRIVATE_DEF_SEL(charValue, diff --git a/metal-cpp/Foundation/NSProcessInfo.hpp b/metal-cpp/Foundation/NSProcessInfo.hpp index 935122fe..565b5993 100644 --- a/metal-cpp/Foundation/NSProcessInfo.hpp +++ b/metal-cpp/Foundation/NSProcessInfo.hpp @@ -2,7 +2,7 @@ // // Foundation/NSProcessInfo.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSRange.hpp b/metal-cpp/Foundation/NSRange.hpp index 09b0eb50..2c5beb5b 100644 --- a/metal-cpp/Foundation/NSRange.hpp +++ b/metal-cpp/Foundation/NSRange.hpp @@ -2,7 +2,7 @@ // // Foundation/NSRange.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSSet.hpp b/metal-cpp/Foundation/NSSet.hpp index aecca09b..a4eb0d64 100644 --- a/metal-cpp/Foundation/NSSet.hpp +++ b/metal-cpp/Foundation/NSSet.hpp @@ -2,7 +2,7 @@ // // Foundation/NSSet.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSSharedPtr.hpp b/metal-cpp/Foundation/NSSharedPtr.hpp index 565ead9e..761ce2db 100644 --- a/metal-cpp/Foundation/NSSharedPtr.hpp +++ b/metal-cpp/Foundation/NSSharedPtr.hpp @@ -2,7 +2,7 @@ // // Foundation/NSSharedPtr.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -159,7 +159,7 @@ _NS_INLINE NS::SharedPtr<_Class>::SharedPtr() } template -_NS_INLINE NS::SharedPtr<_Class>::~SharedPtr() +_NS_INLINE NS::SharedPtr<_Class>::~SharedPtr<_Class>() { if (m_pObject) { diff --git a/metal-cpp/Foundation/NSString.hpp b/metal-cpp/Foundation/NSString.hpp index 51b1cd18..c601fc01 100644 --- a/metal-cpp/Foundation/NSString.hpp +++ b/metal-cpp/Foundation/NSString.hpp @@ -2,7 +2,7 @@ // // Foundation/NSString.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ //------------------------------------------------------------------------------------------------------------------------------------------------------------- #include "NSDefines.hpp" +#include "NSObjCRuntime.hpp" #include "NSObject.hpp" #include "NSPrivate.hpp" #include "NSRange.hpp" @@ -78,40 +79,40 @@ using unichar = unsigned short; class String : public Copying { public: - static String* string(); - static String* string(const String* pString); - static String* string(const char* pString, StringEncoding encoding); + static String* string(); + static String* string(const String* pString); + static String* string(const char* pString, StringEncoding encoding); - static String* alloc(); - String* init(); - String* init(const String* pString); - String* init(const char* pString, StringEncoding encoding); - String* init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer); + static String* alloc(); + String* init(); + String* init(const String* pString); + String* init(const char* pString, StringEncoding encoding); + String* init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer); - unichar character(UInteger index) const; - UInteger length() const; + unichar character(UInteger index) const; + UInteger length() const; - const char* cString(StringEncoding encoding) const; - const char* utf8String() const; - UInteger maximumLengthOfBytes(StringEncoding encoding) const; - UInteger lengthOfBytes(StringEncoding encoding) const; + const char* cString(StringEncoding encoding) const; + const char* utf8String() const; + UInteger maximumLengthOfBytes(StringEncoding encoding) const; + UInteger lengthOfBytes(StringEncoding encoding) const; - bool isEqualToString(const String* pString) const; - Range rangeOfString(const String* pString, StringCompareOptions options) const; + bool isEqualToString(const String* pString) const; + Range rangeOfString(const String* pString, StringCompareOptions options) const; - const char* fileSystemRepresentation() const; + const char* fileSystemRepresentation() const; - String* stringByAppendingString(const String* pString) const; + String* stringByAppendingString(const String* pString) const; + ComparisonResult caseInsensitiveCompare(const String* pString) const; }; /// Create an NS::String* from a string literal. -#define MTLSTR( literal ) (NS::String *)__builtin___CFStringMakeConstantString( "" literal "" ) +#define MTLSTR(literal) (NS::String*)__builtin___CFStringMakeConstantString("" literal "") -template< std::size_t _StringLen > -[[deprecated("please use MTLSTR(str)")]] -constexpr const String* MakeConstantString( const char ( &str )[_StringLen] ) +template +[[deprecated("please use MTLSTR(str)")]] constexpr const String* MakeConstantString(const char (&str)[_StringLen]) { - return reinterpret_cast< const String* >( __CFStringMakeConstantString( str ) ); + return reinterpret_cast(__CFStringMakeConstantString(str)); } } @@ -230,7 +231,6 @@ _NS_INLINE NS::Range NS::String::rangeOfString(const NS::String* pString, NS::St //------------------------------------------------------------------------------------------------------------------------------------------------------------- - //------------------------------------------------------------------------------------------------------------------------------------------------------------- _NS_INLINE const char* NS::String::fileSystemRepresentation() const @@ -246,3 +246,10 @@ _NS_INLINE NS::String* NS::String::stringByAppendingString(const String* pString } //------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_NS_INLINE NS::ComparisonResult NS::String::caseInsensitiveCompare(const String* pString) const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(caseInsensitiveCompare_), pString); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/Foundation/NSTypes.hpp b/metal-cpp/Foundation/NSTypes.hpp index c2fef527..5f098f67 100644 --- a/metal-cpp/Foundation/NSTypes.hpp +++ b/metal-cpp/Foundation/NSTypes.hpp @@ -2,7 +2,7 @@ // // Foundation/NSTypes.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Foundation/NSURL.hpp b/metal-cpp/Foundation/NSURL.hpp index e904a8d1..a7bc3e6e 100644 --- a/metal-cpp/Foundation/NSURL.hpp +++ b/metal-cpp/Foundation/NSURL.hpp @@ -2,7 +2,7 @@ // // Foundation/NSURL.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLAccelerationStructure.hpp b/metal-cpp/Metal/MTLAccelerationStructure.hpp index f2d05e0f..cb30db71 100644 --- a/metal-cpp/Metal/MTLAccelerationStructure.hpp +++ b/metal-cpp/Metal/MTLAccelerationStructure.hpp @@ -2,7 +2,7 @@ // // Metal/MTLAccelerationStructure.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -259,6 +259,140 @@ class AccelerationStructureMotionBoundingBoxGeometryDescriptor : public NS::Copy static MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* descriptor(); }; +_MTL_ENUM(NS::Integer, CurveType) { + CurveTypeRound = 0, + CurveTypeFlat = 1, +}; + +_MTL_ENUM(NS::Integer, CurveBasis) { + CurveBasisBSpline = 0, + CurveBasisCatmullRom = 1, + CurveBasisLinear = 2, + CurveBasisBezier = 3, +}; + +_MTL_ENUM(NS::Integer, CurveEndCaps) { + CurveEndCapsNone = 0, + CurveEndCapsDisk = 1, + CurveEndCapsSphere = 2, +}; + +class AccelerationStructureCurveGeometryDescriptor : public NS::Copying +{ +public: + static class AccelerationStructureCurveGeometryDescriptor* alloc(); + + class AccelerationStructureCurveGeometryDescriptor* init(); + + class Buffer* controlPointBuffer() const; + void setControlPointBuffer(const class Buffer* controlPointBuffer); + + NS::UInteger controlPointBufferOffset() const; + void setControlPointBufferOffset(NS::UInteger controlPointBufferOffset); + + NS::UInteger controlPointCount() const; + void setControlPointCount(NS::UInteger controlPointCount); + + NS::UInteger controlPointStride() const; + void setControlPointStride(NS::UInteger controlPointStride); + + MTL::AttributeFormat controlPointFormat() const; + void setControlPointFormat(MTL::AttributeFormat controlPointFormat); + + class Buffer* radiusBuffer() const; + void setRadiusBuffer(const class Buffer* radiusBuffer); + + NS::UInteger radiusBufferOffset() const; + void setRadiusBufferOffset(NS::UInteger radiusBufferOffset); + + MTL::AttributeFormat radiusFormat() const; + void setRadiusFormat(MTL::AttributeFormat radiusFormat); + + NS::UInteger radiusStride() const; + void setRadiusStride(NS::UInteger radiusStride); + + class Buffer* indexBuffer() const; + void setIndexBuffer(const class Buffer* indexBuffer); + + NS::UInteger indexBufferOffset() const; + void setIndexBufferOffset(NS::UInteger indexBufferOffset); + + MTL::IndexType indexType() const; + void setIndexType(MTL::IndexType indexType); + + NS::UInteger segmentCount() const; + void setSegmentCount(NS::UInteger segmentCount); + + NS::UInteger segmentControlPointCount() const; + void setSegmentControlPointCount(NS::UInteger segmentControlPointCount); + + MTL::CurveType curveType() const; + void setCurveType(MTL::CurveType curveType); + + MTL::CurveBasis curveBasis() const; + void setCurveBasis(MTL::CurveBasis curveBasis); + + MTL::CurveEndCaps curveEndCaps() const; + void setCurveEndCaps(MTL::CurveEndCaps curveEndCaps); + + static MTL::AccelerationStructureCurveGeometryDescriptor* descriptor(); +}; + +class AccelerationStructureMotionCurveGeometryDescriptor : public NS::Copying +{ +public: + static class AccelerationStructureMotionCurveGeometryDescriptor* alloc(); + + class AccelerationStructureMotionCurveGeometryDescriptor* init(); + + NS::Array* controlPointBuffers() const; + void setControlPointBuffers(const NS::Array* controlPointBuffers); + + NS::UInteger controlPointCount() const; + void setControlPointCount(NS::UInteger controlPointCount); + + NS::UInteger controlPointStride() const; + void setControlPointStride(NS::UInteger controlPointStride); + + MTL::AttributeFormat controlPointFormat() const; + void setControlPointFormat(MTL::AttributeFormat controlPointFormat); + + NS::Array* radiusBuffers() const; + void setRadiusBuffers(const NS::Array* radiusBuffers); + + MTL::AttributeFormat radiusFormat() const; + void setRadiusFormat(MTL::AttributeFormat radiusFormat); + + NS::UInteger radiusStride() const; + void setRadiusStride(NS::UInteger radiusStride); + + class Buffer* indexBuffer() const; + void setIndexBuffer(const class Buffer* indexBuffer); + + NS::UInteger indexBufferOffset() const; + void setIndexBufferOffset(NS::UInteger indexBufferOffset); + + MTL::IndexType indexType() const; + void setIndexType(MTL::IndexType indexType); + + NS::UInteger segmentCount() const; + void setSegmentCount(NS::UInteger segmentCount); + + NS::UInteger segmentControlPointCount() const; + void setSegmentControlPointCount(NS::UInteger segmentControlPointCount); + + MTL::CurveType curveType() const; + void setCurveType(MTL::CurveType curveType); + + MTL::CurveBasis curveBasis() const; + void setCurveBasis(MTL::CurveBasis curveBasis); + + MTL::CurveEndCaps curveEndCaps() const; + void setCurveEndCaps(MTL::CurveEndCaps curveEndCaps); + + static MTL::AccelerationStructureMotionCurveGeometryDescriptor* descriptor(); +}; + struct AccelerationStructureInstanceDescriptor { MTL::PackedFloat4x3 transformationMatrix; @@ -282,6 +416,8 @@ _MTL_ENUM(NS::UInteger, AccelerationStructureInstanceDescriptorType) { AccelerationStructureInstanceDescriptorTypeDefault = 0, AccelerationStructureInstanceDescriptorTypeUserID = 1, AccelerationStructureInstanceDescriptorTypeMotion = 2, + AccelerationStructureInstanceDescriptorTypeIndirect = 3, + AccelerationStructureInstanceDescriptorTypeIndirectMotion = 4, }; struct AccelerationStructureMotionInstanceDescriptor @@ -299,6 +435,31 @@ struct AccelerationStructureMotionInstanceDescriptor float motionEndTime; } _MTL_PACKED; +struct IndirectAccelerationStructureInstanceDescriptor +{ + MTL::PackedFloat4x3 transformationMatrix; + MTL::AccelerationStructureInstanceOptions options; + uint32_t mask; + uint32_t intersectionFunctionTableOffset; + uint32_t userID; + MTL::ResourceID accelerationStructureID; +} _MTL_PACKED; + +struct IndirectAccelerationStructureMotionInstanceDescriptor +{ + MTL::AccelerationStructureInstanceOptions options; + uint32_t mask; + uint32_t intersectionFunctionTableOffset; + uint32_t userID; + MTL::ResourceID accelerationStructureID; + uint32_t motionTransformsStartIndex; + uint32_t motionTransformsCount; + MTL::MotionBorderMode motionStartBorderMode; + MTL::MotionBorderMode motionEndBorderMode; + float motionStartTime; + float motionEndTime; +} _MTL_PACKED; + class InstanceAccelerationStructureDescriptor : public NS::Copying { public: @@ -336,6 +497,52 @@ class InstanceAccelerationStructureDescriptor : public NS::Copying +{ +public: + static class IndirectInstanceAccelerationStructureDescriptor* alloc(); + + class IndirectInstanceAccelerationStructureDescriptor* init(); + + class Buffer* instanceDescriptorBuffer() const; + void setInstanceDescriptorBuffer(const class Buffer* instanceDescriptorBuffer); + + NS::UInteger instanceDescriptorBufferOffset() const; + void setInstanceDescriptorBufferOffset(NS::UInteger instanceDescriptorBufferOffset); + + NS::UInteger instanceDescriptorStride() const; + void setInstanceDescriptorStride(NS::UInteger instanceDescriptorStride); + + NS::UInteger maxInstanceCount() const; + void setMaxInstanceCount(NS::UInteger maxInstanceCount); + + class Buffer* instanceCountBuffer() const; + void setInstanceCountBuffer(const class Buffer* instanceCountBuffer); + + NS::UInteger instanceCountBufferOffset() const; + void setInstanceCountBufferOffset(NS::UInteger instanceCountBufferOffset); + + MTL::AccelerationStructureInstanceDescriptorType instanceDescriptorType() const; + void setInstanceDescriptorType(MTL::AccelerationStructureInstanceDescriptorType instanceDescriptorType); + + class Buffer* motionTransformBuffer() const; + void setMotionTransformBuffer(const class Buffer* motionTransformBuffer); + + NS::UInteger motionTransformBufferOffset() const; + void setMotionTransformBufferOffset(NS::UInteger motionTransformBufferOffset); + + NS::UInteger maxMotionTransformCount() const; + void setMaxMotionTransformCount(NS::UInteger maxMotionTransformCount); + + class Buffer* motionTransformCountBuffer() const; + void setMotionTransformCountBuffer(const class Buffer* motionTransformCountBuffer); + + NS::UInteger motionTransformCountBufferOffset() const; + void setMotionTransformCountBufferOffset(NS::UInteger motionTransformCountBufferOffset); + + static MTL::IndirectInstanceAccelerationStructureDescriptor* descriptor(); +}; + class AccelerationStructure : public NS::Referencing { public: @@ -951,6 +1158,394 @@ _MTL_INLINE MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* MTL:: return Object::sendMessage(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor)); } +// static method: alloc +_MTL_INLINE MTL::AccelerationStructureCurveGeometryDescriptor* MTL::AccelerationStructureCurveGeometryDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructureCurveGeometryDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructureCurveGeometryDescriptor* MTL::AccelerationStructureCurveGeometryDescriptor::init() +{ + return NS::Object::init(); +} + +// property: controlPointBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureCurveGeometryDescriptor::controlPointBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setControlPointBuffer(const MTL::Buffer* controlPointBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointBuffer_), controlPointBuffer); +} + +// property: controlPointBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureCurveGeometryDescriptor::controlPointBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setControlPointBufferOffset(NS::UInteger controlPointBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointBufferOffset_), controlPointBufferOffset); +} + +// property: controlPointCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureCurveGeometryDescriptor::controlPointCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setControlPointCount(NS::UInteger controlPointCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointCount_), controlPointCount); +} + +// property: controlPointStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureCurveGeometryDescriptor::controlPointStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setControlPointStride(NS::UInteger controlPointStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointStride_), controlPointStride); +} + +// property: controlPointFormat +_MTL_INLINE MTL::AttributeFormat MTL::AccelerationStructureCurveGeometryDescriptor::controlPointFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointFormat)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setControlPointFormat(MTL::AttributeFormat controlPointFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointFormat_), controlPointFormat); +} + +// property: radiusBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureCurveGeometryDescriptor::radiusBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(radiusBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setRadiusBuffer(const MTL::Buffer* radiusBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRadiusBuffer_), radiusBuffer); +} + +// property: radiusBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureCurveGeometryDescriptor::radiusBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(radiusBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setRadiusBufferOffset(NS::UInteger radiusBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRadiusBufferOffset_), radiusBufferOffset); +} + +// property: radiusFormat +_MTL_INLINE MTL::AttributeFormat MTL::AccelerationStructureCurveGeometryDescriptor::radiusFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(radiusFormat)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setRadiusFormat(MTL::AttributeFormat radiusFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRadiusFormat_), radiusFormat); +} + +// property: radiusStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureCurveGeometryDescriptor::radiusStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(radiusStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setRadiusStride(NS::UInteger radiusStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRadiusStride_), radiusStride); +} + +// property: indexBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureCurveGeometryDescriptor::indexBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setIndexBuffer(const MTL::Buffer* indexBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBuffer_), indexBuffer); +} + +// property: indexBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureCurveGeometryDescriptor::indexBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setIndexBufferOffset(NS::UInteger indexBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBufferOffset_), indexBufferOffset); +} + +// property: indexType +_MTL_INLINE MTL::IndexType MTL::AccelerationStructureCurveGeometryDescriptor::indexType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexType)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setIndexType(MTL::IndexType indexType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexType_), indexType); +} + +// property: segmentCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureCurveGeometryDescriptor::segmentCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(segmentCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setSegmentCount(NS::UInteger segmentCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSegmentCount_), segmentCount); +} + +// property: segmentControlPointCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureCurveGeometryDescriptor::segmentControlPointCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(segmentControlPointCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setSegmentControlPointCount(NS::UInteger segmentControlPointCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSegmentControlPointCount_), segmentControlPointCount); +} + +// property: curveType +_MTL_INLINE MTL::CurveType MTL::AccelerationStructureCurveGeometryDescriptor::curveType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(curveType)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setCurveType(MTL::CurveType curveType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCurveType_), curveType); +} + +// property: curveBasis +_MTL_INLINE MTL::CurveBasis MTL::AccelerationStructureCurveGeometryDescriptor::curveBasis() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(curveBasis)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setCurveBasis(MTL::CurveBasis curveBasis) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCurveBasis_), curveBasis); +} + +// property: curveEndCaps +_MTL_INLINE MTL::CurveEndCaps MTL::AccelerationStructureCurveGeometryDescriptor::curveEndCaps() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(curveEndCaps)); +} + +_MTL_INLINE void MTL::AccelerationStructureCurveGeometryDescriptor::setCurveEndCaps(MTL::CurveEndCaps curveEndCaps) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCurveEndCaps_), curveEndCaps); +} + +// static method: descriptor +_MTL_INLINE MTL::AccelerationStructureCurveGeometryDescriptor* MTL::AccelerationStructureCurveGeometryDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLAccelerationStructureCurveGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + +// static method: alloc +_MTL_INLINE MTL::AccelerationStructureMotionCurveGeometryDescriptor* MTL::AccelerationStructureMotionCurveGeometryDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionCurveGeometryDescriptor)); +} + +// method: init +_MTL_INLINE MTL::AccelerationStructureMotionCurveGeometryDescriptor* MTL::AccelerationStructureMotionCurveGeometryDescriptor::init() +{ + return NS::Object::init(); +} + +// property: controlPointBuffers +_MTL_INLINE NS::Array* MTL::AccelerationStructureMotionCurveGeometryDescriptor::controlPointBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointBuffers)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setControlPointBuffers(const NS::Array* controlPointBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointBuffers_), controlPointBuffers); +} + +// property: controlPointCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionCurveGeometryDescriptor::controlPointCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setControlPointCount(NS::UInteger controlPointCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointCount_), controlPointCount); +} + +// property: controlPointStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionCurveGeometryDescriptor::controlPointStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setControlPointStride(NS::UInteger controlPointStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointStride_), controlPointStride); +} + +// property: controlPointFormat +_MTL_INLINE MTL::AttributeFormat MTL::AccelerationStructureMotionCurveGeometryDescriptor::controlPointFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(controlPointFormat)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setControlPointFormat(MTL::AttributeFormat controlPointFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setControlPointFormat_), controlPointFormat); +} + +// property: radiusBuffers +_MTL_INLINE NS::Array* MTL::AccelerationStructureMotionCurveGeometryDescriptor::radiusBuffers() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(radiusBuffers)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setRadiusBuffers(const NS::Array* radiusBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRadiusBuffers_), radiusBuffers); +} + +// property: radiusFormat +_MTL_INLINE MTL::AttributeFormat MTL::AccelerationStructureMotionCurveGeometryDescriptor::radiusFormat() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(radiusFormat)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setRadiusFormat(MTL::AttributeFormat radiusFormat) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRadiusFormat_), radiusFormat); +} + +// property: radiusStride +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionCurveGeometryDescriptor::radiusStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(radiusStride)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setRadiusStride(NS::UInteger radiusStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setRadiusStride_), radiusStride); +} + +// property: indexBuffer +_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureMotionCurveGeometryDescriptor::indexBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBuffer)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setIndexBuffer(const MTL::Buffer* indexBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBuffer_), indexBuffer); +} + +// property: indexBufferOffset +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionCurveGeometryDescriptor::indexBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexBufferOffset)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setIndexBufferOffset(NS::UInteger indexBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexBufferOffset_), indexBufferOffset); +} + +// property: indexType +_MTL_INLINE MTL::IndexType MTL::AccelerationStructureMotionCurveGeometryDescriptor::indexType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(indexType)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setIndexType(MTL::IndexType indexType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setIndexType_), indexType); +} + +// property: segmentCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionCurveGeometryDescriptor::segmentCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(segmentCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setSegmentCount(NS::UInteger segmentCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSegmentCount_), segmentCount); +} + +// property: segmentControlPointCount +_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionCurveGeometryDescriptor::segmentControlPointCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(segmentControlPointCount)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setSegmentControlPointCount(NS::UInteger segmentControlPointCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSegmentControlPointCount_), segmentControlPointCount); +} + +// property: curveType +_MTL_INLINE MTL::CurveType MTL::AccelerationStructureMotionCurveGeometryDescriptor::curveType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(curveType)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setCurveType(MTL::CurveType curveType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCurveType_), curveType); +} + +// property: curveBasis +_MTL_INLINE MTL::CurveBasis MTL::AccelerationStructureMotionCurveGeometryDescriptor::curveBasis() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(curveBasis)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setCurveBasis(MTL::CurveBasis curveBasis) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCurveBasis_), curveBasis); +} + +// property: curveEndCaps +_MTL_INLINE MTL::CurveEndCaps MTL::AccelerationStructureMotionCurveGeometryDescriptor::curveEndCaps() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(curveEndCaps)); +} + +_MTL_INLINE void MTL::AccelerationStructureMotionCurveGeometryDescriptor::setCurveEndCaps(MTL::CurveEndCaps curveEndCaps) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCurveEndCaps_), curveEndCaps); +} + +// static method: descriptor +_MTL_INLINE MTL::AccelerationStructureMotionCurveGeometryDescriptor* MTL::AccelerationStructureMotionCurveGeometryDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionCurveGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + // static method: alloc _MTL_INLINE MTL::InstanceAccelerationStructureDescriptor* MTL::InstanceAccelerationStructureDescriptor::alloc() { @@ -1068,6 +1663,156 @@ _MTL_INLINE MTL::InstanceAccelerationStructureDescriptor* MTL::InstanceAccelerat return Object::sendMessage(_MTL_PRIVATE_CLS(MTLInstanceAccelerationStructureDescriptor), _MTL_PRIVATE_SEL(descriptor)); } +// static method: alloc +_MTL_INLINE MTL::IndirectInstanceAccelerationStructureDescriptor* MTL::IndirectInstanceAccelerationStructureDescriptor::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLIndirectInstanceAccelerationStructureDescriptor)); +} + +// method: init +_MTL_INLINE MTL::IndirectInstanceAccelerationStructureDescriptor* MTL::IndirectInstanceAccelerationStructureDescriptor::init() +{ + return NS::Object::init(); +} + +// property: instanceDescriptorBuffer +_MTL_INLINE MTL::Buffer* MTL::IndirectInstanceAccelerationStructureDescriptor::instanceDescriptorBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceDescriptorBuffer)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setInstanceDescriptorBuffer(const MTL::Buffer* instanceDescriptorBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceDescriptorBuffer_), instanceDescriptorBuffer); +} + +// property: instanceDescriptorBufferOffset +_MTL_INLINE NS::UInteger MTL::IndirectInstanceAccelerationStructureDescriptor::instanceDescriptorBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceDescriptorBufferOffset)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setInstanceDescriptorBufferOffset(NS::UInteger instanceDescriptorBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceDescriptorBufferOffset_), instanceDescriptorBufferOffset); +} + +// property: instanceDescriptorStride +_MTL_INLINE NS::UInteger MTL::IndirectInstanceAccelerationStructureDescriptor::instanceDescriptorStride() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceDescriptorStride)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setInstanceDescriptorStride(NS::UInteger instanceDescriptorStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceDescriptorStride_), instanceDescriptorStride); +} + +// property: maxInstanceCount +_MTL_INLINE NS::UInteger MTL::IndirectInstanceAccelerationStructureDescriptor::maxInstanceCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxInstanceCount)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setMaxInstanceCount(NS::UInteger maxInstanceCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxInstanceCount_), maxInstanceCount); +} + +// property: instanceCountBuffer +_MTL_INLINE MTL::Buffer* MTL::IndirectInstanceAccelerationStructureDescriptor::instanceCountBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceCountBuffer)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setInstanceCountBuffer(const MTL::Buffer* instanceCountBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceCountBuffer_), instanceCountBuffer); +} + +// property: instanceCountBufferOffset +_MTL_INLINE NS::UInteger MTL::IndirectInstanceAccelerationStructureDescriptor::instanceCountBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceCountBufferOffset)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setInstanceCountBufferOffset(NS::UInteger instanceCountBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceCountBufferOffset_), instanceCountBufferOffset); +} + +// property: instanceDescriptorType +_MTL_INLINE MTL::AccelerationStructureInstanceDescriptorType MTL::IndirectInstanceAccelerationStructureDescriptor::instanceDescriptorType() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(instanceDescriptorType)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setInstanceDescriptorType(MTL::AccelerationStructureInstanceDescriptorType instanceDescriptorType) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setInstanceDescriptorType_), instanceDescriptorType); +} + +// property: motionTransformBuffer +_MTL_INLINE MTL::Buffer* MTL::IndirectInstanceAccelerationStructureDescriptor::motionTransformBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionTransformBuffer)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setMotionTransformBuffer(const MTL::Buffer* motionTransformBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionTransformBuffer_), motionTransformBuffer); +} + +// property: motionTransformBufferOffset +_MTL_INLINE NS::UInteger MTL::IndirectInstanceAccelerationStructureDescriptor::motionTransformBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionTransformBufferOffset)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setMotionTransformBufferOffset(NS::UInteger motionTransformBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionTransformBufferOffset_), motionTransformBufferOffset); +} + +// property: maxMotionTransformCount +_MTL_INLINE NS::UInteger MTL::IndirectInstanceAccelerationStructureDescriptor::maxMotionTransformCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxMotionTransformCount)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setMaxMotionTransformCount(NS::UInteger maxMotionTransformCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxMotionTransformCount_), maxMotionTransformCount); +} + +// property: motionTransformCountBuffer +_MTL_INLINE MTL::Buffer* MTL::IndirectInstanceAccelerationStructureDescriptor::motionTransformCountBuffer() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionTransformCountBuffer)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setMotionTransformCountBuffer(const MTL::Buffer* motionTransformCountBuffer) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionTransformCountBuffer_), motionTransformCountBuffer); +} + +// property: motionTransformCountBufferOffset +_MTL_INLINE NS::UInteger MTL::IndirectInstanceAccelerationStructureDescriptor::motionTransformCountBufferOffset() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(motionTransformCountBufferOffset)); +} + +_MTL_INLINE void MTL::IndirectInstanceAccelerationStructureDescriptor::setMotionTransformCountBufferOffset(NS::UInteger motionTransformCountBufferOffset) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMotionTransformCountBufferOffset_), motionTransformCountBufferOffset); +} + +// static method: descriptor +_MTL_INLINE MTL::IndirectInstanceAccelerationStructureDescriptor* MTL::IndirectInstanceAccelerationStructureDescriptor::descriptor() +{ + return Object::sendMessage(_MTL_PRIVATE_CLS(MTLIndirectInstanceAccelerationStructureDescriptor), _MTL_PRIVATE_SEL(descriptor)); +} + // property: size _MTL_INLINE NS::UInteger MTL::AccelerationStructure::size() const { diff --git a/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp b/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp index 8a07cec0..e0b4ccd9 100644 --- a/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp +++ b/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLAccelerationStructureCommandEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp b/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp index 8a4b95f0..146ffc20 100644 --- a/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp +++ b/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp @@ -2,7 +2,7 @@ // // Metal/MTLAccelerationStructureTypes.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLArgument.hpp b/metal-cpp/Metal/MTLArgument.hpp index d92ce579..796fa332 100644 --- a/metal-cpp/Metal/MTLArgument.hpp +++ b/metal-cpp/Metal/MTLArgument.hpp @@ -2,7 +2,7 @@ // // Metal/MTLArgument.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -123,6 +123,10 @@ _MTL_ENUM(NS::UInteger, DataType) { DataTypeIntersectionFunctionTable = 116, DataTypePrimitiveAccelerationStructure = 117, DataTypeInstanceAccelerationStructure = 118, + DataTypeBFloat = 121, + DataTypeBFloat2 = 122, + DataTypeBFloat3 = 123, + DataTypeBFloat4 = 124, }; _MTL_ENUM(NS::Integer, BindingType) { @@ -152,7 +156,10 @@ _MTL_ENUM(NS::UInteger, ArgumentType) { ArgumentTypeIntersectionFunctionTable = 27, }; -_MTL_ENUM(NS::UInteger, ArgumentAccess) { +_MTL_ENUM(NS::UInteger, BindingAccess) { + BindingAccessReadOnly = 0, + BindingAccessReadWrite = 1, + BindingAccessWriteOnly = 2, ArgumentAccessReadOnly = 0, ArgumentAccessReadWrite = 1, ArgumentAccessWriteOnly = 2, @@ -237,7 +244,7 @@ class PointerType : public NS::Referencing MTL::DataType elementType() const; - MTL::ArgumentAccess access() const; + MTL::BindingAccess access() const; NS::UInteger alignment() const; @@ -261,7 +268,7 @@ class TextureReferenceType : public NS::Referencing MTL::TextureType textureType() const; - MTL::ArgumentAccess access() const; + MTL::BindingAccess access() const; bool isDepthTexture() const; }; @@ -277,7 +284,7 @@ class Argument : public NS::Referencing MTL::ArgumentType type() const; - MTL::ArgumentAccess access() const; + MTL::BindingAccess access() const; NS::UInteger index() const; @@ -309,17 +316,17 @@ class Argument : public NS::Referencing class Binding : public NS::Referencing { public: - NS::String* name() const; + NS::String* name() const; - MTL::BindingType type() const; + MTL::BindingType type() const; - MTL::ArgumentAccess access() const; + MTL::BindingAccess access() const; - NS::UInteger index() const; + NS::UInteger index() const; - bool used() const; + bool used() const; - bool argument() const; + bool argument() const; }; class BufferBinding : public NS::Referencing @@ -547,9 +554,9 @@ _MTL_INLINE MTL::DataType MTL::PointerType::elementType() const } // property: access -_MTL_INLINE MTL::ArgumentAccess MTL::PointerType::access() const +_MTL_INLINE MTL::BindingAccess MTL::PointerType::access() const { - return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); } // property: alignment @@ -607,9 +614,9 @@ _MTL_INLINE MTL::TextureType MTL::TextureReferenceType::textureType() const } // property: access -_MTL_INLINE MTL::ArgumentAccess MTL::TextureReferenceType::access() const +_MTL_INLINE MTL::BindingAccess MTL::TextureReferenceType::access() const { - return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); } // property: isDepthTexture @@ -643,9 +650,9 @@ _MTL_INLINE MTL::ArgumentType MTL::Argument::type() const } // property: access -_MTL_INLINE MTL::ArgumentAccess MTL::Argument::access() const +_MTL_INLINE MTL::BindingAccess MTL::Argument::access() const { - return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); } // property: index @@ -739,9 +746,9 @@ _MTL_INLINE MTL::BindingType MTL::Binding::type() const } // property: access -_MTL_INLINE MTL::ArgumentAccess MTL::Binding::access() const +_MTL_INLINE MTL::BindingAccess MTL::Binding::access() const { - return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); } // property: index diff --git a/metal-cpp/Metal/MTLArgumentEncoder.hpp b/metal-cpp/Metal/MTLArgumentEncoder.hpp index b4fab77f..a81859c7 100644 --- a/metal-cpp/Metal/MTLArgumentEncoder.hpp +++ b/metal-cpp/Metal/MTLArgumentEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLArgumentEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,6 +28,9 @@ namespace MTL { + +static const NS::UInteger AttributeStrideStatic = NS::UIntegerMax; + class ArgumentEncoder : public NS::Referencing { public: diff --git a/metal-cpp/Metal/MTLBinaryArchive.hpp b/metal-cpp/Metal/MTLBinaryArchive.hpp index 7d123625..1c77c078 100644 --- a/metal-cpp/Metal/MTLBinaryArchive.hpp +++ b/metal-cpp/Metal/MTLBinaryArchive.hpp @@ -2,7 +2,7 @@ // // Metal/MTLBinaryArchive.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLBlitCommandEncoder.hpp b/metal-cpp/Metal/MTLBlitCommandEncoder.hpp index fe64f822..8d4845a8 100644 --- a/metal-cpp/Metal/MTLBlitCommandEncoder.hpp +++ b/metal-cpp/Metal/MTLBlitCommandEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLBlitCommandEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLBlitPass.hpp b/metal-cpp/Metal/MTLBlitPass.hpp index f16e8fdb..84987525 100644 --- a/metal-cpp/Metal/MTLBlitPass.hpp +++ b/metal-cpp/Metal/MTLBlitPass.hpp @@ -2,7 +2,7 @@ // // Metal/MTLBlitPass.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLBuffer.hpp b/metal-cpp/Metal/MTLBuffer.hpp index f936f139..684ee8e2 100644 --- a/metal-cpp/Metal/MTLBuffer.hpp +++ b/metal-cpp/Metal/MTLBuffer.hpp @@ -2,7 +2,7 @@ // // Metal/MTLBuffer.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLCaptureManager.hpp b/metal-cpp/Metal/MTLCaptureManager.hpp index 0cdf53b8..ebe7ddd2 100644 --- a/metal-cpp/Metal/MTLCaptureManager.hpp +++ b/metal-cpp/Metal/MTLCaptureManager.hpp @@ -2,7 +2,7 @@ // // Metal/MTLCaptureManager.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLCaptureScope.hpp b/metal-cpp/Metal/MTLCaptureScope.hpp index 1ad42469..6d5d1d6f 100644 --- a/metal-cpp/Metal/MTLCaptureScope.hpp +++ b/metal-cpp/Metal/MTLCaptureScope.hpp @@ -2,7 +2,7 @@ // // Metal/MTLCaptureScope.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLCommandBuffer.hpp b/metal-cpp/Metal/MTLCommandBuffer.hpp index 6bc12d02..64bdf35e 100644 --- a/metal-cpp/Metal/MTLCommandBuffer.hpp +++ b/metal-cpp/Metal/MTLCommandBuffer.hpp @@ -2,7 +2,7 @@ // // Metal/MTLCommandBuffer.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -45,8 +45,8 @@ _MTL_ENUM(NS::UInteger, CommandBufferError) { CommandBufferErrorInternal = 1, CommandBufferErrorTimeout = 2, CommandBufferErrorPageFault = 3, - CommandBufferErrorAccessRevoked = 4, CommandBufferErrorBlacklisted = 4, + CommandBufferErrorAccessRevoked = 4, CommandBufferErrorNotPermitted = 7, CommandBufferErrorOutOfMemory = 8, CommandBufferErrorInvalidResource = 9, diff --git a/metal-cpp/Metal/MTLCommandEncoder.hpp b/metal-cpp/Metal/MTLCommandEncoder.hpp index 8b5e6651..9a4d97c7 100644 --- a/metal-cpp/Metal/MTLCommandEncoder.hpp +++ b/metal-cpp/Metal/MTLCommandEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLCommandEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLCommandQueue.hpp b/metal-cpp/Metal/MTLCommandQueue.hpp index 42678af9..07b29844 100644 --- a/metal-cpp/Metal/MTLCommandQueue.hpp +++ b/metal-cpp/Metal/MTLCommandQueue.hpp @@ -2,7 +2,7 @@ // // Metal/MTLCommandQueue.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLComputeCommandEncoder.hpp b/metal-cpp/Metal/MTLComputeCommandEncoder.hpp index f3afd00b..50a3b241 100644 --- a/metal-cpp/Metal/MTLComputeCommandEncoder.hpp +++ b/metal-cpp/Metal/MTLComputeCommandEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLComputeCommandEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -58,6 +58,14 @@ class ComputeCommandEncoder : public NS::Referencing(this, _MTL_PRIVATE_SEL(setBuffers_offsets_withRange_), buffers, offsets, range); } +// method: setBuffer:offset:attributeStride:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger stride, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffer_offset_attributeStride_atIndex_), buffer, offset, stride, index); +} + +// method: setBuffers:offsets:attributeStrides:withRange: +_MTL_INLINE void MTL::ComputeCommandEncoder::setBuffers(const MTL::Buffer* const buffers[], const NS::UInteger* offsets, const NS::UInteger* strides, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBuffers_offsets_attributeStrides_withRange_), buffers, offsets, strides, range); +} + +// method: setBufferOffset:attributeStride:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setBufferOffset(NS::UInteger offset, NS::UInteger stride, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBufferOffset_attributeStride_atIndex_), offset, stride, index); +} + +// method: setBytes:length:attributeStride:atIndex: +_MTL_INLINE void MTL::ComputeCommandEncoder::setBytes(const void* bytes, NS::UInteger length, NS::UInteger stride, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBytes_length_attributeStride_atIndex_), bytes, length, stride, index); +} + // method: setVisibleFunctionTable:atBufferIndex: _MTL_INLINE void MTL::ComputeCommandEncoder::setVisibleFunctionTable(const MTL::VisibleFunctionTable* visibleFunctionTable, NS::UInteger bufferIndex) { diff --git a/metal-cpp/Metal/MTLComputePass.hpp b/metal-cpp/Metal/MTLComputePass.hpp index 6cc79f8e..bbc71841 100644 --- a/metal-cpp/Metal/MTLComputePass.hpp +++ b/metal-cpp/Metal/MTLComputePass.hpp @@ -2,7 +2,7 @@ // // Metal/MTLComputePass.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLComputePipeline.hpp b/metal-cpp/Metal/MTLComputePipeline.hpp index 35275a39..3065b8f2 100644 --- a/metal-cpp/Metal/MTLComputePipeline.hpp +++ b/metal-cpp/Metal/MTLComputePipeline.hpp @@ -2,7 +2,7 @@ // // Metal/MTLComputePipeline.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLCounters.hpp b/metal-cpp/Metal/MTLCounters.hpp index 7bff72c6..c552f7ed 100644 --- a/metal-cpp/Metal/MTLCounters.hpp +++ b/metal-cpp/Metal/MTLCounters.hpp @@ -2,7 +2,7 @@ // // Metal/MTLCounters.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLDefines.hpp b/metal-cpp/Metal/MTLDefines.hpp index b2aba9d0..7dd8ff95 100644 --- a/metal-cpp/Metal/MTLDefines.hpp +++ b/metal-cpp/Metal/MTLDefines.hpp @@ -2,7 +2,7 @@ // // Metal/MTLDefines.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLDepthStencil.hpp b/metal-cpp/Metal/MTLDepthStencil.hpp index 1cb24764..ba8bd8b0 100644 --- a/metal-cpp/Metal/MTLDepthStencil.hpp +++ b/metal-cpp/Metal/MTLDepthStencil.hpp @@ -2,7 +2,7 @@ // // Metal/MTLDepthStencil.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLDevice.hpp b/metal-cpp/Metal/MTLDevice.hpp index 514d6387..6843131d 100644 --- a/metal-cpp/Metal/MTLDevice.hpp +++ b/metal-cpp/Metal/MTLDevice.hpp @@ -2,7 +2,7 @@ // // Metal/MTLDevice.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -67,8 +67,8 @@ _MTL_ENUM(NS::UInteger, FeatureSet) { FeatureSet_OSX_GPUFamily1_v1 = 10000, FeatureSet_macOS_GPUFamily1_v2 = 10001, FeatureSet_OSX_GPUFamily1_v2 = 10001, - FeatureSet_OSX_ReadWriteTextureTier2 = 10002, FeatureSet_macOS_ReadWriteTextureTier2 = 10002, + FeatureSet_OSX_ReadWriteTextureTier2 = 10002, FeatureSet_macOS_GPUFamily1_v3 = 10003, FeatureSet_macOS_GPUFamily1_v4 = 10004, FeatureSet_macOS_GPUFamily2_v1 = 10005, @@ -94,6 +94,7 @@ _MTL_ENUM(NS::Integer, GPUFamily) { GPUFamilyApple6 = 1006, GPUFamilyApple7 = 1007, GPUFamilyApple8 = 1008, + GPUFamilyApple9 = 1009, GPUFamilyMac1 = 2001, GPUFamilyMac2 = 2002, GPUFamilyCommon1 = 3001, @@ -179,8 +180,8 @@ class ArgumentDescriptor : public NS::Copying NS::UInteger arrayLength() const; void setArrayLength(NS::UInteger arrayLength); - MTL::ArgumentAccess access() const; - void setAccess(MTL::ArgumentAccess access); + MTL::BindingAccess access() const; + void setAccess(MTL::BindingAccess access); MTL::TextureType textureType() const; void setTextureType(MTL::TextureType textureType); @@ -189,6 +190,16 @@ class ArgumentDescriptor : public NS::Copying void setConstantBlockAlignment(NS::UInteger constantBlockAlignment); }; +class Architecture : public NS::Copying +{ +public: + static class Architecture* alloc(); + + class Architecture* init(); + + NS::String* name() const; +}; + using DeviceNotificationName = NS::String*; _MTL_CONST(DeviceNotificationName, DeviceWasAddedNotification); _MTL_CONST(DeviceNotificationName, DeviceRemovalRequestedNotification); @@ -260,6 +271,8 @@ class Device : public NS::Referencing uint64_t registryID() const; + class Architecture* architecture() const; + MTL::Size maxThreadsPerThreadgroup() const; bool lowPower() const; @@ -422,6 +435,10 @@ class Device : public NS::Referencing class IOFileHandle* newIOHandle(const NS::URL* url, MTL::IOCompressionMethod compressionMethod, NS::Error** error); + class IOFileHandle* newIOFileHandle(const NS::URL* url, NS::Error** error); + + class IOFileHandle* newIOFileHandle(const NS::URL* url, MTL::IOCompressionMethod compressionMethod, NS::Error** error); + MTL::Size sparseTileSize(MTL::TextureType textureType, MTL::PixelFormat pixelFormat, NS::UInteger sampleCount); NS::UInteger sparseTileSizeInBytes() const; @@ -477,6 +494,11 @@ class Device : public NS::Referencing bool supportsRaytracingFromRender() const; bool supportsPrimitiveMotionBlur() const; + + bool shouldMaximizeConcurrentCompilation() const; + void setShouldMaximizeConcurrentCompilation(bool shouldMaximizeConcurrentCompilation); + + NS::UInteger maximumConcurrentCompilationTaskCount() const; }; } @@ -533,12 +555,12 @@ _MTL_INLINE void MTL::ArgumentDescriptor::setArrayLength(NS::UInteger arrayLengt } // property: access -_MTL_INLINE MTL::ArgumentAccess MTL::ArgumentDescriptor::access() const +_MTL_INLINE MTL::BindingAccess MTL::ArgumentDescriptor::access() const { - return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); + return Object::sendMessage(this, _MTL_PRIVATE_SEL(access)); } -_MTL_INLINE void MTL::ArgumentDescriptor::setAccess(MTL::ArgumentAccess access) +_MTL_INLINE void MTL::ArgumentDescriptor::setAccess(MTL::BindingAccess access) { Object::sendMessage(this, _MTL_PRIVATE_SEL(setAccess_), access); } @@ -565,6 +587,24 @@ _MTL_INLINE void MTL::ArgumentDescriptor::setConstantBlockAlignment(NS::UInteger Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantBlockAlignment_), constantBlockAlignment); } +// static method: alloc +_MTL_INLINE MTL::Architecture* MTL::Architecture::alloc() +{ + return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLArchitecture)); +} + +// method: init +_MTL_INLINE MTL::Architecture* MTL::Architecture::init() +{ + return NS::Object::init(); +} + +// property: name +_MTL_INLINE NS::String* MTL::Architecture::name() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); +} + _MTL_PRIVATE_DEF_WEAK_CONST(MTL::DeviceNotificationName, DeviceWasAddedNotification); _MTL_PRIVATE_DEF_WEAK_CONST(MTL::DeviceNotificationName, DeviceRemovalRequestedNotification); _MTL_PRIVATE_DEF_WEAK_CONST(MTL::DeviceNotificationName, DeviceWasRemovedNotification); @@ -697,6 +737,12 @@ _MTL_INLINE uint64_t MTL::Device::registryID() const return Object::sendMessage(this, _MTL_PRIVATE_SEL(registryID)); } +// property: architecture +_MTL_INLINE MTL::Architecture* MTL::Device::architecture() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(architecture)); +} + // property: maxThreadsPerThreadgroup _MTL_INLINE MTL::Size MTL::Device::maxThreadsPerThreadgroup() const { @@ -1183,6 +1229,18 @@ _MTL_INLINE MTL::IOFileHandle* MTL::Device::newIOHandle(const NS::URL* url, MTL: return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIOHandleWithURL_compressionMethod_error_), url, compressionMethod, error); } +// method: newIOFileHandleWithURL:error: +_MTL_INLINE MTL::IOFileHandle* MTL::Device::newIOFileHandle(const NS::URL* url, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIOFileHandleWithURL_error_), url, error); +} + +// method: newIOFileHandleWithURL:compressionMethod:error: +_MTL_INLINE MTL::IOFileHandle* MTL::Device::newIOFileHandle(const NS::URL* url, MTL::IOCompressionMethod compressionMethod, NS::Error** error) +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newIOFileHandleWithURL_compressionMethod_error_), url, compressionMethod, error); +} + // method: sparseTileSizeWithTextureType:pixelFormat:sampleCount: _MTL_INLINE MTL::Size MTL::Device::sparseTileSize(MTL::TextureType textureType, MTL::PixelFormat pixelFormat, NS::UInteger sampleCount) { @@ -1350,3 +1408,20 @@ _MTL_INLINE bool MTL::Device::supportsPrimitiveMotionBlur() const { return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportsPrimitiveMotionBlur)); } + +// property: shouldMaximizeConcurrentCompilation +_MTL_INLINE bool MTL::Device::shouldMaximizeConcurrentCompilation() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(shouldMaximizeConcurrentCompilation)); +} + +_MTL_INLINE void MTL::Device::setShouldMaximizeConcurrentCompilation(bool shouldMaximizeConcurrentCompilation) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setShouldMaximizeConcurrentCompilation_), shouldMaximizeConcurrentCompilation); +} + +// property: maximumConcurrentCompilationTaskCount +_MTL_INLINE NS::UInteger MTL::Device::maximumConcurrentCompilationTaskCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maximumConcurrentCompilationTaskCount)); +} diff --git a/metal-cpp/Metal/MTLDrawable.hpp b/metal-cpp/Metal/MTLDrawable.hpp index b23232b3..58945d03 100644 --- a/metal-cpp/Metal/MTLDrawable.hpp +++ b/metal-cpp/Metal/MTLDrawable.hpp @@ -2,7 +2,7 @@ // // Metal/MTLDrawable.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLDynamicLibrary.hpp b/metal-cpp/Metal/MTLDynamicLibrary.hpp index 3c6c2eb6..f951d125 100644 --- a/metal-cpp/Metal/MTLDynamicLibrary.hpp +++ b/metal-cpp/Metal/MTLDynamicLibrary.hpp @@ -2,7 +2,7 @@ // // Metal/MTLDynamicLibrary.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLEvent.hpp b/metal-cpp/Metal/MTLEvent.hpp index abb4e06d..965fc39e 100644 --- a/metal-cpp/Metal/MTLEvent.hpp +++ b/metal-cpp/Metal/MTLEvent.hpp @@ -2,7 +2,7 @@ // // Metal/MTLEvent.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLFence.hpp b/metal-cpp/Metal/MTLFence.hpp index 6337ac17..c8ef24ca 100644 --- a/metal-cpp/Metal/MTLFence.hpp +++ b/metal-cpp/Metal/MTLFence.hpp @@ -2,7 +2,7 @@ // // Metal/MTLFence.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLFunctionConstantValues.hpp b/metal-cpp/Metal/MTLFunctionConstantValues.hpp index 93157c6a..d23d98fc 100644 --- a/metal-cpp/Metal/MTLFunctionConstantValues.hpp +++ b/metal-cpp/Metal/MTLFunctionConstantValues.hpp @@ -2,7 +2,7 @@ // // Metal/MTLFunctionConstantValues.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLFunctionDescriptor.hpp b/metal-cpp/Metal/MTLFunctionDescriptor.hpp index 02188ab5..ec82d981 100644 --- a/metal-cpp/Metal/MTLFunctionDescriptor.hpp +++ b/metal-cpp/Metal/MTLFunctionDescriptor.hpp @@ -2,7 +2,7 @@ // // Metal/MTLFunctionDescriptor.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ namespace MTL _MTL_OPTIONS(NS::UInteger, FunctionOptions) { FunctionOptionNone = 0, FunctionOptionCompileToBinary = 1, + FunctionOptionStoreFunctionInMetalScript = 2, }; class FunctionDescriptor : public NS::Copying diff --git a/metal-cpp/Metal/MTLFunctionHandle.hpp b/metal-cpp/Metal/MTLFunctionHandle.hpp index 8b0785a2..30f71f2e 100644 --- a/metal-cpp/Metal/MTLFunctionHandle.hpp +++ b/metal-cpp/Metal/MTLFunctionHandle.hpp @@ -2,7 +2,7 @@ // // Metal/MTLFunctionHandle.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLFunctionLog.hpp b/metal-cpp/Metal/MTLFunctionLog.hpp index 31404bc8..2a899525 100644 --- a/metal-cpp/Metal/MTLFunctionLog.hpp +++ b/metal-cpp/Metal/MTLFunctionLog.hpp @@ -2,7 +2,7 @@ // // Metal/MTLFunctionLog.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLFunctionStitching.hpp b/metal-cpp/Metal/MTLFunctionStitching.hpp index a0c2b566..0ae7d6a4 100644 --- a/metal-cpp/Metal/MTLFunctionStitching.hpp +++ b/metal-cpp/Metal/MTLFunctionStitching.hpp @@ -2,7 +2,7 @@ // // Metal/MTLFunctionStitching.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ namespace MTL { class FunctionStitchingAttribute : public NS::Referencing { +public: }; class FunctionStitchingAttributeAlwaysInline : public NS::Referencing @@ -44,6 +45,7 @@ class FunctionStitchingAttributeAlwaysInline : public NS::Referencing { +public: }; class FunctionStitchingInputNode : public NS::Referencing diff --git a/metal-cpp/Metal/MTLHeaderBridge.hpp b/metal-cpp/Metal/MTLHeaderBridge.hpp index 95da4964..21306c40 100644 --- a/metal-cpp/Metal/MTLHeaderBridge.hpp +++ b/metal-cpp/Metal/MTLHeaderBridge.hpp @@ -2,7 +2,7 @@ // // Metal/MTLHeaderBridge.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,14 +25,17 @@ namespace MTL::Private::Class { _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureBoundingBoxGeometryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureCurveGeometryDescriptor); _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureDescriptor); _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureGeometryDescriptor); _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureMotionCurveGeometryDescriptor); _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureMotionTriangleGeometryDescriptor); _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructurePassDescriptor); _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructurePassSampleBufferAttachmentDescriptor); _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructurePassSampleBufferAttachmentDescriptorArray); _MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureTriangleGeometryDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLArchitecture); _MTL_PRIVATE_DEF_CLS(MTLArgument); _MTL_PRIVATE_DEF_CLS(MTLArgumentDescriptor); _MTL_PRIVATE_DEF_CLS(MTLArrayType); @@ -66,6 +69,7 @@ _MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingInputNode); _MTL_PRIVATE_DEF_CLS(MTLHeapDescriptor); _MTL_PRIVATE_DEF_CLS(MTLIOCommandQueueDescriptor); _MTL_PRIVATE_DEF_CLS(MTLIndirectCommandBufferDescriptor); +_MTL_PRIVATE_DEF_CLS(MTLIndirectInstanceAccelerationStructureDescriptor); _MTL_PRIVATE_DEF_CLS(MTLInstanceAccelerationStructureDescriptor); _MTL_PRIVATE_DEF_CLS(MTLIntersectionFunctionDescriptor); _MTL_PRIVATE_DEF_CLS(MTLIntersectionFunctionTableDescriptor); @@ -226,8 +230,12 @@ _MTL_PRIVATE_DEF_SEL(allowDuplicateIntersectionFunctionInvocation, "allowDuplicateIntersectionFunctionInvocation"); _MTL_PRIVATE_DEF_SEL(allowGPUOptimizedContents, "allowGPUOptimizedContents"); +_MTL_PRIVATE_DEF_SEL(allowReferencingUndefinedSymbols, + "allowReferencingUndefinedSymbols"); _MTL_PRIVATE_DEF_SEL(alphaBlendOperation, "alphaBlendOperation"); +_MTL_PRIVATE_DEF_SEL(architecture, + "architecture"); _MTL_PRIVATE_DEF_SEL(areBarycentricCoordsSupported, "areBarycentricCoordsSupported"); _MTL_PRIVATE_DEF_SEL(areProgrammableSamplePositionsSupported, @@ -330,6 +338,8 @@ _MTL_PRIVATE_DEF_SEL(commit, "commit"); _MTL_PRIVATE_DEF_SEL(compareFunction, "compareFunction"); +_MTL_PRIVATE_DEF_SEL(compileSymbolVisibility, + "compileSymbolVisibility"); _MTL_PRIVATE_DEF_SEL(compressionType, "compressionType"); _MTL_PRIVATE_DEF_SEL(computeCommandEncoder, @@ -356,6 +366,18 @@ _MTL_PRIVATE_DEF_SEL(contents, "contents"); _MTL_PRIVATE_DEF_SEL(controlDependencies, "controlDependencies"); +_MTL_PRIVATE_DEF_SEL(controlPointBuffer, + "controlPointBuffer"); +_MTL_PRIVATE_DEF_SEL(controlPointBufferOffset, + "controlPointBufferOffset"); +_MTL_PRIVATE_DEF_SEL(controlPointBuffers, + "controlPointBuffers"); +_MTL_PRIVATE_DEF_SEL(controlPointCount, + "controlPointCount"); +_MTL_PRIVATE_DEF_SEL(controlPointFormat, + "controlPointFormat"); +_MTL_PRIVATE_DEF_SEL(controlPointStride, + "controlPointStride"); _MTL_PRIVATE_DEF_SEL(convertSparsePixelRegions_toTileRegions_withTileSize_alignmentMode_numRegions_, "convertSparsePixelRegions:toTileRegions:withTileSize:alignmentMode:numRegions:"); _MTL_PRIVATE_DEF_SEL(convertSparseTileRegions_toPixelRegions_withTileSize_numRegions_, @@ -396,6 +418,12 @@ _MTL_PRIVATE_DEF_SEL(cpuCacheMode, "cpuCacheMode"); _MTL_PRIVATE_DEF_SEL(currentAllocatedSize, "currentAllocatedSize"); +_MTL_PRIVATE_DEF_SEL(curveBasis, + "curveBasis"); +_MTL_PRIVATE_DEF_SEL(curveEndCaps, + "curveEndCaps"); +_MTL_PRIVATE_DEF_SEL(curveType, + "curveType"); _MTL_PRIVATE_DEF_SEL(data, "data"); _MTL_PRIVATE_DEF_SEL(dataSize, @@ -666,6 +694,10 @@ _MTL_PRIVATE_DEF_SEL(installName, "installName"); _MTL_PRIVATE_DEF_SEL(instanceCount, "instanceCount"); +_MTL_PRIVATE_DEF_SEL(instanceCountBuffer, + "instanceCountBuffer"); +_MTL_PRIVATE_DEF_SEL(instanceCountBufferOffset, + "instanceCountBufferOffset"); _MTL_PRIVATE_DEF_SEL(instanceDescriptorBuffer, "instanceDescriptorBuffer"); _MTL_PRIVATE_DEF_SEL(instanceDescriptorBufferOffset, @@ -800,8 +832,20 @@ _MTL_PRIVATE_DEF_SEL(maxFragmentBufferBindCount, "maxFragmentBufferBindCount"); _MTL_PRIVATE_DEF_SEL(maxFragmentCallStackDepth, "maxFragmentCallStackDepth"); +_MTL_PRIVATE_DEF_SEL(maxInstanceCount, + "maxInstanceCount"); _MTL_PRIVATE_DEF_SEL(maxKernelBufferBindCount, "maxKernelBufferBindCount"); +_MTL_PRIVATE_DEF_SEL(maxKernelThreadgroupMemoryBindCount, + "maxKernelThreadgroupMemoryBindCount"); +_MTL_PRIVATE_DEF_SEL(maxMeshBufferBindCount, + "maxMeshBufferBindCount"); +_MTL_PRIVATE_DEF_SEL(maxMotionTransformCount, + "maxMotionTransformCount"); +_MTL_PRIVATE_DEF_SEL(maxObjectBufferBindCount, + "maxObjectBufferBindCount"); +_MTL_PRIVATE_DEF_SEL(maxObjectThreadgroupMemoryBindCount, + "maxObjectThreadgroupMemoryBindCount"); _MTL_PRIVATE_DEF_SEL(maxSampleCount, "maxSampleCount"); _MTL_PRIVATE_DEF_SEL(maxTessellationFactor, @@ -826,6 +870,8 @@ _MTL_PRIVATE_DEF_SEL(maxVertexBufferBindCount, "maxVertexBufferBindCount"); _MTL_PRIVATE_DEF_SEL(maxVertexCallStackDepth, "maxVertexCallStackDepth"); +_MTL_PRIVATE_DEF_SEL(maximumConcurrentCompilationTaskCount, + "maximumConcurrentCompilationTaskCount"); _MTL_PRIVATE_DEF_SEL(memberByName_, "memberByName:"); _MTL_PRIVATE_DEF_SEL(members, @@ -844,6 +890,8 @@ _MTL_PRIVATE_DEF_SEL(meshBuffers, "meshBuffers"); _MTL_PRIVATE_DEF_SEL(meshFunction, "meshFunction"); +_MTL_PRIVATE_DEF_SEL(meshLinkedFunctions, + "meshLinkedFunctions"); _MTL_PRIVATE_DEF_SEL(meshThreadExecutionWidth, "meshThreadExecutionWidth"); _MTL_PRIVATE_DEF_SEL(meshThreadgroupSizeIsMultipleOfThreadExecutionWidth, @@ -874,6 +922,10 @@ _MTL_PRIVATE_DEF_SEL(motionTransformBufferOffset, "motionTransformBufferOffset"); _MTL_PRIVATE_DEF_SEL(motionTransformCount, "motionTransformCount"); +_MTL_PRIVATE_DEF_SEL(motionTransformCountBuffer, + "motionTransformCountBuffer"); +_MTL_PRIVATE_DEF_SEL(motionTransformCountBufferOffset, + "motionTransformCountBufferOffset"); _MTL_PRIVATE_DEF_SEL(moveTextureMappingsFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_, "moveTextureMappingsFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:"); _MTL_PRIVATE_DEF_SEL(mutability, @@ -960,6 +1012,10 @@ _MTL_PRIVATE_DEF_SEL(newHeapWithDescriptor_, "newHeapWithDescriptor:"); _MTL_PRIVATE_DEF_SEL(newIOCommandQueueWithDescriptor_error_, "newIOCommandQueueWithDescriptor:error:"); +_MTL_PRIVATE_DEF_SEL(newIOFileHandleWithURL_compressionMethod_error_, + "newIOFileHandleWithURL:compressionMethod:error:"); +_MTL_PRIVATE_DEF_SEL(newIOFileHandleWithURL_error_, + "newIOFileHandleWithURL:error:"); _MTL_PRIVATE_DEF_SEL(newIOHandleWithURL_compressionMethod_error_, "newIOHandleWithURL:compressionMethod:error:"); _MTL_PRIVATE_DEF_SEL(newIOHandleWithURL_error_, @@ -1060,6 +1116,8 @@ _MTL_PRIVATE_DEF_SEL(objectBuffers, "objectBuffers"); _MTL_PRIVATE_DEF_SEL(objectFunction, "objectFunction"); +_MTL_PRIVATE_DEF_SEL(objectLinkedFunctions, + "objectLinkedFunctions"); _MTL_PRIVATE_DEF_SEL(objectPayloadAlignment, "objectPayloadAlignment"); _MTL_PRIVATE_DEF_SEL(objectPayloadDataSize, @@ -1158,6 +1216,16 @@ _MTL_PRIVATE_DEF_SEL(pushDebugGroup_, "pushDebugGroup:"); _MTL_PRIVATE_DEF_SEL(rAddressMode, "rAddressMode"); +_MTL_PRIVATE_DEF_SEL(radiusBuffer, + "radiusBuffer"); +_MTL_PRIVATE_DEF_SEL(radiusBufferOffset, + "radiusBufferOffset"); +_MTL_PRIVATE_DEF_SEL(radiusBuffers, + "radiusBuffers"); +_MTL_PRIVATE_DEF_SEL(radiusFormat, + "radiusFormat"); +_MTL_PRIVATE_DEF_SEL(radiusStride, + "radiusStride"); _MTL_PRIVATE_DEF_SEL(rasterSampleCount, "rasterSampleCount"); _MTL_PRIVATE_DEF_SEL(rasterizationRateMap, @@ -1254,6 +1322,10 @@ _MTL_PRIVATE_DEF_SEL(scratchBufferAllocator, "scratchBufferAllocator"); _MTL_PRIVATE_DEF_SEL(screenSize, "screenSize"); +_MTL_PRIVATE_DEF_SEL(segmentControlPointCount, + "segmentControlPointCount"); +_MTL_PRIVATE_DEF_SEL(segmentCount, + "segmentCount"); _MTL_PRIVATE_DEF_SEL(serializeToURL_error_, "serializeToURL:error:"); _MTL_PRIVATE_DEF_SEL(setAccelerationStructure_atBufferIndex_, @@ -1266,6 +1338,8 @@ _MTL_PRIVATE_DEF_SEL(setAllowDuplicateIntersectionFunctionInvocation_, "setAllowDuplicateIntersectionFunctionInvocation:"); _MTL_PRIVATE_DEF_SEL(setAllowGPUOptimizedContents_, "setAllowGPUOptimizedContents:"); +_MTL_PRIVATE_DEF_SEL(setAllowReferencingUndefinedSymbols_, + "setAllowReferencingUndefinedSymbols:"); _MTL_PRIVATE_DEF_SEL(setAlphaBlendOperation_, "setAlphaBlendOperation:"); _MTL_PRIVATE_DEF_SEL(setAlphaToCoverageEnabled_, @@ -1312,14 +1386,22 @@ _MTL_PRIVATE_DEF_SEL(setBuffer_, "setBuffer:"); _MTL_PRIVATE_DEF_SEL(setBuffer_offset_atIndex_, "setBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setBuffer_offset_attributeStride_atIndex_, + "setBuffer:offset:attributeStride:atIndex:"); _MTL_PRIVATE_DEF_SEL(setBufferIndex_, "setBufferIndex:"); _MTL_PRIVATE_DEF_SEL(setBufferOffset_atIndex_, "setBufferOffset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setBufferOffset_attributeStride_atIndex_, + "setBufferOffset:attributeStride:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setBuffers_offsets_attributeStrides_withRange_, + "setBuffers:offsets:attributeStrides:withRange:"); _MTL_PRIVATE_DEF_SEL(setBuffers_offsets_withRange_, "setBuffers:offsets:withRange:"); _MTL_PRIVATE_DEF_SEL(setBytes_length_atIndex_, "setBytes:length:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setBytes_length_attributeStride_atIndex_, + "setBytes:length:attributeStride:atIndex:"); _MTL_PRIVATE_DEF_SEL(setCaptureObject_, "setCaptureObject:"); _MTL_PRIVATE_DEF_SEL(setClearColor_, @@ -1336,6 +1418,8 @@ _MTL_PRIVATE_DEF_SEL(setCommandTypes_, "setCommandTypes:"); _MTL_PRIVATE_DEF_SEL(setCompareFunction_, "setCompareFunction:"); +_MTL_PRIVATE_DEF_SEL(setCompileSymbolVisibility_, + "setCompileSymbolVisibility:"); _MTL_PRIVATE_DEF_SEL(setCompressionType_, "setCompressionType:"); _MTL_PRIVATE_DEF_SEL(setComputeFunction_, @@ -1358,12 +1442,30 @@ _MTL_PRIVATE_DEF_SEL(setConstantValues_type_withRange_, "setConstantValues:type:withRange:"); _MTL_PRIVATE_DEF_SEL(setControlDependencies_, "setControlDependencies:"); +_MTL_PRIVATE_DEF_SEL(setControlPointBuffer_, + "setControlPointBuffer:"); +_MTL_PRIVATE_DEF_SEL(setControlPointBufferOffset_, + "setControlPointBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setControlPointBuffers_, + "setControlPointBuffers:"); +_MTL_PRIVATE_DEF_SEL(setControlPointCount_, + "setControlPointCount:"); +_MTL_PRIVATE_DEF_SEL(setControlPointFormat_, + "setControlPointFormat:"); +_MTL_PRIVATE_DEF_SEL(setControlPointStride_, + "setControlPointStride:"); _MTL_PRIVATE_DEF_SEL(setCounterSet_, "setCounterSet:"); _MTL_PRIVATE_DEF_SEL(setCpuCacheMode_, "setCpuCacheMode:"); _MTL_PRIVATE_DEF_SEL(setCullMode_, "setCullMode:"); +_MTL_PRIVATE_DEF_SEL(setCurveBasis_, + "setCurveBasis:"); +_MTL_PRIVATE_DEF_SEL(setCurveEndCaps_, + "setCurveEndCaps:"); +_MTL_PRIVATE_DEF_SEL(setCurveType_, + "setCurveType:"); _MTL_PRIVATE_DEF_SEL(setDataType_, "setDataType:"); _MTL_PRIVATE_DEF_SEL(setDefaultCaptureScope_, @@ -1510,6 +1612,10 @@ _MTL_PRIVATE_DEF_SEL(setInstallName_, "setInstallName:"); _MTL_PRIVATE_DEF_SEL(setInstanceCount_, "setInstanceCount:"); +_MTL_PRIVATE_DEF_SEL(setInstanceCountBuffer_, + "setInstanceCountBuffer:"); +_MTL_PRIVATE_DEF_SEL(setInstanceCountBufferOffset_, + "setInstanceCountBufferOffset:"); _MTL_PRIVATE_DEF_SEL(setInstanceDescriptorBuffer_, "setInstanceDescriptorBuffer:"); _MTL_PRIVATE_DEF_SEL(setInstanceDescriptorBufferOffset_, @@ -1532,6 +1638,8 @@ _MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTables_withRange_, "setIntersectionFunctionTables:withRange:"); _MTL_PRIVATE_DEF_SEL(setKernelBuffer_offset_atIndex_, "setKernelBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setKernelBuffer_offset_attributeStride_atIndex_, + "setKernelBuffer:offset:attributeStride:atIndex:"); _MTL_PRIVATE_DEF_SEL(setLabel_, "setLabel:"); _MTL_PRIVATE_DEF_SEL(setLanguageVersion_, @@ -1568,8 +1676,20 @@ _MTL_PRIVATE_DEF_SEL(setMaxFragmentBufferBindCount_, "setMaxFragmentBufferBindCount:"); _MTL_PRIVATE_DEF_SEL(setMaxFragmentCallStackDepth_, "setMaxFragmentCallStackDepth:"); +_MTL_PRIVATE_DEF_SEL(setMaxInstanceCount_, + "setMaxInstanceCount:"); _MTL_PRIVATE_DEF_SEL(setMaxKernelBufferBindCount_, "setMaxKernelBufferBindCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxKernelThreadgroupMemoryBindCount_, + "setMaxKernelThreadgroupMemoryBindCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxMeshBufferBindCount_, + "setMaxMeshBufferBindCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxMotionTransformCount_, + "setMaxMotionTransformCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxObjectBufferBindCount_, + "setMaxObjectBufferBindCount:"); +_MTL_PRIVATE_DEF_SEL(setMaxObjectThreadgroupMemoryBindCount_, + "setMaxObjectThreadgroupMemoryBindCount:"); _MTL_PRIVATE_DEF_SEL(setMaxTessellationFactor_, "setMaxTessellationFactor:"); _MTL_PRIVATE_DEF_SEL(setMaxTotalThreadgroupsPerMeshGrid_, @@ -1596,6 +1716,8 @@ _MTL_PRIVATE_DEF_SEL(setMeshBytes_length_atIndex_, "setMeshBytes:length:atIndex:"); _MTL_PRIVATE_DEF_SEL(setMeshFunction_, "setMeshFunction:"); +_MTL_PRIVATE_DEF_SEL(setMeshLinkedFunctions_, + "setMeshLinkedFunctions:"); _MTL_PRIVATE_DEF_SEL(setMeshSamplerState_atIndex_, "setMeshSamplerState:atIndex:"); _MTL_PRIVATE_DEF_SEL(setMeshSamplerState_lodMinClamp_lodMaxClamp_atIndex_, @@ -1632,6 +1754,10 @@ _MTL_PRIVATE_DEF_SEL(setMotionTransformBufferOffset_, "setMotionTransformBufferOffset:"); _MTL_PRIVATE_DEF_SEL(setMotionTransformCount_, "setMotionTransformCount:"); +_MTL_PRIVATE_DEF_SEL(setMotionTransformCountBuffer_, + "setMotionTransformCountBuffer:"); +_MTL_PRIVATE_DEF_SEL(setMotionTransformCountBufferOffset_, + "setMotionTransformCountBufferOffset:"); _MTL_PRIVATE_DEF_SEL(setMutability_, "setMutability:"); _MTL_PRIVATE_DEF_SEL(setName_, @@ -1652,6 +1778,8 @@ _MTL_PRIVATE_DEF_SEL(setObjectBytes_length_atIndex_, "setObjectBytes:length:atIndex:"); _MTL_PRIVATE_DEF_SEL(setObjectFunction_, "setObjectFunction:"); +_MTL_PRIVATE_DEF_SEL(setObjectLinkedFunctions_, + "setObjectLinkedFunctions:"); _MTL_PRIVATE_DEF_SEL(setObjectSamplerState_atIndex_, "setObjectSamplerState:atIndex:"); _MTL_PRIVATE_DEF_SEL(setObjectSamplerState_lodMinClamp_lodMaxClamp_atIndex_, @@ -1672,6 +1800,10 @@ _MTL_PRIVATE_DEF_SEL(setOffset_, "setOffset:"); _MTL_PRIVATE_DEF_SEL(setOpaque_, "setOpaque:"); +_MTL_PRIVATE_DEF_SEL(setOpaqueCurveIntersectionFunctionWithSignature_atIndex_, + "setOpaqueCurveIntersectionFunctionWithSignature:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setOpaqueCurveIntersectionFunctionWithSignature_withRange_, + "setOpaqueCurveIntersectionFunctionWithSignature:withRange:"); _MTL_PRIVATE_DEF_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_atIndex_, "setOpaqueTriangleIntersectionFunctionWithSignature:atIndex:"); _MTL_PRIVATE_DEF_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_withRange_, @@ -1710,6 +1842,16 @@ _MTL_PRIVATE_DEF_SEL(setPurgeableState_, "setPurgeableState:"); _MTL_PRIVATE_DEF_SEL(setRAddressMode_, "setRAddressMode:"); +_MTL_PRIVATE_DEF_SEL(setRadiusBuffer_, + "setRadiusBuffer:"); +_MTL_PRIVATE_DEF_SEL(setRadiusBufferOffset_, + "setRadiusBufferOffset:"); +_MTL_PRIVATE_DEF_SEL(setRadiusBuffers_, + "setRadiusBuffers:"); +_MTL_PRIVATE_DEF_SEL(setRadiusFormat_, + "setRadiusFormat:"); +_MTL_PRIVATE_DEF_SEL(setRadiusStride_, + "setRadiusStride:"); _MTL_PRIVATE_DEF_SEL(setRasterSampleCount_, "setRasterSampleCount:"); _MTL_PRIVATE_DEF_SEL(setRasterizationEnabled_, @@ -1768,6 +1910,12 @@ _MTL_PRIVATE_DEF_SEL(setScratchBufferAllocator_, "setScratchBufferAllocator:"); _MTL_PRIVATE_DEF_SEL(setScreenSize_, "setScreenSize:"); +_MTL_PRIVATE_DEF_SEL(setSegmentControlPointCount_, + "setSegmentControlPointCount:"); +_MTL_PRIVATE_DEF_SEL(setSegmentCount_, + "setSegmentCount:"); +_MTL_PRIVATE_DEF_SEL(setShouldMaximizeConcurrentCompilation_, + "setShouldMaximizeConcurrentCompilation:"); _MTL_PRIVATE_DEF_SEL(setSignaledValue_, "setSignaledValue:"); _MTL_PRIVATE_DEF_SEL(setSize_, @@ -1832,6 +1980,8 @@ _MTL_PRIVATE_DEF_SEL(setSupportAddingVertexBinaryFunctions_, "setSupportAddingVertexBinaryFunctions:"); _MTL_PRIVATE_DEF_SEL(setSupportArgumentBuffers_, "setSupportArgumentBuffers:"); +_MTL_PRIVATE_DEF_SEL(setSupportDynamicAttributeStride_, + "setSupportDynamicAttributeStride:"); _MTL_PRIVATE_DEF_SEL(setSupportIndirectCommandBuffers_, "setSupportIndirectCommandBuffers:"); _MTL_PRIVATE_DEF_SEL(setSupportRayTracing_, @@ -1936,16 +2086,24 @@ _MTL_PRIVATE_DEF_SEL(setVertexBuffer_, "setVertexBuffer:"); _MTL_PRIVATE_DEF_SEL(setVertexBuffer_offset_atIndex_, "setVertexBuffer:offset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexBuffer_offset_attributeStride_atIndex_, + "setVertexBuffer:offset:attributeStride:atIndex:"); _MTL_PRIVATE_DEF_SEL(setVertexBufferOffset_, "setVertexBufferOffset:"); _MTL_PRIVATE_DEF_SEL(setVertexBufferOffset_atIndex_, "setVertexBufferOffset:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexBufferOffset_attributeStride_atIndex_, + "setVertexBufferOffset:attributeStride:atIndex:"); _MTL_PRIVATE_DEF_SEL(setVertexBuffers_, "setVertexBuffers:"); +_MTL_PRIVATE_DEF_SEL(setVertexBuffers_offsets_attributeStrides_withRange_, + "setVertexBuffers:offsets:attributeStrides:withRange:"); _MTL_PRIVATE_DEF_SEL(setVertexBuffers_offsets_withRange_, "setVertexBuffers:offsets:withRange:"); _MTL_PRIVATE_DEF_SEL(setVertexBytes_length_atIndex_, "setVertexBytes:length:atIndex:"); +_MTL_PRIVATE_DEF_SEL(setVertexBytes_length_attributeStride_atIndex_, + "setVertexBytes:length:attributeStride:atIndex:"); _MTL_PRIVATE_DEF_SEL(setVertexDescriptor_, "setVertexDescriptor:"); _MTL_PRIVATE_DEF_SEL(setVertexFormat_, @@ -2000,6 +2158,8 @@ _MTL_PRIVATE_DEF_SEL(setWriteMask_, "setWriteMask:"); _MTL_PRIVATE_DEF_SEL(sharedCaptureManager, "sharedCaptureManager"); +_MTL_PRIVATE_DEF_SEL(shouldMaximizeConcurrentCompilation, + "shouldMaximizeConcurrentCompilation"); _MTL_PRIVATE_DEF_SEL(signalEvent_value_, "signalEvent:value:"); _MTL_PRIVATE_DEF_SEL(signaledValue, @@ -2082,6 +2242,8 @@ _MTL_PRIVATE_DEF_SEL(supportAddingVertexBinaryFunctions, "supportAddingVertexBinaryFunctions"); _MTL_PRIVATE_DEF_SEL(supportArgumentBuffers, "supportArgumentBuffers"); +_MTL_PRIVATE_DEF_SEL(supportDynamicAttributeStride, + "supportDynamicAttributeStride"); _MTL_PRIVATE_DEF_SEL(supportIndirectCommandBuffers, "supportIndirectCommandBuffers"); _MTL_PRIVATE_DEF_SEL(supportRayTracing, diff --git a/metal-cpp/Metal/MTLHeap.hpp b/metal-cpp/Metal/MTLHeap.hpp index 4b0b155c..bb5b8361 100644 --- a/metal-cpp/Metal/MTLHeap.hpp +++ b/metal-cpp/Metal/MTLHeap.hpp @@ -2,7 +2,7 @@ // // Metal/MTLHeap.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -93,7 +93,7 @@ class Heap : public NS::Referencing class Buffer* newBuffer(NS::UInteger length, MTL::ResourceOptions options); - class Texture* newTexture(const class TextureDescriptor* desc); + class Texture* newTexture(const class TextureDescriptor* descriptor); MTL::PurgeableState setPurgeableState(MTL::PurgeableState state); @@ -275,9 +275,9 @@ _MTL_INLINE MTL::Buffer* MTL::Heap::newBuffer(NS::UInteger length, MTL::Resource } // method: newTextureWithDescriptor: -_MTL_INLINE MTL::Texture* MTL::Heap::newTexture(const MTL::TextureDescriptor* desc) +_MTL_INLINE MTL::Texture* MTL::Heap::newTexture(const MTL::TextureDescriptor* descriptor) { - return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_), desc); + return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_), descriptor); } // method: setPurgeableState: diff --git a/metal-cpp/Metal/MTLIOCommandBuffer.hpp b/metal-cpp/Metal/MTLIOCommandBuffer.hpp index 85a76de8..fb28f3e7 100644 --- a/metal-cpp/Metal/MTLIOCommandBuffer.hpp +++ b/metal-cpp/Metal/MTLIOCommandBuffer.hpp @@ -2,7 +2,7 @@ // // Metal/MTLIOCommandBuffer.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLIOCommandQueue.hpp b/metal-cpp/Metal/MTLIOCommandQueue.hpp index de2fc8a1..b5f5d1c2 100644 --- a/metal-cpp/Metal/MTLIOCommandQueue.hpp +++ b/metal-cpp/Metal/MTLIOCommandQueue.hpp @@ -2,7 +2,7 @@ // // Metal/MTLIOCommandQueue.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLIOCompressor.hpp b/metal-cpp/Metal/MTLIOCompressor.hpp index 83fc486c..ad9e92d1 100644 --- a/metal-cpp/Metal/MTLIOCompressor.hpp +++ b/metal-cpp/Metal/MTLIOCompressor.hpp @@ -2,7 +2,7 @@ // // Metal/MTLIOCompressor.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,6 +29,8 @@ namespace MTL { +using IOCompresionContext=void*; + _MTL_ENUM(NS::Integer, IOCompressionStatus) { IOCompressionStatusComplete = 0, IOCompressionStatusError = 1, @@ -36,11 +38,11 @@ _MTL_ENUM(NS::Integer, IOCompressionStatus) { size_t IOCompressionContextDefaultChunkSize(); -void* IOCreateCompressionContext(const char* path, IOCompressionMethod type, size_t chunkSize); +IOCompresionContext IOCreateCompressionContext(const char* path, IOCompressionMethod type, size_t chunkSize); -void IOCompressionContextAppendData(void* context, const void* data, size_t size); +void IOCompressionContextAppendData(IOCompresionContext context, const void* data, size_t size); -IOCompressionStatus IOFlushAndDestroyCompressionContext(void* context); +IOCompressionStatus IOFlushAndDestroyCompressionContext(IOCompresionContext context); } diff --git a/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp b/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp index 570805b8..ce7ba5d1 100644 --- a/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp +++ b/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp @@ -2,7 +2,7 @@ // // Metal/MTLIndirectCommandBuffer.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -39,6 +39,8 @@ _MTL_OPTIONS(NS::UInteger, IndirectCommandType) { IndirectCommandTypeDrawIndexedPatches = 8, IndirectCommandTypeConcurrentDispatch = 32, IndirectCommandTypeConcurrentDispatchThreads = 64, + IndirectCommandTypeDrawMeshThreadgroups = 128, + IndirectCommandTypeDrawMeshThreads = 256, }; struct IndirectCommandBufferExecutionRange @@ -72,8 +74,23 @@ class IndirectCommandBufferDescriptor : public NS::Copying @@ -170,6 +187,50 @@ _MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxKernelBufferBindCou Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxKernelBufferBindCount_), maxKernelBufferBindCount); } +// property: maxKernelThreadgroupMemoryBindCount +_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxKernelThreadgroupMemoryBindCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxKernelThreadgroupMemoryBindCount)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxKernelThreadgroupMemoryBindCount(NS::UInteger maxKernelThreadgroupMemoryBindCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxKernelThreadgroupMemoryBindCount_), maxKernelThreadgroupMemoryBindCount); +} + +// property: maxObjectBufferBindCount +_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxObjectBufferBindCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxObjectBufferBindCount)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxObjectBufferBindCount(NS::UInteger maxObjectBufferBindCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxObjectBufferBindCount_), maxObjectBufferBindCount); +} + +// property: maxMeshBufferBindCount +_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxMeshBufferBindCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxMeshBufferBindCount)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxMeshBufferBindCount(NS::UInteger maxMeshBufferBindCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxMeshBufferBindCount_), maxMeshBufferBindCount); +} + +// property: maxObjectThreadgroupMemoryBindCount +_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxObjectThreadgroupMemoryBindCount() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxObjectThreadgroupMemoryBindCount)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxObjectThreadgroupMemoryBindCount(NS::UInteger maxObjectThreadgroupMemoryBindCount) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxObjectThreadgroupMemoryBindCount_), maxObjectThreadgroupMemoryBindCount); +} + // property: supportRayTracing _MTL_INLINE bool MTL::IndirectCommandBufferDescriptor::supportRayTracing() const { @@ -181,6 +242,17 @@ _MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setSupportRayTracing(bool Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportRayTracing_), supportRayTracing); } +// property: supportDynamicAttributeStride +_MTL_INLINE bool MTL::IndirectCommandBufferDescriptor::supportDynamicAttributeStride() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportDynamicAttributeStride)); +} + +_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setSupportDynamicAttributeStride(bool supportDynamicAttributeStride) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportDynamicAttributeStride_), supportDynamicAttributeStride); +} + // property: size _MTL_INLINE NS::UInteger MTL::IndirectCommandBuffer::size() const { diff --git a/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp b/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp index 659cb3db..ff973ef0 100644 --- a/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp +++ b/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLIndirectCommandEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -41,6 +41,8 @@ class IndirectRenderCommand : public NS::Referencing void setFragmentBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + void setVertexBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger stride, NS::UInteger index); + void drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const class Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride); void drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const class Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride); @@ -49,6 +51,20 @@ class IndirectRenderCommand : public NS::Referencing void drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance); + void setObjectThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index); + + void setObjectBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void setMeshBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + + void drawMeshThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup); + + void drawMeshThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup); + + void setBarrier(); + + void clearBarrier(); + void reset(); }; @@ -59,6 +75,8 @@ class IndirectComputeCommand : public NS::Referencing void setKernelBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index); + void setKernelBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger stride, NS::UInteger index); + void concurrentDispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup); void concurrentDispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup); @@ -96,6 +114,12 @@ _MTL_INLINE void MTL::IndirectRenderCommand::setFragmentBuffer(const MTL::Buffer Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentBuffer_offset_atIndex_), buffer, offset, index); } +// method: setVertexBuffer:offset:attributeStride:atIndex: +_MTL_INLINE void MTL::IndirectRenderCommand::setVertexBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger stride, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBuffer_offset_attributeStride_atIndex_), buffer, offset, stride, index); +} + // method: drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride: _MTL_INLINE void MTL::IndirectRenderCommand::drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride) { @@ -120,6 +144,48 @@ _MTL_INLINE void MTL::IndirectRenderCommand::drawIndexedPrimitives(MTL::Primitiv Object::sendMessage(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_baseVertex_baseInstance_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); } +// method: setObjectThreadgroupMemoryLength:atIndex: +_MTL_INLINE void MTL::IndirectRenderCommand::setObjectThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectThreadgroupMemoryLength_atIndex_), length, index); +} + +// method: setObjectBuffer:offset:atIndex: +_MTL_INLINE void MTL::IndirectRenderCommand::setObjectBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: setMeshBuffer:offset:atIndex: +_MTL_INLINE void MTL::IndirectRenderCommand::setMeshBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshBuffer_offset_atIndex_), buffer, offset, index); +} + +// method: drawMeshThreadgroups:threadsPerObjectThreadgroup:threadsPerMeshThreadgroup: +_MTL_INLINE void MTL::IndirectRenderCommand::drawMeshThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawMeshThreadgroups_threadsPerObjectThreadgroup_threadsPerMeshThreadgroup_), threadgroupsPerGrid, threadsPerObjectThreadgroup, threadsPerMeshThreadgroup); +} + +// method: drawMeshThreads:threadsPerObjectThreadgroup:threadsPerMeshThreadgroup: +_MTL_INLINE void MTL::IndirectRenderCommand::drawMeshThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerObjectThreadgroup, MTL::Size threadsPerMeshThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(drawMeshThreads_threadsPerObjectThreadgroup_threadsPerMeshThreadgroup_), threadsPerGrid, threadsPerObjectThreadgroup, threadsPerMeshThreadgroup); +} + +// method: setBarrier +_MTL_INLINE void MTL::IndirectRenderCommand::setBarrier() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setBarrier)); +} + +// method: clearBarrier +_MTL_INLINE void MTL::IndirectRenderCommand::clearBarrier() +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(clearBarrier)); +} + // method: reset _MTL_INLINE void MTL::IndirectRenderCommand::reset() { @@ -138,6 +204,12 @@ _MTL_INLINE void MTL::IndirectComputeCommand::setKernelBuffer(const MTL::Buffer* Object::sendMessage(this, _MTL_PRIVATE_SEL(setKernelBuffer_offset_atIndex_), buffer, offset, index); } +// method: setKernelBuffer:offset:attributeStride:atIndex: +_MTL_INLINE void MTL::IndirectComputeCommand::setKernelBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger stride, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setKernelBuffer_offset_attributeStride_atIndex_), buffer, offset, stride, index); +} + // method: concurrentDispatchThreadgroups:threadsPerThreadgroup: _MTL_INLINE void MTL::IndirectComputeCommand::concurrentDispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup) { diff --git a/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp b/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp index 1dda215e..e1c0911e 100644 --- a/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp +++ b/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp @@ -2,7 +2,7 @@ // // Metal/MTLIntersectionFunctionTable.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -40,6 +40,8 @@ _MTL_OPTIONS(NS::UInteger, IntersectionFunctionSignature) { IntersectionFunctionSignatureInstanceMotion = 8, IntersectionFunctionSignaturePrimitiveMotion = 16, IntersectionFunctionSignatureExtendedLimits = 32, + IntersectionFunctionSignatureMaxLevels = 64, + IntersectionFunctionSignatureCurveData = 128, }; class IntersectionFunctionTableDescriptor : public NS::Copying @@ -72,6 +74,10 @@ class IntersectionFunctionTable : public NS::Referencing(this, _MTL_PRIVATE_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_withRange_), signature, range); } +// method: setOpaqueCurveIntersectionFunctionWithSignature:atIndex: +_MTL_INLINE void MTL::IntersectionFunctionTable::setOpaqueCurveIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOpaqueCurveIntersectionFunctionWithSignature_atIndex_), signature, index); +} + +// method: setOpaqueCurveIntersectionFunctionWithSignature:withRange: +_MTL_INLINE void MTL::IntersectionFunctionTable::setOpaqueCurveIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setOpaqueCurveIntersectionFunctionWithSignature_withRange_), signature, range); +} + // method: setVisibleFunctionTable:atBufferIndex: _MTL_INLINE void MTL::IntersectionFunctionTable::setVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex) { diff --git a/metal-cpp/Metal/MTLLibrary.hpp b/metal-cpp/Metal/MTLLibrary.hpp index 74585178..1933ff01 100644 --- a/metal-cpp/Metal/MTLLibrary.hpp +++ b/metal-cpp/Metal/MTLLibrary.hpp @@ -2,7 +2,7 @@ // // Metal/MTLLibrary.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -146,6 +146,7 @@ _MTL_ENUM(NS::UInteger, LanguageVersion) { LanguageVersion2_3 = 131075, LanguageVersion2_4 = 131076, LanguageVersion3_0 = 196608, + LanguageVersion3_1 = 196609, }; _MTL_ENUM(NS::Integer, LibraryType) { @@ -158,6 +159,11 @@ _MTL_ENUM(NS::Integer, LibraryOptimizationLevel) { LibraryOptimizationLevelSize = 1, }; +_MTL_ENUM(NS::Integer, CompileSymbolVisibility) { + CompileSymbolVisibilityDefault = 0, + CompileSymbolVisibilityHidden = 1, +}; + class CompileOptions : public NS::Copying { public: @@ -188,6 +194,15 @@ class CompileOptions : public NS::Copying MTL::LibraryOptimizationLevel optimizationLevel() const; void setOptimizationLevel(MTL::LibraryOptimizationLevel optimizationLevel); + + MTL::CompileSymbolVisibility compileSymbolVisibility() const; + void setCompileSymbolVisibility(MTL::CompileSymbolVisibility compileSymbolVisibility); + + bool allowReferencingUndefinedSymbols() const; + void setAllowReferencingUndefinedSymbols(bool allowReferencingUndefinedSymbols); + + NS::UInteger maxTotalThreadsPerThreadgroup() const; + void setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup); }; _MTL_ENUM(NS::UInteger, LibraryError) { @@ -545,6 +560,39 @@ _MTL_INLINE void MTL::CompileOptions::setOptimizationLevel(MTL::LibraryOptimizat Object::sendMessage(this, _MTL_PRIVATE_SEL(setOptimizationLevel_), optimizationLevel); } +// property: compileSymbolVisibility +_MTL_INLINE MTL::CompileSymbolVisibility MTL::CompileOptions::compileSymbolVisibility() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(compileSymbolVisibility)); +} + +_MTL_INLINE void MTL::CompileOptions::setCompileSymbolVisibility(MTL::CompileSymbolVisibility compileSymbolVisibility) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setCompileSymbolVisibility_), compileSymbolVisibility); +} + +// property: allowReferencingUndefinedSymbols +_MTL_INLINE bool MTL::CompileOptions::allowReferencingUndefinedSymbols() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(allowReferencingUndefinedSymbols)); +} + +_MTL_INLINE void MTL::CompileOptions::setAllowReferencingUndefinedSymbols(bool allowReferencingUndefinedSymbols) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setAllowReferencingUndefinedSymbols_), allowReferencingUndefinedSymbols); +} + +// property: maxTotalThreadsPerThreadgroup +_MTL_INLINE NS::UInteger MTL::CompileOptions::maxTotalThreadsPerThreadgroup() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup)); +} + +_MTL_INLINE void MTL::CompileOptions::setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMaxTotalThreadsPerThreadgroup_), maxTotalThreadsPerThreadgroup); +} + _MTL_INLINE void MTL::Library::newFunction(const NS::String* pFunctionName, const FunctionConstantValues* pConstantValues, const std::function& completionHandler) { __block std::function blockCompletionHandler = completionHandler; diff --git a/metal-cpp/Metal/MTLLinkedFunctions.hpp b/metal-cpp/Metal/MTLLinkedFunctions.hpp index d8f0bd84..89ee8759 100644 --- a/metal-cpp/Metal/MTLLinkedFunctions.hpp +++ b/metal-cpp/Metal/MTLLinkedFunctions.hpp @@ -2,7 +2,7 @@ // // Metal/MTLLinkedFunctions.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp b/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp index e1a661ef..d6d6ad8c 100644 --- a/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp +++ b/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLParallelRenderCommandEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLPipeline.hpp b/metal-cpp/Metal/MTLPipeline.hpp index b8fa6c0f..1b6859ca 100644 --- a/metal-cpp/Metal/MTLPipeline.hpp +++ b/metal-cpp/Metal/MTLPipeline.hpp @@ -2,7 +2,7 @@ // // Metal/MTLPipeline.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLPixelFormat.hpp b/metal-cpp/Metal/MTLPixelFormat.hpp index 1ea11e42..8e28a6be 100644 --- a/metal-cpp/Metal/MTLPixelFormat.hpp +++ b/metal-cpp/Metal/MTLPixelFormat.hpp @@ -2,7 +2,7 @@ // // Metal/MTLPixelFormat.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -70,6 +70,8 @@ _MTL_ENUM(NS::UInteger, PixelFormat) { PixelFormatRG11B10Float = 92, PixelFormatRGB9E5Float = 93, PixelFormatBGR10A2Unorm = 94, + PixelFormatBGR10_XR = 554, + PixelFormatBGR10_XR_sRGB = 555, PixelFormatRG32Uint = 103, PixelFormatRG32Sint = 104, PixelFormatRG32Float = 105, @@ -78,6 +80,8 @@ _MTL_ENUM(NS::UInteger, PixelFormat) { PixelFormatRGBA16Uint = 113, PixelFormatRGBA16Sint = 114, PixelFormatRGBA16Float = 115, + PixelFormatBGRA10_XR = 552, + PixelFormatBGRA10_XR_sRGB = 553, PixelFormatRGBA32Uint = 123, PixelFormatRGBA32Sint = 124, PixelFormatRGBA32Float = 125, @@ -164,10 +168,6 @@ _MTL_ENUM(NS::UInteger, PixelFormat) { PixelFormatDepth32Float_Stencil8 = 260, PixelFormatX32_Stencil8 = 261, PixelFormatX24_Stencil8 = 262, - PixelFormatBGRA10_XR = 552, - PixelFormatBGRA10_XR_sRGB = 553, - PixelFormatBGR10_XR = 554, - PixelFormatBGR10_XR_sRGB = 555, }; } diff --git a/metal-cpp/Metal/MTLPrivate.hpp b/metal-cpp/Metal/MTLPrivate.hpp index a579e874..e72e6354 100644 --- a/metal-cpp/Metal/MTLPrivate.hpp +++ b/metal-cpp/Metal/MTLPrivate.hpp @@ -2,7 +2,7 @@ // // Metal/MTLPrivate.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -71,7 +71,7 @@ namespace MTL::Private } } // MTL::Private -#if defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__MAC_13_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__IPHONE_16_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0) || defined(__TVOS_16_0) +#if defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__MAC_13_0) || defined(__MAC_14_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__IPHONE_16_0) || defined(__IPHONE_17_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0) || defined(__TVOS_16_0) || defined(__TVOS_17_0) #define _MTL_PRIVATE_DEF_STR(type, symbol) \ _MTL_EXTERN type const MTL##symbol _MTL_PRIVATE_IMPORT; \ @@ -97,7 +97,7 @@ namespace MTL::Private #define _MTL_PRIVATE_DEF_WEAK_CONST(type, symbol) _MTL_PRIVATE_DEF_CONST(type, symbol) -#endif // defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__MAC_13_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__IPHONE_16_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0) || defined(__TVOS_16_0) +#endif // defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__MAC_13_0) || defined(__MAC_14_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__IPHONE_16_0) || defined(__IPHONE_17_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0) || defined(__TVOS_16_0) || defined(__TVOS_17_0) #else diff --git a/metal-cpp/Metal/MTLRasterizationRate.hpp b/metal-cpp/Metal/MTLRasterizationRate.hpp index 6ea44636..cac23392 100644 --- a/metal-cpp/Metal/MTLRasterizationRate.hpp +++ b/metal-cpp/Metal/MTLRasterizationRate.hpp @@ -2,7 +2,7 @@ // // Metal/MTLRasterizationRate.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLRenderCommandEncoder.hpp b/metal-cpp/Metal/MTLRenderCommandEncoder.hpp index 8bdffbff..a3b0c420 100644 --- a/metal-cpp/Metal/MTLRenderCommandEncoder.hpp +++ b/metal-cpp/Metal/MTLRenderCommandEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLRenderCommandEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -151,6 +151,14 @@ class RenderCommandEncoder : public NS::Referencing(this, _MTL_PRIVATE_SEL(setVertexBuffers_offsets_withRange_), buffers, offsets, range); } +// method: setVertexBuffer:offset:attributeStride:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger stride, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBuffer_offset_attributeStride_atIndex_), buffer, offset, stride, index); +} + +// method: setVertexBuffers:offsets:attributeStrides:withRange: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBuffers(const MTL::Buffer* const buffers[], const NS::UInteger* offsets, const NS::UInteger* strides, NS::Range range) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBuffers_offsets_attributeStrides_withRange_), buffers, offsets, strides, range); +} + +// method: setVertexBufferOffset:attributeStride:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBufferOffset(NS::UInteger offset, NS::UInteger stride, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBufferOffset_attributeStride_atIndex_), offset, stride, index); +} + +// method: setVertexBytes:length:attributeStride:atIndex: +_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBytes(const void* bytes, NS::UInteger length, NS::UInteger stride, NS::UInteger index) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setVertexBytes_length_attributeStride_atIndex_), bytes, length, stride, index); +} + // method: setVertexTexture:atIndex: _MTL_INLINE void MTL::RenderCommandEncoder::setVertexTexture(const MTL::Texture* texture, NS::UInteger index) { diff --git a/metal-cpp/Metal/MTLRenderPass.hpp b/metal-cpp/Metal/MTLRenderPass.hpp index d5d03d75..cb47ceab 100644 --- a/metal-cpp/Metal/MTLRenderPass.hpp +++ b/metal-cpp/Metal/MTLRenderPass.hpp @@ -2,7 +2,7 @@ // // Metal/MTLRenderPass.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -61,8 +61,8 @@ _MTL_ENUM(NS::UInteger, StoreAction) { _MTL_OPTIONS(NS::UInteger, StoreActionOptions) { StoreActionOptionNone = 0, - StoreActionOptionValidMask = 1, StoreActionOptionCustomSamplePositions = 1, + StoreActionOptionValidMask = 1, }; class RenderPassAttachmentDescriptor : public NS::Copying diff --git a/metal-cpp/Metal/MTLRenderPipeline.hpp b/metal-cpp/Metal/MTLRenderPipeline.hpp index 903f12a5..c4e501d2 100644 --- a/metal-cpp/Metal/MTLRenderPipeline.hpp +++ b/metal-cpp/Metal/MTLRenderPipeline.hpp @@ -2,7 +2,7 @@ // // Metal/MTLRenderPipeline.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -65,10 +65,10 @@ _MTL_ENUM(NS::UInteger, BlendOperation) { _MTL_OPTIONS(NS::UInteger, ColorWriteMask) { ColorWriteMaskNone = 0, - ColorWriteMaskAlpha = 1, - ColorWriteMaskBlue = 2, - ColorWriteMaskGreen = 4, ColorWriteMaskRed = 8, + ColorWriteMaskGreen = 4, + ColorWriteMaskBlue = 2, + ColorWriteMaskAlpha = 1, ColorWriteMaskAll = 15, }; @@ -468,6 +468,18 @@ class MeshRenderPipelineDescriptor : public NS::Copying(this, _MTL_PRIVATE_SEL(setStencilAttachmentPixelFormat_), stencilAttachmentPixelFormat); } +// property: supportIndirectCommandBuffers +_MTL_INLINE bool MTL::MeshRenderPipelineDescriptor::supportIndirectCommandBuffers() const +{ + return Object::sendMessageSafe(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setSupportIndirectCommandBuffers_), supportIndirectCommandBuffers); +} + +// property: objectLinkedFunctions +_MTL_INLINE MTL::LinkedFunctions* MTL::MeshRenderPipelineDescriptor::objectLinkedFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectLinkedFunctions)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setObjectLinkedFunctions(const MTL::LinkedFunctions* objectLinkedFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setObjectLinkedFunctions_), objectLinkedFunctions); +} + +// property: meshLinkedFunctions +_MTL_INLINE MTL::LinkedFunctions* MTL::MeshRenderPipelineDescriptor::meshLinkedFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(meshLinkedFunctions)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setMeshLinkedFunctions(const MTL::LinkedFunctions* meshLinkedFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setMeshLinkedFunctions_), meshLinkedFunctions); +} + +// property: fragmentLinkedFunctions +_MTL_INLINE MTL::LinkedFunctions* MTL::MeshRenderPipelineDescriptor::fragmentLinkedFunctions() const +{ + return Object::sendMessage(this, _MTL_PRIVATE_SEL(fragmentLinkedFunctions)); +} + +_MTL_INLINE void MTL::MeshRenderPipelineDescriptor::setFragmentLinkedFunctions(const MTL::LinkedFunctions* fragmentLinkedFunctions) +{ + Object::sendMessage(this, _MTL_PRIVATE_SEL(setFragmentLinkedFunctions_), fragmentLinkedFunctions); +} + // method: reset _MTL_INLINE void MTL::MeshRenderPipelineDescriptor::reset() { diff --git a/metal-cpp/Metal/MTLResource.hpp b/metal-cpp/Metal/MTLResource.hpp index 7de3c7f7..b39caa13 100644 --- a/metal-cpp/Metal/MTLResource.hpp +++ b/metal-cpp/Metal/MTLResource.hpp @@ -2,7 +2,7 @@ // // Metal/MTLResource.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -56,17 +56,17 @@ _MTL_ENUM(NS::UInteger, HazardTrackingMode) { }; _MTL_OPTIONS(NS::UInteger, ResourceOptions) { - ResourceStorageModeShared = 0, - ResourceHazardTrackingModeDefault = 0, ResourceCPUCacheModeDefaultCache = 0, - ResourceOptionCPUCacheModeDefault = 0, ResourceCPUCacheModeWriteCombined = 1, - ResourceOptionCPUCacheModeWriteCombined = 1, + ResourceStorageModeShared = 0, ResourceStorageModeManaged = 16, ResourceStorageModePrivate = 32, ResourceStorageModeMemoryless = 48, + ResourceHazardTrackingModeDefault = 0, ResourceHazardTrackingModeUntracked = 256, ResourceHazardTrackingModeTracked = 512, + ResourceOptionCPUCacheModeDefault = 0, + ResourceOptionCPUCacheModeWriteCombined = 1, }; class Resource : public NS::Referencing diff --git a/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp b/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp index 97041a82..cf54f387 100644 --- a/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp +++ b/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp @@ -2,7 +2,7 @@ // // Metal/MTLResourceStateCommandEncoder.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLResourceStatePass.hpp b/metal-cpp/Metal/MTLResourceStatePass.hpp index f20aa383..3c759b1a 100644 --- a/metal-cpp/Metal/MTLResourceStatePass.hpp +++ b/metal-cpp/Metal/MTLResourceStatePass.hpp @@ -2,7 +2,7 @@ // // Metal/MTLResourceStatePass.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLSampler.hpp b/metal-cpp/Metal/MTLSampler.hpp index b1b16d25..b086744a 100644 --- a/metal-cpp/Metal/MTLSampler.hpp +++ b/metal-cpp/Metal/MTLSampler.hpp @@ -2,7 +2,7 @@ // // Metal/MTLSampler.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp b/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp index 75ccd81a..dbc37467 100644 --- a/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp +++ b/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp @@ -2,7 +2,7 @@ // // Metal/MTLStageInputOutputDescriptor.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -83,6 +83,8 @@ _MTL_ENUM(NS::UInteger, AttributeFormat) { AttributeFormatUShortNormalized = 51, AttributeFormatShortNormalized = 52, AttributeFormatHalf = 53, + AttributeFormatFloatRG11B10 = 54, + AttributeFormatFloatRGB9E5 = 55, }; _MTL_ENUM(NS::UInteger, IndexType) { diff --git a/metal-cpp/Metal/MTLTexture.hpp b/metal-cpp/Metal/MTLTexture.hpp index 51f8d7c2..15898f40 100644 --- a/metal-cpp/Metal/MTLTexture.hpp +++ b/metal-cpp/Metal/MTLTexture.hpp @@ -2,7 +2,7 @@ // // Metal/MTLTexture.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -82,6 +82,7 @@ _MTL_OPTIONS(NS::UInteger, TextureUsage) { TextureUsageShaderWrite = 2, TextureUsageRenderTarget = 4, TextureUsagePixelFormatView = 16, + TextureUsageShaderAtomic = 32, }; _MTL_ENUM(NS::Integer, TextureCompressionType) { diff --git a/metal-cpp/Metal/MTLTypes.hpp b/metal-cpp/Metal/MTLTypes.hpp index 2c7a9cea..09a81840 100644 --- a/metal-cpp/Metal/MTLTypes.hpp +++ b/metal-cpp/Metal/MTLTypes.hpp @@ -2,7 +2,7 @@ // // Metal/MTLTypes.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/MTLVersion.hpp b/metal-cpp/Metal/MTLVersion.hpp index aa6a03a5..f811e555 100644 --- a/metal-cpp/Metal/MTLVersion.hpp +++ b/metal-cpp/Metal/MTLVersion.hpp @@ -2,7 +2,7 @@ // // Metal/MTLVersion.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,9 +22,9 @@ //------------------------------------------------------------------------------------------------------------------------------------------------------------- -#define METALCPP_VERSION_MAJOR 306 -#define METALCPP_VERSION_MINOR 2 -#define METALCPP_VERSION_PATCH 4 +#define METALCPP_VERSION_MAJOR 354 +#define METALCPP_VERSION_MINOR 0 +#define METALCPP_VERSION_PATCH 0 #define METALCPP_SUPPORTS_VERSION(major, minor, patch) \ ((major < METALCPP_VERSION_MAJOR) || \ diff --git a/metal-cpp/Metal/MTLVertexDescriptor.hpp b/metal-cpp/Metal/MTLVertexDescriptor.hpp index 8bc7fc7b..7e92b551 100644 --- a/metal-cpp/Metal/MTLVertexDescriptor.hpp +++ b/metal-cpp/Metal/MTLVertexDescriptor.hpp @@ -2,7 +2,7 @@ // // Metal/MTLVertexDescriptor.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,6 +30,9 @@ namespace MTL { + +static const NS::UInteger BufferLayoutStrideDynamic = NS::UIntegerMax; + _MTL_ENUM(NS::UInteger, VertexFormat) { VertexFormatInvalid = 0, VertexFormatUChar2 = 1, @@ -83,6 +86,8 @@ _MTL_ENUM(NS::UInteger, VertexFormat) { VertexFormatUShortNormalized = 51, VertexFormatShortNormalized = 52, VertexFormatHalf = 53, + VertexFormatFloatRG11B10 = 54, + VertexFormatFloatRGB9E5 = 55, }; _MTL_ENUM(NS::UInteger, VertexStepFunction) { diff --git a/metal-cpp/Metal/MTLVisibleFunctionTable.hpp b/metal-cpp/Metal/MTLVisibleFunctionTable.hpp index 70daccbd..88e8fb1e 100644 --- a/metal-cpp/Metal/MTLVisibleFunctionTable.hpp +++ b/metal-cpp/Metal/MTLVisibleFunctionTable.hpp @@ -2,7 +2,7 @@ // // Metal/MTLVisibleFunctionTable.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/Metal/Metal.hpp b/metal-cpp/Metal/Metal.hpp index f4cf931b..6ea7adf3 100644 --- a/metal-cpp/Metal/Metal.hpp +++ b/metal-cpp/Metal/Metal.hpp @@ -2,7 +2,7 @@ // // Metal/Metal.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/MetalFX/MTLFXDefines.hpp b/metal-cpp/MetalFX/MTLFXDefines.hpp new file mode 100644 index 00000000..8b452168 --- /dev/null +++ b/metal-cpp/MetalFX/MTLFXDefines.hpp @@ -0,0 +1,41 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// MetalFX/MTLFXDefines.hpp +// +// Copyright 2020-2023 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "../Foundation/NSDefines.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define _MTLFX_EXPORT _NS_EXPORT +#define _MTLFX_EXTERN _NS_EXTERN +#define _MTLFX_INLINE _NS_INLINE +#define _MTLFX_PACKED _NS_PACKED + +#define _MTLFX_CONST( type, name ) _NS_CONST( type, name ) +#define _MTLFX_ENUM( type, name ) _NS_ENUM( type, name ) +#define _MTLFX_OPTIONS( type, name ) _NS_OPTIONS( type, name ) + +#define _MTLFX_VALIDATE_SIZE( mtlfx, name ) _NS_VALIDATE_SIZE( mtlfx, name ) +#define _MTLFX_VALIDATE_ENUM( mtlfx, name ) _NS_VALIDATE_ENUM( mtlfx, name ) + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/MetalFX/MTLFXPrivate.hpp b/metal-cpp/MetalFX/MTLFXPrivate.hpp new file mode 100644 index 00000000..104678ad --- /dev/null +++ b/metal-cpp/MetalFX/MTLFXPrivate.hpp @@ -0,0 +1,285 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// MetalFX/MTLFXPrivate.hpp +// +// Copyright 2020-2023 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "MTLFXDefines.hpp" + +#include + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#define _MTLFX_PRIVATE_CLS( symbol ) ( Private::Class::s_k##symbol ) +#define _MTLFX_PRIVATE_SEL( accessor ) ( Private::Selector::s_k##accessor ) + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#if defined( MTLFX_PRIVATE_IMPLEMENTATION ) + +#if defined( METALCPP_SYMBOL_VISIBILITY_HIDDEN ) +#define _MTLFX_PRIVATE_VISIBILITY __attribute__( ( visibility("hidden" ) ) ) +#else +#define _MTLFX_PRIVATE_VISIBILITY __attribute__( ( visibility("default" ) ) ) +#endif // METALCPP_SYMBOL_VISIBILITY_HIDDEN + +#define _MTLFX_PRIVATE_IMPORT __attribute__( ( weak_import ) ) + +#ifdef __OBJC__ +#define _MTLFX_PRIVATE_OBJC_LOOKUP_CLASS( symbol ) ( ( __bridge void* ) objc_lookUpClass( #symbol ) ) +#define _MTLFX_PRIVATE_OBJC_GET_PROTOCOL( symbol ) ( ( __bridge void* ) objc_getProtocol( #symbol ) ) +#else +#define _MTLFX_PRIVATE_OBJC_LOOKUP_CLASS( symbol ) objc_lookUpClass(#symbol) +#define _MTLFX_PRIVATE_OBJC_GET_PROTOCOL( symbol ) objc_getProtocol(#symbol) +#endif // __OBJC__ + +#define _MTLFX_PRIVATE_DEF_CLS( symbol ) void* s_k##symbol _MTLFX_PRIVATE_VISIBILITY = _MTLFX_PRIVATE_OBJC_LOOKUP_CLASS( symbol ) +#define _MTLFX_PRIVATE_DEF_PRO( symbol ) void* s_k##symbol _MTLFX_PRIVATE_VISIBILITY = _MTLFX_PRIVATE_OBJC_GET_PROTOCOL( symbol ) +#define _MTLFX_PRIVATE_DEF_SEL( accessor, symbol ) SEL s_k##accessor _MTLFX_PRIVATE_VISIBILITY = sel_registerName( symbol ) + +#include +#define MTLFX_DEF_FUNC( name, signature ) using Fn##name = signature; \ + Fn##name name = reinterpret_cast< Fn##name >( dlsym( RTLD_DEFAULT, #name ) ) + +namespace MTLFX::Private +{ + template + + inline _Type const LoadSymbol(const char* pSymbol) + { + const _Type* pAddress = static_cast<_Type*>(dlsym(RTLD_DEFAULT, pSymbol)); + + return pAddress ? *pAddress : nullptr; + } +} // MTLFX::Private + +#if defined( __MAC_13_0 ) || defined( __MAC_14_0 ) || defined( __IPHONE_16_0 ) || defined( __IPHONE_17_0 ) || defined( __TVOS_16_0 ) || defined( __TVOS_17_0 ) + +#define _MTLFX_PRIVATE_DEF_STR( type, symbol ) \ + _MTLFX_EXTERN type const MTLFX##symbol _MTLFX_PRIVATE_IMPORT; \ + type const MTLFX::symbol = ( nullptr != &MTLFX##symbol ) ? MTLFX##ssymbol : nullptr + +#define _MTLFX_PRIVATE_DEF_CONST( type, symbol ) \ + _MTLFX_EXTERN type const MTLFX##ssymbol _MTLFX_PRIVATE_IMPORT; \ + type const MTLFX::symbol = (nullptr != &MTLFX##ssymbol) ? MTLFX##ssymbol : nullptr + +#define _MTLFX_PRIVATE_DEF_WEAK_CONST( type, symbol ) \ + _MTLFX_EXTERN type const MTLFX##ssymbol; \ + type const MTLFX::symbol = Private::LoadSymbol< type >( "MTLFX" #symbol ) + +#else + +#define _MTLFX_PRIVATE_DEF_STR( type, symbol ) \ + _MTLFX_EXTERN type const MTLFX##ssymbol; \ + type const MTLFX::symbol = Private::LoadSymbol< type >( "MTLFX" #symbol ) + +#define _MTLFX_PRIVATE_DEF_CONST( type, symbol ) \ + _MTLFX_EXTERN type const MTLFX##ssymbol; \ + type const MTLFX::symbol = Private::LoadSymbol< type >( "MTLFX" #symbol ) + +#define _MTLFX_PRIVATE_DEF_WEAK_CONST( type, symbol ) _MTLFX_PRIVATE_DEF_CONST( type, symbol ) + +#endif // defined( __MAC_13_0 ) || defined( __MAC_14_0 ) || defined( __IPHONE_16_0 ) || defined( __IPHONE_17_0 ) || defined( __TVOS_16_0 ) || defined( __TVOS_17_0 ) + +#else + +#define _MTLFX_PRIVATE_DEF_CLS( symbol ) extern void* s_k##symbol +#define _MTLFX_PRIVATE_DEF_PRO( symbol ) extern void* s_k##symbol +#define _MTLFX_PRIVATE_DEF_SEL( accessor, symbol ) extern SEL s_k##accessor +#define _MTLFX_PRIVATE_DEF_STR( type, symbol ) extern type const MTLFX::symbol +#define _MTLFX_PRIVATE_DEF_CONST( type, symbol ) extern type const MTLFX::symbol +#define _MTLFX_PRIVATE_DEF_WEAK_CONST( type, symbol ) extern type const MTLFX::symbol + +#endif // MTLFX_PRIVATE_IMPLEMENTATION + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTLFX +{ + namespace Private + { + namespace Class + { + _MTLFX_PRIVATE_DEF_CLS( MTLFXSpatialScalerDescriptor ); + _MTLFX_PRIVATE_DEF_CLS( MTLFXTemporalScalerDescriptor ); + } // Class + } // Private +} // MTLFX + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTLFX +{ + namespace Private + { + namespace Protocol + { + _MTLFX_PRIVATE_DEF_PRO( MTLFXSpatialScaler ); + _MTLFX_PRIVATE_DEF_PRO( MTLFXTemporalScaler ); + } // Protocol + } // Private +} // MTLFX + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTLFX +{ + namespace Private + { + namespace Selector + { + _MTLFX_PRIVATE_DEF_SEL( colorProcessingMode, + "colorProcessingMode" ); + _MTLFX_PRIVATE_DEF_SEL( colorTexture, + "colorTexture" ); + _MTLFX_PRIVATE_DEF_SEL( colorTextureFormat, + "colorTextureFormat" ); + _MTLFX_PRIVATE_DEF_SEL( colorTextureUsage, + "colorTextureUsage" ); + _MTLFX_PRIVATE_DEF_SEL( depthTexture, + "depthTexture" ); + _MTLFX_PRIVATE_DEF_SEL( depthTextureFormat, + "depthTextureFormat" ); + _MTLFX_PRIVATE_DEF_SEL( depthTextureUsage, + "depthTextureUsage" ); + _MTLFX_PRIVATE_DEF_SEL( encodeToCommandBuffer_, + "encodeToCommandBuffer:" ); + _MTLFX_PRIVATE_DEF_SEL( exposureTexture, + "exposureTexture" ); + _MTLFX_PRIVATE_DEF_SEL( fence, + "fence" ); + _MTLFX_PRIVATE_DEF_SEL( inputContentHeight, + "inputContentHeight" ); + _MTLFX_PRIVATE_DEF_SEL( inputContentMaxScale, + "inputContentMaxScale" ); + _MTLFX_PRIVATE_DEF_SEL( inputContentMinScale, + "inputContentMinScale" ); + _MTLFX_PRIVATE_DEF_SEL( inputContentWidth, + "inputContentWidth" ); + _MTLFX_PRIVATE_DEF_SEL( inputHeight, + "inputHeight" ); + _MTLFX_PRIVATE_DEF_SEL( inputWidth, + "inputWidth" ); + _MTLFX_PRIVATE_DEF_SEL( isAutoExposureEnabled, + "isAutoExposureEnabled" ); + _MTLFX_PRIVATE_DEF_SEL( isDepthReversed, + "isDepthReversed" ); + _MTLFX_PRIVATE_DEF_SEL( isInputContentPropertiesEnabled, + "isInputContentPropertiesEnabled" ); + _MTLFX_PRIVATE_DEF_SEL( jitterOffsetX, + "jitterOffsetX" ); + _MTLFX_PRIVATE_DEF_SEL( jitterOffsetY, + "jitterOffsetY" ); + _MTLFX_PRIVATE_DEF_SEL( motionTexture, + "motionTexture" ); + _MTLFX_PRIVATE_DEF_SEL( motionTextureFormat, + "motionTextureFormat" ); + _MTLFX_PRIVATE_DEF_SEL( motionTextureUsage, + "motionTextureUsage" ); + _MTLFX_PRIVATE_DEF_SEL( motionVectorScaleX, + "motionVectorScaleX" ); + _MTLFX_PRIVATE_DEF_SEL( motionVectorScaleY, + "motionVectorScaleY" ); + _MTLFX_PRIVATE_DEF_SEL( newSpatialScalerWithDevice_, + "newSpatialScalerWithDevice:" ); + _MTLFX_PRIVATE_DEF_SEL( newTemporalScalerWithDevice_, + "newTemporalScalerWithDevice:" ); + _MTLFX_PRIVATE_DEF_SEL( outputHeight, + "outputHeight" ); + _MTLFX_PRIVATE_DEF_SEL( outputTexture, + "outputTexture" ); + _MTLFX_PRIVATE_DEF_SEL( outputTextureFormat, + "outputTextureFormat" ); + _MTLFX_PRIVATE_DEF_SEL( outputTextureUsage, + "outputTextureUsage" ); + _MTLFX_PRIVATE_DEF_SEL( outputWidth, + "outputWidth" ); + _MTLFX_PRIVATE_DEF_SEL( preExposure, + "preExposure" ); + _MTLFX_PRIVATE_DEF_SEL( reset, + "reset" ); + _MTLFX_PRIVATE_DEF_SEL( setAutoExposureEnabled_, + "setAutoExposureEnabled:" ); + _MTLFX_PRIVATE_DEF_SEL( setColorProcessingMode_, + "setColorProcessingMode:" ); + _MTLFX_PRIVATE_DEF_SEL( setColorTexture_, + "setColorTexture:" ); + _MTLFX_PRIVATE_DEF_SEL( setColorTextureFormat_, + "setColorTextureFormat:" ); + _MTLFX_PRIVATE_DEF_SEL( setDepthReversed_, + "setDepthReversed:" ); + _MTLFX_PRIVATE_DEF_SEL( setDepthTexture_, + "setDepthTexture:" ); + _MTLFX_PRIVATE_DEF_SEL( setDepthTextureFormat_, + "setDepthTextureFormat:" ); + _MTLFX_PRIVATE_DEF_SEL( setExposureTexture_, + "setExposureTexture:" ); + _MTLFX_PRIVATE_DEF_SEL( setFence_, + "setFence:" ); + _MTLFX_PRIVATE_DEF_SEL( setInputContentHeight_, + "setInputContentHeight:" ); + _MTLFX_PRIVATE_DEF_SEL( setInputContentMaxScale_, + "setInputContentMaxScale:" ); + _MTLFX_PRIVATE_DEF_SEL( setInputContentMinScale_, + "setInputContentMinScale:" ); + _MTLFX_PRIVATE_DEF_SEL( setInputContentPropertiesEnabled_, + "setInputContentPropertiesEnabled:" ); + _MTLFX_PRIVATE_DEF_SEL( setInputContentWidth_, + "setInputContentWidth:" ); + _MTLFX_PRIVATE_DEF_SEL( setInputHeight_, + "setInputHeight:" ); + _MTLFX_PRIVATE_DEF_SEL( setInputWidth_, + "setInputWidth:" ); + _MTLFX_PRIVATE_DEF_SEL( setJitterOffsetX_, + "setJitterOffsetX:" ); + _MTLFX_PRIVATE_DEF_SEL( setJitterOffsetY_, + "setJitterOffsetY:" ); + _MTLFX_PRIVATE_DEF_SEL( setMotionTexture_, + "setMotionTexture:" ); + _MTLFX_PRIVATE_DEF_SEL( setMotionTextureFormat_, + "setMotionTextureFormat:" ); + _MTLFX_PRIVATE_DEF_SEL( setMotionVectorScaleX_, + "setMotionVectorScaleX:" ); + _MTLFX_PRIVATE_DEF_SEL( setMotionVectorScaleY_, + "setMotionVectorScaleY:" ); + _MTLFX_PRIVATE_DEF_SEL( setOutputHeight_, + "setOutputHeight:" ); + _MTLFX_PRIVATE_DEF_SEL( setOutputTexture_, + "setOutputTexture:" ); + _MTLFX_PRIVATE_DEF_SEL( setOutputTextureFormat_, + "setOutputTextureFormat:" ); + _MTLFX_PRIVATE_DEF_SEL( setOutputWidth_, + "setOutputWidth:" ); + _MTLFX_PRIVATE_DEF_SEL( setPreExposure_, + "setPreExposure:" ); + _MTLFX_PRIVATE_DEF_SEL( setReset_, + "setReset:" ); + _MTLFX_PRIVATE_DEF_SEL( supportedInputContentMaxScaleForDevice_, + "supportedInputContentMaxScaleForDevice:" ); + _MTLFX_PRIVATE_DEF_SEL( supportedInputContentMinScaleForDevice_, + "supportedInputContentMinScaleForDevice:" ); + _MTLFX_PRIVATE_DEF_SEL( supportsDevice_, + "supportsDevice:" ); + } // Selector + } // Private +} // MTLFX + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/MetalFX/MTLFXSpatialScaler.hpp b/metal-cpp/MetalFX/MTLFXSpatialScaler.hpp new file mode 100644 index 00000000..841898d4 --- /dev/null +++ b/metal-cpp/MetalFX/MTLFXSpatialScaler.hpp @@ -0,0 +1,372 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// MetalFX/MTLFXSpatialScaler.hpp +// +// Copyright 2020-2023 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "MTLFXDefines.hpp" +#include "MTLFXPrivate.hpp" + +#include "../Metal/Metal.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTLFX +{ + _MTLFX_ENUM( NS::Integer, SpatialScalerColorProcessingMode ) + { + SpatialScalerColorProcessingModePerceptual = 0, + SpatialScalerColorProcessingModeLinear = 1, + SpatialScalerColorProcessingModeHDR = 2 + }; + + class SpatialScalerDescriptor : public NS::Copying< SpatialScalerDescriptor > + { + public: + static class SpatialScalerDescriptor* alloc(); + class SpatialScalerDescriptor* init(); + + MTL::PixelFormat colorTextureFormat() const; + void setColorTextureFormat( MTL::PixelFormat format ); + + MTL::PixelFormat outputTextureFormat() const; + void setOutputTextureFormat( MTL::PixelFormat format ); + + NS::UInteger inputWidth() const; + void setInputWidth( NS::UInteger width ); + + NS::UInteger inputHeight() const; + void setInputHeight( NS::UInteger height ); + + NS::UInteger outputWidth() const; + void setOutputWidth( NS::UInteger width ); + + NS::UInteger outputHeight() const; + void setOutputHeight( NS::UInteger height ); + + SpatialScalerColorProcessingMode colorProcessingMode() const; + void setColorProcessingMode( SpatialScalerColorProcessingMode mode ); + + class SpatialScaler* newSpatialScaler( const MTL::Device* pDevice ); + + static bool supportsDevice( const MTL::Device* ); + }; + + class SpatialScaler : public NS::Referencing< SpatialScaler > + { + public: + MTL::TextureUsage colorTextureUsage() const; + MTL::TextureUsage outputTextureUsage() const; + + NS::UInteger inputContentWidth() const; + void setInputContentWidth( NS::UInteger width ); + + NS::UInteger inputContentHeight() const; + void setInputContentHeight( NS::UInteger height ); + + MTL::Texture* colorTexture() const; + void setColorTexture( MTL::Texture* pTexture ); + + MTL::Texture* outputTexture() const; + void setOutputTexture( MTL::Texture* pTexture ); + + MTL::PixelFormat colorTextureFormat() const; + MTL::PixelFormat outputTextureFormat() const; + NS::UInteger inputWidth() const; + NS::UInteger inputHeight() const; + NS::UInteger outputWidth() const; + NS::UInteger outputHeight() const; + SpatialScalerColorProcessingMode colorProcessingMode() const; + + MTL::Fence* fence() const; + void setFence( MTL::Fence* pFence ); + + void encodeToCommandBuffer( MTL::CommandBuffer* pCommandBuffer ); + }; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTLFX::SpatialScalerDescriptor* MTLFX::SpatialScalerDescriptor::alloc() +{ + return NS::Object::alloc< SpatialScalerDescriptor >( _MTLFX_PRIVATE_CLS( MTLFXSpatialScalerDescriptor ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTLFX::SpatialScalerDescriptor* MTLFX::SpatialScalerDescriptor::init() +{ + return NS::Object::init< SpatialScalerDescriptor >(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::SpatialScalerDescriptor::colorTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( colorTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScalerDescriptor::setColorTextureFormat( MTL::PixelFormat format ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setColorTextureFormat_ ), format ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::SpatialScalerDescriptor::outputTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( outputTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScalerDescriptor::setOutputTextureFormat( MTL::PixelFormat format ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setOutputTextureFormat_ ), format ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScalerDescriptor::inputWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScalerDescriptor::setInputWidth( NS::UInteger width ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputWidth_ ), width ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScalerDescriptor::inputHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScalerDescriptor::setInputHeight( NS::UInteger height ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputHeight_ ), height ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScalerDescriptor::outputWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( outputWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScalerDescriptor::setOutputWidth( NS::UInteger width ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setOutputWidth_ ), width ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScalerDescriptor::outputHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( outputHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScalerDescriptor::setOutputHeight( NS::UInteger height ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setOutputHeight_ ), height ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTLFX::SpatialScalerColorProcessingMode MTLFX::SpatialScalerDescriptor::colorProcessingMode() const +{ + return Object::sendMessage< SpatialScalerColorProcessingMode >( this, _MTLFX_PRIVATE_SEL( colorProcessingMode ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScalerDescriptor::setColorProcessingMode( SpatialScalerColorProcessingMode mode ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setColorProcessingMode_ ), mode ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTLFX::SpatialScaler* MTLFX::SpatialScalerDescriptor::newSpatialScaler( const MTL::Device* pDevice ) +{ + return Object::sendMessage< SpatialScaler* >( this, _MTLFX_PRIVATE_SEL( newSpatialScalerWithDevice_ ), pDevice ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE bool MTLFX::SpatialScalerDescriptor::supportsDevice( const MTL::Device* pDevice ) +{ + return Object::sendMessageSafe< bool >( _NS_PRIVATE_CLS( MTLFXSpatialScalerDescriptor ), _MTLFX_PRIVATE_SEL( supportsDevice_ ), pDevice ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::TextureUsage MTLFX::SpatialScaler::colorTextureUsage() const +{ + return Object::sendMessage< MTL::TextureUsage >( this, _MTLFX_PRIVATE_SEL( colorTextureUsage ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::TextureUsage MTLFX::SpatialScaler::outputTextureUsage() const +{ + return Object::sendMessage< MTL::TextureUsage >( this, _MTLFX_PRIVATE_SEL( outputTextureUsage ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScaler::inputContentWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputContentWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScaler::setInputContentWidth( NS::UInteger width ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputContentWidth_ ), width ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScaler::inputContentHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputContentHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScaler::setInputContentHeight( NS::UInteger height ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputContentHeight_ ), height ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Texture* MTLFX::SpatialScaler::colorTexture() const +{ + return Object::sendMessage< MTL::Texture* >( this, _MTLFX_PRIVATE_SEL( colorTexture ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScaler::setColorTexture( MTL::Texture* pTexture ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setColorTexture_ ), pTexture ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Texture* MTLFX::SpatialScaler::outputTexture() const +{ + return Object::sendMessage< MTL::Texture* >( this, _MTLFX_PRIVATE_SEL( outputTexture ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScaler::setOutputTexture( MTL::Texture* pTexture ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setOutputTexture_ ), pTexture ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::SpatialScaler::colorTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( colorTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::SpatialScaler::outputTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( outputTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScaler::inputWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScaler::inputHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScaler::outputWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( outputWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::SpatialScaler::outputHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( outputHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTLFX::SpatialScalerColorProcessingMode MTLFX::SpatialScaler::colorProcessingMode() const +{ + return Object::sendMessage< SpatialScalerColorProcessingMode >( this, _MTLFX_PRIVATE_SEL( colorProcessingMode ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Fence* MTLFX::SpatialScaler::fence() const +{ + return Object::sendMessage< MTL::Fence* >( this, _MTLFX_PRIVATE_SEL( fence ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScaler::setFence( MTL::Fence* pFence ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setFence_ ), pFence ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::SpatialScaler::encodeToCommandBuffer( MTL::CommandBuffer* pCommandBuffer ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( encodeToCommandBuffer_ ), pCommandBuffer ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/MetalFX/MTLFXTemporalScaler.hpp b/metal-cpp/MetalFX/MTLFXTemporalScaler.hpp new file mode 100644 index 00000000..e4782973 --- /dev/null +++ b/metal-cpp/MetalFX/MTLFXTemporalScaler.hpp @@ -0,0 +1,695 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// MetalFX/MTLFXTemporalScaler.hpp +// +// Copyright 2020-2023 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "MTLFXDefines.hpp" +#include "MTLFXPrivate.hpp" + +#include "../Metal/Metal.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace MTLFX +{ + class TemporalScalerDescriptor : public NS::Copying< TemporalScalerDescriptor > + { + public: + static class TemporalScalerDescriptor* alloc(); + class TemporalScalerDescriptor* init(); + + MTL::PixelFormat colorTextureFormat() const; + void setColorTextureFormat( MTL::PixelFormat format ); + + MTL::PixelFormat depthTextureFormat() const; + void setDepthTextureFormat( MTL::PixelFormat format ); + + MTL::PixelFormat motionTextureFormat() const; + void setMotionTextureFormat( MTL::PixelFormat format ); + + MTL::PixelFormat outputTextureFormat() const; + void setOutputTextureFormat( MTL::PixelFormat format ); + + NS::UInteger inputWidth() const; + void setInputWidth( NS::UInteger width ); + + NS::UInteger inputHeight() const; + void setInputHeight( NS::UInteger height ); + + NS::UInteger outputWidth() const; + void setOutputWidth( NS::UInteger width ); + + NS::UInteger outputHeight() const; + void setOutputHeight( NS::UInteger height ); + + bool isAutoExposureEnabled() const; + void setAutoExposureEnabled( bool enabled ); + + bool isInputContentPropertiesEnabled() const; + void setInputContentPropertiesEnabled( bool enabled ); + + float inputContentMinScale() const; + void setInputContentMinScale( float scale ); + + float inputContentMaxScale() const; + void setInputContentMaxScale( float scale ); + + class TemporalScaler* newTemporalScaler( const MTL::Device* pDevice ) const; + + static float supportedInputContentMinScale( const MTL::Device* pDevice ); + static float supportedInputContentMaxScale( const MTL::Device* pDevice ); + + static bool supportsDevice( const MTL::Device* pDevice ); + }; + + class TemporalScaler : public NS::Referencing< TemporalScaler > + { + public: + MTL::TextureUsage colorTextureUsage() const; + MTL::TextureUsage depthTextureUsage() const; + MTL::TextureUsage motionTextureUsage() const; + MTL::TextureUsage outputTextureUsage() const; + + NS::UInteger inputContentWidth() const; + void setInputContentWidth( NS::UInteger width ); + + NS::UInteger inputContentHeight() const; + void setInputContentHeight( NS::UInteger height ); + + MTL::Texture* colorTexture() const; + void setColorTexture( MTL::Texture* pTexture ); + + MTL::Texture* depthTexture() const; + void setDepthTexture( MTL::Texture* pTexture ); + + MTL::Texture* motionTexture() const; + void setMotionTexture( MTL::Texture* pTexture ); + + MTL::Texture* outputTexture() const; + void setOutputTexture( MTL::Texture* pTexture ); + + MTL::Texture* exposureTexture() const; + void setExposureTexture( MTL::Texture* pTexture ); + + float preExposure() const; + void setPreExposure( float preExposure ); + + float jitterOffsetX() const; + void setJitterOffsetX( float offset ); + + float jitterOffsetY() const; + void setJitterOffsetY( float offset ); + + float motionVectorScaleX() const; + void setMotionVectorScaleX( float scale ); + + float motionVectorScaleY() const; + void setMotionVectorScaleY( float scale ); + + bool reset() const; + void setReset( bool reset ); + + bool isDepthReversed() const; + void setDepthReversed( bool depthReversed ); + + MTL::PixelFormat colorTextureFormat() const; + MTL::PixelFormat depthTextureFormat() const; + MTL::PixelFormat motionTextureFormat() const; + MTL::PixelFormat outputTextureFormat() const; + NS::UInteger inputWidth() const; + NS::UInteger inputHeight() const; + NS::UInteger outputWidth() const; + NS::UInteger outputHeight() const; + float inputContentMinScale() const; + float inputContentMaxScale() const; + + MTL::Fence* fence() const; + void setFence( MTL::Fence* pFence ); + + void encodeToCommandBuffer( MTL::CommandBuffer* pCommandBuffer ); + }; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTLFX::TemporalScalerDescriptor* MTLFX::TemporalScalerDescriptor::alloc() +{ + return NS::Object::alloc< TemporalScalerDescriptor >( _MTLFX_PRIVATE_CLS( MTLFXTemporalScalerDescriptor ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTLFX::TemporalScalerDescriptor* MTLFX::TemporalScalerDescriptor::init() +{ + return NS::Object::init< TemporalScalerDescriptor >(); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::TemporalScalerDescriptor::colorTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( colorTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setColorTextureFormat( MTL::PixelFormat format ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setColorTextureFormat_ ), format ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::TemporalScalerDescriptor::depthTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( depthTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setDepthTextureFormat( MTL::PixelFormat format ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setDepthTextureFormat_ ), format ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::TemporalScalerDescriptor::motionTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( motionTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setMotionTextureFormat( MTL::PixelFormat format ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setMotionTextureFormat_ ), format ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::TemporalScalerDescriptor::outputTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( outputTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setOutputTextureFormat( MTL::PixelFormat format ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setOutputTextureFormat_ ), format ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScalerDescriptor::inputWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setInputWidth( NS::UInteger width ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputWidth_ ), width ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScalerDescriptor::inputHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setInputHeight( NS::UInteger height ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputHeight_ ), height ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScalerDescriptor::outputWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( outputWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setOutputWidth( NS::UInteger width ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setOutputWidth_ ), width ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScalerDescriptor::outputHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( outputHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setOutputHeight( NS::UInteger height ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setOutputHeight_ ), height ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE bool MTLFX::TemporalScalerDescriptor::isAutoExposureEnabled() const +{ + return Object::sendMessage< bool >( this, _MTLFX_PRIVATE_SEL( isAutoExposureEnabled ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setAutoExposureEnabled( bool enabled ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setAutoExposureEnabled_ ), enabled ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE bool MTLFX::TemporalScalerDescriptor::isInputContentPropertiesEnabled() const +{ + return Object::sendMessage< bool >( this, _MTLFX_PRIVATE_SEL( isInputContentPropertiesEnabled ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setInputContentPropertiesEnabled( bool enabled ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputContentPropertiesEnabled_ ), enabled ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScalerDescriptor::inputContentMinScale() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( inputContentMinScale ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setInputContentMinScale( float scale ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputContentMinScale_ ), scale ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScalerDescriptor::inputContentMaxScale() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( inputContentMaxScale ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScalerDescriptor::setInputContentMaxScale( float scale ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputContentMaxScale_ ), scale ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTLFX::TemporalScaler* MTLFX::TemporalScalerDescriptor::newTemporalScaler( const MTL::Device* pDevice ) const +{ + return Object::sendMessage< TemporalScaler* >( this, _MTLFX_PRIVATE_SEL( newTemporalScalerWithDevice_ ), pDevice ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScalerDescriptor::supportedInputContentMinScale( const MTL::Device* pDevice ) +{ + float scale = 1.0f; + + if ( nullptr != methodSignatureForSelector( _NS_PRIVATE_CLS( MTLFXTemporalScalerDescriptor ), _MTLFX_PRIVATE_SEL( supportedInputContentMinScaleForDevice_ ) ) ) + { + scale = sendMessage< float >( _NS_PRIVATE_CLS( MTLFXTemporalScalerDescriptor ), _MTLFX_PRIVATE_SEL( supportedInputContentMinScaleForDevice_ ), pDevice ); + } + + return scale; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScalerDescriptor::supportedInputContentMaxScale( const MTL::Device* pDevice ) +{ + float scale = 1.0f; + + if ( nullptr != methodSignatureForSelector( _NS_PRIVATE_CLS( MTLFXTemporalScalerDescriptor ), _MTLFX_PRIVATE_SEL( supportedInputContentMaxScaleForDevice_ ) ) ) + { + scale = sendMessage< float >( _NS_PRIVATE_CLS( MTLFXTemporalScalerDescriptor ), _MTLFX_PRIVATE_SEL( supportedInputContentMaxScaleForDevice_ ), pDevice ); + } + else if ( supportsDevice( pDevice ) ) + { + scale = 2.0f; + } + + return scale; +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE bool MTLFX::TemporalScalerDescriptor::supportsDevice( const MTL::Device* pDevice ) +{ + return Object::sendMessageSafe< bool >( _NS_PRIVATE_CLS( MTLFXTemporalScalerDescriptor ), _MTLFX_PRIVATE_SEL( supportsDevice_ ), pDevice ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::TextureUsage MTLFX::TemporalScaler::colorTextureUsage() const +{ + return Object::sendMessage< MTL::TextureUsage >( this, _MTLFX_PRIVATE_SEL( colorTextureUsage ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::TextureUsage MTLFX::TemporalScaler::depthTextureUsage() const +{ + return Object::sendMessage< MTL::TextureUsage >( this, _MTLFX_PRIVATE_SEL( depthTextureUsage ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::TextureUsage MTLFX::TemporalScaler::motionTextureUsage() const +{ + return Object::sendMessage< MTL::TextureUsage >( this, _MTLFX_PRIVATE_SEL( motionTextureUsage ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::TextureUsage MTLFX::TemporalScaler::outputTextureUsage() const +{ + return Object::sendMessage< MTL::TextureUsage >( this, _MTLFX_PRIVATE_SEL( outputTextureUsage ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScaler::inputContentWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputContentWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setInputContentWidth( NS::UInteger width ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputContentWidth_ ), width ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScaler::inputContentHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputContentHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setInputContentHeight( NS::UInteger height ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setInputContentHeight_ ), height ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Texture* MTLFX::TemporalScaler::colorTexture() const +{ + return Object::sendMessage< MTL::Texture* >( this, _MTLFX_PRIVATE_SEL( colorTexture ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setColorTexture( MTL::Texture* pTexture ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setColorTexture_ ), pTexture ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Texture* MTLFX::TemporalScaler::depthTexture() const +{ + return Object::sendMessage< MTL::Texture* >( this, _MTLFX_PRIVATE_SEL( depthTexture ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setDepthTexture( MTL::Texture* pTexture ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setDepthTexture_ ), pTexture ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Texture* MTLFX::TemporalScaler::motionTexture() const +{ + return Object::sendMessage< MTL::Texture* >( this, _MTLFX_PRIVATE_SEL( motionTexture ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setMotionTexture( MTL::Texture* pTexture ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setMotionTexture_ ), pTexture ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Texture* MTLFX::TemporalScaler::outputTexture() const +{ + return Object::sendMessage< MTL::Texture* >( this, _MTLFX_PRIVATE_SEL( outputTexture ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setOutputTexture( MTL::Texture* pTexture ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setOutputTexture_ ), pTexture ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Texture* MTLFX::TemporalScaler::exposureTexture() const +{ + return Object::sendMessage< MTL::Texture* >( this, _MTLFX_PRIVATE_SEL( exposureTexture ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setExposureTexture( MTL::Texture* pTexture ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setExposureTexture_ ), pTexture ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScaler::preExposure() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( preExposure ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setPreExposure( float preExposure ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setPreExposure_ ), preExposure ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScaler::jitterOffsetX() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( jitterOffsetX ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setJitterOffsetX( float offset ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setJitterOffsetX_ ), offset ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScaler::jitterOffsetY() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( jitterOffsetY ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setJitterOffsetY( float offset ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setJitterOffsetY_ ), offset ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScaler::motionVectorScaleX() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( motionVectorScaleX ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setMotionVectorScaleX( float scale ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setMotionVectorScaleX_ ), scale ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScaler::motionVectorScaleY() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( motionVectorScaleY ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setMotionVectorScaleY( float scale ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setMotionVectorScaleY_ ), scale ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE bool MTLFX::TemporalScaler::reset() const +{ + return Object::sendMessage< bool >( this, _MTLFX_PRIVATE_SEL( reset ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setReset( bool reset ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setReset_ ), reset ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE bool MTLFX::TemporalScaler::isDepthReversed() const +{ + return Object::sendMessage< bool >( this, _MTLFX_PRIVATE_SEL( isDepthReversed ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setDepthReversed( bool depthReversed ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setDepthReversed_ ), depthReversed ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::TemporalScaler::colorTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( colorTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::TemporalScaler::depthTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( depthTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::TemporalScaler::motionTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( motionTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::PixelFormat MTLFX::TemporalScaler::outputTextureFormat() const +{ + return Object::sendMessage< MTL::PixelFormat >( this, _MTLFX_PRIVATE_SEL( outputTextureFormat ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScaler::inputWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScaler::inputHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( inputHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScaler::outputWidth() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( outputWidth ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE NS::UInteger MTLFX::TemporalScaler::outputHeight() const +{ + return Object::sendMessage< NS::UInteger >( this, _MTLFX_PRIVATE_SEL( outputHeight ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScaler::inputContentMinScale() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( inputContentMinScale ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE float MTLFX::TemporalScaler::inputContentMaxScale() const +{ + return Object::sendMessage< float >( this, _MTLFX_PRIVATE_SEL( inputContentMaxScale ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE MTL::Fence* MTLFX::TemporalScaler::fence() const +{ + return Object::sendMessage< MTL::Fence* >( this, _MTLFX_PRIVATE_SEL( fence ) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::setFence( MTL::Fence* pFence ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( setFence_ ), pFence ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +_MTLFX_INLINE void MTLFX::TemporalScaler::encodeToCommandBuffer( MTL::CommandBuffer* pCommandBuffer ) +{ + Object::sendMessage< void >( this, _MTL_PRIVATE_SEL( encodeToCommandBuffer_ ), pCommandBuffer ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/MetalFX/MetalFX.hpp b/metal-cpp/MetalFX/MetalFX.hpp new file mode 100644 index 00000000..40405cd3 --- /dev/null +++ b/metal-cpp/MetalFX/MetalFX.hpp @@ -0,0 +1,28 @@ +//------------------------------------------------------------------------------------------------------------------------------------------------------------- +// +// MetalFX/MetalFX.hpp +// +// Copyright 2020-2023 Apple Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#include "MTLFXSpatialScaler.hpp" +#include "MTLFXTemporalScaler.hpp" + +//------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/metal-cpp/QuartzCore/CADefines.hpp b/metal-cpp/QuartzCore/CADefines.hpp index d9df7486..83f3d8fc 100644 --- a/metal-cpp/QuartzCore/CADefines.hpp +++ b/metal-cpp/QuartzCore/CADefines.hpp @@ -2,7 +2,7 @@ // // QuartzCore/CADefines.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/QuartzCore/CAMetalDrawable.hpp b/metal-cpp/QuartzCore/CAMetalDrawable.hpp index 8bc55b0c..99a3872a 100644 --- a/metal-cpp/QuartzCore/CAMetalDrawable.hpp +++ b/metal-cpp/QuartzCore/CAMetalDrawable.hpp @@ -2,7 +2,7 @@ // // QuartzCore/CAMetalDrawable.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/QuartzCore/CAMetalLayer.hpp b/metal-cpp/QuartzCore/CAMetalLayer.hpp index 1914f778..904e2188 100644 --- a/metal-cpp/QuartzCore/CAMetalLayer.hpp +++ b/metal-cpp/QuartzCore/CAMetalLayer.hpp @@ -2,7 +2,7 @@ // // QuartzCore/CAMetalDrawable.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/QuartzCore/CAPrivate.hpp b/metal-cpp/QuartzCore/CAPrivate.hpp index 624bc527..701ee082 100644 --- a/metal-cpp/QuartzCore/CAPrivate.hpp +++ b/metal-cpp/QuartzCore/CAPrivate.hpp @@ -2,7 +2,7 @@ // // QuartzCore/CAPrivate.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/QuartzCore/QuartzCore.hpp b/metal-cpp/QuartzCore/QuartzCore.hpp index 66f7e132..beb57b61 100644 --- a/metal-cpp/QuartzCore/QuartzCore.hpp +++ b/metal-cpp/QuartzCore/QuartzCore.hpp @@ -2,7 +2,7 @@ // // QuartzCore/QuartzCore.hpp // -// Copyright 2020-2022 Apple Inc. +// Copyright 2020-2023 Apple Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/metal-cpp/README.md b/metal-cpp/README.md index f003dc83..52e1938c 100644 --- a/metal-cpp/README.md +++ b/metal-cpp/README.md @@ -14,6 +14,15 @@ - Backwards compatibility: All `bool MTL::Device::supports...()` functions check if their required selectors exist and automatically return `false` if not. - String (`ErrorDomain`) constants are weak linked and automatically set to `nullptr` if not available. +## Changelog + +| Version | Changes | +|-|-| +| macOS 14, iOS 17 | Add support for the **MetalFX** framework.
Add all the APIs in macOS 14 and iOS 17. | +| macOS 13.3, iOS 16.4 | Add all the APIs in macOS 13.3 and iOS 16.4. | +| macOS 13, iOS 16| Add all the APIs in macOS 13 and iOS 16.
New optional `NS::SharedPtr` type to assist with memory management.
New convenience function to create a `CA::MetalLayer`.
New `MTLSTR(str)` macro allows faster string creation from literals.
Fix a problem with the signature of functions that take an array of pointers as input.
Fix a problem with the signature of the `setGroups()` function in `MTL::LinkedFunctions`.| +| macOS 12, iOS 15 | Initial release. | + ## Memory Allocation Policy **metal-cpp** follows the object allocation policies of Cocoa, Cocoa Touch, and CoreFoundation. Understanding these rules is especially important when using metal-cpp, as C++ objects are not eligible for automatic reference counting (ARC). @@ -103,7 +112,7 @@ If you want to use the QuartzCore wrapper, add: Purely optional: You can generate a single header file that contains all **metal-cpp** headers via: ```shell -./SingleHeader/MakeSingleHeader.py Foundation/Foundation.hpp QuartzCore/QuartzCore.hpp Metal/Metal.hpp +./SingleHeader/MakeSingleHeader.py Foundation/Foundation.hpp QuartzCore/QuartzCore.hpp Metal/Metal.hpp MetalFX/MetalFX.hpp ``` By default the generator script writes its output to `./SingleHeader/Metal.hpp`. Use the `-o` option to customize output filename. @@ -300,10 +309,3 @@ CA::MetalDrawable* pMetalDrawable = pMetalLayer->nextDrawable(); // ... ``` - -## Changelog - -| Version | Changes | -|-|-| -| macOS 13, iOS 16| Add all APIs for macOS 13 and iOS 16.
New optional `NS::SharedPtr` type to assist with memory management.
New convenience function to create a `CA::MetalLayer`.
New `MTLSTR(str)` macro allows faster string creation from literals.
Fix a problem with the signature of functions that take an array of pointers as input.
Fix a problem with the signature of the `setGroups()` function in `MTL::LinkedFunctions`.| -| macOS 12, iOS 15 | Initial release. | \ No newline at end of file diff --git a/metal-cpp/SingleHeader/MakeSingleHeader.py b/metal-cpp/SingleHeader/MakeSingleHeader.py index 7dcf29ce..520cb889 100755 --- a/metal-cpp/SingleHeader/MakeSingleHeader.py +++ b/metal-cpp/SingleHeader/MakeSingleHeader.py @@ -4,7 +4,7 @@ # # SingleHeader/MakeSingleHeader.py # -# Copyright 2020-2022 Apple Inc. +# Copyright 2020-2023 Apple Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -36,7 +36,7 @@ class HeaderPrefix( object ): '//\n' '// {meta_data}\n' '//\n' - '// Copyright 2020-2022 Apple Inc.\n' + '// Copyright 2020-2023 Apple Inc.\n' '//\n' '// Licensed under the Apache License, Version 2.0 (the "License");\n' '// you may not use this file except in compliance with the License.\n' From 246744b44d5cd9cffd3e1edb3a6a12df7a4f6ea2 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Thu, 1 Feb 2024 16:47:37 -0500 Subject: [PATCH 19/37] everything except for TODO --- kernels/metal/matmul_metal_imp.cc | 232 +++++++++++++++++++++++++----- kernels/metal/matmul_metal_imp.h | 1 + kernels/metal/metal_gelu.cc | 24 ++++ kernels/metal/metal_gelu_quick.cc | 24 ++++ kernels/metal/metal_relu.cc | 24 ++++ kernels/metal/metal_silu.cc | 24 ++++ 6 files changed, 297 insertions(+), 32 deletions(-) create mode 100644 kernels/metal/metal_gelu.cc create mode 100644 kernels/metal/metal_gelu_quick.cc create mode 100644 kernels/metal/metal_relu.cc create mode 100644 kernels/metal/metal_silu.cc diff --git a/kernels/metal/matmul_metal_imp.cc b/kernels/metal/matmul_metal_imp.cc index 02c50a7a..4636fbb9 100644 --- a/kernels/metal/matmul_metal_imp.cc +++ b/kernels/metal/matmul_metal_imp.cc @@ -40,6 +40,7 @@ void *MetalIMP::allocateSharedMem(size_t size) { return void_ptr; } + void MetalIMP::init() { _mDevice = MTL::CreateSystemDefaultDevice(); has_init = true; @@ -92,6 +93,24 @@ void MetalIMP::setupLibrary(const char *kernel_name){ } } +void MetalIMP::SendEncode(MTL::Size gridSize, MTL::Size threadgroupSize, MTL::CommandBuffer *commandBuffer, MTL::ComputeCommandEncoder *computeEncoder){ + // Encode the compute command. + computeEncoder->dispatchThreads(gridSize, threadgroupSize); + + // End the compute pass. + computeEncoder->endEncoding(); + + // Execute the command. + commandBuffer->commit(); + + // Normally, you want to do other work in your app while the GPU is running, + // but in this example, the code simply blocks until the calculation is complete. + commandBuffer->waitUntilCompleted(); + + computeEncoder->release(); + commandBuffer->release(); +} + void MetalIMP::run_mat_mul_accelerator_int4_fast_no_offset(MetalMatMulParams param, MetalMatmulBuffers *bufferParams) { setupLibrary("matmulUInt4_SIMD_Q4Interleave_unroll32"); @@ -137,21 +156,7 @@ void MetalIMP::run_mat_mul_accelerator_int4_fast_no_offset(MetalMatMulParams par // Calculate a threadgroup size. MTL::Size threadgroupSize = MTL::Size::Make(16, 1, 1); - // Encode the compute command. - computeEncoder->dispatchThreads(gridSize, threadgroupSize); - - // End the compute pass. - computeEncoder->endEncoding(); - - // Execute the command. - commandBuffer->commit(); - - // Normally, you want to do other work in your app while the GPU is running, - // but in this example, the code simply blocks until the calculation is complete. - commandBuffer->waitUntilCompleted(); - - computeEncoder->release(); - commandBuffer->release(); + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); _mMatmulFunctionPSO->release(); } @@ -172,7 +177,7 @@ void MetalIMP::run_naive_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bu _mBufferB = getBufferfromPtr((void *)bufferParams->B); _mBufferResult = getBufferfromPtr((void *)bufferParams->C); - if (!_mBufferA || !_mBufferB || !_mBufferResult || !_mBufferScales) { + if (!_mBufferA || !_mBufferB || !_mBufferResult) { std::cerr << "Failed to locate some buffer!" << std::endl; exit(-1); } @@ -197,21 +202,7 @@ void MetalIMP::run_naive_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bu (m + threadgroupSize.height - 1) / threadgroupSize.height, 1); - // Encode the compute command. - computeEncoder->dispatchThreads(gridSize, threadgroupSize); - - // End the compute pass. - computeEncoder->endEncoding(); - - // Execute the command. - commandBuffer->commit(); - - // Normally, you want to do other work in your app while the GPU is running, - // but in this example, the code simply blocks until the calculation is complete. - commandBuffer->waitUntilCompleted(); - - computeEncoder->release(); - commandBuffer->release(); + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); _mMatmulFunctionPSO->release(); } @@ -233,7 +224,7 @@ void MetalIMP::run_batch_add(MetalMatMulParams param, MetalMatmulBuffers *buffer _mBufferB = getBufferfromPtr((void *)bufferParams->B); _mBufferResult = getBufferfromPtr((void *)bufferParams->C); - if (!_mBufferA || !_mBufferB || !_mBufferResult || !_mBufferScales) { + if (!_mBufferA || !_mBufferB || !_mBufferResult) { std::cerr << "Failed to locate some buffer!" << std::endl; exit(-1); } @@ -275,4 +266,181 @@ void MetalIMP::run_batch_add(MetalMatMulParams param, MetalMatmulBuffers *buffer commandBuffer->release(); _mMatmulFunctionPSO->release(); +} + +void MetalIMP::run_relu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel_relu"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int m, n, k; + m = param.m; // row1 + // n = param.n; // col2/3 + k = param.k; // col1 + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferResult, 0, 1); + + MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); + MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, + (m + threadgroupSize.height - 1) / threadgroupSize.height, + 1); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); +} + +void MetalIMP::run_silu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel_silu"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int m, n, k; + m = param.m; // row1 + // n = param.n; // col2/3 + k = param.k; // col1 + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferResult, 0, 1); + + MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); + MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, + (m + threadgroupSize.height - 1) / threadgroupSize.height, + 1); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); +} + +void MetalIMP::run_gelu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel_gelu"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int m, n, k; + m = param.m; // row1 + // n = param.n; // col2/3 + k = param.k; // col1 + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferResult, 0, 1); + + MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); + MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, + (m + threadgroupSize.height - 1) / threadgroupSize.height, + 1); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); +} + + +void MetalIMP::run_gelu_quick(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel_gelu_quick"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int m, n, k; + m = param.m; // row1 + // n = param.n; // col2/3 + k = param.k; // col1 + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferResult, 0, 1); + + MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); + MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, + (m + threadgroupSize.height - 1) / threadgroupSize.height, + 1); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); } \ No newline at end of file diff --git a/kernels/metal/matmul_metal_imp.h b/kernels/metal/matmul_metal_imp.h index 2393b01c..bd7a6677 100644 --- a/kernels/metal/matmul_metal_imp.h +++ b/kernels/metal/matmul_metal_imp.h @@ -36,6 +36,7 @@ class MetalIMP { static void *allocateSharedMem(size_t size); static MetalMatMulParams *_mParamsPtr; static MTL::Buffer *getBufferfromPtr(void *ptr); + static void SendEncode(MTL::Size gridSize, MTL::Size threadgroupSize, MTL::CommandBuffer *commandBuffer, MTL::ComputeCommandEncoder *computeEncoder); static void run_mat_mul_accelerator_int4_fast_no_offset(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); static void run_naive_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); diff --git a/kernels/metal/metal_gelu.cc b/kernels/metal/metal_gelu.cc new file mode 100644 index 00000000..961997f7 --- /dev/null +++ b/kernels/metal/metal_gelu.cc @@ -0,0 +1,24 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { + // naive float*float matmul +void MatmulOperator::gelu_metal(const struct matmul_params *params) { + int i, j, k; + const struct matrix *A = ¶ms->A, *C = ¶ms->C; + + MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column}; + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; + MetalIMP::run_gelu(matmulparams, &bufferparams); +}; +} // namespace matmul \ No newline at end of file diff --git a/kernels/metal/metal_gelu_quick.cc b/kernels/metal/metal_gelu_quick.cc new file mode 100644 index 00000000..d10d15a7 --- /dev/null +++ b/kernels/metal/metal_gelu_quick.cc @@ -0,0 +1,24 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { + // naive float*float matmul +void MatmulOperator::gelu_quick_metal(const struct matmul_params *params) { + int i, j, k; + const struct matrix *A = ¶ms->A, *C = ¶ms->C; + + MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column}; + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; + MetalIMP::run_gelu_quick(matmulparams, &bufferparams); +}; +} // namespace matmul \ No newline at end of file diff --git a/kernels/metal/metal_relu.cc b/kernels/metal/metal_relu.cc new file mode 100644 index 00000000..2e40784c --- /dev/null +++ b/kernels/metal/metal_relu.cc @@ -0,0 +1,24 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { + // naive float*float matmul +void MatmulOperator::relu_metal(const struct matmul_params *params) { + int i, j, k; + const struct matrix *A = ¶ms->A, *C = ¶ms->C; + + MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column}; + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; + MetalIMP::run_relu(matmulparams, &bufferparams); +}; +} // namespace matmul \ No newline at end of file diff --git a/kernels/metal/metal_silu.cc b/kernels/metal/metal_silu.cc new file mode 100644 index 00000000..39fa35b6 --- /dev/null +++ b/kernels/metal/metal_silu.cc @@ -0,0 +1,24 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { + // naive float*float matmul +void MatmulOperator::silu_metal(const struct matmul_params *params) { + int i, j, k; + const struct matrix *A = ¶ms->A, *C = ¶ms->C; + + MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column}; + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; + MetalIMP::run_silu(matmulparams, &bufferparams); +}; +} // namespace matmul \ No newline at end of file From 1b5d027fd7cf4a98c905334af3b70faf79f85073 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Thu, 1 Feb 2024 18:05:11 -0500 Subject: [PATCH 20/37] rms_norm done --- kernels/matmul.h | 2 +- kernels/metal/include/opParams.h | 8 +++-- kernels/metal/kernel/op.metal | 15 ++++++--- kernels/metal/matmul_metal_imp.cc | 51 +++++++++++++++++++++++++++++-- kernels/metal/metal_rms_norm.cc | 24 +++++++++++++++ 5 files changed, 88 insertions(+), 12 deletions(-) create mode 100644 kernels/metal/metal_rms_norm.cc diff --git a/kernels/matmul.h b/kernels/matmul.h index 44439721..b439e7d9 100644 --- a/kernels/matmul.h +++ b/kernels/matmul.h @@ -143,7 +143,7 @@ class MatmulOperator { void silu_metal(const struct matmul_params *params); void gelu_metal(const struct matmul_params *params); void gelu_quick_metal(const struct matmul_params *params); - void rms_norm_metal(const struct matmul_params *params); // TODO: to be fixed + void rms_norm_metal(const struct matmul_params *params, float eps); void soft_max_metal(const struct matmul_params *params); // TODO: to be fixed void soft_max_4_metal(const struct matmul_params *params); // TODO: to be fixed void rope_metal(const struct matmul_params *params); // TODO: to be fixed diff --git a/kernels/metal/include/opParams.h b/kernels/metal/include/opParams.h index 3b19625f..7919e998 100644 --- a/kernels/metal/include/opParams.h +++ b/kernels/metal/include/opParams.h @@ -1,8 +1,10 @@ #pragma once typedef struct { - unsigned int m; - unsigned int n; - unsigned int k; + unsigned int m; //row1 + unsigned int n; //col2 + unsigned int k; //col1 unsigned int group_size; + unsigned int type_size; + float eps; } MetalMatMulParams; diff --git a/kernels/metal/kernel/op.metal b/kernels/metal/kernel/op.metal index 0681e181..0aa3e0ce 100644 --- a/kernels/metal/kernel/op.metal +++ b/kernels/metal/kernel/op.metal @@ -1,6 +1,8 @@ #include using namespace metal; +#define N_SIMDWIDTH 32 // assuming SIMD group size is 32 + /* CUDA */ // __global__ void batch_Add_cuda(Matrix3D input, Matrix3D input2, Matrix3D output) { // int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -69,9 +71,10 @@ kernel void kernel_gelu_quick( kernel void kernel_rms_norm( device const void * src0, device float * dst, - constant int64_t & ne00, - constant uint64_t & nb01, - constant float & eps, + constant MetalMatMulParams& params, + // constant int64_t & ne00, // row + // constant uint64_t & nb01, // col*sizeof(type) + // constant float & eps, threadgroup float * buf [[threadgroup(0)]], uint tgpig[[threadgroup_position_in_grid]], uint tpitg[[thread_position_in_threadgroup]], @@ -79,13 +82,15 @@ kernel void kernel_rms_norm( uint tiisg[[thread_index_in_simdgroup]], uint ntg[[threads_per_threadgroup]]) { device const float4 * x = (device const float4 *) ((device const char *) src0 + tgpig*nb01); - + unsigned int ne00 = params.m; + unsigned int nb01 = params.k*param.type_size; + float eps = param.eps; float4 sumf = 0; float all_sum = 0; // parallel sum for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { - sumf += x[i00] * x[i00]; + sumf += x[i00] * x[i00]; // take four elements and square it at the same time } all_sum = sumf[0] + sumf[1] + sumf[2] + sumf[3]; all_sum = simd_sum(all_sum); diff --git a/kernels/metal/matmul_metal_imp.cc b/kernels/metal/matmul_metal_imp.cc index 4636fbb9..0436a211 100644 --- a/kernels/metal/matmul_metal_imp.cc +++ b/kernels/metal/matmul_metal_imp.cc @@ -10,6 +10,7 @@ // all op helper functions here, which will be called later in ops. // static data +#define N_SIMDWIDTH 32 // assuming SIMD group size is 32 MTL::Device *MetalIMP::_mDevice; MTL::ComputePipelineState *MetalIMP::_mMatmulFunctionPSO; MTL::CommandQueue *MetalIMP::_mCommandQueue; @@ -40,7 +41,6 @@ void *MetalIMP::allocateSharedMem(size_t size) { return void_ptr; } - void MetalIMP::init() { _mDevice = MTL::CreateSystemDefaultDevice(); has_init = true; @@ -400,7 +400,6 @@ void MetalIMP::run_gelu(MetalMatMulParams param, MetalMatmulBuffers *bufferParam _mMatmulFunctionPSO->release(); } - void MetalIMP::run_gelu_quick(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ setupLibrary("kernel_gelu_quick"); @@ -443,4 +442,50 @@ void MetalIMP::run_gelu_quick(MetalMatMulParams param, MetalMatmulBuffers *buffe SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); _mMatmulFunctionPSO->release(); -} \ No newline at end of file +} + +void MetalIMP::run_rms_norm(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel void kernel_rms_norm"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int m, n, k; + m = param.m; // row1 + k = param.k; // col1 + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferResult, 0, 1); + computeEncoder->setBuffer(_mParams, 0, 2); + + computeEncoder->setThreadgroupMemoryLength(param.type_size * N_SIMDWIDTH, 0); + + MTL::Size threadgroupSize = MTL::Size::Make(16, 16, 1); + MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, + (m + threadgroupSize.height - 1) / threadgroupSize.height, + 1); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); +} diff --git a/kernels/metal/metal_rms_norm.cc b/kernels/metal/metal_rms_norm.cc new file mode 100644 index 00000000..59357430 --- /dev/null +++ b/kernels/metal/metal_rms_norm.cc @@ -0,0 +1,24 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { + // naive float*float matmul +void MatmulOperator::rms_norm_metal(const struct matmul_params *params, float eps) { + int i, j, k; + const struct matrix *A = ¶ms->A, *C = ¶ms->C; + + MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column, eps: eps, type_size: sizeof(A[0])}; + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; + MetalIMP::run_rms_norm(matmulparams, &bufferparams); +}; +} // namespace matmul \ No newline at end of file From c5fb54f56b7d3bd523f4bf21c3c5644281d93b84 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Fri, 2 Feb 2024 01:57:32 -0500 Subject: [PATCH 21/37] done with softmax, rope and minor fix on opparam struct --- kernels/matmul.h | 20 +-- kernels/metal/include/opParams.h | 27 +++- kernels/metal/kernel/op.metal | 202 ++++++++++++++++---------- kernels/metal/matmul_metal_imp.cc | 234 ++++++++++++++++++++++++------ kernels/metal/matmul_metal_imp.h | 8 +- kernels/metal/metal_batch_add.cc | 8 +- kernels/metal/metal_gelu.cc | 7 +- kernels/metal/metal_gelu_quick.cc | 9 +- kernels/metal/metal_relu.cc | 7 +- kernels/metal/metal_rms_norm.cc | 7 +- kernels/metal/metal_rope.cc | 26 ++++ kernels/metal/metal_silu.cc | 7 +- kernels/metal/metal_softmax.cc | 30 ++++ llm/src/ops/metal/batch_add.cc | 8 +- 14 files changed, 428 insertions(+), 172 deletions(-) create mode 100644 kernels/metal/metal_rope.cc create mode 100644 kernels/metal/metal_softmax.cc diff --git a/kernels/matmul.h b/kernels/matmul.h index b439e7d9..cdb71007 100644 --- a/kernels/matmul.h +++ b/kernels/matmul.h @@ -138,15 +138,17 @@ class MatmulOperator { void gemv_forward_cuda(const struct matmul_params *params); // metal void mat_mul_metal(const struct matmul_params *params); - void batch_add_metal(const struct matmul_params *params); - void relu_metal(const struct matmul_params *params); - void silu_metal(const struct matmul_params *params); - void gelu_metal(const struct matmul_params *params); - void gelu_quick_metal(const struct matmul_params *params); - void rms_norm_metal(const struct matmul_params *params, float eps); - void soft_max_metal(const struct matmul_params *params); // TODO: to be fixed - void soft_max_4_metal(const struct matmul_params *params); // TODO: to be fixed - void rope_metal(const struct matmul_params *params); // TODO: to be fixed + void batch_add_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); + void relu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); + void silu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); + void gelu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); + void gelu_quick_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); + void rms_norm_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, float eps); + void soft_max_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, int64_t scale); + void soft_max_4_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, int64_t scale); + void rope_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, +int n_past, int n_dims, int mode, int n_orig_ctx, float freq_base, float freq_scale, float ext_factor, float attn_factor, +float beta_fast, float beta_slow); diff --git a/kernels/metal/include/opParams.h b/kernels/metal/include/opParams.h index 7919e998..18f6c002 100644 --- a/kernels/metal/include/opParams.h +++ b/kernels/metal/include/opParams.h @@ -1,10 +1,25 @@ #pragma once typedef struct { - unsigned int m; //row1 - unsigned int n; //col2 - unsigned int k; //col1 - unsigned int group_size; - unsigned int type_size; - float eps; + unsigned int m; //row1 (only for matmul) + unsigned int n; //col2 (only for matmul) + unsigned int k; //col1 (only for matmul) + unsigned int group_size; // for matmulInt4 + + unsigned int m_dim_x, m_dim_y, m_dim_z; + unsigned int type_size; // for nb + float eps; // rms_nor + float scale; // for softmax + + int n_past; + int n_dims; + int mode; + int n_orig_ctx; + float freq_base; + float freq_scale; + float ext_factor; + float attn_factor; + float beta_fast; + float beta_slow; + } MetalMatMulParams; diff --git a/kernels/metal/kernel/op.metal b/kernels/metal/kernel/op.metal index 0aa3e0ce..5d89d2f3 100644 --- a/kernels/metal/kernel/op.metal +++ b/kernels/metal/kernel/op.metal @@ -1,7 +1,12 @@ #include using namespace metal; -#define N_SIMDWIDTH 32 // assuming SIMD group size is 32 +using namespace metal; + +#define N_SIMDWIDTH 32 +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; } /* CUDA */ // __global__ void batch_Add_cuda(Matrix3D input, Matrix3D input2, Matrix3D output) { @@ -19,8 +24,8 @@ kernel void kernel_batch_add(device const float* inputA, device float* output, constant MetalMatMulParams& params, uint3 id[[thread_position_in_grid]]) { - const uint m = param.m; - const uint n = param.k; + const uint m = param.m_dim_x; + const uint n = param.m_dim_y; const uint idx = id.x; const uint idy = id.y; @@ -82,8 +87,8 @@ kernel void kernel_rms_norm( uint tiisg[[thread_index_in_simdgroup]], uint ntg[[threads_per_threadgroup]]) { device const float4 * x = (device const float4 *) ((device const char *) src0 + tgpig*nb01); - unsigned int ne00 = params.m; - unsigned int nb01 = params.k*param.type_size; + unsigned int ne00 = params.m_dim_x; + unsigned int nb01 = params.m_dim_y*param.type_size; float eps = param.eps; float4 sumf = 0; float all_sum = 0; @@ -126,16 +131,23 @@ kernel void kernel_soft_max( device const float * src0, device const float * src1, device float * dst, - constant int64_t & ne00, - constant int64_t & ne01, - constant int64_t & ne02, - constant float & scale, + constant MetalMatMulParams& params, + // constant int64_t & ne00, + // constant int64_t & ne01, + // constant int64_t & ne02, + // constant float & scale, threadgroup float * buf [[threadgroup(0)]], uint tgpig[[threadgroup_position_in_grid]], uint tpitg[[thread_position_in_threadgroup]], uint sgitg[[simdgroup_index_in_threadgroup]], uint tiisg[[thread_index_in_simdgroup]], uint ntg[[threads_per_threadgroup]]) { + const int64_t ne00 = params.m_dim_x; + const int64_t ne01 = params.m_dim_y; + const int64_t ne02 = params.m_dim_z; + const int64_t scale = params.scale; + + const int64_t i03 = (tgpig) / (ne02*ne01); const int64_t i02 = (tgpig - i03*ne02*ne01) / ne01; const int64_t i01 = (tgpig - i03*ne02*ne01 - i02*ne01); @@ -213,16 +225,22 @@ kernel void kernel_soft_max_4( device const float * src0, device const float * src1, device float * dst, - constant int64_t & ne00, - constant int64_t & ne01, - constant int64_t & ne02, - constant float & scale, + constant MetalMatMulParams& params, + // constant int64_t & ne00, + // constant int64_t & ne01, + // constant int64_t & ne02, + // constant float & scale, threadgroup float * buf [[threadgroup(0)]], uint tgpig[[threadgroup_position_in_grid]], uint tpitg[[thread_position_in_threadgroup]], uint sgitg[[simdgroup_index_in_threadgroup]], uint tiisg[[thread_index_in_simdgroup]], uint ntg[[threads_per_threadgroup]]) { + const int64_t ne00 = params.m_dim_x; + const int64_t ne01 = params.m_dim_y; + const int64_t ne02 = params.m_dim_z; + const int64_t scale = params.scale; + const int64_t i03 = (tgpig) / (ne02*ne01); const int64_t i02 = (tgpig - i03*ne02*ne01) / ne01; const int64_t i01 = (tgpig - i03*ne02*ne01 - i02*ne01); @@ -336,75 +354,105 @@ static void rope_yarn_corr_dims( dims[1] = min(n_dims - 1.0f, ceil(rope_yarn_corr_factor(n_dims, n_orig_ctx, beta_slow, freq_base))); } -typedef void (rope_t)( - device const void * src0, - device const int32_t * src1, - device float * dst, - constant int64_t & ne00, - constant int64_t & ne01, - constant int64_t & ne02, - constant int64_t & ne03, - constant uint64_t & nb00, - constant uint64_t & nb01, - constant uint64_t & nb02, - constant uint64_t & nb03, - constant int64_t & ne0, - constant int64_t & ne1, - constant int64_t & ne2, - constant int64_t & ne3, - constant uint64_t & nb0, - constant uint64_t & nb1, - constant uint64_t & nb2, - constant uint64_t & nb3, - constant int & n_past, - constant int & n_dims, - constant int & mode, - constant int & n_orig_ctx, - constant float & freq_base, - constant float & freq_scale, - constant float & ext_factor, - constant float & attn_factor, - constant float & beta_fast, - constant float & beta_slow, - uint tiitg[[thread_index_in_threadgroup]], - uint3 tptg[[threads_per_threadgroup]], - uint3 tgpig[[threadgroup_position_in_grid]]); - -// TODO: to be fixed -template +// typedef void (rope_t)( +// device const void * src0, +// device const int32_t * src1, +// device float * dst, +// constant int64_t & ne00, +// constant int64_t & ne01, +// constant int64_t & ne02, +// constant int64_t & ne03, +// constant uint64_t & nb00, +// constant uint64_t & nb01, +// constant uint64_t & nb02, +// constant uint64_t & nb03, +// constant int64_t & ne0, +// constant int64_t & ne1, +// constant int64_t & ne2, +// constant int64_t & ne3, +// constant uint64_t & nb0, +// constant uint64_t & nb1, +// constant uint64_t & nb2, +// constant uint64_t & nb3, +// constant int & n_past, +// constant int & n_dims, +// constant int & mode, +// constant int & n_orig_ctx, +// constant float & freq_base, +// constant float & freq_scale, +// constant float & ext_factor, +// constant float & attn_factor, +// constant float & beta_fast, +// constant float & beta_slow, +// uint tiitg[[thread_index_in_threadgroup]], +// uint3 tptg[[threads_per_threadgroup]], +// uint3 tgpig[[threadgroup_position_in_grid]]); + +// // TODO: to be fixed +// template kernel void kernel_rope( device const void * src0, device const int32_t * src1, device float * dst, - constant int64_t & ne00, - constant int64_t & ne01, - constant int64_t & ne02, - constant int64_t & ne03, - constant uint64_t & nb00, - constant uint64_t & nb01, - constant uint64_t & nb02, - constant uint64_t & nb03, - constant int64_t & ne0, - constant int64_t & ne1, - constant int64_t & ne2, - constant int64_t & ne3, - constant uint64_t & nb0, - constant uint64_t & nb1, - constant uint64_t & nb2, - constant uint64_t & nb3, - constant int & n_past, - constant int & n_dims, - constant int & mode, - constant int & n_orig_ctx, - constant float & freq_base, - constant float & freq_scale, - constant float & ext_factor, - constant float & attn_factor, - constant float & beta_fast, - constant float & beta_slow, + constant MetalMatMulParams& params, + // constant int64_t & ne00, + // constant int64_t & ne01, + // constant int64_t & ne02, + // constant int64_t & ne03, + // constant uint64_t & nb00, + // constant uint64_t & nb01, + // constant uint64_t & nb02, + // constant uint64_t & nb03, + // constant int64_t & ne0, + // constant int64_t & ne1, + // constant int64_t & ne2, + // constant int64_t & ne3, + // constant uint64_t & nb0, + // constant uint64_t & nb1, + // constant uint64_t & nb2, + // constant uint64_t & nb3, + // constant int & n_past, + // constant int & n_dims, + // constant int & mode, + // constant int & n_orig_ctx, + // constant float & freq_base, + // constant float & freq_scale, + // constant float & ext_factor, + // constant float & attn_factor, + // constant float & beta_fast, + // constant float & beta_slow, uint tiitg[[thread_index_in_threadgroup]], uint3 tptg[[threads_per_threadgroup]], uint3 tgpig[[threadgroup_position_in_grid]]) { + constant int64_t ne00 = param.m_dim_x; + constant int64_t ne01 = param.m_dim_y; + constant int64_t ne02 = param.m_dim_z; + constant int64_t ne03 = 0; + constant uint64_t nb00 = param.m_dim_x*param.type_size; + constant uint64_t nb01 = param.m_dim_y*param.type_size; + constant uint64_t nb02 = param.m_dim_z*param.type_size; + constant uint64_t nb03 = 0; + constant int64_t ne0 = param.m_dim_x; + constant int64_t ne1 = param.m_dim_y; + constant int64_t ne2 = param.m_dim_z; + constant int64_t ne3 = 0; + constant uint64_t nb0 = param.m_dim_x*param.type_size; + constant uint64_t nb1 = param.m_dim_y*param.type_size; + constant uint64_t nb2 = param.m_dim_z*param.type_size; + constant uint64_t nb3 = 0; + + int n_past = param.n_past; + int n_dims = param.n_dims; + int mode = param.mode; + int n_orig_ctx = param.n_orig_ctx; + float freq_base = param.freq_base; + float freq_scale = param.freq_scale; + float ext_factor = param.ext_factor; + float attn_factor = param.attn_factor; + float beta_fast = param.beta_fast; + float beta_slow = param.beta_slow; + + const int64_t i3 = tgpig[2]; const int64_t i2 = tgpig[1]; const int64_t i1 = tgpig[0]; @@ -472,8 +520,8 @@ kernel void kernel_rope( } } -template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope; -template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope; +// template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope; +// template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope; /* Performance comparision with the test case: diff --git a/kernels/metal/matmul_metal_imp.cc b/kernels/metal/matmul_metal_imp.cc index 0436a211..229fb031 100644 --- a/kernels/metal/matmul_metal_imp.cc +++ b/kernels/metal/matmul_metal_imp.cc @@ -214,10 +214,10 @@ void MetalIMP::run_batch_add(MetalMatMulParams param, MetalMatmulBuffers *buffer *_mParamsPtr = param; - unsigned int m, n, k; - m = param.m; - n = param.n; - k = param.k; + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; // assign the buffers to hold our data and the result. _mBufferA = getBufferfromPtr((void *)bufferParams->A); @@ -245,9 +245,9 @@ void MetalIMP::run_batch_add(MetalMatMulParams param, MetalMatmulBuffers *buffer computeEncoder->setBuffer(_mParams, 0, 3); MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); - MTL::Size gridSize = MTL::Size::Make((n + threadgroupSize.width - 1) / threadgroupSize.width, - (m + threadgroupSize.height - 1) / threadgroupSize.height, - 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); // Encode the compute command. computeEncoder->dispatchThreads(gridSize, threadgroupSize); @@ -276,10 +276,10 @@ void MetalIMP::run_relu(MetalMatMulParams param, MetalMatmulBuffers *bufferParam *_mParamsPtr = param; - unsigned int m, n, k; - m = param.m; // row1 - // n = param.n; // col2/3 - k = param.k; // col1 + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; // assign the buffers to hold our data and the result. _mBufferA = getBufferfromPtr((void *)bufferParams->A); @@ -304,9 +304,9 @@ void MetalIMP::run_relu(MetalMatMulParams param, MetalMatmulBuffers *bufferParam computeEncoder->setBuffer(_mBufferResult, 0, 1); MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); - MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, - (m + threadgroupSize.height - 1) / threadgroupSize.height, - 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); _mMatmulFunctionPSO->release(); @@ -320,10 +320,10 @@ void MetalIMP::run_silu(MetalMatMulParams param, MetalMatmulBuffers *bufferParam *_mParamsPtr = param; - unsigned int m, n, k; - m = param.m; // row1 - // n = param.n; // col2/3 - k = param.k; // col1 + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; // assign the buffers to hold our data and the result. _mBufferA = getBufferfromPtr((void *)bufferParams->A); @@ -348,9 +348,9 @@ void MetalIMP::run_silu(MetalMatMulParams param, MetalMatmulBuffers *bufferParam computeEncoder->setBuffer(_mBufferResult, 0, 1); MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); - MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, - (m + threadgroupSize.height - 1) / threadgroupSize.height, - 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); _mMatmulFunctionPSO->release(); @@ -364,10 +364,10 @@ void MetalIMP::run_gelu(MetalMatMulParams param, MetalMatmulBuffers *bufferParam *_mParamsPtr = param; - unsigned int m, n, k; - m = param.m; // row1 - // n = param.n; // col2/3 - k = param.k; // col1 + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; // assign the buffers to hold our data and the result. _mBufferA = getBufferfromPtr((void *)bufferParams->A); @@ -392,9 +392,9 @@ void MetalIMP::run_gelu(MetalMatMulParams param, MetalMatmulBuffers *bufferParam computeEncoder->setBuffer(_mBufferResult, 0, 1); MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); - MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, - (m + threadgroupSize.height - 1) / threadgroupSize.height, - 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); _mMatmulFunctionPSO->release(); @@ -408,10 +408,10 @@ void MetalIMP::run_gelu_quick(MetalMatMulParams param, MetalMatmulBuffers *buffe *_mParamsPtr = param; - unsigned int m, n, k; - m = param.m; // row1 - // n = param.n; // col2/3 - k = param.k; // col1 + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; // assign the buffers to hold our data and the result. _mBufferA = getBufferfromPtr((void *)bufferParams->A); @@ -436,25 +436,26 @@ void MetalIMP::run_gelu_quick(MetalMatMulParams param, MetalMatmulBuffers *buffe computeEncoder->setBuffer(_mBufferResult, 0, 1); MTL::Size threadgroupSize = MTL::Size::Make(8, 8, 1); - MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, - (m + threadgroupSize.height - 1) / threadgroupSize.height, - 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); _mMatmulFunctionPSO->release(); } void MetalIMP::run_rms_norm(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ - setupLibrary("kernel void kernel_rms_norm"); + setupLibrary("kernel_rms_norm"); _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); *_mParamsPtr = param; - unsigned int m, n, k; - m = param.m; // row1 - k = param.k; // col1 + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; // assign the buffers to hold our data and the result. _mBufferA = getBufferfromPtr((void *)bufferParams->A); @@ -481,11 +482,158 @@ void MetalIMP::run_rms_norm(MetalMatMulParams param, MetalMatmulBuffers *bufferP computeEncoder->setThreadgroupMemoryLength(param.type_size * N_SIMDWIDTH, 0); - MTL::Size threadgroupSize = MTL::Size::Make(16, 16, 1); - MTL::Size gridSize = MTL::Size::Make((k + threadgroupSize.width - 1) / threadgroupSize.width, - (m + threadgroupSize.height - 1) / threadgroupSize.height, - 1); + MTL::Size threadgroupSize = MTL::Size::Make(N_SIMDWIDTH, N_SIMDWIDTH, 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); +} + +void MetalIMP::run_soft_max(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel_soft_max"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferB = getBufferfromPtr((void *)bufferParams->B); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferB, 0, 1); + computeEncoder->setBuffer(_mBufferResult, 0, 2); + computeEncoder->setBuffer(_mParams, 0, 3); + + computeEncoder->setThreadgroupMemoryLength(param.type_size * N_SIMDWIDTH, 0); + + MTL::Size threadgroupSize = MTL::Size::Make(N_SIMDWIDTH, N_SIMDWIDTH, 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); +} + +void MetalIMP::run_soft_max_4(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel_soft_max_4"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferB = getBufferfromPtr((void *)bufferParams->B); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferB, 0, 1); + computeEncoder->setBuffer(_mBufferResult, 0, 2); + computeEncoder->setBuffer(_mParams, 0, 3); + + computeEncoder->setThreadgroupMemoryLength(param.type_size * N_SIMDWIDTH, 0); + + MTL::Size threadgroupSize = MTL::Size::Make(N_SIMDWIDTH, N_SIMDWIDTH, 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); _mMatmulFunctionPSO->release(); } + +void MetalIMP::run_rope(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ + setupLibrary("kernel_soft_max_4"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + + + *_mParamsPtr = param; + unsigned int x, y, z; + x = param.m_dim_x; + y = param.m_dim_y; + z = param.m_dim_z; + + // assign the buffers to hold our data and the result. + _mBufferA = getBufferfromPtr((void *)bufferParams->A); + _mBufferB = getBufferfromPtr((void *)bufferParams->B); + _mBufferResult = getBufferfromPtr((void *)bufferParams->C); + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferB, 0, 1); + computeEncoder->setBuffer(_mBufferResult, 0, 2); + computeEncoder->setBuffer(_mParams, 0, 3); + + computeEncoder->setThreadgroupMemoryLength(param.type_size * N_SIMDWIDTH, 0); + + MTL::Size threadgroupSize = MTL::Size::Make(N_SIMDWIDTH, N_SIMDWIDTH, 1); + MTL::Size gridSize = MTL::Size::Make((y + threadgroupSize.width - 1) / threadgroupSize.width, + (x + threadgroupSize.height - 1) / threadgroupSize.height, + z); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); +} \ No newline at end of file diff --git a/kernels/metal/matmul_metal_imp.h b/kernels/metal/matmul_metal_imp.h index bd7a6677..18d4cd36 100644 --- a/kernels/metal/matmul_metal_imp.h +++ b/kernels/metal/matmul_metal_imp.h @@ -45,10 +45,10 @@ class MetalIMP { static void run_silu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); static void run_gelu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); static void run_gelu_quick(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); - static void run_rms_norm(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // TODO: to be fixed - static void run_soft_max(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // TODO: to be fixed - static void run_soft_max_4(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // TODO: to be fixed - static void run_rope(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // TODO: to be fixed + static void run_rms_norm(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_soft_max(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_soft_max_4(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_rope(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); // static void sendComputeCommand(); diff --git a/kernels/metal/metal_batch_add.cc b/kernels/metal/metal_batch_add.cc index 8a43b02b..5e1bf848 100644 --- a/kernels/metal/metal_batch_add.cc +++ b/kernels/metal/metal_batch_add.cc @@ -12,13 +12,11 @@ #include "matmul_metal_imp.h" namespace matmul { - // naive float*float matmul -void MatmulOperator::batch_add_metal(const struct matmul_params *params) { +void MatmulOperator::batch_add_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z) { int i, j, k; const struct matrix *A = ¶ms->A, *B = ¶ms->B, *C = ¶ms->C; - - MetalMatMulParams matmulparams = {(unsigned int)A->row, (unsigned int)C->column, (unsigned int)A->column}; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_y, m_dim_z: m_dim_z}; MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr, B: (unsigned char*)B->data_ptr}; MetalIMP::run_batch_add(matmulparams, &bufferparams); }; -} // namespace matmul \ No newline at end of file +} \ No newline at end of file diff --git a/kernels/metal/metal_gelu.cc b/kernels/metal/metal_gelu.cc index 961997f7..8bbd9b1d 100644 --- a/kernels/metal/metal_gelu.cc +++ b/kernels/metal/metal_gelu.cc @@ -12,13 +12,12 @@ #include "matmul_metal_imp.h" namespace matmul { - // naive float*float matmul -void MatmulOperator::gelu_metal(const struct matmul_params *params) { +void MatmulOperator::gelu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z) { int i, j, k; const struct matrix *A = ¶ms->A, *C = ¶ms->C; - MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column}; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_x, m_dim_z: m_dim_z}; MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; MetalIMP::run_gelu(matmulparams, &bufferparams); }; -} // namespace matmul \ No newline at end of file +} \ No newline at end of file diff --git a/kernels/metal/metal_gelu_quick.cc b/kernels/metal/metal_gelu_quick.cc index d10d15a7..07ba9f19 100644 --- a/kernels/metal/metal_gelu_quick.cc +++ b/kernels/metal/metal_gelu_quick.cc @@ -12,13 +12,12 @@ #include "matmul_metal_imp.h" namespace matmul { - // naive float*float matmul -void MatmulOperator::gelu_quick_metal(const struct matmul_params *params) { + +void MatmulOperator::gelu_quick_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z) { int i, j, k; const struct matrix *A = ¶ms->A, *C = ¶ms->C; - - MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column}; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_x, m_dim_z: m_dim_z}; MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; MetalIMP::run_gelu_quick(matmulparams, &bufferparams); }; -} // namespace matmul \ No newline at end of file +} \ No newline at end of file diff --git a/kernels/metal/metal_relu.cc b/kernels/metal/metal_relu.cc index 2e40784c..03cd462d 100644 --- a/kernels/metal/metal_relu.cc +++ b/kernels/metal/metal_relu.cc @@ -12,13 +12,12 @@ #include "matmul_metal_imp.h" namespace matmul { - // naive float*float matmul -void MatmulOperator::relu_metal(const struct matmul_params *params) { +void MatmulOperator::relu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z){ int i, j, k; const struct matrix *A = ¶ms->A, *C = ¶ms->C; - MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column}; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_x, m_dim_z: m_dim_z}; MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; MetalIMP::run_relu(matmulparams, &bufferparams); }; -} // namespace matmul \ No newline at end of file +} \ No newline at end of file diff --git a/kernels/metal/metal_rms_norm.cc b/kernels/metal/metal_rms_norm.cc index 59357430..7934d0d0 100644 --- a/kernels/metal/metal_rms_norm.cc +++ b/kernels/metal/metal_rms_norm.cc @@ -12,13 +12,12 @@ #include "matmul_metal_imp.h" namespace matmul { - // naive float*float matmul -void MatmulOperator::rms_norm_metal(const struct matmul_params *params, float eps) { +void MatmulOperator::rms_norm_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, float eps) { int i, j, k; const struct matrix *A = ¶ms->A, *C = ¶ms->C; - MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column, eps: eps, type_size: sizeof(A[0])}; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_x, m_dim_z: m_dim_z, eps: eps, type_size: sizeof(short)}; //it uses half* in cuda MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; MetalIMP::run_rms_norm(matmulparams, &bufferparams); }; -} // namespace matmul \ No newline at end of file +} \ No newline at end of file diff --git a/kernels/metal/metal_rope.cc b/kernels/metal/metal_rope.cc new file mode 100644 index 00000000..10495dbe --- /dev/null +++ b/kernels/metal/metal_rope.cc @@ -0,0 +1,26 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { +void MatmulOperator::rope_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, +int n_past, int n_dims, int mode, int n_orig_ctx, float freq_base, float freq_scale, float ext_factor, float attn_factor, +float beta_fast, float beta_slow) { + int i, j, k; + const struct matrix *A = ¶ms->A, *C = ¶ms->C; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_x, m_dim_z: m_dim_z, + n_past: n_past, n_dims: n_dims, mode: mode, n_orig_ctx: n_orig_ctx, freq_base: freq_base, freq_scale: freq_scale, ext_factor: ext_factor, + attn_factor: attn_factor, beta_fast: beta_fast, beta_slow: beta_slow, type_size: sizeof(short)}; //it uses half in cuda + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; + MetalIMP::run_rope(matmulparams, &bufferparams); +}; +} \ No newline at end of file diff --git a/kernels/metal/metal_silu.cc b/kernels/metal/metal_silu.cc index 39fa35b6..2b075f62 100644 --- a/kernels/metal/metal_silu.cc +++ b/kernels/metal/metal_silu.cc @@ -12,13 +12,12 @@ #include "matmul_metal_imp.h" namespace matmul { - // naive float*float matmul -void MatmulOperator::silu_metal(const struct matmul_params *params) { +void MatmulOperator::silu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z) { int i, j, k; const struct matrix *A = ¶ms->A, *C = ¶ms->C; - MetalMatMulParams matmulparams = {m: (unsigned int)A->row, n: (unsigned int)C->column, k: (unsigned int)A->column}; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_x, m_dim_z: m_dim_z}; MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; MetalIMP::run_silu(matmulparams, &bufferparams); }; -} // namespace matmul \ No newline at end of file +} \ No newline at end of file diff --git a/kernels/metal/metal_softmax.cc b/kernels/metal/metal_softmax.cc new file mode 100644 index 00000000..b6a640e2 --- /dev/null +++ b/kernels/metal/metal_softmax.cc @@ -0,0 +1,30 @@ +#include +#include +#include + +#include +#include + +#include "../matmul.h" +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include "matmul_metal_imp.h" + +namespace matmul { +void MatmulOperator::soft_max_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, int64_t scale) { + int i, j, k; + const struct matrix *A = ¶ms->A, *C = ¶ms->C; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_x, m_dim_z: m_dim_z, scale: scale, type_size: sizeof(short)}; //it uses half in cuda + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; + MetalIMP::run_soft_max(matmulparams, &bufferparams); +}; + +void MatmulOperator::soft_max_4_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, int64_t scale) { + int i, j, k; + const struct matrix *A = ¶ms->A, *C = ¶ms->C; + MetalMatMulParams matmulparams = {m_dim_x: m_dim_x, m_dim_y: m_dim_x, m_dim_z: m_dim_z, scale: scale, type_size: sizeof(short)}; //it uses half in cuda + MetalMatmulBuffers bufferparams = {A: A->data_ptr, C: C->data_ptr}; + MetalIMP::run_soft_max_4(matmulparams, &bufferparams); +}; +} \ No newline at end of file diff --git a/llm/src/ops/metal/batch_add.cc b/llm/src/ops/metal/batch_add.cc index fe25c212..66c8ada5 100644 --- a/llm/src/ops/metal/batch_add.cc +++ b/llm/src/ops/metal/batch_add.cc @@ -2,16 +2,10 @@ void batch_Add(const Matrix3D &input, const Matrix3D &input2, Matrix3D &output) { struct matmul_params params; - params.A.row = input.m_dim_y; - params.A.column = input.m_dim_z; params.A.data_ptr = input.m_data; - params.B.row = input.m_dim_z; // k - params.B.column = input2.m_dim_y; // n params.B.data_ptr = input2.m_data; - params.C.row = output.m_dim_y; - params.C.column = output.m_dim_z; params.C.data_ptr = output.m_data; matmul::MatmulOperator op = matmul::MatmulOperator(); - op.batch_add_metal(¶ms); + op.batch_add_metal(¶ms, input.m_dim_x, input.m_dim_y, input.m_dim_z); } From 189ed980754b30476341c6fc80056ff70f303ace Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Fri, 2 Feb 2024 11:35:17 -0500 Subject: [PATCH 22/37] update for test --- llm/tests/metal/cpp_version/Makefile | 3 +- llm/tests/metal/cpp_version/main.cc | 44 +-- llm/tests/metal/cpp_version/main_tmp.cc | 401 ++++++++++++++++++++++++ llm/tests/metal/cpp_version/op.metal | 138 ++++---- 4 files changed, 496 insertions(+), 90 deletions(-) create mode 100644 llm/tests/metal/cpp_version/main_tmp.cc diff --git a/llm/tests/metal/cpp_version/Makefile b/llm/tests/metal/cpp_version/Makefile index 3c089572..9f21a88c 100644 --- a/llm/tests/metal/cpp_version/Makefile +++ b/llm/tests/metal/cpp_version/Makefile @@ -1,4 +1,5 @@ -CXX = /opt/homebrew/opt/llvm/bin/clang++ +CXX = g++ +# CXX = /opt/homebrew/opt/llvm/bin/clang++ CXXFLAGS = -std=c++17 -stdlib=libc++ -O3 # Executable and source files diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index 36c6927c..a0263347 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -42,10 +42,10 @@ const char * fn_name = "matmulInt4"; // main -unsigned int height1 = 32; -unsigned int width1 = 32; -unsigned int height2 = 32; -unsigned int width2 = 32; +unsigned int height1 = 320*320; +unsigned int width1 = 320*32; +unsigned int height2 = 320*32; +unsigned int width2 = 320*320; float *A1, *A3; unsigned char *A2; matmul_param *param; @@ -212,14 +212,14 @@ void metal_compute(){ } // Dispatch and Run Computation - // auto start = high_resolution_clock::now(); + auto start = high_resolution_clock::now(); computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); computeEncoder->endEncoding(); commandBuffer->commit(); commandBuffer->waitUntilCompleted(); - // auto stop = high_resolution_clock::now(); - // auto duration = duration_cast(stop - start); - // std::cout << "GPU: " << duration.count() << "ms" << std::endl; + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + std::cout << "GPU: " << fn_name << " " << duration.count() << "ms" << std::endl; computeEncoder->release(); commandBuffer->release(); } @@ -291,24 +291,28 @@ void test_matmulInt4(){ metal_init(); metal_compute(); - printf("GPU Results: \n"); - for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ - printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); - } +// printf("GPU Results: \n"); + // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + // } fn_name = "matmulInt4_SIMD_Q4Interleave"; metal_init(); metal_compute(); - printf("GPU Results: \n"); - for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ - printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); - } +// printf("GPU Results: \n"); + // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + // } fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll16"; metal_init(); metal_compute(); - printf("GPU Results: \n"); - for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ - printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); - } +// printf("GPU Results: \n"); + // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + // } + fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll16"; + metal_init(); + metal_compute(); +// printf("GPU Results: \n"); } int main(){ diff --git a/llm/tests/metal/cpp_version/main_tmp.cc b/llm/tests/metal/cpp_version/main_tmp.cc new file mode 100644 index 00000000..7d699775 --- /dev/null +++ b/llm/tests/metal/cpp_version/main_tmp.cc @@ -0,0 +1,401 @@ +// +// main.cpp +// metal_cpp +// +// Created by Derrick on 1/24/24. +// Some to-do list: +// 1. keep a map: ptr on CPU -> buffer on GPU +// Notes: +// 1. Offset hasn't been considered +// 2. Group_Size is multiple of 32 + +#include +#include +#include +#include + +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION + +#include "Metal/Metal.hpp" +#include "Foundation/Foundation.hpp" +#include "param.h" +typedef struct{ + float values[4]; +}PackedFloat; + +typedef struct{ + unsigned char values[4]; +}PackedChar; + +// .h +MTL::Buffer *bM1, *bM2, *bM3, *bParam, *bScales, *bOffset; +MTL::Buffer *bsM1, *bsM2, *bsM3, *bsParam, *bsScales, *bsOffset; +MTL::Device* mDevice; +MTL::ComputePipelineState* mfnPipelineState; +MTL::CommandQueue* mCommandQueue; +NS::Error *error = nullptr; +typedef struct { + float *A, *C, *scales, *offset; + unsigned char *B; +} MetalMatmulBuffers; + +typedef struct { + float *C, *scales, *offset; + PackedFloat *A; + PackedChar *B; +} MetalMatmulSBuffers; + +using namespace std; +using namespace chrono; + +// .cc +const char * fn_name = "matmulInt4"; + + +// main +unsigned int height1 = 32; +unsigned int width1 = 32; +unsigned int height2 = 32; +unsigned int width2 = 32; +float *A1, *A3; +unsigned char *A2; +matmul_param *param; +// for MatmulInt4 use +unsigned int group_size = 32; +float* scales, *offset; +MetalMatmulBuffers *Int4_buffer; +MetalMatMulParams *Int4_params; + +MetalMatmulSBuffers *Int4_Sbuffer; +MetalMatMulParams *Int4_Sparams; + +// Test Use +void test_addArrays(const float arr1[], const float arr2[], float result[], uint size) { + for (int i = 0; i < size; ++i) { + result[i] = arr1[i] + arr2[i]; + } +} +void test_matmul(const float* matA, int rowsA, int colsA, const unsigned char* matB, int rowsB, int colsB, float* result) { + for (int i = 0; i < rowsA; i++) { + for (int j = 0; j < colsB; j++) { + result[i * colsB + j] = 0; + for (int k = 0; k < colsA; k++) { + result[i * colsB + j] += matA[i * colsA + k] * matB[k * colsB + j]; + } + } + } +} +void printArray(const float* array, uint arraySize) { + for (int i = 0; i < arraySize; ++i) { + std::cout << array[i] << " "; + } + std::cout << std::endl; +} +void generateRandomFloatArray(float* array, uint arraySize) { + // Use a random device to seed the random number generator + std::random_device rd; + // Use the current time as a seed for the random number generator + std::mt19937 gen(rd()); + // Define the range of random numbers (adjust as needed) + std::uniform_real_distribution distribution(1, 100); + + // Generate random integers and fill the array + for (int i = 0; i < arraySize; ++i) { + array[i] = distribution(gen); + } +} +void generateRandomCharArray(unsigned char* array, uint arraySize) { + // Use a random device to seed the random number generator + std::random_device rd; + // Use the current time as a seed for the random number generator + std::mt19937 gen(rd()); + // Define the range of random numbers (adjust as needed) + std::uniform_int_distribution distrib(0, 255); + + // Generate random integers and fill the array + for (int i = 0; i < arraySize; ++i) { + array[i] = static_cast(distrib(gen)); + } +} + +void generateRandomScale(float* array, uint arraySize) { + for (size_t i = 0; i < arraySize; i++){ + array[i] = 1.1; + } +} + +// Metal functions +void metal_init(){ + mDevice = MTL::CreateSystemDefaultDevice(); + MTL::Library *defaultLibrary = mDevice->newDefaultLibrary(); + if (defaultLibrary == nullptr) { + std::cout << "Failed to find the default library." << std::endl; + return; + } + auto str = NS::String::string(fn_name, NS::ASCIIStringEncoding); + MTL::Function *matmulFunction = defaultLibrary->newFunction(str); + defaultLibrary->release(); + if (matmulFunction == nullptr) { + std::cout << "Failed to find the function." << std::endl; + return; + } + mfnPipelineState = mDevice->newComputePipelineState(matmulFunction, &error); + matmulFunction->release(); + if (mfnPipelineState == nullptr) { + std::cout << "Failed to created pipeline state object, error " << error << "." << std::endl; + return; + } + mCommandQueue = mDevice->newCommandQueue(); + if (mCommandQueue == nullptr) { + std::cout << "Failed to find the command queue." << std::endl; + return; + } +} + +MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size){ + return mDevice->newBuffer(type_size*size, MTL::ResourceStorageModeShared); +} + +void metal_encodecommand_matmulInt4_simd(MTL::ComputeCommandEncoder *computeEncoder){ + //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance + + bsScales = metal_newBuf(sizeof(float), Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size); + bsM1 = metal_newBuf(sizeof(PackedFloat), Int4_Sparams->height1*Int4_Sparams->width1); + bsM2 = metal_newBuf(sizeof(PackedChar), Int4_Sparams->width2*Int4_Sparams->width2); + bsParam = metal_newBuf(sizeof(MetalMatMulParams), 1); + bsM3 = metal_newBuf(sizeof(float), Int4_Sparams->height3*Int4_Sparams->width3); + + computeEncoder->setComputePipelineState(mfnPipelineState); + computeEncoder->setBuffer(bsM1, 0, 0); + computeEncoder->setBuffer(bsM2, 0, 1); + computeEncoder->setBuffer(bsM3, 0, 2); + computeEncoder->setBuffer(bsScales, 0, 3); + computeEncoder->setBuffer(bParam, 0, 4); + + memcpy(bsM1->contents(), Int4_Sbuffer->A, Int4_Sparams->height1*Int4_Sparams->width1*sizeof(PackedFloat)); + memcpy(bsM2->contents(), Int4_Sbuffer->B, Int4_Sparams->width2*Int4_Sparams->width2*sizeof(PackedChar)); + memcpy(bsM3->contents(), Int4_Sbuffer->C, Int4_Sparams->height3*Int4_Sparams->width3*sizeof(float)); + memcpy(bsParam->contents(), Int4_Sparams, sizeof(MetalMatMulParams)); + memcpy(bsScales->contents(), Int4_Sbuffer->scales, Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size*sizeof(float)); +} + + +void metal_encodecommand_matmulInt4(MTL::ComputeCommandEncoder *computeEncoder){ + //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance + + bScales = metal_newBuf(sizeof(float), height1*width1); + bM1 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width1); + bM2 = metal_newBuf(sizeof(unsigned char), Int4_params->width1*Int4_params->width3); + bParam = metal_newBuf(sizeof(MetalMatMulParams), 1); + bM3 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width3); + + computeEncoder->setComputePipelineState(mfnPipelineState); + computeEncoder->setBuffer(bM1, 0, 0); + computeEncoder->setBuffer(bM2, 0, 1); + computeEncoder->setBuffer(bM3, 0, 2); + computeEncoder->setBuffer(bScales, 0, 3); + computeEncoder->setBuffer(bParam, 0, 4); + + memcpy(bM1->contents(), Int4_buffer->A, Int4_params->height1*Int4_params->width1*sizeof(float)); + memcpy(bM2->contents(), Int4_buffer->B, Int4_params->width1*Int4_params->width3*sizeof(unsigned char)); + memcpy(bM3->contents(), Int4_buffer->C, Int4_params->height1*Int4_params->width3*sizeof(float)); + memcpy(bParam->contents(), Int4_params, sizeof(MetalMatMulParams)); + memcpy(bScales->contents(), Int4_buffer->scales, height1*width1*sizeof(float)); +} + +void metal_encodecommand_matmul(MTL::ComputeCommandEncoder *computeEncoder){ + //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance + bM1 = metal_newBuf(sizeof(float), param->arraySize1); + bM2 = metal_newBuf(sizeof(float), param->arraySize2); + bM3 = metal_newBuf(sizeof(float), param->outputsize); + bParam = metal_newBuf(sizeof(matmul_param), 1); + + computeEncoder->setComputePipelineState(mfnPipelineState); + computeEncoder->setBuffer(bM1, 0, 0); + computeEncoder->setBuffer(bM2, 0, 1); + computeEncoder->setBuffer(bParam, 0, 2); + computeEncoder->setBuffer(bM3, 0, 3); + + + memcpy(bM1->contents(), A1, param->arraySize1*sizeof(float)); + memcpy(bM2->contents(), A2, param->arraySize2*sizeof(float)); + memcpy(bM3->contents(), A3, param->outputsize*sizeof(float)); + memcpy(bParam->contents(), param, sizeof(matmul_param)); +} + +void metal_compute(){ + // Initialization of GPU vals + MTL::CommandBuffer *commandBuffer = mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode command and set buffer to GPU + if (strcmp(fn_name, "matmulInt4") == 0) { + metal_encodecommand_matmulInt4(computeEncoder); + } else if (strcmp(fn_name, "matmul") == 0) { + metal_encodecommand_matmul(computeEncoder); + } else if (strcmp(fn_name, "matmulInt4_SIMD_Q4Interleave") == 0) { + metal_encodecommand_matmulInt4_simd(computeEncoder); + } + + // Threads -> ThreadGroup -> Grid + MTL::Size mThreadGroupSize; + MTL::Size mGridSize; + if (strcmp(fn_name, "matmulInt4") == 0){ + mThreadGroupSize = MTL::Size::Make(Int4_params->width3, Int4_params->height1, 1); + mGridSize = MTL::Size::Make(16, 1, 1); + } else if (strcmp(fn_name, "matmul") == 0) { + mThreadGroupSize = MTL::Size::Make(8, 8, 1); + mGridSize = MTL::Size::Make((param->width1 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, + (param->height2 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, + 1); + } else if (strcmp(fn_name, "matmulInt4_SIMD_Q4Interleave") == 0) { + mThreadGroupSize = MTL::Size::Make(Int4_Sparams->width3, Int4_Sparams->height1, 1); + mGridSize = MTL::Size::Make(16, 1, 1); + } + + // Dispatch and Run Computation + // auto start = high_resolution_clock::now(); + computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); + computeEncoder->endEncoding(); + commandBuffer->commit(); + commandBuffer->waitUntilCompleted(); + // auto stop = high_resolution_clock::now(); + // auto duration = duration_cast(stop - start); + // std::cout << "GPU: " << duration.count() << "ms" << std::endl; + computeEncoder->release(); + commandBuffer->release(); +} + +void test_normal_matmul(){ + // Initialization for test + param = new matmul_param; + param->height1 = height1; + param->height2 = height2; + param->width1 = width1; + param->width2 = width2; + param->outputsize = height1*width2; + param->arraySize1 = width1*height1; + param->arraySize2 = width2*height2; + A1 = new float[param->arraySize1]; + A2 = new unsigned char[param->arraySize2]; + A3 = new float[param->outputsize]; + generateRandomFloatArray(A1, param->arraySize1); + generateRandomCharArray(A2, param->arraySize2); + // printArray(A1, param->arraySize1); + // printArray(A2, param->arraySize2); + + + // CPU + auto start = high_resolution_clock::now(); + test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + std::cout << "CPU: " << duration.count() << "ms" << std::endl; + printf("CPU Results: \n"); + for (uint32_t i = 0; i < param->outputsize; i++){ + printf("A3[%d]: %f\n", i, A3[i]); + } + free(A3); + A3 = new float[param->outputsize]; + + // GPU + metal_init(); + metal_compute(); + // printf("GPU Results: \n"); + // for (uint32_t i = 0; i < param->outputsize; i++){ + // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + // } + + free(A1); + free(A2); + free(A3); +} + +void test_matmulInt4(){ + // not considering offset atm + Int4_buffer = new MetalMatmulBuffers; + Int4_params = new MetalMatMulParams; + Int4_params->group_size = group_size; + Int4_params->height1 = height1; // m + Int4_params->width1 = width1; // k + Int4_params->width3 = width2; // n + + scales = new float[Int4_params->height1*Int4_params->width1/Int4_params->group_size]; + generateRandomFloatArray(scales, height1*width1/group_size); + Int4_buffer->A = A1; + Int4_buffer->B = A2; + Int4_buffer->C = A3; + Int4_buffer->scales = scales; + metal_init(); + metal_compute(); + printf("GPU Results: \n"); + for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + } +} + +void test_matmulInt4_simds(){ + fn_name = "matmulInt4_SIMD_Q4Interleave"; + Int4_Sbuffer = new MetalMatmulSBuffers; + Int4_Sparams = new MetalMatMulParams; + Int4_Sparams->group_size = group_size/4; + Int4_Sparams->height1 = height1; // m + Int4_Sparams->width1 = width1/4; // k + Int4_Sparams->height2 = height2/2/4; // m + Int4_Sparams->width2 = width2/2; // k + Int4_Sparams->height3 = height1; // m + Int4_Sparams->width3 = width2; // k + + scales = new float[Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size]; + generateRandomFloatArray(scales, Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size); + Int4_Sbuffer->scales = scales; + Int4_Sbuffer->C = A3; + PackedFloat *tempF = new PackedFloat[Int4_Sparams->height1*Int4_Sparams->width1]; + PackedChar *tempC = new PackedChar[Int4_Sparams->width2*Int4_Sparams->width2]; + printf("GPU Results: 1 \n"); + for (size_t i = 0; i < height1*width1; i += 4) { + for (int j = 0; j < 4; j++) { + if (i + j < height1*width1) { // Check to prevent out-of-bounds access + tempF[i / 4].values[j] = A1[i + j]; + } + } + } + Int4_Sbuffer->A = tempF; + + for (size_t i = 0; i < height2/2*width2/2; i += 4) { + for (int j = 0; j < 4; j++) { + if (i + j < height1*width1) { // Check to prevent out-of-bounds access + tempC[i / 4].values[j] = A2[i + j]; + } + } + } + Int4_Sbuffer->B = tempC; + + metal_init(); + metal_compute(); + for (uint32_t i = 0; i < Int4_Sparams->height1*Int4_Sparams->width1/Int4_Sparams->group_size; i++){ + // for (int j = 0; j < 4; j++) { + printf("bsM3[%d]: %f\n", i, ((float*)(bsM3->contents()))[i]); + // } + } +} + +int main(){ + A1 = new float[height1*width1]; + A2 = new unsigned char[width1*width2]; + A3 = new float[height1*width2]; + scales = new float[height1*width1]; + generateRandomFloatArray(A1, height1*width1); + generateRandomCharArray(A2, width1*width2); + generateRandomScale(scales, height1*width1); + // test_matmulInt4(); + test_matmulInt4_simds(); + return 0; +} + + + diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index f2e3db9b..3e0781e7 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -660,85 +660,85 @@ kernel void matmulUInt4_SIMD_Q4Interleave_unroll16( } -// kernel void matmulUInt4_SIMD_Q4Interleave_unroll32( -// device const packed_float4* inA, -// device const packed_char4* inB, // column major -// device float* result, -// device const float* scales, -// constant MetalMatMulParams& params, -// uint2 id [[thread_position_in_grid]]) -// { -// // the for-loop is replaced with a collection of threads, each of which -// // calls this function. +kernel void matmulUInt4_SIMD_Q4Interleave_unroll32( + device const packed_float4* inA, + device const packed_char4* inB, // column major + device float* result, + device const float* scales, + constant MetalMatMulParams* params, + uint2 id [[thread_position_in_grid]]) +{ + // the for-loop is replaced with a collection of threads, each of which + // calls this function. -// const uint n = params.n; -// const uint k = params.k; -// const uint group_size = params.group_size; + const uint n = params->width3; + const uint k = params->width1; + const uint group_size = params->group_size; -// const uint idx = id.x; // column index of the output -// const uint idy = id.y; // row index of the output + const uint idx = id.x; // column index of the output + const uint idy = id.y; // row index of the output -// packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; -// packed_char4 offsets = {8, 8, 8, 8}; -// packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; + packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; + packed_char4 offsets = {8, 8, 8, 8}; + packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; -// for (uint i = 0; i < k; i += group_size){ -// float scale = scales[(idx * k + i) / group_size]; -// packed_float4 scale4 = {scale, scale, scale, scale}; -// for (uint j = 0; j < group_size; j+= 32){ -// // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 -// // expected layout of inB: (a, e), (b, f), (c, g), (d, h) -// // low; (a, 0), (b, 0), (c, 0), (d, 0) -// // high: (e, 0), (f, 0), (g, 0), (h, 0) -// size_t weight_idx = (idx * k + i + j) / 8; -// size_t activation_idx = (idy * k + i + j) / 4; -// packed_char4 packed_8_0 = inB[weight_idx]; -// packed_char4 packed_8_1 = inB[weight_idx + 1]; -// packed_char4 packed_8_2 = inB[weight_idx + 2]; -// packed_char4 packed_8_3 = inB[weight_idx + 3]; + for (uint i = 0; i < k; i += group_size){ + float scale = scales[(idx * k + i) / group_size]; + packed_float4 scale4 = {scale, scale, scale, scale}; + for (uint j = 0; j < group_size; j+= 32){ + // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 + // expected layout of inB: (a, e), (b, f), (c, g), (d, h) + // low; (a, 0), (b, 0), (c, 0), (d, 0) + // high: (e, 0), (f, 0), (g, 0), (h, 0) + size_t weight_idx = (idx * k + i + j) / 8; + size_t activation_idx = (idy * k + i + j) / 4; + packed_char4 packed_8_0 = inB[weight_idx]; + packed_char4 packed_8_1 = inB[weight_idx + 1]; + packed_char4 packed_8_2 = inB[weight_idx + 2]; + packed_char4 packed_8_3 = inB[weight_idx + 3]; -// packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; -// packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; -// packed_char4 packed_low_2 = (packed_8_2 & lowMask) - offsets;; -// packed_char4 packed_low_3 = (packed_8_3 & lowMask) - offsets;; + packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; + packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; + packed_char4 packed_low_2 = (packed_8_2 & lowMask) - offsets;; + packed_char4 packed_low_3 = (packed_8_3 & lowMask) - offsets;; -// packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; -// packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; -// packed_char4 packed_high_2 = ((packed_8_2 >> 4) & lowMask) - offsets; -// packed_char4 packed_high_3 = ((packed_8_3 >> 4) & lowMask) - offsets; + packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; + packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; + packed_char4 packed_high_2 = ((packed_8_2 >> 4) & lowMask) - offsets; + packed_char4 packed_high_3 = ((packed_8_3 >> 4) & lowMask) - offsets; -// packed_float4 inAlow_0 = inA[activation_idx]; -// packed_float4 inAhigh_0 = inA[activation_idx+1]; -// packed_float4 inAlow_1 = inA[activation_idx+2]; -// packed_float4 inAhigh_1 = inA[activation_idx+3]; -// packed_float4 inAlow_2 = inA[activation_idx+4]; -// packed_float4 inAhigh_2 = inA[activation_idx+5]; -// packed_float4 inAlow_3 = inA[activation_idx+6]; -// packed_float4 inAhigh_3 = inA[activation_idx+7]; + packed_float4 inAlow_0 = inA[activation_idx]; + packed_float4 inAhigh_0 = inA[activation_idx+1]; + packed_float4 inAlow_1 = inA[activation_idx+2]; + packed_float4 inAhigh_1 = inA[activation_idx+3]; + packed_float4 inAlow_2 = inA[activation_idx+4]; + packed_float4 inAhigh_2 = inA[activation_idx+5]; + packed_float4 inAlow_3 = inA[activation_idx+6]; + packed_float4 inAhigh_3 = inA[activation_idx+7]; -// packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; -// packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; -// packed_float4 inBlow_2 = packed_float4(packed_low_2) * scale4; -// packed_float4 inBlow_3 = packed_float4(packed_low_3) * scale4; + packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; + packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; + packed_float4 inBlow_2 = packed_float4(packed_low_2) * scale4; + packed_float4 inBlow_3 = packed_float4(packed_low_3) * scale4; -// packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; -// packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; -// packed_float4 inBhigh_2 = packed_float4(packed_high_2) * scale4; -// packed_float4 inBhigh_3 = packed_float4(packed_high_3) * scale4; + packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; + packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; + packed_float4 inBhigh_2 = packed_float4(packed_high_2) * scale4; + packed_float4 inBhigh_3 = packed_float4(packed_high_3) * scale4; -// sum4 += inAlow_0 * inBlow_0; -// sum4 += inAlow_1 * inBlow_1; -// sum4 += inAlow_2 * inBlow_2; -// sum4 += inAlow_3 * inBlow_3; -// sum4 += inAhigh_0 * inBhigh_0; -// sum4 += inAhigh_1 * inBhigh_1; -// sum4 += inAhigh_2 * inBhigh_2; -// sum4 += inAhigh_3 * inBhigh_3; -// } -// } -// float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; -// result[idy * n + idx] = sum; -// } + sum4 += inAlow_0 * inBlow_0; + sum4 += inAlow_1 * inBlow_1; + sum4 += inAlow_2 * inBlow_2; + sum4 += inAlow_3 * inBlow_3; + sum4 += inAhigh_0 * inBhigh_0; + sum4 += inAhigh_1 * inBhigh_1; + sum4 += inAhigh_2 * inBhigh_2; + sum4 += inAhigh_3 * inBhigh_3; + } + } + float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; + result[idy * n + idx] = sum; +} // kernel void matmulUInt4_SIMD_Q4Interleave_unroll2x32( // device const packed_float4* inA, From 3217635bd21b0814d69f0e8ce2910595d2f0fe96 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sat, 3 Feb 2024 23:49:20 -0500 Subject: [PATCH 23/37] updated matmulf32 + test script --- kernels/matmul.h | 2 +- .../metal/{matmul_naive.cc => matmul_f32.cc} | 4 ++-- kernels/metal/matmul_metal_imp.cc | 4 ++-- kernels/metal/matmul_metal_imp.h | 2 +- llm/tests/metal/cpp_version/run.sh | 21 +++++++++++++++++++ 5 files changed, 27 insertions(+), 6 deletions(-) rename kernels/metal/{matmul_naive.cc => matmul_f32.cc} (86%) create mode 100755 llm/tests/metal/cpp_version/run.sh diff --git a/kernels/matmul.h b/kernels/matmul.h index cdb71007..4f36617e 100644 --- a/kernels/matmul.h +++ b/kernels/matmul.h @@ -137,7 +137,7 @@ class MatmulOperator { //// GEMV void gemv_forward_cuda(const struct matmul_params *params); // metal - void mat_mul_metal(const struct matmul_params *params); + void mat_mul_f32_metal(const struct matmul_params *params); void batch_add_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); void relu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); void silu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); diff --git a/kernels/metal/matmul_naive.cc b/kernels/metal/matmul_f32.cc similarity index 86% rename from kernels/metal/matmul_naive.cc rename to kernels/metal/matmul_f32.cc index 541bb05d..d3a337a6 100644 --- a/kernels/metal/matmul_naive.cc +++ b/kernels/metal/matmul_f32.cc @@ -13,7 +13,7 @@ namespace matmul { // naive float*float matmul -void MatmulOperator::mat_mul_metal(const struct matmul_params *params) { +void MatmulOperator::mat_mul_f32_metal(const struct matmul_params *params) { int i, j, k; const struct matrix *A = ¶ms->A, *B = ¶ms->B, *C = ¶ms->C; const int block_size = params->block_size; @@ -25,6 +25,6 @@ void MatmulOperator::mat_mul_metal(const struct matmul_params *params) { MetalMatMulParams matmulparams = {(unsigned int)A->row, (unsigned int)C->column, (unsigned int)A->column, (unsigned int)block_size}; MetalMatmulBuffers bufferparams = {A->data_ptr, C->data_ptr, scale, offset, (unsigned char*)B->data_ptr}; - MetalIMP::run_mat_mul_accelerator_int4_fast_no_offset(matmulparams, &bufferparams); + MetalIMP::run_f32_mat_mul(matmulparams, &bufferparams); }; } // namespace matmul diff --git a/kernels/metal/matmul_metal_imp.cc b/kernels/metal/matmul_metal_imp.cc index 229fb031..54ab5bc9 100644 --- a/kernels/metal/matmul_metal_imp.cc +++ b/kernels/metal/matmul_metal_imp.cc @@ -160,7 +160,7 @@ void MetalIMP::run_mat_mul_accelerator_int4_fast_no_offset(MetalMatMulParams par _mMatmulFunctionPSO->release(); } -void MetalIMP::run_naive_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bufferParams) { +void MetalIMP::run_f32_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bufferParams) { setupLibrary("matmul"); _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); @@ -590,7 +590,7 @@ void MetalIMP::run_soft_max_4(MetalMatMulParams param, MetalMatmulBuffers *buffe } void MetalIMP::run_rope(MetalMatMulParams param, MetalMatmulBuffers *bufferParams){ - setupLibrary("kernel_soft_max_4"); + setupLibrary("kernel_rope"); _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); diff --git a/kernels/metal/matmul_metal_imp.h b/kernels/metal/matmul_metal_imp.h index 18d4cd36..05f5fb51 100644 --- a/kernels/metal/matmul_metal_imp.h +++ b/kernels/metal/matmul_metal_imp.h @@ -39,7 +39,7 @@ class MetalIMP { static void SendEncode(MTL::Size gridSize, MTL::Size threadgroupSize, MTL::CommandBuffer *commandBuffer, MTL::ComputeCommandEncoder *computeEncoder); static void run_mat_mul_accelerator_int4_fast_no_offset(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); - static void run_naive_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); + static void run_f32_mat_mul(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); static void run_batch_add(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); static void run_relu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); static void run_silu(MetalMatMulParams param, MetalMatmulBuffers *bufferParams); diff --git a/llm/tests/metal/cpp_version/run.sh b/llm/tests/metal/cpp_version/run.sh new file mode 100755 index 00000000..3413235f --- /dev/null +++ b/llm/tests/metal/cpp_version/run.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Initialize sum +sum=0 + +# Number of runs +runs=5 + +for i in $(seq 1 $runs); do + # Run the program and capture its output + output=$(./main) + echo "Run $i: $output" + # Add the output to sum + sum=$(echo "$sum + $output" | bc) +done + +# Calculate the average +average=$(echo "scale=2; $sum / $runs" | bc) + +echo "Total sum: $sum" +echo "Average: $average" \ No newline at end of file From 06c472af9b0dc5668e46f18da0713111452406ca Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sun, 4 Feb 2024 11:50:42 -0500 Subject: [PATCH 24/37] new test script on normalization --- llm/tests/metal/cpp_version/main.cc | 208 ++++++++++++++++++---------- llm/tests/metal/cpp_version/param.h | 20 ++- 2 files changed, 150 insertions(+), 78 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index a0263347..523e89da 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -38,15 +38,15 @@ using namespace std; using namespace chrono; // .cc -const char * fn_name = "matmulInt4"; +const char * fn_name = "matmul"; // main -unsigned int height1 = 320*320; -unsigned int width1 = 320*32; -unsigned int height2 = 320*32; -unsigned int width2 = 320*320; -float *A1, *A3; +unsigned int height1 = 96; +unsigned int width1 = 4096; +unsigned int height2 = 4096; +unsigned int width2 = 32000; +float *A1, *Anorm, *A3; unsigned char *A2; matmul_param *param; // for MatmulInt4 use @@ -83,7 +83,7 @@ void generateRandomFloatArray(float* array, uint arraySize) { // Use the current time as a seed for the random number generator std::mt19937 gen(rd()); // Define the range of random numbers (adjust as needed) - std::uniform_real_distribution distribution(1, 100); + std::uniform_real_distribution distribution(0, 1); // Generate random integers and fill the array for (int i = 0; i < arraySize; ++i) { @@ -139,11 +139,11 @@ MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size){ void metal_encodecommand_matmulInt4(MTL::ComputeCommandEncoder *computeEncoder){ //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance - bScales = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width1/Int4_params->group_size); - bM1 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width1); - bM2 = metal_newBuf(sizeof(unsigned char), Int4_params->width1*Int4_params->width3); + bScales = metal_newBuf(sizeof(float), (Int4_params->width2*Int4_params->width1+Int4_params->group_size-1)/Int4_params->group_size); + bM1 = metal_newBuf(sizeof(float), Int4_params->arraySize1); + bM2 = metal_newBuf(sizeof(unsigned char), Int4_params->arraySize2); bParam = metal_newBuf(sizeof(MetalMatMulParams), 1); - bM3 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width3); + bM3 = metal_newBuf(sizeof(float), Int4_params->outputsize); computeEncoder->setComputePipelineState(mfnPipelineState); computeEncoder->setBuffer(bM1, 0, 0); @@ -152,11 +152,11 @@ void metal_encodecommand_matmulInt4(MTL::ComputeCommandEncoder *computeEncoder){ computeEncoder->setBuffer(bScales, 0, 3); computeEncoder->setBuffer(bParam, 0, 4); - memcpy(bM1->contents(), Int4_buffer->A, Int4_params->height1*Int4_params->width1*sizeof(float)); - memcpy(bM2->contents(), Int4_buffer->B, Int4_params->width1*Int4_params->width3*sizeof(unsigned char)); - memcpy(bM3->contents(), Int4_buffer->C, Int4_params->height1*Int4_params->width3*sizeof(float)); + memcpy(bM1->contents(), Int4_buffer->A, Int4_params->arraySize1*sizeof(float)); + memcpy(bM2->contents(), Int4_buffer->B, Int4_params->arraySize2*sizeof(unsigned char)); + memcpy(bM3->contents(), Int4_buffer->C, Int4_params->outputsize*sizeof(float)); memcpy(bParam->contents(), Int4_params, sizeof(MetalMatMulParams)); - memcpy(bScales->contents(), Int4_buffer->scales, ((Int4_params->height1*Int4_params->width1)/Int4_params->group_size)*sizeof(float)); + memcpy(bScales->contents(), Int4_buffer->scales, (Int4_params->width2*Int4_params->width1+Int4_params->group_size-1)/Int4_params->group_size*sizeof(float)); } void metal_encodecommand_matmul(MTL::ComputeCommandEncoder *computeEncoder){ @@ -165,7 +165,6 @@ void metal_encodecommand_matmul(MTL::ComputeCommandEncoder *computeEncoder){ bM2 = metal_newBuf(sizeof(float), param->arraySize2); bM3 = metal_newBuf(sizeof(float), param->outputsize); bParam = metal_newBuf(sizeof(matmul_param), 1); - computeEncoder->setComputePipelineState(mfnPipelineState); computeEncoder->setBuffer(bM1, 0, 0); computeEncoder->setBuffer(bM2, 0, 1); @@ -174,7 +173,7 @@ void metal_encodecommand_matmul(MTL::ComputeCommandEncoder *computeEncoder){ memcpy(bM1->contents(), A1, param->arraySize1*sizeof(float)); - memcpy(bM2->contents(), A2, param->arraySize2*sizeof(float)); + memcpy(bM2->contents(), Anorm, param->arraySize2*sizeof(float)); memcpy(bM3->contents(), A3, param->outputsize*sizeof(float)); memcpy(bParam->contents(), param, sizeof(matmul_param)); } @@ -186,10 +185,7 @@ void metal_compute(){ MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); assert(computeEncoder != nullptr); - // Encode command and set buffer to GPU - if (strcmp(fn_name, "matmulInt4") == 0) { - metal_encodecommand_matmulInt4(computeEncoder); - } else if (strcmp(fn_name, "matmul") == 0) { + if (strcmp(fn_name, "matmul") == 0) { metal_encodecommand_matmul(computeEncoder); } else { metal_encodecommand_matmulInt4(computeEncoder); @@ -198,28 +194,72 @@ void metal_compute(){ // Threads -> ThreadGroup -> Grid MTL::Size mThreadGroupSize; MTL::Size mGridSize; - if (strcmp(fn_name, "matmulInt4") == 0){ - mThreadGroupSize = MTL::Size::Make(Int4_params->width3, Int4_params->height1, 1); - mGridSize = MTL::Size::Make(16, 1, 1); - } else if (strcmp(fn_name, "matmul") == 0) { - mThreadGroupSize = MTL::Size::Make(8, 8, 1); - mGridSize = MTL::Size::Make((param->width1 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, - (param->height2 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, + if (strcmp(fn_name, "matmul") == 0) { + auto threadsPerThreadgroup = mDevice->maxThreadsPerThreadgroup(); + mThreadGroupSize = MTL::Size::Make(32, 32, 1); + mGridSize = MTL::Size::Make((param->width2 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, + (param->height1 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, 1); } else { - mThreadGroupSize = MTL::Size::Make(Int4_params->width3, Int4_params->height1, 1); - mGridSize = MTL::Size::Make(16, 1, 1); + auto threadsPerThreadgroup = mDevice->maxThreadsPerThreadgroup(); + // for test Normal Matmul (16, 16, 1); + // for test Int4, (16, 1, 1) + mThreadGroupSize = MTL::Size::Make(1, 9, 1); // for test, (16, 1, 1) + mGridSize = MTL::Size::Make((Int4_params->width3+ mThreadGroupSize.width - 1)/mThreadGroupSize.width, (Int4_params->height1+ mThreadGroupSize.height - 1)/mThreadGroupSize.height, 1); } // Dispatch and Run Computation - auto start = high_resolution_clock::now(); + computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); computeEncoder->endEncoding(); commandBuffer->commit(); + auto start = high_resolution_clock::now(); commandBuffer->waitUntilCompleted(); auto stop = high_resolution_clock::now(); auto duration = duration_cast(stop - start); - std::cout << "GPU: " << fn_name << " " << duration.count() << "ms" << std::endl; + std::cout << duration.count() << std::endl; + computeEncoder->release(); + commandBuffer->release(); +} + +void metal_rms_compute(){ + // Initialization of GPU vals + MTL::CommandBuffer *commandBuffer = mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + bM1 = metal_newBuf(sizeof(float), Int4_params->arraySize1); + bM3 = metal_newBuf(sizeof(float), Int4_params->outputsize); + bParam = metal_newBuf(sizeof(MetalMatMulParams), 1); + + computeEncoder->setComputePipelineState(mfnPipelineState); + computeEncoder->setBuffer(bM1, 0, 0); + computeEncoder->setBuffer(bM3, 0, 1); + computeEncoder->setBuffer(bParam, 0, 2); + + computeEncoder->setThreadgroupMemoryLength(Int4_params->type_size * group_size, 0); + + memcpy(bM1->contents(), A1, Int4_params->arraySize1*sizeof(float)); + memcpy(bParam->contents(), Int4_params, sizeof(MetalMatMulParams)); + + // Threads -> ThreadGroup -> Grid + + MTL::Size threadgroupSize = MTL::Size::Make(MIN(16, Int4_params->m_dim_z), MIN(16, Int4_params->m_dim_y), 1); + MTL::Size gridSize = MTL::Size::Make((Int4_params->m_dim_z + threadgroupSize.width - 1) / threadgroupSize.width, + (Int4_params->m_dim_y + threadgroupSize.height - 1) / threadgroupSize.height, + 1); + + // Dispatch and Run Computation + + computeEncoder->dispatchThreadgroups(gridSize, threadgroupSize); + computeEncoder->endEncoding(); + commandBuffer->commit(); + auto start = high_resolution_clock::now(); + commandBuffer->waitUntilCompleted(); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + std::cout << duration.count() << std::endl; computeEncoder->release(); commandBuffer->release(); } @@ -235,28 +275,29 @@ void test_normal_matmul(){ param->arraySize1 = width1*height1; param->arraySize2 = width2*height2; A1 = new float[param->arraySize1]; - A2 = new unsigned char[param->arraySize2]; + Anorm = new float[param->arraySize2]; A3 = new float[param->outputsize]; generateRandomFloatArray(A1, param->arraySize1); - generateRandomCharArray(A2, param->arraySize2); + generateRandomFloatArray(Anorm, param->arraySize2); // printArray(A1, param->arraySize1); // printArray(A2, param->arraySize2); - // CPU - auto start = high_resolution_clock::now(); - test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); - auto stop = high_resolution_clock::now(); - auto duration = duration_cast(stop - start); - std::cout << "CPU: " << duration.count() << "ms" << std::endl; - printf("CPU Results: \n"); - for (uint32_t i = 0; i < param->outputsize; i++){ - printf("A3[%d]: %f\n", i, A3[i]); - } - free(A3); - A3 = new float[param->outputsize]; + // // CPU + // auto start = high_resolution_clock::now(); + // test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); + // auto stop = high_resolution_clock::now(); + // auto duration = duration_cast(stop - start); + // std::cout << "CPU: " << duration.count() << "ms" << std::endl; + // printf("CPU Results: \n"); + // for (uint32_t i = 0; i < param->outputsize; i++){ + // printf("A3[%d]: %f\n", i, A3[i]); + // } + // free(A3); + // A3 = new float[param->outputsize]; // GPU + metal_init(); metal_compute(); // printf("GPU Results: \n"); @@ -271,52 +312,69 @@ void test_normal_matmul(){ void test_matmulInt4(){ // not considering offset atm + fn_name = "matmulInt4"; + fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll32"; Int4_buffer = new MetalMatmulBuffers; Int4_params = new MetalMatMulParams; Int4_params->group_size = group_size; Int4_params->height1 = height1; // m Int4_params->width1 = width1; // k - Int4_params->width3 = width2; // n - A1 = new float[Int4_params->height1*Int4_params->width1]; - A2 = new unsigned char[Int4_params->width1*Int4_params->width3]; - A3 = new float[Int4_params->height1*Int4_params->width3]; - scales = new float[Int4_params->height1*Int4_params->width1/Int4_params->group_size]; - generateRandomFloatArray(A1, Int4_params->height1*Int4_params->width1); - generateRandomCharArray(A2, Int4_params->width1*Int4_params->width3); - generateRandomFloatArray(scales, Int4_params->height1*Int4_params->width1/Int4_params->group_size); + Int4_params->height2 = height2; // n + Int4_params->width2 = width2; + Int4_params->height3 = height1; + Int4_params->width3 = width2; + + Int4_params-> arraySize1 = Int4_params->height1*Int4_params->width1; + Int4_params-> arraySize2 = Int4_params->height2*Int4_params->width2; + Int4_params-> outputsize = Int4_params->height3*Int4_params->width3; + A1 = new float[Int4_params-> arraySize1]; + A2 = new unsigned char[Int4_params-> arraySize2]; + A3 = new float[Int4_params-> outputsize]; + scales = new float[(Int4_params->width2*Int4_params->width1+Int4_params->group_size-1)/Int4_params->group_size]; + generateRandomFloatArray(A1, Int4_params-> arraySize1); + generateRandomCharArray(A2, Int4_params-> arraySize2); + generateRandomFloatArray(scales, (Int4_params->width2*Int4_params->width1+Int4_params->group_size-1)/Int4_params->group_size); Int4_buffer->A = A1; Int4_buffer->B = A2; Int4_buffer->C = A3; Int4_buffer->scales = scales; - - metal_init(); - metal_compute(); -// printf("GPU Results: \n"); - // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ - // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); - // } - fn_name = "matmulInt4_SIMD_Q4Interleave"; metal_init(); metal_compute(); -// printf("GPU Results: \n"); - // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + // for (uint32_t i = 0; i < Int4_params-> outputsize; i++){ // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); // } - fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll16"; - metal_init(); - metal_compute(); -// printf("GPU Results: \n"); - // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ - // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); - // } - fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll16"; +} + +void test_rms_nor(){ + fn_name = "kernel_rms_norm"; + Int4_buffer = new MetalMatmulBuffers; + Int4_params = new MetalMatMulParams; + Int4_params->group_size = group_size; + Int4_params->height1 = 960; // m + Int4_params->width1 = 4096; // k + Int4_params->height2 = 960; // n + Int4_params->width2 = 4096; + Int4_params->height3 = 960; + Int4_params->width3 = 4096; + Int4_params-> arraySize1 = Int4_params->height1*Int4_params->width1; + Int4_params-> arraySize2 = Int4_params->height2*Int4_params->width2; + Int4_params-> outputsize = Int4_params->height3*Int4_params->width3; + A1 = new float[Int4_params-> arraySize1]; + A3 = new float[Int4_params-> outputsize]; + generateRandomFloatArray(A1, Int4_params-> arraySize1); + Int4_params->m_dim_x = 1; + Int4_params->m_dim_y = Int4_params->height1; + Int4_params->m_dim_z = Int4_params->width1; + Int4_params->eps = 1e-06; + Int4_params->type_size = sizeof(float); metal_init(); - metal_compute(); -// printf("GPU Results: \n"); + metal_rms_compute(); + } int main(){ - test_matmulInt4(); + + test_rms_nor(); return 0; } diff --git a/llm/tests/metal/cpp_version/param.h b/llm/tests/metal/cpp_version/param.h index f5ed6290..f241125d 100644 --- a/llm/tests/metal/cpp_version/param.h +++ b/llm/tests/metal/cpp_version/param.h @@ -17,10 +17,24 @@ typedef struct { // For customized MatmulInt4 use typedef struct { - unsigned int height1; - unsigned int width3; - unsigned int width1; + unsigned int width1, height1, width2, height2, width3, height3, outputsize, arraySize1, arraySize2; unsigned int group_size; + + unsigned int m_dim_x, m_dim_y, m_dim_z; + unsigned int type_size; // for nb + float eps; // rms_nor + float scale; // for softmax + + int n_past; + int n_dims; + int mode; + int n_orig_ctx; + float freq_base; + float freq_scale; + float ext_factor; + float attn_factor; + float beta_fast; + float beta_slow; } MetalMatMulParams; // should be inside metal header From 0c385822f336bebd8632c86bf8acd073f53c629f Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Tue, 20 Feb 2024 00:52:18 -0500 Subject: [PATCH 25/37] more metal ops (TODO needed) --- kernels/metal/kernel/op.metal | 31 ++++++++++++++--------- llm/src/ops/metal/LlamaRMSNorm.cc | 28 +++++++++++++++++++++ llm/src/ops/metal/RotaryPosEmb.cc | 24 ++++++++++++++++++ llm/src/ops/metal/batch_add.cc | 1 + llm/src/ops/metal/embedding.cc | 22 +++++++++++++++++ llm/src/ops/metal/linear.cc | 41 +++++++++++++++++++++++++++++++ llm/src/ops/metal/softmax.cc | 17 +++++++++++++ 7 files changed, 153 insertions(+), 11 deletions(-) create mode 100644 llm/src/ops/metal/LlamaRMSNorm.cc create mode 100644 llm/src/ops/metal/RotaryPosEmb.cc create mode 100644 llm/src/ops/metal/embedding.cc create mode 100644 llm/src/ops/metal/linear.cc create mode 100644 llm/src/ops/metal/softmax.cc diff --git a/kernels/metal/kernel/op.metal b/kernels/metal/kernel/op.metal index 5d89d2f3..61d0b79e 100644 --- a/kernels/metal/kernel/op.metal +++ b/kernels/metal/kernel/op.metal @@ -1,4 +1,7 @@ #include +#include "operators.h" +#include "utils.h" + using namespace metal; using namespace metal; @@ -8,17 +11,23 @@ using namespace metal; #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; } - /* CUDA */ -// __global__ void batch_Add_cuda(Matrix3D input, Matrix3D input2, Matrix3D output) { -// int i = blockIdx.x * blockDim.x + threadIdx.x; -// int j = blockIdx.y * blockDim.y + threadIdx.y; -// int k = blockIdx.z * blockDim.z + threadIdx.z; - -// //// half version -// if (i < input.m_dim_x && j < input.m_dim_y && k < input.m_dim_z) { -// output(i, j, k) = __hadd(input(i, j, k), input2(0, j, k)); -// } -// } +kernel void EmbeddingKernel(device Matrix3D_int& input_id [[buffer(0)]], + device Matrix3D_half& output [[buffer(1)]], + device float* lookup [[buffer(2)]], + const unsigned int embed_dim [[buffer(3)]], + uint id [[thread_position_in_grid]]) { + if (id < input_id.m_dim_z) { + int token_id = input_id(0, 0, id); + device half* output_sample_ptr = &output.m_data[id * embed_dim]; + device float* target_embed = &lookup[token_id * embed_dim]; + + for (int j = 0; j < embed_dim; ++j) { + output_sample_ptr[j] = half(target_embed[j]); + } + } +} + + kernel void kernel_batch_add(device const float* inputA, device const float* inputB, device float* output, diff --git a/llm/src/ops/metal/LlamaRMSNorm.cc b/llm/src/ops/metal/LlamaRMSNorm.cc new file mode 100644 index 00000000..6a9a6448 --- /dev/null +++ b/llm/src/ops/metal/LlamaRMSNorm.cc @@ -0,0 +1,28 @@ +#include +#include + +#include "operators.h" + +// TODO: modify metal for weights +void LlamaRMSNorm_metal::forward(const Matrix3D &x, Matrix3D &output, float eps) { + int m = x.m_dim_x * x.m_dim_y; + int n = x.m_dim_z; + dim3 grid(m); + dim3 block(min(n, 1024)); + + /* For general cases, n is equal to hidden_units, e.g., 512/1024. + Since we have warp shuffle inside the code, block.x % 32 should be 0. + */ + if (n % 32 != 0) { + block.x = 1024; + } + + block.x = block.x / (4 / sizeof(half)); // if using half, only need half of block.x + + /* should pay attention to the rsqrt precision */ + half *input = x.m_data, *out = output.m_data; + float *gamma = weight.m_data; + + matmul::MatmulOperator op = matmul::MatmulOperator(); + op.rms_norm_metal(input, gamma, out, eps, m, n); // For gpt-3 +} \ No newline at end of file diff --git a/llm/src/ops/metal/RotaryPosEmb.cc b/llm/src/ops/metal/RotaryPosEmb.cc new file mode 100644 index 00000000..f039b055 --- /dev/null +++ b/llm/src/ops/metal/RotaryPosEmb.cc @@ -0,0 +1,24 @@ +#include +#include "operators.h" + +// TODO: match constants on metal +void RotaryPosEmb_cuda_forward(Matrix3D query, Matrix3D key, Matrix3D cos, Matrix3D sin, int start_idx, int len) { + struct matmul_params params; + params.A.row = query.m_dim_y; + params.A.column = query.m_dim_z; + params.A.half_data_ptr = query.m_data; + params.B.row = key.m_dim_z; // k + params.B.column = key.m_dim_y; // n + params.B.int32_data_ptr = key.m_data; + params.C.row = output.m_dim_y; + params.C.column = output.m_dim_z; + params.C.half_data_ptr = output.m_data; + params.opt_params.num_thread = num_thread; + params.half_scales = this->scale.m_data; + params.int32_zero_point = this->zero_point.m_data; + params.block_size = QK; + + matmul::MatmulOperator op = matmul::MatmulOperator(); + op.rope_metal(¶ms, query.m_dim_x, query.m_dim_y, query.m_dim_z, n_past, n_dims, mode, n_orig_ctx, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow ); + +} diff --git a/llm/src/ops/metal/batch_add.cc b/llm/src/ops/metal/batch_add.cc index 66c8ada5..469538d6 100644 --- a/llm/src/ops/metal/batch_add.cc +++ b/llm/src/ops/metal/batch_add.cc @@ -1,5 +1,6 @@ #include "operators.h" +// done void batch_Add(const Matrix3D &input, const Matrix3D &input2, Matrix3D &output) { struct matmul_params params; params.A.data_ptr = input.m_data; diff --git a/llm/src/ops/metal/embedding.cc b/llm/src/ops/metal/embedding.cc new file mode 100644 index 00000000..1bb17df0 --- /dev/null +++ b/llm/src/ops/metal/embedding.cc @@ -0,0 +1,22 @@ +#include "operators.h" +#include "utils.h" +#include "utils.h" + +void load_Embedding_params_metal(Embedding_cuda& op, std::string prefix) { + op.lookup.load((prefix + "/weight.bin").c_str()); +} + +// TODO: implement metal side +void Embedding_cuda::forward(Matrix3D input_id, Matrix3D output) { + PROFILE_START(profile_name); + assert(input_id.m_dim_x == 1); + assert(input_id.m_dim_y == 1); + assert(input_id.m_dim_z == output.m_dim_y); + assert(output.m_dim_z == this->embed_dim); + + int threadsPerBlock = 1024; + int blocksPerGrid = (input_id.m_dim_z + threadsPerBlock - 1) / threadsPerBlock; + EmbeddingKernel_metal(input_id, output, this->lookup.m_data, this->embed_dim); + + PROFILE_END(profile_name); +} \ No newline at end of file diff --git a/llm/src/ops/metal/linear.cc b/llm/src/ops/metal/linear.cc new file mode 100644 index 00000000..9455bba6 --- /dev/null +++ b/llm/src/ops/metal/linear.cc @@ -0,0 +1,41 @@ +#include +#include "operators.h" +#include "utils.h" + + +// TODO: incorporate gemv from llama.cpp +void Linear_half_int4::forward(const Matrix3D &x, Matrix3D &output) { + const int num_thread = 8; + Matrix3D b = this->weight; + PROFILE_START(profile_name); + + // a: m x k b: n x k c: m x n + assert(output.m_dim_x == 1); + assert(output.m_dim_y == x.m_dim_y); + // assert(output.m_dim_z == weight.m_dim_y); + // assert(x.m_dim_z / 8 == weight.m_dim_z); + + assert(output.m_dim_z > num_thread); + assert(output.m_dim_z % (num_thread * 2) == 0); // unroll column by 2 + + struct matmul_params params; + params.A.row = x.m_dim_y; + params.A.column = x.m_dim_z; + params.A.half_data_ptr = x.m_data; + params.B.row = b.m_dim_z; // k + params.B.column = b.m_dim_y; // n + params.B.int32_data_ptr = b.m_data; + params.C.row = output.m_dim_y; + params.C.column = output.m_dim_z; + params.C.half_data_ptr = output.m_data; + params.opt_params.num_thread = num_thread; + params.half_scales = this->scale.m_data; + params.int32_zero_point = this->zero_point.m_data; + params.block_size = QK; + + matmul::MatmulOperator op = matmul::MatmulOperator(); + op.mat_mul_accelerator_int4_fast_no_offset(¶ms); //BUG: gemv and matmul int4? (llama.cpp matmul needed) + + PROFILE_END(profile_name); + return; +} \ No newline at end of file diff --git a/llm/src/ops/metal/softmax.cc b/llm/src/ops/metal/softmax.cc new file mode 100644 index 00000000..82ef6824 --- /dev/null +++ b/llm/src/ops/metal/softmax.cc @@ -0,0 +1,17 @@ +#include +#include "operators.h" + + +// TODO: scale? +void softmax_cuda(Matrix3D input, Matrix3D output) { + struct matmul_params params; + params.A.row = input.m_dim_y; + params.A.column = input.m_dim_z; + params.A.half_data_ptr = input.m_data; + params.C.row = output.m_dim_y; + params.C.column = output.m_dim_z; + params.C.half_data_ptr = output.m_data; + + matmul::MatmulOperator op = matmul::MatmulOperator(); + op.soft_max_metal(¶ms, input.m_dim_x, input.m_dim_y,input.m_dim_z, ); +} \ No newline at end of file From 962266ecee3ba941b9568a7e3a4040c2606a355e Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Mon, 26 Feb 2024 01:30:11 -0500 Subject: [PATCH 26/37] All basic operations and matmul are included --- kernels/metal/kernel/op.metal | 220 +++++++++++++++++++++++++++++- llm/src/ops/metal/LlamaRMSNorm.cc | 49 ++++++- llm/src/ops/metal/embedding.cc | 39 +++++- llm/src/ops/metal/softmax.cc | 4 +- 4 files changed, 303 insertions(+), 9 deletions(-) diff --git a/kernels/metal/kernel/op.metal b/kernels/metal/kernel/op.metal index 61d0b79e..f8c5c8a9 100644 --- a/kernels/metal/kernel/op.metal +++ b/kernels/metal/kernel/op.metal @@ -1,6 +1,7 @@ #include #include "operators.h" #include "utils.h" +#include "opParams.h" using namespace metal; @@ -84,6 +85,7 @@ kernel void kernel_gelu_quick( // TODO: to be fixed kernel void kernel_rms_norm( device const void * src0, + device const float * src1, device float * dst, constant MetalMatMulParams& params, // constant int64_t & ne00, // row @@ -129,8 +131,9 @@ kernel void kernel_rms_norm( const float scale = 1.0f / sqrt(mean + eps); device float4 * y = (device float4 *) (dst + tgpig*ne00); + device float4 * weight = (device float4 *) (src1 + tgpig*ne00); for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { - y[i00] = x[i00] * scale; + y[i00] = x[i00] * scale * weight[i00]; } } @@ -541,7 +544,220 @@ matmulInt4_SIMD_Q4Interleave_unroll16(GPU): 1800 ms, 133 GOP/s matmulInt4_SIMD_Q4Interleave_unroll32(GPU): 1500 ms, 160 GOP/s */ -#include "opParams.h" + + +template +void dequantize_q4_0(device const block_q4_0 *xb, short il, thread type4x4 & reg) { + device const uint16_t * qs = ((device const uint16_t *)xb + 1); + const float d1 = il ? (xb->d / 16.h) : xb->d; + const float d2 = d1 / 256.f; + const float md = -8.h * xb->d; + const ushort mask0 = il ? 0x00F0 : 0x000F; + const ushort mask1 = mask0 << 8; + + for (int i=0;i<8;i++) { + reg[i/2][2*(i%2)+0] = d1 * (qs[i] & mask0) + md; + reg[i/2][2*(i%2)+1] = d2 * (qs[i] & mask1) + md; + } +} + +// each block_q contains 16*nl weights +template +void kernel_mul_mm_impl(device const uchar * src0, + device const uchar * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne02, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant int64_t & ne12, + constant uint64_t & nb10, + constant uint64_t & nb11, + constant uint64_t & nb12, + constant int64_t & ne0, + constant int64_t & ne1, + constant uint & r2, + constant uint & r3, + threadgroup uchar * shared_memory [[threadgroup(0)]], + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiitg[[thread_index_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]]) { + + threadgroup half * sa = (threadgroup half *)(shared_memory); + threadgroup float * sb = (threadgroup float *)(shared_memory + 4096); + + const uint r0 = tgpig.y; + const uint r1 = tgpig.x; + const uint im = tgpig.z; + + // if this block is of 64x32 shape or smaller + short n_rows = (ne0 - r0 * BLOCK_SIZE_M < BLOCK_SIZE_M) ? (ne0 - r0 * BLOCK_SIZE_M) : BLOCK_SIZE_M; + short n_cols = (ne1 - r1 * BLOCK_SIZE_N < BLOCK_SIZE_N) ? (ne1 - r1 * BLOCK_SIZE_N) : BLOCK_SIZE_N; + + // a thread shouldn't load data outside of the matrix + short thread_row = ((short)tiitg/THREAD_PER_ROW) < n_rows ? ((short)tiitg/THREAD_PER_ROW) : n_rows - 1; + short thread_col = ((short)tiitg/THREAD_PER_COL) < n_cols ? ((short)tiitg/THREAD_PER_COL) : n_cols - 1; + + simdgroup_half8x8 ma[4]; + simdgroup_float8x8 mb[2]; + simdgroup_float8x8 c_res[8]; + for (int i = 0; i < 8; i++){ + c_res[i] = make_filled_simdgroup_matrix(0.f); + } + + short il = (tiitg % THREAD_PER_ROW); + + const uint i12 = im%ne12; + const uint i13 = im/ne12; + + uint offset0 = (i12/r2)*nb02 + (i13/r3)*(nb02*ne02); + ushort offset1 = il/nl; + + device const block_q * x = (device const block_q *)(src0 + (r0 * BLOCK_SIZE_M + thread_row) * nb01 + offset0) + offset1; + device const float * y = (device const float *)(src1 + + nb12 * im + + nb11 * (r1 * BLOCK_SIZE_N + thread_col) + + nb10 * (BLOCK_SIZE_K / THREAD_PER_COL * (tiitg % THREAD_PER_COL))); + + for (int loop_k = 0; loop_k < ne00; loop_k += BLOCK_SIZE_K) { + // load data and store to threadgroup memory + half4x4 temp_a; + dequantize_func(x, il, temp_a); + threadgroup_barrier(mem_flags::mem_threadgroup); + + #pragma unroll(16) + for (int i = 0; i < 16; i++) { + *(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \ + + (tiitg % THREAD_PER_ROW) * 16 + (i / 8) * 8) \ + + (tiitg / THREAD_PER_ROW) % 8 + (i & 7) * 8) = temp_a[i/4][i%4]; + } + + *(threadgroup float2x4 *)(sb + (tiitg % THREAD_PER_COL) * 8 * 32 + 8 * (tiitg / THREAD_PER_COL)) = *((device float2x4 *)y); + + il = (il + 2 < nl) ? il + 2 : il % 2; + x = (il < 2) ? x + (2+nl-1)/nl : x; + y += BLOCK_SIZE_K; + + threadgroup_barrier(mem_flags::mem_threadgroup); + + // load matrices from threadgroup memory and conduct outer products + threadgroup half * lsma = (sa + THREAD_MAT_M * SG_MAT_SIZE * (sgitg % 2)); + threadgroup float * lsmb = (sb + THREAD_MAT_N * SG_MAT_SIZE * (sgitg / 2)); + + #pragma unroll(4) + for (int ik = 0; ik < BLOCK_SIZE_K / 8; ik++) { + #pragma unroll(4) + for (int i = 0; i < 4; i++) { + simdgroup_load(ma[i],lsma + SG_MAT_SIZE * i); + } + simdgroup_barrier(mem_flags::mem_none); + #pragma unroll(2) + for (int i = 0; i < 2; i++) { + simdgroup_load(mb[i],lsmb + SG_MAT_SIZE * i); + } + + lsma += BLOCK_SIZE_M / SG_MAT_ROW * SG_MAT_SIZE; + lsmb += BLOCK_SIZE_N / SG_MAT_ROW * SG_MAT_SIZE; + + #pragma unroll(8) + for (int i = 0; i < 8; i++){ + simdgroup_multiply_accumulate(c_res[i], mb[i/4], ma[i%4], c_res[i]); + } + } + } + + if ((r0 + 1) * BLOCK_SIZE_M <= ne0 && (r1 + 1) * BLOCK_SIZE_N <= ne1) { + device float * C = dst + (BLOCK_SIZE_M * r0 + 32 * (sgitg & 1)) \ + + (BLOCK_SIZE_N * r1 + 16 * (sgitg >> 1)) * ne0 + im*ne1*ne0; + for (int i = 0; i < 8; i++) { + simdgroup_store(c_res[i], C + 8 * (i%4) + 8 * ne0 * (i/4), ne0); + } + } else { + // block is smaller than 64x32, we should avoid writing data outside of the matrix + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup float * temp_str = ((threadgroup float *)shared_memory) \ + + 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M; + for (int i = 0; i < 8; i++) { + simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M); + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + device float * C = dst + (BLOCK_SIZE_M * r0) + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0; + if (sgitg == 0) { + for (int i = 0; i < n_rows; i++) { + for (int j = tiitg; j < n_cols; j += BLOCK_SIZE_N) { + *(C + i + j * ne0) = *(temp_str + i + j * BLOCK_SIZE_M); + } + } + } + } +} + +template +kernel void kernel_mul_mm(device const uchar * src0, + device const uchar * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne02, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant int64_t & ne12, + constant uint64_t & nb10, + constant uint64_t & nb11, + constant uint64_t & nb12, + constant int64_t & ne0, + constant int64_t & ne1, + constant uint & r2, + constant uint & r3, + threadgroup uchar * shared_memory [[threadgroup(0)]], + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiitg[[thread_index_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]]) { + kernel_mul_mm_impl( + src0, + src1, + dst, + ne00, + ne02, + nb01, + nb02, + ne12, + nb10, + nb11, + nb12, + ne0, + ne1, + r2, + r3, + shared_memory, + tgpig, + tiitg, + sgitg); +} + +typedef void (mat_mm_t)( + device const uchar * src0, + device const uchar * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne02, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant int64_t & ne12, + constant uint64_t & nb10, + constant uint64_t & nb11, + constant uint64_t & nb12, + constant int64_t & ne0, + constant int64_t & ne1, + constant uint & r2, + constant uint & r3, + threadgroup uchar *, + uint3, uint, uint); + +template [[host_name("kernel_mul_mm_q4_0_f32")]] kernel mat_mm_t kernel_mul_mm; + + kernel void matmul(device const float* inA, device const float* inB, // column major device float* result, diff --git a/llm/src/ops/metal/LlamaRMSNorm.cc b/llm/src/ops/metal/LlamaRMSNorm.cc index 6a9a6448..7903b0b2 100644 --- a/llm/src/ops/metal/LlamaRMSNorm.cc +++ b/llm/src/ops/metal/LlamaRMSNorm.cc @@ -1,7 +1,8 @@ #include #include - #include "operators.h" +#include "utils.h" +#include "matmul_metal_imp.h" // TODO: modify metal for weights void LlamaRMSNorm_metal::forward(const Matrix3D &x, Matrix3D &output, float eps) { @@ -19,10 +20,52 @@ void LlamaRMSNorm_metal::forward(const Matrix3D &x, Matrix3D &output block.x = block.x / (4 / sizeof(half)); // if using half, only need half of block.x + setupLibrary("kernel_rms_norm"); + + _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); + _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); + _mParamsPtr->m_dim_x = x.m_dim_x; + _mParamsPtr->m_dim_y = x.m_dim_y; + _mParamsPtr->m_dim_z = x.m_dim_z; + _mParamsPtr->eps = eps; + _mParamsPtr->type_size = sizeof(half); + + /* should pay attention to the rsqrt precision */ half *input = x.m_data, *out = output.m_data; float *gamma = weight.m_data; - matmul::MatmulOperator op = matmul::MatmulOperator(); - op.rms_norm_metal(input, gamma, out, eps, m, n); // For gpt-3 + _mBufferA = getBufferfromPtr((void *)input); + _mBufferB = getBufferfromPtr((void *)gamma); + _mBufferResult = getBufferfromPtr((void *)out); + + + + if (!_mBufferA || !_mBufferResult) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferB, 0, 1); + computeEncoder->setBuffer(_mBufferResult, 0, 2); + computeEncoder->setBuffer(_mParams, 0, 3); + + computeEncoder->setThreadgroupMemoryLength(param.type_size * N_SIMDWIDTH, 0); + + MTL::Size threadgroupSize = MTL::Size::Make(block.x, block.y, block.z); + MTL::Size gridSize = MTL::Size::Make(grid.x, grid.y, grid.z); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); } \ No newline at end of file diff --git a/llm/src/ops/metal/embedding.cc b/llm/src/ops/metal/embedding.cc index 1bb17df0..b6a18658 100644 --- a/llm/src/ops/metal/embedding.cc +++ b/llm/src/ops/metal/embedding.cc @@ -1,6 +1,6 @@ #include "operators.h" #include "utils.h" -#include "utils.h" +#include "matmul_metal_imp.h" void load_Embedding_params_metal(Embedding_cuda& op, std::string prefix) { op.lookup.load((prefix + "/weight.bin").c_str()); @@ -16,7 +16,42 @@ void Embedding_cuda::forward(Matrix3D input_id, Matrix3D output) { int threadsPerBlock = 1024; int blocksPerGrid = (input_id.m_dim_z + threadsPerBlock - 1) / threadsPerBlock; - EmbeddingKernel_metal(input_id, output, this->lookup.m_data, this->embed_dim); + + setupLibrary("EmbeddingKernel"); + + _mBufferA = getBufferfromPtr((void *)input_id); + _mBufferB = getBufferfromPtr((void *)this->lookup.m_data); + _mBufferResult = getBufferfromPtr((void *)output); + _mBufferEmbed_dim = getBufferfromPtr((void *)this->embed_dim); + + if (!_mBufferA || !_mBufferB || !_mBufferResult || !_mBufferScales) { + std::cerr << "Failed to locate some buffer!" << std::endl; + exit(-1); + } + + // Create a command buffer to hold commands. + MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + + // Start a compute pass. + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + // Encode the pipeline state object and its parameters. + computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); + computeEncoder->setBuffer(_mBufferA, 0, 0); + computeEncoder->setBuffer(_mBufferResult, 0, 1); + computeEncoder->setBuffer(_mBufferB, 0, 2); + computeEncoder->setBuffer(_mBufferEmbed_dim, 0, 3); + + MTL::Size gridSize = MTL::Size::Make(blocksPerGrid, 1, 1); + + // Calculate a threadgroup size + MTL::Size threadgroupSize = MTL::Size::Make(threadsPerBlock, 1, 1); + + SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); + _mMatmulFunctionPSO->release(); + PROFILE_END(profile_name); } \ No newline at end of file diff --git a/llm/src/ops/metal/softmax.cc b/llm/src/ops/metal/softmax.cc index 82ef6824..ca1d3e22 100644 --- a/llm/src/ops/metal/softmax.cc +++ b/llm/src/ops/metal/softmax.cc @@ -3,7 +3,7 @@ // TODO: scale? -void softmax_cuda(Matrix3D input, Matrix3D output) { +void softmax(Matrix3D input, Matrix3D output) { struct matmul_params params; params.A.row = input.m_dim_y; params.A.column = input.m_dim_z; @@ -13,5 +13,5 @@ void softmax_cuda(Matrix3D input, Matrix3D output) { params.C.half_data_ptr = output.m_data; matmul::MatmulOperator op = matmul::MatmulOperator(); - op.soft_max_metal(¶ms, input.m_dim_x, input.m_dim_y,input.m_dim_z, ); + op.soft_max_metal(¶ms, input.m_dim_x, input.m_dim_y, input.m_dim_z, 1.0); } \ No newline at end of file From cd416a340de8cf4aed5d239ad341226adfc62a58 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sun, 10 Mar 2024 22:55:09 -0400 Subject: [PATCH 27/37] update llama matmul parameter and test --- kernels/metal/kernel/op.metal | 78 ++-------- llm/src/ops/metal/RotaryPosEmb.cc | 3 +- llm/tests/metal/cpp_version/main.cc | 143 ++++++++++++++++-- llm/tests/metal/cpp_version/op.metal | 217 +++++++++++++++++++++++++-- 4 files changed, 347 insertions(+), 94 deletions(-) diff --git a/kernels/metal/kernel/op.metal b/kernels/metal/kernel/op.metal index f8c5c8a9..c5368bfe 100644 --- a/kernels/metal/kernel/op.metal +++ b/kernels/metal/kernel/op.metal @@ -544,10 +544,16 @@ matmulInt4_SIMD_Q4Interleave_unroll16(GPU): 1800 ms, 133 GOP/s matmulInt4_SIMD_Q4Interleave_unroll32(GPU): 1500 ms, 160 GOP/s */ - +#define QK4_0 32 +#define QR4_0 2 +#define nl 2 +typedef struct { + half d; // delta + uint8_t qs[QK4_0 / 2]; // nibbles / quants +} block_q; template -void dequantize_q4_0(device const block_q4_0 *xb, short il, thread type4x4 & reg) { +void dequantize_q4_0(device const block_q *xb, short il, thread type4x4 & reg) { device const uint16_t * qs = ((device const uint16_t *)xb + 1); const float d1 = il ? (xb->d / 16.h) : xb->d; const float d2 = d1 / 256.f; @@ -561,8 +567,6 @@ void dequantize_q4_0(device const block_q4_0 *xb, short il, thread type4x4 & reg } } -// each block_q contains 16*nl weights -template void kernel_mul_mm_impl(device const uchar * src0, device const uchar * src1, device float * dst, @@ -622,7 +626,7 @@ void kernel_mul_mm_impl(device const uchar * src0, for (int loop_k = 0; loop_k < ne00; loop_k += BLOCK_SIZE_K) { // load data and store to threadgroup memory half4x4 temp_a; - dequantize_func(x, il, temp_a); + dequantize_q4_0(x, il, temp_a); threadgroup_barrier(mem_flags::mem_threadgroup); #pragma unroll(16) @@ -694,70 +698,6 @@ void kernel_mul_mm_impl(device const uchar * src0, } } -template -kernel void kernel_mul_mm(device const uchar * src0, - device const uchar * src1, - device float * dst, - constant int64_t & ne00, - constant int64_t & ne02, - constant uint64_t & nb01, - constant uint64_t & nb02, - constant int64_t & ne12, - constant uint64_t & nb10, - constant uint64_t & nb11, - constant uint64_t & nb12, - constant int64_t & ne0, - constant int64_t & ne1, - constant uint & r2, - constant uint & r3, - threadgroup uchar * shared_memory [[threadgroup(0)]], - uint3 tgpig[[threadgroup_position_in_grid]], - uint tiitg[[thread_index_in_threadgroup]], - uint sgitg[[simdgroup_index_in_threadgroup]]) { - kernel_mul_mm_impl( - src0, - src1, - dst, - ne00, - ne02, - nb01, - nb02, - ne12, - nb10, - nb11, - nb12, - ne0, - ne1, - r2, - r3, - shared_memory, - tgpig, - tiitg, - sgitg); -} - -typedef void (mat_mm_t)( - device const uchar * src0, - device const uchar * src1, - device float * dst, - constant int64_t & ne00, - constant int64_t & ne02, - constant uint64_t & nb01, - constant uint64_t & nb02, - constant int64_t & ne12, - constant uint64_t & nb10, - constant uint64_t & nb11, - constant uint64_t & nb12, - constant int64_t & ne0, - constant int64_t & ne1, - constant uint & r2, - constant uint & r3, - threadgroup uchar *, - uint3, uint, uint); - -template [[host_name("kernel_mul_mm_q4_0_f32")]] kernel mat_mm_t kernel_mul_mm; - - kernel void matmul(device const float* inA, device const float* inB, // column major device float* result, diff --git a/llm/src/ops/metal/RotaryPosEmb.cc b/llm/src/ops/metal/RotaryPosEmb.cc index f039b055..594a210b 100644 --- a/llm/src/ops/metal/RotaryPosEmb.cc +++ b/llm/src/ops/metal/RotaryPosEmb.cc @@ -20,5 +20,6 @@ void RotaryPosEmb_cuda_forward(Matrix3D query, Matrix3D key, Matrix3 matmul::MatmulOperator op = matmul::MatmulOperator(); op.rope_metal(¶ms, query.m_dim_x, query.m_dim_y, query.m_dim_z, n_past, n_dims, mode, n_orig_ctx, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow ); - + // In llama.cpp: + // } diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index 523e89da..b9aa09ad 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -42,10 +42,10 @@ const char * fn_name = "matmul"; // main -unsigned int height1 = 96; -unsigned int width1 = 4096; -unsigned int height2 = 4096; -unsigned int width2 = 32000; +unsigned int height1 = 320*320; +unsigned int width1 = 320; +unsigned int height2 = 320; +unsigned int width2 = 320*320; float *A1, *Anorm, *A3; unsigned char *A2; matmul_param *param; @@ -251,11 +251,11 @@ void metal_rms_compute(){ 1); // Dispatch and Run Computation - + auto start = high_resolution_clock::now(); computeEncoder->dispatchThreadgroups(gridSize, threadgroupSize); computeEncoder->endEncoding(); commandBuffer->commit(); - auto start = high_resolution_clock::now(); + commandBuffer->waitUntilCompleted(); auto stop = high_resolution_clock::now(); auto duration = duration_cast(stop - start); @@ -312,7 +312,7 @@ void test_normal_matmul(){ void test_matmulInt4(){ // not considering offset atm - fn_name = "matmulInt4"; + fn_name = "matmulInt4"; fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll32"; Int4_buffer = new MetalMatmulBuffers; Int4_params = new MetalMatMulParams; @@ -372,9 +372,134 @@ void test_rms_nor(){ } -int main(){ +void test_matmul_llama(){ + // m: 1, n = 32000, k = 4096 (lm_head) + // m: 1, n = 4096, k = 4096 (Q, K, V, out projections) + // m: 1, n = 4096, k = 11008 (down_proj) + // m: 1, n = 11008, k = 4096 (up_proj and gate_proj) + + // in ggml doc: https://github.com/ggerganov/whisper.cpp/blob/master/ggml.h + // ne[GGML_MAX_DIMS] => number of elements + // ne10 => number of elements of src1 along dim_0 + // nb[GGML_MAX_DIMS] => stride in bytes: + // nb[0] = ggml_type_size(type) + // nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding + // nb[i] = nb[i-1] * ne[i-1] + + fn_name = "kernel_mul_mm_impl"; + int bs = 1; + int m = 1; + int n = 32000; + int k = 4096; + int hidden_size = bs*m*k; + int weight_size = bs*n*k; + int output_size = bs*m*n; + unsigned char* src0 = new unsigned char[hidden_size]; + unsigned char* src1 = new unsigned char[weight_size]; + float* dst = new float[output_size]; + generateRandomCharArray(src0, hidden_size); + generateRandomCharArray(src1, weight_size); + // generateRandomFloatArray(dst, arraySize); + metal_init(); + // Initialization of GPU vals + MTL::CommandBuffer *commandBuffer = mCommandQueue->commandBuffer(); + assert(commandBuffer != nullptr); + MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); + assert(computeEncoder != nullptr); + + bM1 = metal_newBuf(sizeof(unsigned char), hidden_size); + bM2 = metal_newBuf(sizeof(unsigned char), weight_size); + bM3 = metal_newBuf(sizeof(float), output_size); + MTL::Buffer *bne00 = metal_newBuf(sizeof(int64_t), 1); + MTL::Buffer *bne02 = metal_newBuf(sizeof(int64_t), 1); + MTL::Buffer *bnb01 = metal_newBuf(sizeof(uint64_t), 1); + MTL::Buffer *bnb02 = metal_newBuf(sizeof(uint64_t), 1); + MTL::Buffer *bne12 = metal_newBuf(sizeof(int64_t), 1); + MTL::Buffer *bnb10 = metal_newBuf(sizeof(uint64_t), 1); + MTL::Buffer *bnb11 = metal_newBuf(sizeof(uint64_t), 1); + MTL::Buffer *bnb12 = metal_newBuf(sizeof(uint64_t), 1); + MTL::Buffer *bne0 = metal_newBuf(sizeof(int64_t), 1); + MTL::Buffer *bne1 = metal_newBuf(sizeof(int64_t), 1); + MTL::Buffer *br2 = metal_newBuf(sizeof(uint), 1); + MTL::Buffer *br3 = metal_newBuf(sizeof(uint), 1); + + computeEncoder->setComputePipelineState(mfnPipelineState); + computeEncoder->setBuffer(bM1, 0, 0); + computeEncoder->setBuffer(bM2, 0, 1); + computeEncoder->setBuffer(bM3, 0, 2); + computeEncoder->setBuffer(bne00, 0, 3); + computeEncoder->setBuffer(bne02, 0, 4); + computeEncoder->setBuffer(bnb01, 0, 5); + computeEncoder->setBuffer(bnb02, 0, 6); + computeEncoder->setBuffer(bne12, 0, 7); + computeEncoder->setBuffer(bnb10, 0, 8); + computeEncoder->setBuffer(bnb11, 0, 9); + computeEncoder->setBuffer(bnb12, 0, 10); + computeEncoder->setBuffer(bne0, 0, 11); + computeEncoder->setBuffer(bne1, 0, 12); + computeEncoder->setBuffer(br2, 0, 13); + computeEncoder->setBuffer(br3, 0, 14); + computeEncoder->setThreadgroupMemoryLength(8192, 1); // from https://github.com/ggerganov/llama.cpp/blob/d5ab29757ebc59a30f03e408294ec20628a6374e/ggml-metal.m#L1315 + + + int64_t ne00 = bs; + int64_t ne01 = m; + int64_t ne02 = k; + int64_t ne03 = 1; + uint64_t nb00 = sizeof(unsigned char); + uint64_t nb01 = nb00*ne00; //nb[0] * (ne[0] / ggml_blck_size(type)) + padding BUG: ggml_blck_size + uint64_t nb02 = nb01 * ne01; + int64_t ne10 = bs; + int64_t ne11 = n; + int64_t ne12 = k; + int64_t ne13 = 1; + uint64_t nb10 = sizeof(unsigned char); + uint64_t nb11 = nb10*ne10; + uint64_t nb12 = nb11 * ne11; + int64_t ne0 = bs; + int64_t ne1 = m; + uint r2 = ne12/ne02; + uint r3 = ne13/ne03; + memcpy(bM1->contents(), src0, hidden_size*sizeof(unsigned char)); + memcpy(bM2->contents(), src1, weight_size*sizeof(unsigned char)); + memcpy(bM3->contents(), dst, output_size*sizeof(float)); + memcpy(bne00->contents(), &ne00, sizeof(ne00)); + memcpy(bne02->contents(), &ne02, sizeof(ne02)); + memcpy(bnb01->contents(), &nb01, sizeof(nb01)); + memcpy(bnb02->contents(), &nb02, sizeof(nb02)); + memcpy(bne12->contents(), &ne12, sizeof(ne12)); + memcpy(bnb10->contents(), &nb10, sizeof(nb10)); + memcpy(bnb11->contents(), &nb11, sizeof(nb11)); + memcpy(bnb12->contents(), &nb12, sizeof(nb12)); + memcpy(bne0->contents(), &ne0, sizeof(ne0)); + memcpy(bne1->contents(), &ne1, sizeof(ne1)); + memcpy(br2->contents(), &r2, sizeof(r2)); + memcpy(br3->contents(), &r3, sizeof(r3)); + + // Assuming you have already configured the threadgroup size and number of threadgroups based on your kernel and data + MTL::Size threadgroupSize = MTL::Size::Make(128, 1, 1); + MTL::Size numThreadgroups = MTL::Size::Make((ne11 + 31)/32, (ne01 + 63)/64, ne12*ne13); // from https://github.com/ggerganov/llama.cpp/blob/d5ab29757ebc59a30f03e408294ec20628a6374e/ggml-metal.m#L1405 + + // Dispatch the kernel + computeEncoder->dispatchThreadgroups(numThreadgroups, threadgroupSize); - test_rms_nor(); + // Finish encoding and commit the command buffer + computeEncoder->endEncoding(); + commandBuffer->commit(); + auto start = high_resolution_clock::now(); + commandBuffer->waitUntilCompleted(); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + std::cout << duration.count() << std::endl; + for (int i = 0; i < 10; ++i) { + std::cout << dst[i] << " " << std::endl; + } + std::cout << std::endl; +} + +int main(){ + test_matmul_llama(); + // test_matmulInt4(); return 0; } diff --git a/llm/tests/metal/cpp_version/op.metal b/llm/tests/metal/cpp_version/op.metal index 3e0781e7..88ddf4d5 100644 --- a/llm/tests/metal/cpp_version/op.metal +++ b/llm/tests/metal/cpp_version/op.metal @@ -15,6 +15,191 @@ using namespace metal; #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; } + +#define QK4_0 32 +#define QR4_0 2 +#define nl 2 +#define BLOCK_SIZE_M 64 // 8 simdgroup matrices from matrix A +#define BLOCK_SIZE_N 32 // 4 simdgroup matrices from matrix B +#define BLOCK_SIZE_K 32 +#define THREAD_MAT_M 4 // each thread take 4 simdgroup matrices from matrix A +#define THREAD_MAT_N 2 // each thread take 2 simdgroup matrices from matrix B +#define THREAD_PER_BLOCK 128 +#define THREAD_PER_ROW 2 // 2 thread for each row in matrix A to load numbers +#define THREAD_PER_COL 4 // 4 thread for each row in matrix B to load numbers +#define SG_MAT_SIZE 64 // simdgroup matrix is of shape 8x8 +#define SG_MAT_ROW 8 + +typedef struct { + half d; // delta + uint8_t qs[QK4_0 / 2]; // nibbles / quants +} block_q; + +template +void dequantize_q4_0(device const block_q *xb, short il, thread type4x4 & reg) { + device const uint16_t * qs = ((device const uint16_t *)xb + 1); + const float d1 = il ? (xb->d / 16.h) : xb->d; + const float d2 = d1 / 256.f; + const float md = -8.h * xb->d; + const ushort mask0 = il ? 0x00F0 : 0x000F; + const ushort mask1 = mask0 << 8; + + for (int i=0;i<8;i++) { + reg[i/2][2*(i%2)+0] = d1 * (qs[i] & mask0) + md; + reg[i/2][2*(i%2)+1] = d2 * (qs[i] & mask1) + md; + } +} + +kernel void kernel_mul_mm_impl(device const uchar * src0, + device const uchar * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne02, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant int64_t & ne12, + constant uint64_t & nb10, + constant uint64_t & nb11, + constant uint64_t & nb12, + constant int64_t & ne0, + constant int64_t & ne1, + constant uint & r2, + constant uint & r3, + threadgroup uchar * shared_memory [[threadgroup(0)]], + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiitg[[thread_index_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]]) { + + threadgroup half * sa = (threadgroup half *)(shared_memory); + threadgroup float * sb = (threadgroup float *)(shared_memory + 4096); + + const uint r0 = tgpig.y; + const uint r1 = tgpig.x; + const uint im = tgpig.z; + + // if this block is of 64x32 shape or smaller + short n_rows = (ne0 - r0 * BLOCK_SIZE_M < BLOCK_SIZE_M) ? (ne0 - r0 * BLOCK_SIZE_M) : BLOCK_SIZE_M; + short n_cols = (ne1 - r1 * BLOCK_SIZE_N < BLOCK_SIZE_N) ? (ne1 - r1 * BLOCK_SIZE_N) : BLOCK_SIZE_N; + + // a thread shouldn't load data outside of the matrix + short thread_row = ((short)tiitg/THREAD_PER_ROW) < n_rows ? ((short)tiitg/THREAD_PER_ROW) : n_rows - 1; + short thread_col = ((short)tiitg/THREAD_PER_COL) < n_cols ? ((short)tiitg/THREAD_PER_COL) : n_cols - 1; + + simdgroup_half8x8 ma[4]; + simdgroup_float8x8 mb[2]; + simdgroup_float8x8 c_res[8]; + for (int i = 0; i < 8; i++){ + c_res[i] = make_filled_simdgroup_matrix(0.f); + } + + short il = (tiitg % THREAD_PER_ROW); + + const uint i12 = im%ne12; + const uint i13 = im/ne12; + + uint offset0 = (i12/r2)*nb02 + (i13/r3)*(nb02*ne02); + ushort offset1 = il/nl; + + device const block_q * x = (device const block_q *)(src0 + (r0 * BLOCK_SIZE_M + thread_row) * nb01 + offset0) + offset1; + device const float * y = (device const float *)(src1 + + nb12 * im + + nb11 * (r1 * BLOCK_SIZE_N + thread_col) + + nb10 * (BLOCK_SIZE_K / THREAD_PER_COL * (tiitg % THREAD_PER_COL))); + + for (int loop_k = 0; loop_k < ne00; loop_k += BLOCK_SIZE_K) { + // load data and store to threadgroup memory + half4x4 temp_a; + dequantize_q4_0(x, il, temp_a); + threadgroup_barrier(mem_flags::mem_threadgroup); + + #pragma unroll(16) + for (int i = 0; i < 16; i++) { + *(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \ + + (tiitg % THREAD_PER_ROW) * 16 + (i / 8) * 8) \ + + (tiitg / THREAD_PER_ROW) % 8 + (i & 7) * 8) = temp_a[i/4][i%4]; + } + + *(threadgroup float2x4 *)(sb + (tiitg % THREAD_PER_COL) * 8 * 32 + 8 * (tiitg / THREAD_PER_COL)) = *((device float2x4 *)y); + + il = (il + 2 < nl) ? il + 2 : il % 2; + x = (il < 2) ? x + (2+nl-1)/nl : x; + y += BLOCK_SIZE_K; + + threadgroup_barrier(mem_flags::mem_threadgroup); + + // load matrices from threadgroup memory and conduct outer products + threadgroup half * lsma = (sa + THREAD_MAT_M * SG_MAT_SIZE * (sgitg % 2)); + threadgroup float * lsmb = (sb + THREAD_MAT_N * SG_MAT_SIZE * (sgitg / 2)); + + #pragma unroll(4) + for (int ik = 0; ik < BLOCK_SIZE_K / 8; ik++) { + #pragma unroll(4) + for (int i = 0; i < 4; i++) { + simdgroup_load(ma[i],lsma + SG_MAT_SIZE * i); + } + simdgroup_barrier(mem_flags::mem_none); + #pragma unroll(2) + for (int i = 0; i < 2; i++) { + simdgroup_load(mb[i],lsmb + SG_MAT_SIZE * i); + } + + lsma += BLOCK_SIZE_M / SG_MAT_ROW * SG_MAT_SIZE; + lsmb += BLOCK_SIZE_N / SG_MAT_ROW * SG_MAT_SIZE; + + #pragma unroll(8) + for (int i = 0; i < 8; i++){ + simdgroup_multiply_accumulate(c_res[i], mb[i/4], ma[i%4], c_res[i]); + } + } + } + + if ((r0 + 1) * BLOCK_SIZE_M <= ne0 && (r1 + 1) * BLOCK_SIZE_N <= ne1) { + device float * C = dst + (BLOCK_SIZE_M * r0 + 32 * (sgitg & 1)) \ + + (BLOCK_SIZE_N * r1 + 16 * (sgitg >> 1)) * ne0 + im*ne1*ne0; + for (int i = 0; i < 8; i++) { + simdgroup_store(c_res[i], C + 8 * (i%4) + 8 * ne0 * (i/4), ne0); + } + } else { + // block is smaller than 64x32, we should avoid writing data outside of the matrix + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup float * temp_str = ((threadgroup float *)shared_memory) \ + + 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M; + for (int i = 0; i < 8; i++) { + simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M); + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + device float * C = dst + (BLOCK_SIZE_M * r0) + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0; + if (sgitg == 0) { + for (int i = 0; i < n_rows; i++) { + for (int j = tiitg; j < n_cols; j += BLOCK_SIZE_N) { + *(C + i + j * ne0) = *(temp_str + i + j * BLOCK_SIZE_M); + } + } + } + } +} + + + + + + + + + + + + + + + + + + + + kernel void arrayAdd(const device float* inputA, const device float* inputB, device float* output, @@ -88,27 +273,28 @@ kernel void kernel_gelu_quick( dst[tpig] = x * (1.0f / (1.0f + exp(GELU_QUICK_COEF * x))); } -// TODO: to be fixed kernel void kernel_rms_norm( device const void * src0, device float * dst, - constant int64_t & ne00, - constant uint64_t & nb01, - constant float & eps, + constant MetalMatMulParams* params, + // constant int64_t & ne00, // row + // constant uint64_t & nb01, // col*sizeof(type) + // constant float & eps, threadgroup float * buf [[threadgroup(0)]], uint tgpig[[threadgroup_position_in_grid]], uint tpitg[[thread_position_in_threadgroup]], uint sgitg[[simdgroup_index_in_threadgroup]], uint tiisg[[thread_index_in_simdgroup]], uint ntg[[threads_per_threadgroup]]) { + unsigned int ne00 = 1; + unsigned int nb01 = 384; + float eps = 0.0001; device const float4 * x = (device const float4 *) ((device const char *) src0 + tgpig*nb01); - float4 sumf = 0; float all_sum = 0; - // parallel sum for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { - sumf += x[i00] * x[i00]; + sumf += x[i00] * x[i00]; // take four elements and square it at the same time } all_sum = sumf[0] + sumf[1] + sumf[2] + sumf[3]; all_sum = simd_sum(all_sum); @@ -138,7 +324,6 @@ kernel void kernel_rms_norm( } } - // TODO: to be fixed kernel void kernel_soft_max( device const float * src0, @@ -529,8 +714,10 @@ kernel void matmulInt4(device const float* inA, // the for-loop is replaced with a collection of threads, each of which // calls this function. - const uint n = params->width3; - const uint k = params->width1; + const uint n = params->width2; //32000 + const uint k = params->width1; //4096 + // const uint n = 32000; //32000 + // const uint k = 4096; //4096 const uint group_size = params->group_size; const uint idx = id.x; // column index of the output @@ -564,7 +751,7 @@ kernel void matmulInt4_SIMD_Q4Interleave( // the for-loop is replaced with a collection of threads, each of which // calls this function. - const uint n = params->width3; + const uint n = params->width2; const uint k = params->width1; const uint group_size = params->group_size; @@ -671,7 +858,7 @@ kernel void matmulUInt4_SIMD_Q4Interleave_unroll32( // the for-loop is replaced with a collection of threads, each of which // calls this function. - const uint n = params->width3; + const uint n = params->width3; const uint k = params->width1; const uint group_size = params->group_size; @@ -751,9 +938,9 @@ kernel void matmulUInt4_SIMD_Q4Interleave_unroll32( // // the for-loop is replaced with a collection of threads, each of which // // calls this function. -// const uint n = params.n; -// const uint k = params.k; -// const uint group_size = params.group_size; +// const uint n = params.width3; +// const uint k = params.width1; +// const uint group_size = params->group_size; // const uint idx = id.x; // column index of the output // const uint idy = id.y; // row index of the output From 67ea0cc67e932440d10bedcf187cde17fa1dc97b Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Fri, 22 Mar 2024 00:32:21 -0400 Subject: [PATCH 28/37] new format for metal in general --- kernels/metal/metal_compute.cc | 181 ++++++ kernels/metal/{ => old_metal}/Makefile | 0 .../{ => old_metal}/download_metal-cpp.sh | 0 kernels/metal/{ => old_metal}/matmul_f32.cc | 0 .../metal/{ => old_metal}/matmul_metal_imp.cc | 0 .../metal/{ => old_metal}/matmul_metal_imp.h | 0 .../{ => old_metal}/matmul_metal_int4.cc | 0 .../metal/{ => old_metal}/matmul_ref_fp32.cc | 0 .../metal/{ => old_metal}/matmul_ref_int8.cc | 0 .../metal/{ => old_metal}/metal_batch_add.cc | 0 kernels/metal/{ => old_metal}/metal_gelu.cc | 0 .../metal/{ => old_metal}/metal_gelu_quick.cc | 0 kernels/metal/{ => old_metal}/metal_relu.cc | 0 .../metal/{ => old_metal}/metal_rms_norm.cc | 0 kernels/metal/{ => old_metal}/metal_rope.cc | 0 kernels/metal/{ => old_metal}/metal_silu.cc | 0 .../metal/{ => old_metal}/metal_softmax.cc | 0 kernels/metal/{kernel => }/op.metal | 555 +++------------- llm/tests/metal/cpp_version/main_tmp.cc | 598 ++++++++++++++---- 19 files changed, 716 insertions(+), 618 deletions(-) create mode 100644 kernels/metal/metal_compute.cc rename kernels/metal/{ => old_metal}/Makefile (100%) rename kernels/metal/{ => old_metal}/download_metal-cpp.sh (100%) rename kernels/metal/{ => old_metal}/matmul_f32.cc (100%) rename kernels/metal/{ => old_metal}/matmul_metal_imp.cc (100%) rename kernels/metal/{ => old_metal}/matmul_metal_imp.h (100%) rename kernels/metal/{ => old_metal}/matmul_metal_int4.cc (100%) rename kernels/metal/{ => old_metal}/matmul_ref_fp32.cc (100%) rename kernels/metal/{ => old_metal}/matmul_ref_int8.cc (100%) rename kernels/metal/{ => old_metal}/metal_batch_add.cc (100%) rename kernels/metal/{ => old_metal}/metal_gelu.cc (100%) rename kernels/metal/{ => old_metal}/metal_gelu_quick.cc (100%) rename kernels/metal/{ => old_metal}/metal_relu.cc (100%) rename kernels/metal/{ => old_metal}/metal_rms_norm.cc (100%) rename kernels/metal/{ => old_metal}/metal_rope.cc (100%) rename kernels/metal/{ => old_metal}/metal_silu.cc (100%) rename kernels/metal/{ => old_metal}/metal_softmax.cc (100%) rename kernels/metal/{kernel => }/op.metal (51%) diff --git a/kernels/metal/metal_compute.cc b/kernels/metal/metal_compute.cc new file mode 100644 index 00000000..b3877684 --- /dev/null +++ b/kernels/metal/metal_compute.cc @@ -0,0 +1,181 @@ +#include + +#include "Foundation/Foundation.hpp" +#include "Metal/Metal.hpp" +#include + +#undef MIN +#undef MAX +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + + +bool has_init = false; + +struct ggml_metal_kernel { + MTL::ComputePipelineState * pipeline; +}; + +struct ggml_metal_context * ctx; + +enum { + MTLGPUFamilyApple1 = 1001, // Example starting value, adjust based on actual definition + MTLGPUFamilyCommon1 = 3001, // Example starting value + MTLGPUFamilyMetal3 = 5001, + MTLGPUFamilyApple7 = 1007, +}; + +enum ggml_metal_kernel_type { + GGML_METAL_KERNEL_EMBEDDING, + GGML_METAL_KERNEL_BATCH_ADD, + GGML_METAL_KERNEL_RELU, + GGML_METAL_KERNEL_SILU, + GGML_METAL_KERNEL_GELU, + GGML_METAL_KERNEL_GELU_QUICK, + GGML_METAL_KERNEL_RMS_NORM, + GGML_METAL_KERNEL_SOFT_MAX, + GGML_METAL_KERNEL_SOFT_MAX_4, + GGML_METAL_KERNEL_ROPE, + GGML_METAL_KERNEL_MUL_MM_INT4, + GGML_METAL_KERNEL_TYPE_COUNT +}; + +enum ggml_status { + GGML_STATUS_SUCCESS, + GGML_STATUS_FAILED +}; + +// Context struct holding Metal related objects and state +struct ggml_metal_context { + int n_cb; + + MTL::Device * device; + MTL::CommandQueue * queue; + static std::unordered_map _mumap; + + dispatch_queue_t d_queue; + + ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT]; + + bool support_simdgroup_reduction; + bool support_simdgroup_mm; + + bool should_capture_next_compute; +}; +void *allocateSharedMem(size_t size) { + if (!has_init) { + init(); + } + + MTL::Buffer *new_b = ctx->device->newBuffer(size, MTL::ResourceStorageModeShared); + + void *void_ptr = new_b->contents(); + + // push the pair to the map + ctx->_mumap.insert(std::make_pair(void_ptr, new_b)); + + return void_ptr; +} + +void init() { + ctx = new(struct ggml_metal_context); + MTL::Device *device = MTL::CreateSystemDefaultDevice(); + ctx->device = device; + ctx->n_cb = 1; // TODO: n_cb and GGML_METAL_MAX_BUFFERS? MIN(n_cb, GGML_METAL_MAX_BUFFERS) + ctx->queue = ctx->device->newCommandQueue(); + ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); + MTL::Library *metal_library = ctx->device->newDefaultLibrary(); + + // simd group support + for (int i = MTLGPUFamilyApple1 + 20; i >= MTLGPUFamilyApple1; --i) { + if (ctx->device->supportsFamily((MTL::GPUFamily)i)) { + printf("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __FUNCTION__, i - MTLGPUFamilyApple1 + 1, i); + break; + } + } + + for (int i = MTLGPUFamilyCommon1 + 5; i >= MTLGPUFamilyCommon1; --i) { + if (ctx->device->supportsFamily((MTL::GPUFamily)i)) { + printf("%s: GPU family: MTLGPUFamilyCommon%d (%d)\n", __FUNCTION__, i - MTLGPUFamilyCommon1 + 1, i); + break; + } + } + + for (int i = MTLGPUFamilyMetal3 + 5; i >= MTLGPUFamilyMetal3; --i) { + if (ctx->device->supportsFamily((MTL::GPUFamily)i)) { + printf("%s: GPU family: MTLGPUFamilyMetal%d (%d)\n", __FUNCTION__, i - MTLGPUFamilyMetal3 + 3, i); + break; + } + } + + ctx->support_simdgroup_reduction = ctx->device->supportsFamily((MTL::GPUFamily)MTLGPUFamilyApple7); + ctx->support_simdgroup_reduction |= ctx->device->supportsFamily((MTL::GPUFamily)MTLGPUFamilyMetal3); + ctx->support_simdgroup_mm = ctx->device->supportsFamily((MTL::GPUFamily)MTLGPUFamilyApple7); + + + // load kernels + { + NS::Error *error = nullptr; + for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) { + ctx->kernels[i].pipeline = nullptr; + } +#define GGML_METAL_ADD_KERNEL(e, name, supported) \ + if (supported) { \ + struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \ + const char * str = "kernel_" + name; \ + auto str = NS::String::string(str, NS::ASCIIStringEncoding); \ + MTL::Function * metal_function = metal_library->newFunction(str); \ + kernel->pipeline = ctx->device->newComputePipelineState(metal_function, &error); \ + metal_function->release(); \ + if (error) { \ + printf("load pipeline error"); \ + return nullptr; \ + } \ + } else { \ + printf("kernel name not supported "); \ + } + + // simd_sum and simd_max requires MTLGPUFamilyApple7 + // TODO: solve error + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_EMBEDDING, "embedding", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_BATCH_ADD, "batch_add", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_RELU, "relu", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_SILU, "silu", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_GELU, "gelu", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_GELU_QUICK, "gelu_quick", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_RMS_NORM, "rms_norm", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_SOFT_MAX, "soft_max", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_SOFT_MAX_4, "soft_max_4", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_ROPE, "rope", true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_MUL_MM_INT4, "mul_mm_int4", true); + } + metal_library->release(); + has_init = true; +} + +MTL::Buffer *MetalIMP::getBufferfromPtr(void *ptr) { + if (_mumap.find(ptr) == _mumap.end()) { + std::cerr << "Cannot find the corresponding MTL::Buffer." << std::endl; + return NULL; + } else + return _mumap[ptr]; +} + +static void ggml_metal_free(struct ggml_metal_context * ctx) { + for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) { + ctx->kernels[i].pipeline->release(); + } + + ctx->queue->release(); + ctx->device->release(); + + dispatch_release(ctx->d_queue); + + free(ctx); +} + +static enum ggml_status ggml_metal_graph_compute(struct ggml_metal_context * ctx, + struct ggml_cgraph * gf) { + + return GGML_STATUS_SUCCESS; +} \ No newline at end of file diff --git a/kernels/metal/Makefile b/kernels/metal/old_metal/Makefile similarity index 100% rename from kernels/metal/Makefile rename to kernels/metal/old_metal/Makefile diff --git a/kernels/metal/download_metal-cpp.sh b/kernels/metal/old_metal/download_metal-cpp.sh similarity index 100% rename from kernels/metal/download_metal-cpp.sh rename to kernels/metal/old_metal/download_metal-cpp.sh diff --git a/kernels/metal/matmul_f32.cc b/kernels/metal/old_metal/matmul_f32.cc similarity index 100% rename from kernels/metal/matmul_f32.cc rename to kernels/metal/old_metal/matmul_f32.cc diff --git a/kernels/metal/matmul_metal_imp.cc b/kernels/metal/old_metal/matmul_metal_imp.cc similarity index 100% rename from kernels/metal/matmul_metal_imp.cc rename to kernels/metal/old_metal/matmul_metal_imp.cc diff --git a/kernels/metal/matmul_metal_imp.h b/kernels/metal/old_metal/matmul_metal_imp.h similarity index 100% rename from kernels/metal/matmul_metal_imp.h rename to kernels/metal/old_metal/matmul_metal_imp.h diff --git a/kernels/metal/matmul_metal_int4.cc b/kernels/metal/old_metal/matmul_metal_int4.cc similarity index 100% rename from kernels/metal/matmul_metal_int4.cc rename to kernels/metal/old_metal/matmul_metal_int4.cc diff --git a/kernels/metal/matmul_ref_fp32.cc b/kernels/metal/old_metal/matmul_ref_fp32.cc similarity index 100% rename from kernels/metal/matmul_ref_fp32.cc rename to kernels/metal/old_metal/matmul_ref_fp32.cc diff --git a/kernels/metal/matmul_ref_int8.cc b/kernels/metal/old_metal/matmul_ref_int8.cc similarity index 100% rename from kernels/metal/matmul_ref_int8.cc rename to kernels/metal/old_metal/matmul_ref_int8.cc diff --git a/kernels/metal/metal_batch_add.cc b/kernels/metal/old_metal/metal_batch_add.cc similarity index 100% rename from kernels/metal/metal_batch_add.cc rename to kernels/metal/old_metal/metal_batch_add.cc diff --git a/kernels/metal/metal_gelu.cc b/kernels/metal/old_metal/metal_gelu.cc similarity index 100% rename from kernels/metal/metal_gelu.cc rename to kernels/metal/old_metal/metal_gelu.cc diff --git a/kernels/metal/metal_gelu_quick.cc b/kernels/metal/old_metal/metal_gelu_quick.cc similarity index 100% rename from kernels/metal/metal_gelu_quick.cc rename to kernels/metal/old_metal/metal_gelu_quick.cc diff --git a/kernels/metal/metal_relu.cc b/kernels/metal/old_metal/metal_relu.cc similarity index 100% rename from kernels/metal/metal_relu.cc rename to kernels/metal/old_metal/metal_relu.cc diff --git a/kernels/metal/metal_rms_norm.cc b/kernels/metal/old_metal/metal_rms_norm.cc similarity index 100% rename from kernels/metal/metal_rms_norm.cc rename to kernels/metal/old_metal/metal_rms_norm.cc diff --git a/kernels/metal/metal_rope.cc b/kernels/metal/old_metal/metal_rope.cc similarity index 100% rename from kernels/metal/metal_rope.cc rename to kernels/metal/old_metal/metal_rope.cc diff --git a/kernels/metal/metal_silu.cc b/kernels/metal/old_metal/metal_silu.cc similarity index 100% rename from kernels/metal/metal_silu.cc rename to kernels/metal/old_metal/metal_silu.cc diff --git a/kernels/metal/metal_softmax.cc b/kernels/metal/old_metal/metal_softmax.cc similarity index 100% rename from kernels/metal/metal_softmax.cc rename to kernels/metal/old_metal/metal_softmax.cc diff --git a/kernels/metal/kernel/op.metal b/kernels/metal/op.metal similarity index 51% rename from kernels/metal/kernel/op.metal rename to kernels/metal/op.metal index c5368bfe..19c70ef1 100644 --- a/kernels/metal/kernel/op.metal +++ b/kernels/metal/op.metal @@ -12,7 +12,7 @@ using namespace metal; #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; } -kernel void EmbeddingKernel(device Matrix3D_int& input_id [[buffer(0)]], +kernel void kernel_embedding(device Matrix3D_int& input_id [[buffer(0)]], device Matrix3D_half& output [[buffer(1)]], device float* lookup [[buffer(2)]], const unsigned int embed_dim [[buffer(3)]], @@ -87,10 +87,10 @@ kernel void kernel_rms_norm( device const void * src0, device const float * src1, device float * dst, - constant MetalMatMulParams& params, - // constant int64_t & ne00, // row - // constant uint64_t & nb01, // col*sizeof(type) - // constant float & eps, + // constant MetalMatMulParams& params, + constant int64_t & ne00, // row + constant uint64_t & nb01, // col*sizeof(type) + constant float & eps, threadgroup float * buf [[threadgroup(0)]], uint tgpig[[threadgroup_position_in_grid]], uint tpitg[[thread_position_in_threadgroup]], @@ -143,11 +143,11 @@ kernel void kernel_soft_max( device const float * src0, device const float * src1, device float * dst, - constant MetalMatMulParams& params, - // constant int64_t & ne00, - // constant int64_t & ne01, - // constant int64_t & ne02, - // constant float & scale, + // constant MetalMatMulParams& params, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant float & scale, threadgroup float * buf [[threadgroup(0)]], uint tgpig[[threadgroup_position_in_grid]], uint tpitg[[thread_position_in_threadgroup]], @@ -237,11 +237,11 @@ kernel void kernel_soft_max_4( device const float * src0, device const float * src1, device float * dst, - constant MetalMatMulParams& params, - // constant int64_t & ne00, - // constant int64_t & ne01, - // constant int64_t & ne02, - // constant float & scale, + // constant MetalMatMulParams& params, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant float & scale, threadgroup float * buf [[threadgroup(0)]], uint tgpig[[threadgroup_position_in_grid]], uint tpitg[[thread_position_in_threadgroup]], @@ -406,63 +406,63 @@ kernel void kernel_rope( device const void * src0, device const int32_t * src1, device float * dst, - constant MetalMatMulParams& params, - // constant int64_t & ne00, - // constant int64_t & ne01, - // constant int64_t & ne02, - // constant int64_t & ne03, - // constant uint64_t & nb00, - // constant uint64_t & nb01, - // constant uint64_t & nb02, - // constant uint64_t & nb03, - // constant int64_t & ne0, - // constant int64_t & ne1, - // constant int64_t & ne2, - // constant int64_t & ne3, - // constant uint64_t & nb0, - // constant uint64_t & nb1, - // constant uint64_t & nb2, - // constant uint64_t & nb3, - // constant int & n_past, - // constant int & n_dims, - // constant int & mode, - // constant int & n_orig_ctx, - // constant float & freq_base, - // constant float & freq_scale, - // constant float & ext_factor, - // constant float & attn_factor, - // constant float & beta_fast, - // constant float & beta_slow, + // constant MetalMatMulParams& params, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant int64_t & ne03, + constant uint64_t & nb00, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant uint64_t & nb03, + constant int64_t & ne0, + constant int64_t & ne1, + constant int64_t & ne2, + constant int64_t & ne3, + constant uint64_t & nb0, + constant uint64_t & nb1, + constant uint64_t & nb2, + constant uint64_t & nb3, + constant int & n_past, + constant int & n_dims, + constant int & mode, + constant int & n_orig_ctx, + constant float & freq_base, + constant float & freq_scale, + constant float & ext_factor, + constant float & attn_factor, + constant float & beta_fast, + constant float & beta_slow, uint tiitg[[thread_index_in_threadgroup]], uint3 tptg[[threads_per_threadgroup]], uint3 tgpig[[threadgroup_position_in_grid]]) { - constant int64_t ne00 = param.m_dim_x; - constant int64_t ne01 = param.m_dim_y; - constant int64_t ne02 = param.m_dim_z; - constant int64_t ne03 = 0; - constant uint64_t nb00 = param.m_dim_x*param.type_size; - constant uint64_t nb01 = param.m_dim_y*param.type_size; - constant uint64_t nb02 = param.m_dim_z*param.type_size; - constant uint64_t nb03 = 0; - constant int64_t ne0 = param.m_dim_x; - constant int64_t ne1 = param.m_dim_y; - constant int64_t ne2 = param.m_dim_z; - constant int64_t ne3 = 0; - constant uint64_t nb0 = param.m_dim_x*param.type_size; - constant uint64_t nb1 = param.m_dim_y*param.type_size; - constant uint64_t nb2 = param.m_dim_z*param.type_size; - constant uint64_t nb3 = 0; - - int n_past = param.n_past; - int n_dims = param.n_dims; - int mode = param.mode; - int n_orig_ctx = param.n_orig_ctx; - float freq_base = param.freq_base; - float freq_scale = param.freq_scale; - float ext_factor = param.ext_factor; - float attn_factor = param.attn_factor; - float beta_fast = param.beta_fast; - float beta_slow = param.beta_slow; + // constant int64_t ne00 = param.m_dim_x; + // constant int64_t ne01 = param.m_dim_y; + // constant int64_t ne02 = param.m_dim_z; + // constant int64_t ne03 = 0; + // constant uint64_t nb00 = param.m_dim_x*param.type_size; + // constant uint64_t nb01 = param.m_dim_y*param.type_size; + // constant uint64_t nb02 = param.m_dim_z*param.type_size; + // constant uint64_t nb03 = 0; + // constant int64_t ne0 = param.m_dim_x; + // constant int64_t ne1 = param.m_dim_y; + // constant int64_t ne2 = param.m_dim_z; + // constant int64_t ne3 = 0; + // constant uint64_t nb0 = param.m_dim_x*param.type_size; + // constant uint64_t nb1 = param.m_dim_y*param.type_size; + // constant uint64_t nb2 = param.m_dim_z*param.type_size; + // constant uint64_t nb3 = 0; + + // int n_past = param.n_past; + // int n_dims = param.n_dims; + // int mode = param.mode; + // int n_orig_ctx = param.n_orig_ctx; + // float freq_base = param.freq_base; + // float freq_scale = param.freq_scale; + // float ext_factor = param.ext_factor; + // float attn_factor = param.attn_factor; + // float beta_fast = param.beta_fast; + // float beta_slow = param.beta_slow; const int64_t i3 = tgpig[2]; @@ -567,7 +567,7 @@ void dequantize_q4_0(device const block_q *xb, short il, thread type4x4 & reg) { } } -void kernel_mul_mm_impl(device const uchar * src0, +void kernel_mul_mm_int4(device const uchar * src0, device const uchar * src1, device float * dst, constant int64_t & ne00, @@ -696,417 +696,4 @@ void kernel_mul_mm_impl(device const uchar * src0, } } } -} - -kernel void matmul(device const float* inA, - device const float* inB, // column major - device float* result, - constant MetalMatMulParams& params, - uint2 id [[thread_position_in_grid]]) -{ - // the for-loop is replaced with a collection of threads, each of which - // calls this function. - - const uint n = params.n; - const uint k = params.k; - - const uint idx = id.x; // column index of the output - const uint idy = id.y; // row index of the output - - float sum = 0; - for (uint i = 0; i < k; i++){ - float vA = inA[idy * k + i]; - float vB = inB[idx * k + i]; - - sum += vA * vB; - } - result[idy * n + idx] = sum; -} - -kernel void matmulInt4(device const float* inA, - device const uint8_t* inB, // column major - device float* result, - device const float* scales, - constant MetalMatMulParams& params, - uint2 id [[thread_position_in_grid]]) -{ - // the for-loop is replaced with a collection of threads, each of which - // calls this function. - - const uint n = params.n; - const uint k = params.k; - const uint group_size = params.group_size; - - const uint idx = id.x; // column index of the output - const uint idy = id.y; // row index of the output - - float sum = 0; - for (uint i = 0; i < k; i += group_size){ - float scale = scales[(idx * k + i) / group_size]; - for (uint j = 0; j < group_size; j+=2){ - size_t weight_idx = (idx * k + i + j) / 2; - uint8_t weight_packed = inB[weight_idx]; - int8_t vl = (weight_packed & 0x0F) - 8; - int8_t vh = (weight_packed >> 4) - 8; - - sum += (inA[idy * k + i + j] * vl) * scale; - sum += (inA[idy * k + i + j + 1] * vh) * scale; - } - } - result[idy * n + idx] = sum; -} - - -kernel void matmulInt4_SIMD_Q4Interleave( - device const packed_float4* inA, - device const packed_char4* inB, // column major - device float* result, - device const float* scales, - constant MetalMatMulParams& params, - uint2 id [[thread_position_in_grid]]) -{ - // the for-loop is replaced with a collection of threads, each of which - // calls this function. - - const uint n = params.n; - const uint k = params.k; - const uint group_size = params.group_size; - - const uint idx = id.x; // column index of the output - const uint idy = id.y; // row index of the output - - packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; - packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; - - for (uint i = 0; i < k; i += group_size){ - float scale = scales[(idx * k + i) / group_size]; - packed_float4 scale4 = {scale, scale, scale, scale}; - for (uint j = 0; j < group_size; j+= 8){ - // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 - // expected layout of inB: (a, e), (b, f), (c, g), (d, h) - // low; (a, 0), (b, 0), (c, 0), (d, 0) - // high: (e, 0), (f, 0), (g, 0), (h, 0) - size_t weight_idx = (idx * k + i + j) / 8; - size_t activation_idx = (idy * k + i + j) / 4; - packed_char4 packed_8 = inB[weight_idx]; - packed_char4 packed_low = packed_8 & lowMask; - packed_char4 packed_high = (packed_8 >> 4) & lowMask; - - packed_float4 inAlow = inA[activation_idx]; - packed_float4 inAhigh = inA[activation_idx+1]; - packed_float4 inBlow = packed_float4(packed_low) * scale4; - packed_float4 inBhigh = packed_float4(packed_high) * scale4; - - sum4 += inAlow * inBlow; - sum4 += inAhigh * inBhigh; - } - } - float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; - result[idy * n + idx] = sum; -} - -kernel void matmulUInt4_SIMD_Q4Interleave_unroll16( - device const packed_float4* inA, - device const packed_char4* inB, // column major - device float* result, - device const float* scales, - constant MetalMatMulParams& params, - uint2 id [[thread_position_in_grid]]) -{ - // the for-loop is replaced with a collection of threads, each of which - // calls this function. - - const uint n = params.n; - const uint k = params.k; - const uint group_size = params.group_size; - - const uint idx = id.x; // column index of the output - const uint idy = id.y; // row index of the output - - packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; - packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; - packed_char4 offsets = {8, 8, 8, 8}; - - for (uint i = 0; i < k; i += group_size){ - float scale = scales[(idx * k + i) / group_size]; - packed_float4 scale4 = {scale, scale, scale, scale}; - for (uint j = 0; j < group_size; j+= 16){ - // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 - // expected layout of inB: (a, e), (b, f), (c, g), (d, h) - // low; (a, 0), (b, 0), (c, 0), (d, 0) - // high: (e, 0), (f, 0), (g, 0), (h, 0) - size_t weight_idx = (idx * k + i + j) / 8; - size_t activation_idx = (idy * k + i + j) / 4; - packed_char4 packed_8_0 = inB[weight_idx]; - packed_char4 packed_8_1 = inB[weight_idx + 1]; - packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; - packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; - packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; - packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; - - packed_float4 inAlow_0 = inA[activation_idx]; - packed_float4 inAlow_1 = inA[activation_idx+2]; - packed_float4 inAhigh_0 = inA[activation_idx+1]; - packed_float4 inAhigh_1 = inA[activation_idx+3]; - packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; - packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; - packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; - packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; - - sum4 += inAlow_0 * inBlow_0; - sum4 += inAlow_1 * inBlow_1; - sum4 += inAhigh_0 * inBhigh_0; - sum4 += inAhigh_1 * inBhigh_1; - } - } - float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; - result[idy * n + idx] = sum; -} - - -kernel void matmulUInt4_SIMD_Q4Interleave_unroll32( - device const packed_float4* inA, - device const packed_char4* inB, // column major - device float* result, - device const float* scales, - constant MetalMatMulParams& params, - uint2 id [[thread_position_in_grid]]) -{ - // the for-loop is replaced with a collection of threads, each of which - // calls this function. - - const uint n = params.n; - const uint k = params.k; - const uint group_size = params.group_size; - - const uint idx = id.x; // column index of the output - const uint idy = id.y; // row index of the output - - packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; - packed_char4 offsets = {8, 8, 8, 8}; - packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; - - for (uint i = 0; i < k; i += group_size){ - float scale = scales[(idx * k + i) / group_size]; - packed_float4 scale4 = {scale, scale, scale, scale}; - for (uint j = 0; j < group_size; j+= 32){ - // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 - // expected layout of inB: (a, e), (b, f), (c, g), (d, h) - // low; (a, 0), (b, 0), (c, 0), (d, 0) - // high: (e, 0), (f, 0), (g, 0), (h, 0) - size_t weight_idx = (idx * k + i + j) / 8; - size_t activation_idx = (idy * k + i + j) / 4; - packed_char4 packed_8_0 = inB[weight_idx]; - packed_char4 packed_8_1 = inB[weight_idx + 1]; - packed_char4 packed_8_2 = inB[weight_idx + 2]; - packed_char4 packed_8_3 = inB[weight_idx + 3]; - - packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; - packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; - packed_char4 packed_low_2 = (packed_8_2 & lowMask) - offsets;; - packed_char4 packed_low_3 = (packed_8_3 & lowMask) - offsets;; - - packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; - packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; - packed_char4 packed_high_2 = ((packed_8_2 >> 4) & lowMask) - offsets; - packed_char4 packed_high_3 = ((packed_8_3 >> 4) & lowMask) - offsets; - - packed_float4 inAlow_0 = inA[activation_idx]; - packed_float4 inAhigh_0 = inA[activation_idx+1]; - packed_float4 inAlow_1 = inA[activation_idx+2]; - packed_float4 inAhigh_1 = inA[activation_idx+3]; - packed_float4 inAlow_2 = inA[activation_idx+4]; - packed_float4 inAhigh_2 = inA[activation_idx+5]; - packed_float4 inAlow_3 = inA[activation_idx+6]; - packed_float4 inAhigh_3 = inA[activation_idx+7]; - - packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; - packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; - packed_float4 inBlow_2 = packed_float4(packed_low_2) * scale4; - packed_float4 inBlow_3 = packed_float4(packed_low_3) * scale4; - - packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; - packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; - packed_float4 inBhigh_2 = packed_float4(packed_high_2) * scale4; - packed_float4 inBhigh_3 = packed_float4(packed_high_3) * scale4; - - sum4 += inAlow_0 * inBlow_0; - sum4 += inAlow_1 * inBlow_1; - sum4 += inAlow_2 * inBlow_2; - sum4 += inAlow_3 * inBlow_3; - sum4 += inAhigh_0 * inBhigh_0; - sum4 += inAhigh_1 * inBhigh_1; - sum4 += inAhigh_2 * inBhigh_2; - sum4 += inAhigh_3 * inBhigh_3; - } - } - float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; - result[idy * n + idx] = sum; -} - -kernel void matmulUInt4_SIMD_Q4Interleave_unroll2x32( - device const packed_float4* inA, - device const packed_char4* inB, // column major - device float* result, - device const float* scales, - constant MetalMatMulParams& params, - uint2 id [[thread_position_in_grid]]) -{ - // the for-loop is replaced with a collection of threads, each of which - // calls this function. - - const uint n = params.n; - const uint k = params.k; - const uint group_size = params.group_size; - - const uint idx = id.x; // column index of the output - const uint idy = id.y; // row index of the output - - packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; - packed_char4 offsets = {8, 8, 8, 8}; - packed_float4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; - packed_float4 sum4_col2 = {0.0f, 0.0f, 0.0f, 0.0f}; - - packed_float4 a; - - for (uint i = 0; i < k; i += group_size){ - float scale = scales[(idx * k + i) / group_size]; - float scale_col2 = scales[((idx+1) * k + i) / group_size]; - packed_float4 scale4 = {scale, scale, scale, scale}; - packed_float4 scale4_col2 = {scale_col2, scale_col2, scale_col2, scale_col2}; - for (uint j = 0; j < group_size; j+= 32){ - // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 - // expected layout of inB: (a, e), (b, f), (c, g), (d, h) - // low; (a, 0), (b, 0), (c, 0), (d, 0) - // high: (e, 0), (f, 0), (g, 0), (h, 0) - size_t weight_idx = (idx * k + i + j) / 8; - size_t weight_col2_idx = ((idx+1) * k + i + j) / 8; - size_t activation_idx = (idy * k + i + j) / 4; - packed_char4 packed_8_0 = inB[weight_idx]; - packed_char4 packed_8_1 = inB[weight_idx + 1]; - packed_char4 packed_8_2 = inB[weight_idx + 2]; - packed_char4 packed_8_3 = inB[weight_idx + 3]; - - packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets; - packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets; - packed_char4 packed_low_2 = (packed_8_2 & lowMask) - offsets; - packed_char4 packed_low_3 = (packed_8_3 & lowMask) - offsets; - - packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; - packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; - packed_char4 packed_high_2 = ((packed_8_2 >> 4) & lowMask) - offsets; - packed_char4 packed_high_3 = ((packed_8_3 >> 4) & lowMask) - offsets; - - packed_float4 inAlow_0 = inA[activation_idx]; - packed_float4 inAhigh_0 = inA[activation_idx+1]; - packed_float4 inAlow_1 = inA[activation_idx+2]; - packed_float4 inAhigh_1 = inA[activation_idx+3]; - packed_float4 inAlow_2 = inA[activation_idx+4]; - packed_float4 inAhigh_2 = inA[activation_idx+5]; - packed_float4 inAlow_3 = inA[activation_idx+6]; - packed_float4 inAhigh_3 = inA[activation_idx+7]; - - packed_float4 inBlow_0 = packed_float4(packed_low_0) * scale4; - packed_float4 inBlow_1 = packed_float4(packed_low_1) * scale4; - packed_float4 inBlow_2 = packed_float4(packed_low_2) * scale4; - packed_float4 inBlow_3 = packed_float4(packed_low_3) * scale4; - - packed_float4 inBhigh_0 = packed_float4(packed_high_0) * scale4; - packed_float4 inBhigh_1 = packed_float4(packed_high_1) * scale4; - packed_float4 inBhigh_2 = packed_float4(packed_high_2) * scale4; - packed_float4 inBhigh_3 = packed_float4(packed_high_3) * scale4; - - sum4 += inAlow_0 * inBlow_0; - sum4 += inAlow_1 * inBlow_1; - sum4 += inAlow_2 * inBlow_2; - sum4 += inAlow_3 * inBlow_3; - sum4 += inAhigh_0 * inBhigh_0; - sum4 += inAhigh_1 * inBhigh_1; - sum4 += inAhigh_2 * inBhigh_2; - sum4 += inAhigh_3 * inBhigh_3; - - } - } - float sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; - result[idy * n + idx] = sum; -} - -kernel void matmulUInt4_SIMD_Q4Interleave_half_unroll32( - device const packed_half4* inA, - device const packed_char4* inB, // column major - device float* result, - device const float* scales, - constant MetalMatMulParams& params, - uint2 id [[thread_position_in_grid]]) -{ - // the for-loop is replaced with a collection of threads, each of which - // calls this function. - - const uint n = params.n; - const uint k = params.k; - const uint group_size = params.group_size; - - const uint idx = id.x; // column index of the output - const uint idy = id.y; // row index of the output - - packed_char4 lowMask = {0x0F, 0x0F, 0x0F, 0x0F}; - packed_char4 offsets = {8, 8, 8, 8}; - packed_half4 sum4 = {0.0f, 0.0f, 0.0f, 0.0f}; - - for (uint i = 0; i < k; i += group_size){ - half scale = half(scales[(idx * k + i) / group_size]); - packed_half4 scale4 = {scale, scale, scale, scale}; - for (uint j = 0; j < group_size; j+= 32){ - // sequential: (a, b), (c, d), (e, f), (g, h): 32 bit = 4xuint8 - // expected layout of inB: (a, e), (b, f), (c, g), (d, h) - // low; (a, 0), (b, 0), (c, 0), (d, 0) - // high: (e, 0), (f, 0), (g, 0), (h, 0) - size_t weight_idx = (idx * k + i + j) / 8; - size_t activation_idx = (idy * k + i + j) / 4; - packed_char4 packed_8_0 = inB[weight_idx]; - packed_char4 packed_8_1 = inB[weight_idx + 1]; - packed_char4 packed_8_2 = inB[weight_idx + 2]; - packed_char4 packed_8_3 = inB[weight_idx + 3]; - - packed_char4 packed_low_0 = (packed_8_0 & lowMask) - offsets;; - packed_char4 packed_low_1 = (packed_8_1 & lowMask) - offsets;; - packed_char4 packed_low_2 = (packed_8_2 & lowMask) - offsets;; - packed_char4 packed_low_3 = (packed_8_3 & lowMask) - offsets;; - - packed_char4 packed_high_0 = ((packed_8_0 >> 4) & lowMask) - offsets; - packed_char4 packed_high_1 = ((packed_8_1 >> 4) & lowMask) - offsets; - packed_char4 packed_high_2 = ((packed_8_2 >> 4) & lowMask) - offsets; - packed_char4 packed_high_3 = ((packed_8_3 >> 4) & lowMask) - offsets; - - packed_half4 inAlow_0 = inA[activation_idx]; - packed_half4 inAhigh_0 = inA[activation_idx+1]; - packed_half4 inAlow_1 = inA[activation_idx+2]; - packed_half4 inAhigh_1 = inA[activation_idx+3]; - packed_half4 inAlow_2 = inA[activation_idx+4]; - packed_half4 inAhigh_2 = inA[activation_idx+5]; - packed_half4 inAlow_3 = inA[activation_idx+6]; - packed_half4 inAhigh_3 = inA[activation_idx+7]; - - packed_half4 inBlow_0 = packed_half4(packed_low_0) * scale4; - packed_half4 inBlow_1 = packed_half4(packed_low_1) * scale4; - packed_half4 inBlow_2 = packed_half4(packed_low_2) * scale4; - packed_half4 inBlow_3 = packed_half4(packed_low_3) * scale4; - - packed_half4 inBhigh_0 = packed_half4(packed_high_0) * scale4; - packed_half4 inBhigh_1 = packed_half4(packed_high_1) * scale4; - packed_half4 inBhigh_2 = packed_half4(packed_high_2) * scale4; - packed_half4 inBhigh_3 = packed_half4(packed_high_3) * scale4; - - sum4 += inAlow_0 * inBlow_0; - sum4 += inAlow_1 * inBlow_1; - sum4 += inAlow_2 * inBlow_2; - sum4 += inAlow_3 * inBlow_3; - sum4 += inAhigh_0 * inBhigh_0; - sum4 += inAhigh_1 * inBhigh_1; - sum4 += inAhigh_2 * inBhigh_2; - sum4 += inAhigh_3 * inBhigh_3; - } - } - half sum = sum4[0] + sum4[1] + sum4[2] + sum4[3]; - result[idy * n + idx] = float(sum); -} +} \ No newline at end of file diff --git a/llm/tests/metal/cpp_version/main_tmp.cc b/llm/tests/metal/cpp_version/main_tmp.cc index 7d699775..bd81366e 100644 --- a/llm/tests/metal/cpp_version/main_tmp.cc +++ b/llm/tests/metal/cpp_version/main_tmp.cc @@ -1,3 +1,409 @@ +// // +// // main.cpp +// // metal_cpp +// // +// // Created by Derrick on 1/24/24. +// // Some to-do list: +// // 1. keep a map: ptr on CPU -> buffer on GPU +// // Notes: +// // 1. Offset hasn't been considered +// // 2. Group_Size is multiple of 32 + +// #include +// #include +// #include +// #include + +// #define NS_PRIVATE_IMPLEMENTATION +// #define CA_PRIVATE_IMPLEMENTATION +// #define MTL_PRIVATE_IMPLEMENTATION + +// #include "Metal/Metal.hpp" +// #include "Foundation/Foundation.hpp" +// #include "param.h" +// typedef struct{ +// float values[4]; +// }PackedFloat; + +// typedef struct{ +// unsigned char values[4]; +// }PackedChar; + +// // .h +// MTL::Buffer *bM1, *bM2, *bM3, *bParam, *bScales, *bOffset; +// MTL::Buffer *bsM1, *bsM2, *bsM3, *bsParam, *bsScales, *bsOffset; +// MTL::Device* mDevice; +// MTL::ComputePipelineState* mfnPipelineState; +// MTL::CommandQueue* mCommandQueue; +// NS::Error *error = nullptr; +// typedef struct { +// float *A, *C, *scales, *offset; +// unsigned char *B; +// } MetalMatmulBuffers; + +// typedef struct { +// float *C, *scales, *offset; +// PackedFloat *A; +// PackedChar *B; +// } MetalMatmulSBuffers; + +// using namespace std; +// using namespace chrono; + +// // .cc +// const char * fn_name = "matmulInt4"; + + +// // main +// unsigned int height1 = 32; +// unsigned int width1 = 32; +// unsigned int height2 = 32; +// unsigned int width2 = 32; +// float *A1, *A3; +// unsigned char *A2; +// matmul_param *param; +// // for MatmulInt4 use +// unsigned int group_size = 32; +// float* scales, *offset; +// MetalMatmulBuffers *Int4_buffer; +// MetalMatMulParams *Int4_params; + +// MetalMatmulSBuffers *Int4_Sbuffer; +// MetalMatMulParams *Int4_Sparams; + +// // Test Use +// void test_addArrays(const float arr1[], const float arr2[], float result[], uint size) { +// for (int i = 0; i < size; ++i) { +// result[i] = arr1[i] + arr2[i]; +// } +// } +// void test_matmul(const float* matA, int rowsA, int colsA, const unsigned char* matB, int rowsB, int colsB, float* result) { +// for (int i = 0; i < rowsA; i++) { +// for (int j = 0; j < colsB; j++) { +// result[i * colsB + j] = 0; +// for (int k = 0; k < colsA; k++) { +// result[i * colsB + j] += matA[i * colsA + k] * matB[k * colsB + j]; +// } +// } +// } +// } +// void printArray(const float* array, uint arraySize) { +// for (int i = 0; i < arraySize; ++i) { +// std::cout << array[i] << " "; +// } +// std::cout << std::endl; +// } +// void generateRandomFloatArray(float* array, uint arraySize) { +// // Use a random device to seed the random number generator +// std::random_device rd; +// // Use the current time as a seed for the random number generator +// std::mt19937 gen(rd()); +// // Define the range of random numbers (adjust as needed) +// std::uniform_real_distribution distribution(1, 100); + +// // Generate random integers and fill the array +// for (int i = 0; i < arraySize; ++i) { +// array[i] = distribution(gen); +// } +// } +// void generateRandomCharArray(unsigned char* array, uint arraySize) { +// // Use a random device to seed the random number generator +// std::random_device rd; +// // Use the current time as a seed for the random number generator +// std::mt19937 gen(rd()); +// // Define the range of random numbers (adjust as needed) +// std::uniform_int_distribution distrib(0, 255); + +// // Generate random integers and fill the array +// for (int i = 0; i < arraySize; ++i) { +// array[i] = static_cast(distrib(gen)); +// } +// } + +// void generateRandomScale(float* array, uint arraySize) { +// for (size_t i = 0; i < arraySize; i++){ +// array[i] = 1.1; +// } +// } + +// // Metal functions +// void metal_init(){ +// mDevice = MTL::CreateSystemDefaultDevice(); +// MTL::Library *defaultLibrary = mDevice->newDefaultLibrary(); +// if (defaultLibrary == nullptr) { +// std::cout << "Failed to find the default library." << std::endl; +// return; +// } +// auto str = NS::String::string(fn_name, NS::ASCIIStringEncoding); +// MTL::Function *matmulFunction = defaultLibrary->newFunction(str); +// defaultLibrary->release(); +// if (matmulFunction == nullptr) { +// std::cout << "Failed to find the function." << std::endl; +// return; +// } +// mfnPipelineState = mDevice->newComputePipelineState(matmulFunction, &error); +// matmulFunction->release(); +// if (mfnPipelineState == nullptr) { +// std::cout << "Failed to created pipeline state object, error " << error << "." << std::endl; +// return; +// } +// mCommandQueue = mDevice->newCommandQueue(); +// if (mCommandQueue == nullptr) { +// std::cout << "Failed to find the command queue." << std::endl; +// return; +// } +// } + +// MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size){ +// return mDevice->newBuffer(type_size*size, MTL::ResourceStorageModeShared); +// } + +// void metal_encodecommand_matmulInt4_simd(MTL::ComputeCommandEncoder *computeEncoder){ +// //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance + +// bsScales = metal_newBuf(sizeof(float), Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size); +// bsM1 = metal_newBuf(sizeof(PackedFloat), Int4_Sparams->height1*Int4_Sparams->width1); +// bsM2 = metal_newBuf(sizeof(PackedChar), Int4_Sparams->width2*Int4_Sparams->width2); +// bsParam = metal_newBuf(sizeof(MetalMatMulParams), 1); +// bsM3 = metal_newBuf(sizeof(float), Int4_Sparams->height3*Int4_Sparams->width3); + +// computeEncoder->setComputePipelineState(mfnPipelineState); +// computeEncoder->setBuffer(bsM1, 0, 0); +// computeEncoder->setBuffer(bsM2, 0, 1); +// computeEncoder->setBuffer(bsM3, 0, 2); +// computeEncoder->setBuffer(bsScales, 0, 3); +// computeEncoder->setBuffer(bParam, 0, 4); + +// memcpy(bsM1->contents(), Int4_Sbuffer->A, Int4_Sparams->height1*Int4_Sparams->width1*sizeof(PackedFloat)); +// memcpy(bsM2->contents(), Int4_Sbuffer->B, Int4_Sparams->width2*Int4_Sparams->width2*sizeof(PackedChar)); +// memcpy(bsM3->contents(), Int4_Sbuffer->C, Int4_Sparams->height3*Int4_Sparams->width3*sizeof(float)); +// memcpy(bsParam->contents(), Int4_Sparams, sizeof(MetalMatMulParams)); +// memcpy(bsScales->contents(), Int4_Sbuffer->scales, Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size*sizeof(float)); +// } + + +// void metal_encodecommand_matmulInt4(MTL::ComputeCommandEncoder *computeEncoder){ +// //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance + +// bScales = metal_newBuf(sizeof(float), height1*width1); +// bM1 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width1); +// bM2 = metal_newBuf(sizeof(unsigned char), Int4_params->width1*Int4_params->width3); +// bParam = metal_newBuf(sizeof(MetalMatMulParams), 1); +// bM3 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width3); + +// computeEncoder->setComputePipelineState(mfnPipelineState); +// computeEncoder->setBuffer(bM1, 0, 0); +// computeEncoder->setBuffer(bM2, 0, 1); +// computeEncoder->setBuffer(bM3, 0, 2); +// computeEncoder->setBuffer(bScales, 0, 3); +// computeEncoder->setBuffer(bParam, 0, 4); + +// memcpy(bM1->contents(), Int4_buffer->A, Int4_params->height1*Int4_params->width1*sizeof(float)); +// memcpy(bM2->contents(), Int4_buffer->B, Int4_params->width1*Int4_params->width3*sizeof(unsigned char)); +// memcpy(bM3->contents(), Int4_buffer->C, Int4_params->height1*Int4_params->width3*sizeof(float)); +// memcpy(bParam->contents(), Int4_params, sizeof(MetalMatMulParams)); +// memcpy(bScales->contents(), Int4_buffer->scales, height1*width1*sizeof(float)); +// } + +// void metal_encodecommand_matmul(MTL::ComputeCommandEncoder *computeEncoder){ +// //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance +// bM1 = metal_newBuf(sizeof(float), param->arraySize1); +// bM2 = metal_newBuf(sizeof(float), param->arraySize2); +// bM3 = metal_newBuf(sizeof(float), param->outputsize); +// bParam = metal_newBuf(sizeof(matmul_param), 1); + +// computeEncoder->setComputePipelineState(mfnPipelineState); +// computeEncoder->setBuffer(bM1, 0, 0); +// computeEncoder->setBuffer(bM2, 0, 1); +// computeEncoder->setBuffer(bParam, 0, 2); +// computeEncoder->setBuffer(bM3, 0, 3); + + +// memcpy(bM1->contents(), A1, param->arraySize1*sizeof(float)); +// memcpy(bM2->contents(), A2, param->arraySize2*sizeof(float)); +// memcpy(bM3->contents(), A3, param->outputsize*sizeof(float)); +// memcpy(bParam->contents(), param, sizeof(matmul_param)); +// } + +// void metal_compute(){ +// // Initialization of GPU vals +// MTL::CommandBuffer *commandBuffer = mCommandQueue->commandBuffer(); +// assert(commandBuffer != nullptr); +// MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); +// assert(computeEncoder != nullptr); + +// // Encode command and set buffer to GPU +// if (strcmp(fn_name, "matmulInt4") == 0) { +// metal_encodecommand_matmulInt4(computeEncoder); +// } else if (strcmp(fn_name, "matmul") == 0) { +// metal_encodecommand_matmul(computeEncoder); +// } else if (strcmp(fn_name, "matmulInt4_SIMD_Q4Interleave") == 0) { +// metal_encodecommand_matmulInt4_simd(computeEncoder); +// } + +// // Threads -> ThreadGroup -> Grid +// MTL::Size mThreadGroupSize; +// MTL::Size mGridSize; +// if (strcmp(fn_name, "matmulInt4") == 0){ +// mThreadGroupSize = MTL::Size::Make(Int4_params->width3, Int4_params->height1, 1); +// mGridSize = MTL::Size::Make(16, 1, 1); +// } else if (strcmp(fn_name, "matmul") == 0) { +// mThreadGroupSize = MTL::Size::Make(8, 8, 1); +// mGridSize = MTL::Size::Make((param->width1 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, +// (param->height2 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, +// 1); +// } else if (strcmp(fn_name, "matmulInt4_SIMD_Q4Interleave") == 0) { +// mThreadGroupSize = MTL::Size::Make(Int4_Sparams->width3, Int4_Sparams->height1, 1); +// mGridSize = MTL::Size::Make(16, 1, 1); +// } + +// // Dispatch and Run Computation +// // auto start = high_resolution_clock::now(); +// computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); +// computeEncoder->endEncoding(); +// commandBuffer->commit(); +// commandBuffer->waitUntilCompleted(); +// // auto stop = high_resolution_clock::now(); +// // auto duration = duration_cast(stop - start); +// // std::cout << "GPU: " << duration.count() << "ms" << std::endl; +// computeEncoder->release(); +// commandBuffer->release(); +// } + +// void test_normal_matmul(){ +// // Initialization for test +// param = new matmul_param; +// param->height1 = height1; +// param->height2 = height2; +// param->width1 = width1; +// param->width2 = width2; +// param->outputsize = height1*width2; +// param->arraySize1 = width1*height1; +// param->arraySize2 = width2*height2; +// A1 = new float[param->arraySize1]; +// A2 = new unsigned char[param->arraySize2]; +// A3 = new float[param->outputsize]; +// generateRandomFloatArray(A1, param->arraySize1); +// generateRandomCharArray(A2, param->arraySize2); +// // printArray(A1, param->arraySize1); +// // printArray(A2, param->arraySize2); + + +// // CPU +// auto start = high_resolution_clock::now(); +// test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); +// auto stop = high_resolution_clock::now(); +// auto duration = duration_cast(stop - start); +// std::cout << "CPU: " << duration.count() << "ms" << std::endl; +// printf("CPU Results: \n"); +// for (uint32_t i = 0; i < param->outputsize; i++){ +// printf("A3[%d]: %f\n", i, A3[i]); +// } +// free(A3); +// A3 = new float[param->outputsize]; + +// // GPU +// metal_init(); +// metal_compute(); +// // printf("GPU Results: \n"); +// // for (uint32_t i = 0; i < param->outputsize; i++){ +// // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); +// // } + +// free(A1); +// free(A2); +// free(A3); +// } + +// void test_matmulInt4(){ +// // not considering offset atm +// Int4_buffer = new MetalMatmulBuffers; +// Int4_params = new MetalMatMulParams; +// Int4_params->group_size = group_size; +// Int4_params->height1 = height1; // m +// Int4_params->width1 = width1; // k +// Int4_params->width3 = width2; // n + +// scales = new float[Int4_params->height1*Int4_params->width1/Int4_params->group_size]; +// generateRandomFloatArray(scales, height1*width1/group_size); +// Int4_buffer->A = A1; +// Int4_buffer->B = A2; +// Int4_buffer->C = A3; +// Int4_buffer->scales = scales; +// metal_init(); +// metal_compute(); +// printf("GPU Results: \n"); +// for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ +// printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); +// } +// } + +// void test_matmulInt4_simds(){ +// fn_name = "matmulInt4_SIMD_Q4Interleave"; +// Int4_Sbuffer = new MetalMatmulSBuffers; +// Int4_Sparams = new MetalMatMulParams; +// Int4_Sparams->group_size = group_size/4; +// Int4_Sparams->height1 = height1; // m +// Int4_Sparams->width1 = width1/4; // k +// Int4_Sparams->height2 = height2/2/4; // m +// Int4_Sparams->width2 = width2/2; // k +// Int4_Sparams->height3 = height1; // m +// Int4_Sparams->width3 = width2; // k + +// scales = new float[Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size]; +// generateRandomFloatArray(scales, Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size); +// Int4_Sbuffer->scales = scales; +// Int4_Sbuffer->C = A3; +// PackedFloat *tempF = new PackedFloat[Int4_Sparams->height1*Int4_Sparams->width1]; +// PackedChar *tempC = new PackedChar[Int4_Sparams->width2*Int4_Sparams->width2]; +// printf("GPU Results: 1 \n"); +// for (size_t i = 0; i < height1*width1; i += 4) { +// for (int j = 0; j < 4; j++) { +// if (i + j < height1*width1) { // Check to prevent out-of-bounds access +// tempF[i / 4].values[j] = A1[i + j]; +// } +// } +// } +// Int4_Sbuffer->A = tempF; + +// for (size_t i = 0; i < height2/2*width2/2; i += 4) { +// for (int j = 0; j < 4; j++) { +// if (i + j < height1*width1) { // Check to prevent out-of-bounds access +// tempC[i / 4].values[j] = A2[i + j]; +// } +// } +// } +// Int4_Sbuffer->B = tempC; + +// metal_init(); +// metal_compute(); +// for (uint32_t i = 0; i < Int4_Sparams->height1*Int4_Sparams->width1/Int4_Sparams->group_size; i++){ +// // for (int j = 0; j < 4; j++) { +// printf("bsM3[%d]: %f\n", i, ((float*)(bsM3->contents()))[i]); +// // } +// } +// } + +// int main(){ +// A1 = new float[height1*width1]; +// A2 = new unsigned char[width1*width2]; +// A3 = new float[height1*width2]; +// scales = new float[height1*width1]; +// generateRandomFloatArray(A1, height1*width1); +// generateRandomCharArray(A2, width1*width2); +// generateRandomScale(scales, height1*width1); +// // test_matmulInt4(); +// test_matmulInt4_simds(); +// return 0; +// } + + + + + + + + // // main.cpp // metal_cpp @@ -21,17 +427,9 @@ #include "Metal/Metal.hpp" #include "Foundation/Foundation.hpp" #include "param.h" -typedef struct{ - float values[4]; -}PackedFloat; - -typedef struct{ - unsigned char values[4]; -}PackedChar; // .h MTL::Buffer *bM1, *bM2, *bM3, *bParam, *bScales, *bOffset; -MTL::Buffer *bsM1, *bsM2, *bsM3, *bsParam, *bsScales, *bsOffset; MTL::Device* mDevice; MTL::ComputePipelineState* mfnPipelineState; MTL::CommandQueue* mCommandQueue; @@ -41,11 +439,6 @@ typedef struct { unsigned char *B; } MetalMatmulBuffers; -typedef struct { - float *C, *scales, *offset; - PackedFloat *A; - PackedChar *B; -} MetalMatmulSBuffers; using namespace std; using namespace chrono; @@ -55,10 +448,10 @@ const char * fn_name = "matmulInt4"; // main -unsigned int height1 = 32; -unsigned int width1 = 32; -unsigned int height2 = 32; -unsigned int width2 = 32; +unsigned int height1 = 1; +unsigned int width1 = 4096; +unsigned int height2 = 4096; +unsigned int width2 = 32000; float *A1, *A3; unsigned char *A2; matmul_param *param; @@ -68,9 +461,6 @@ float* scales, *offset; MetalMatmulBuffers *Int4_buffer; MetalMatMulParams *Int4_params; -MetalMatmulSBuffers *Int4_Sbuffer; -MetalMatMulParams *Int4_Sparams; - // Test Use void test_addArrays(const float arr1[], const float arr2[], float result[], uint size) { for (int i = 0; i < size; ++i) { @@ -120,12 +510,6 @@ void generateRandomCharArray(unsigned char* array, uint arraySize) { } } -void generateRandomScale(float* array, uint arraySize) { - for (size_t i = 0; i < arraySize; i++){ - array[i] = 1.1; - } -} - // Metal functions void metal_init(){ mDevice = MTL::CreateSystemDefaultDevice(); @@ -158,34 +542,10 @@ MTL::Buffer *metal_newBuf(unsigned long type_size, unsigned long size){ return mDevice->newBuffer(type_size*size, MTL::ResourceStorageModeShared); } -void metal_encodecommand_matmulInt4_simd(MTL::ComputeCommandEncoder *computeEncoder){ - //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance - - bsScales = metal_newBuf(sizeof(float), Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size); - bsM1 = metal_newBuf(sizeof(PackedFloat), Int4_Sparams->height1*Int4_Sparams->width1); - bsM2 = metal_newBuf(sizeof(PackedChar), Int4_Sparams->width2*Int4_Sparams->width2); - bsParam = metal_newBuf(sizeof(MetalMatMulParams), 1); - bsM3 = metal_newBuf(sizeof(float), Int4_Sparams->height3*Int4_Sparams->width3); - - computeEncoder->setComputePipelineState(mfnPipelineState); - computeEncoder->setBuffer(bsM1, 0, 0); - computeEncoder->setBuffer(bsM2, 0, 1); - computeEncoder->setBuffer(bsM3, 0, 2); - computeEncoder->setBuffer(bsScales, 0, 3); - computeEncoder->setBuffer(bParam, 0, 4); - - memcpy(bsM1->contents(), Int4_Sbuffer->A, Int4_Sparams->height1*Int4_Sparams->width1*sizeof(PackedFloat)); - memcpy(bsM2->contents(), Int4_Sbuffer->B, Int4_Sparams->width2*Int4_Sparams->width2*sizeof(PackedChar)); - memcpy(bsM3->contents(), Int4_Sbuffer->C, Int4_Sparams->height3*Int4_Sparams->width3*sizeof(float)); - memcpy(bsParam->contents(), Int4_Sparams, sizeof(MetalMatMulParams)); - memcpy(bsScales->contents(), Int4_Sbuffer->scales, Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size*sizeof(float)); -} - - void metal_encodecommand_matmulInt4(MTL::ComputeCommandEncoder *computeEncoder){ //Create Metal buffers for input and output, if inside the TinyChat, param should be created in advance - bScales = metal_newBuf(sizeof(float), height1*width1); + bScales = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width1/Int4_params->group_size); bM1 = metal_newBuf(sizeof(float), Int4_params->height1*Int4_params->width1); bM2 = metal_newBuf(sizeof(unsigned char), Int4_params->width1*Int4_params->width3); bParam = metal_newBuf(sizeof(MetalMatMulParams), 1); @@ -202,7 +562,7 @@ void metal_encodecommand_matmulInt4(MTL::ComputeCommandEncoder *computeEncoder){ memcpy(bM2->contents(), Int4_buffer->B, Int4_params->width1*Int4_params->width3*sizeof(unsigned char)); memcpy(bM3->contents(), Int4_buffer->C, Int4_params->height1*Int4_params->width3*sizeof(float)); memcpy(bParam->contents(), Int4_params, sizeof(MetalMatMulParams)); - memcpy(bScales->contents(), Int4_buffer->scales, height1*width1*sizeof(float)); + memcpy(bScales->contents(), Int4_buffer->scales, ((Int4_params->height1*Int4_params->width1)/Int4_params->group_size)*sizeof(float)); } void metal_encodecommand_matmul(MTL::ComputeCommandEncoder *computeEncoder){ @@ -237,13 +597,13 @@ void metal_compute(){ metal_encodecommand_matmulInt4(computeEncoder); } else if (strcmp(fn_name, "matmul") == 0) { metal_encodecommand_matmul(computeEncoder); - } else if (strcmp(fn_name, "matmulInt4_SIMD_Q4Interleave") == 0) { - metal_encodecommand_matmulInt4_simd(computeEncoder); + } else { + metal_encodecommand_matmulInt4(computeEncoder); } // Threads -> ThreadGroup -> Grid MTL::Size mThreadGroupSize; - MTL::Size mGridSize; + MTL::Size mGridSize; if (strcmp(fn_name, "matmulInt4") == 0){ mThreadGroupSize = MTL::Size::Make(Int4_params->width3, Int4_params->height1, 1); mGridSize = MTL::Size::Make(16, 1, 1); @@ -252,26 +612,27 @@ void metal_compute(){ mGridSize = MTL::Size::Make((param->width1 + mThreadGroupSize.width - 1) / mThreadGroupSize.width, (param->height2 + mThreadGroupSize.height - 1) / mThreadGroupSize.height, 1); - } else if (strcmp(fn_name, "matmulInt4_SIMD_Q4Interleave") == 0) { - mThreadGroupSize = MTL::Size::Make(Int4_Sparams->width3, Int4_Sparams->height1, 1); + } else { + mThreadGroupSize = MTL::Size::Make(Int4_params->width3, Int4_params->height1, 1); mGridSize = MTL::Size::Make(16, 1, 1); } // Dispatch and Run Computation - // auto start = high_resolution_clock::now(); + auto start = high_resolution_clock::now(); computeEncoder->dispatchThreadgroups(mGridSize, mThreadGroupSize); computeEncoder->endEncoding(); commandBuffer->commit(); commandBuffer->waitUntilCompleted(); - // auto stop = high_resolution_clock::now(); - // auto duration = duration_cast(stop - start); - // std::cout << "GPU: " << duration.count() << "ms" << std::endl; + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + std::cout << "GPU: " << fn_name << " " << duration.count() << "ms" << std::endl; computeEncoder->release(); commandBuffer->release(); } void test_normal_matmul(){ // Initialization for test + fn_name = "matmul"; param = new matmul_param; param->height1 = height1; param->height2 = height2; @@ -289,26 +650,26 @@ void test_normal_matmul(){ // printArray(A2, param->arraySize2); - // CPU - auto start = high_resolution_clock::now(); - test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); - auto stop = high_resolution_clock::now(); - auto duration = duration_cast(stop - start); - std::cout << "CPU: " << duration.count() << "ms" << std::endl; - printf("CPU Results: \n"); - for (uint32_t i = 0; i < param->outputsize; i++){ - printf("A3[%d]: %f\n", i, A3[i]); - } - free(A3); - A3 = new float[param->outputsize]; + // // CPU + // auto start = high_resolution_clock::now(); + // test_matmul(A1, param->height1, param->width1, A2, param->height2, param->width2, A3); + // auto stop = high_resolution_clock::now(); + // auto duration = duration_cast(stop - start); + // std::cout << "CPU: " << duration.count() << "ms" << std::endl; + // printf("CPU Results: \n"); + // for (uint32_t i = 0; i < param->outputsize; i++){ + // printf("A3[%d]: %f\n", i, A3[i]); + // } + // free(A3); + // A3 = new float[param->outputsize]; // GPU metal_init(); metal_compute(); // printf("GPU Results: \n"); - // for (uint32_t i = 0; i < param->outputsize; i++){ - // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); - // } + for (uint32_t i = 0; i < param->outputsize; i++){ + printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + } free(A1); free(A2); @@ -323,77 +684,46 @@ void test_matmulInt4(){ Int4_params->height1 = height1; // m Int4_params->width1 = width1; // k Int4_params->width3 = width2; // n - + A1 = new float[Int4_params->height1*Int4_params->width1]; + A2 = new unsigned char[Int4_params->width1*Int4_params->width3]; + A3 = new float[Int4_params->height1*Int4_params->width3]; scales = new float[Int4_params->height1*Int4_params->width1/Int4_params->group_size]; - generateRandomFloatArray(scales, height1*width1/group_size); + generateRandomFloatArray(A1, Int4_params->height1*Int4_params->width1); + generateRandomCharArray(A2, Int4_params->width1*Int4_params->width3); + generateRandomFloatArray(scales, Int4_params->height1*Int4_params->width1/Int4_params->group_size); Int4_buffer->A = A1; Int4_buffer->B = A2; Int4_buffer->C = A3; Int4_buffer->scales = scales; + metal_init(); metal_compute(); - printf("GPU Results: \n"); - for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ - printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); - } -} - -void test_matmulInt4_simds(){ +// printf("GPU Results: \n"); + // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + // } fn_name = "matmulInt4_SIMD_Q4Interleave"; - Int4_Sbuffer = new MetalMatmulSBuffers; - Int4_Sparams = new MetalMatMulParams; - Int4_Sparams->group_size = group_size/4; - Int4_Sparams->height1 = height1; // m - Int4_Sparams->width1 = width1/4; // k - Int4_Sparams->height2 = height2/2/4; // m - Int4_Sparams->width2 = width2/2; // k - Int4_Sparams->height3 = height1; // m - Int4_Sparams->width3 = width2; // k - - scales = new float[Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size]; - generateRandomFloatArray(scales, Int4_Sparams->height2*Int4_Sparams->width2/Int4_Sparams->group_size); - Int4_Sbuffer->scales = scales; - Int4_Sbuffer->C = A3; - PackedFloat *tempF = new PackedFloat[Int4_Sparams->height1*Int4_Sparams->width1]; - PackedChar *tempC = new PackedChar[Int4_Sparams->width2*Int4_Sparams->width2]; - printf("GPU Results: 1 \n"); - for (size_t i = 0; i < height1*width1; i += 4) { - for (int j = 0; j < 4; j++) { - if (i + j < height1*width1) { // Check to prevent out-of-bounds access - tempF[i / 4].values[j] = A1[i + j]; - } - } - } - Int4_Sbuffer->A = tempF; - - for (size_t i = 0; i < height2/2*width2/2; i += 4) { - for (int j = 0; j < 4; j++) { - if (i + j < height1*width1) { // Check to prevent out-of-bounds access - tempC[i / 4].values[j] = A2[i + j]; - } - } - } - Int4_Sbuffer->B = tempC; - metal_init(); metal_compute(); - for (uint32_t i = 0; i < Int4_Sparams->height1*Int4_Sparams->width1/Int4_Sparams->group_size; i++){ - // for (int j = 0; j < 4; j++) { - printf("bsM3[%d]: %f\n", i, ((float*)(bsM3->contents()))[i]); - // } - } +// printf("GPU Results: \n"); + // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + // } + fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll16"; + metal_init(); + metal_compute(); +// printf("GPU Results: \n"); + // for (uint32_t i = 0; i < Int4_params->height1*Int4_params->width1/Int4_params->group_size; i++){ + // printf("bM3[%d]: %f\n", i, ((float*)(bM3->contents()))[i]); + // } + fn_name = "matmulUInt4_SIMD_Q4Interleave_unroll16"; + metal_init(); + metal_compute(); +// printf("GPU Results: \n"); } int main(){ - A1 = new float[height1*width1]; - A2 = new unsigned char[width1*width2]; - A3 = new float[height1*width2]; - scales = new float[height1*width1]; - generateRandomFloatArray(A1, height1*width1); - generateRandomCharArray(A2, width1*width2); - generateRandomScale(scales, height1*width1); - // test_matmulInt4(); - test_matmulInt4_simds(); + test_normal_matmul(); return 0; } From 2b71f9f2edb884e899839b6993232024ae92534a Mon Sep 17 00:00:00 2001 From: RaymondWang0 Date: Fri, 22 Mar 2024 10:56:56 -0400 Subject: [PATCH 29/37] fix matmul --- llm/tests/metal/cpp_version/main.cc | 46 +++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index b9aa09ad..cf3426a7 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -103,6 +103,11 @@ void generateRandomCharArray(unsigned char* array, uint arraySize) { array[i] = static_cast(distrib(gen)); } } +void generateOnesCharArray(unsigned char* array, uint arraySize) { + for (int i = 0; i < arraySize; ++i) { + array[i] = 1; + } +} // Metal functions void metal_init(){ @@ -399,6 +404,8 @@ void test_matmul_llama(){ float* dst = new float[output_size]; generateRandomCharArray(src0, hidden_size); generateRandomCharArray(src1, weight_size); + // generateOnesCharArray(src0, hidden_size); + // generateOnesCharArray(src1, weight_size); // generateRandomFloatArray(dst, arraySize); metal_init(); // Initialization of GPU vals @@ -439,7 +446,7 @@ void test_matmul_llama(){ computeEncoder->setBuffer(bne1, 0, 12); computeEncoder->setBuffer(br2, 0, 13); computeEncoder->setBuffer(br3, 0, 14); - computeEncoder->setThreadgroupMemoryLength(8192, 1); // from https://github.com/ggerganov/llama.cpp/blob/d5ab29757ebc59a30f03e408294ec20628a6374e/ggml-metal.m#L1315 + computeEncoder->setThreadgroupMemoryLength(8192, 0); // from https://github.com/ggerganov/llama.cpp/blob/d5ab29757ebc59a30f03e408294ec20628a6374e/ggml-metal.m#L1315 int64_t ne00 = bs; @@ -475,6 +482,17 @@ void test_matmul_llama(){ memcpy(bne1->contents(), &ne1, sizeof(ne1)); memcpy(br2->contents(), &r2, sizeof(r2)); memcpy(br3->contents(), &r3, sizeof(r3)); + + std::cout << "src0: "; + for (int i = 0; i < 10; ++i) { + std::cout << src0[i] << " "; + } + std::cout << std::endl; + std::cout << "bM1: "; + for (int i = 0; i < 10; ++i) { + std::cout << ((unsigned char*)(bM1->contents()))[i] << " "; + } + std::cout << std::endl; // Assuming you have already configured the threadgroup size and number of threadgroups based on your kernel and data MTL::Size threadgroupSize = MTL::Size::Make(128, 1, 1); @@ -490,10 +508,32 @@ void test_matmul_llama(){ commandBuffer->waitUntilCompleted(); auto stop = high_resolution_clock::now(); auto duration = duration_cast(stop - start); - std::cout << duration.count() << std::endl; + std::cout << "Metal GPU Duration: " << duration.count() << " ms" << std::endl; + + memcpy(dst, bM3->contents(), output_size * sizeof(float)); + + // print dst + std::cout << "dst: "; for (int i = 0; i < 10; ++i) { - std::cout << dst[i] << " " << std::endl; + std::cout << dst[i] << " "; } + // for (int i = 0; i < output_size; ++i) { + // if (dst[i] != 0) { + // std::cout << dst[i] << " "; + // } + // } + std::cout << std::endl; + + // print bM3 + std::cout << "bM3: "; + for (int i = 0; i < 10; ++i) { + std::cout << ((float*)(bM3->contents()))[i] << " "; + } + // for (int i = 0; i < output_size; ++i) { + // if (((float*)(bM3->contents()))[i] != 0) { + // std::cout << ((float*)(bM3->contents()))[i] << " "; + // } + // } std::cout << std::endl; } From f792a1b4be7a9c9132e25d296f0dcf2443d88ff3 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Tue, 26 Mar 2024 17:10:38 -0400 Subject: [PATCH 30/37] add new kernels and reorganize --- kernels/matmul.h | 29 +- kernels/metal/matmul_f32_f32.cc | 9 + kernels/metal/matmul_int4_f32.cc | 9 + kernels/metal/matvec_f32_f32.cc | 9 + kernels/metal/matvec_int4_f32.cc | 9 + kernels/metal/metal_compute.cc | 335 +++++++++++++++++------ kernels/metal/metal_compute.h | 99 +++++++ kernels/metal/op.metal | 426 +++++++++++++++++++++++++----- llm/src/ops/metal/LlamaRMSNorm.cc | 81 ++---- llm/src/ops/metal/RotaryPosEmb.cc | 3 +- llm/src/ops/metal/batch_add.cc | 20 +- llm/src/ops/metal/embedding.cc | 49 +--- llm/src/ops/metal/linear.cc | 2 +- llm/src/ops/metal/softmax.cc | 16 +- 14 files changed, 827 insertions(+), 269 deletions(-) create mode 100644 kernels/metal/matmul_f32_f32.cc create mode 100644 kernels/metal/matmul_int4_f32.cc create mode 100644 kernels/metal/matvec_f32_f32.cc create mode 100644 kernels/metal/matvec_int4_f32.cc create mode 100644 kernels/metal/metal_compute.h diff --git a/kernels/matmul.h b/kernels/matmul.h index 4f36617e..148d1faf 100644 --- a/kernels/matmul.h +++ b/kernels/matmul.h @@ -137,18 +137,23 @@ class MatmulOperator { //// GEMV void gemv_forward_cuda(const struct matmul_params *params); // metal - void mat_mul_f32_metal(const struct matmul_params *params); - void batch_add_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); - void relu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); - void silu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); - void gelu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); - void gelu_quick_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); - void rms_norm_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, float eps); - void soft_max_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, int64_t scale); - void soft_max_4_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, int64_t scale); - void rope_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, -int n_past, int n_dims, int mode, int n_orig_ctx, float freq_base, float freq_scale, float ext_factor, float attn_factor, -float beta_fast, float beta_slow); + void mat_mul_int4_f32_metal(const struct matmul_params *params); + void mat_mul_f32_f32_metal(const struct matmul_params *params); + void mat_vec_int4_f32_metal(const struct matmul_params *params); + void mat_vec_f32_f32_metal(const struct matmul_params *params); +// void batch_add_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); +// void mat_mul_f32_metal(const struct matmul_params *params); +// void batch_add_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); +// void relu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); +// void silu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); +// void gelu_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); +// void gelu_quick_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z); +// void rms_norm_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, float eps); +// void soft_max_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, int64_t scale); +// void soft_max_4_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, int64_t scale); +// void rope_metal(const struct matmul_params *params, unsigned int m_dim_x, unsigned int m_dim_y, unsigned int m_dim_z, +// int n_past, int n_dims, int mode, int n_orig_ctx, float freq_base, float freq_scale, float ext_factor, float attn_factor, +// float beta_fast, float beta_slow); diff --git a/kernels/metal/matmul_f32_f32.cc b/kernels/metal/matmul_f32_f32.cc new file mode 100644 index 00000000..d29e6ff0 --- /dev/null +++ b/kernels/metal/matmul_f32_f32.cc @@ -0,0 +1,9 @@ +#include "metal_compute.h" +namespace matmul { + void mat_mul_f32_f32_metal(const struct matmul_params *params){ + struct metal_cgraph *graph = new (struct metal_cgraph); + graph->n_nodes = 1; + graph->mm_nodes[0] = (const metal_params *) params; + metal_graph_compute(METAL_KERNEL_MUL_MM_F32_F32, graph); + } +} \ No newline at end of file diff --git a/kernels/metal/matmul_int4_f32.cc b/kernels/metal/matmul_int4_f32.cc new file mode 100644 index 00000000..9eee2426 --- /dev/null +++ b/kernels/metal/matmul_int4_f32.cc @@ -0,0 +1,9 @@ +#include "metal_compute.h" +namespace matmul { + void mat_mul_int4_f32_metal(const struct matmul_params *params){ + struct metal_cgraph *graph = new (struct metal_cgraph); + graph->n_nodes = 1; + graph->mm_nodes[0] = (const metal_params *) params; + metal_graph_compute(METAL_KERNEL_MUL_MM_INT4_F32, graph); + } +} \ No newline at end of file diff --git a/kernels/metal/matvec_f32_f32.cc b/kernels/metal/matvec_f32_f32.cc new file mode 100644 index 00000000..9d8a5ab9 --- /dev/null +++ b/kernels/metal/matvec_f32_f32.cc @@ -0,0 +1,9 @@ +#include "metal_compute.h" +namespace matmul { + void mat_vec_f32_f32_metal(const struct matmul_params *params){ + struct metal_cgraph *graph = new (struct metal_cgraph); + graph->n_nodes = 1; + graph->mm_nodes[0] = (const metal_params *) params; + metal_graph_compute(METAL_KERNEL_MUL_MV_F32_F32, graph); + } +} \ No newline at end of file diff --git a/kernels/metal/matvec_int4_f32.cc b/kernels/metal/matvec_int4_f32.cc new file mode 100644 index 00000000..a6c95702 --- /dev/null +++ b/kernels/metal/matvec_int4_f32.cc @@ -0,0 +1,9 @@ +#include "metal_compute.h" +namespace matmul { + void mat_vec_int4_f32_metal(const struct matmul_params *params){ + struct metal_cgraph *graph = new (struct metal_cgraph); + graph->n_nodes = 1; + graph->mm_nodes[0] = (const metal_params *) params; + metal_graph_compute(METAL_KERNEL_MUL_MV_INT4_F32, graph); + } +} \ No newline at end of file diff --git a/kernels/metal/metal_compute.cc b/kernels/metal/metal_compute.cc index b3877684..99a65b5f 100644 --- a/kernels/metal/metal_compute.cc +++ b/kernels/metal/metal_compute.cc @@ -1,67 +1,14 @@ #include +#include -#include "Foundation/Foundation.hpp" -#include "Metal/Metal.hpp" -#include +#include +#include "metal_compute.h" #undef MIN #undef MAX #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) - -bool has_init = false; - -struct ggml_metal_kernel { - MTL::ComputePipelineState * pipeline; -}; - -struct ggml_metal_context * ctx; - -enum { - MTLGPUFamilyApple1 = 1001, // Example starting value, adjust based on actual definition - MTLGPUFamilyCommon1 = 3001, // Example starting value - MTLGPUFamilyMetal3 = 5001, - MTLGPUFamilyApple7 = 1007, -}; - -enum ggml_metal_kernel_type { - GGML_METAL_KERNEL_EMBEDDING, - GGML_METAL_KERNEL_BATCH_ADD, - GGML_METAL_KERNEL_RELU, - GGML_METAL_KERNEL_SILU, - GGML_METAL_KERNEL_GELU, - GGML_METAL_KERNEL_GELU_QUICK, - GGML_METAL_KERNEL_RMS_NORM, - GGML_METAL_KERNEL_SOFT_MAX, - GGML_METAL_KERNEL_SOFT_MAX_4, - GGML_METAL_KERNEL_ROPE, - GGML_METAL_KERNEL_MUL_MM_INT4, - GGML_METAL_KERNEL_TYPE_COUNT -}; - -enum ggml_status { - GGML_STATUS_SUCCESS, - GGML_STATUS_FAILED -}; - -// Context struct holding Metal related objects and state -struct ggml_metal_context { - int n_cb; - - MTL::Device * device; - MTL::CommandQueue * queue; - static std::unordered_map _mumap; - - dispatch_queue_t d_queue; - - ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT]; - - bool support_simdgroup_reduction; - bool support_simdgroup_mm; - - bool should_capture_next_compute; -}; void *allocateSharedMem(size_t size) { if (!has_init) { init(); @@ -78,12 +25,12 @@ void *allocateSharedMem(size_t size) { } void init() { - ctx = new(struct ggml_metal_context); + ctx = new(struct metal_context); MTL::Device *device = MTL::CreateSystemDefaultDevice(); ctx->device = device; - ctx->n_cb = 1; // TODO: n_cb and GGML_METAL_MAX_BUFFERS? MIN(n_cb, GGML_METAL_MAX_BUFFERS) + ctx->n_cb = 1; // TODO: n_cb and METAL_MAX_BUFFERS? MIN(n_cb, METAL_MAX_BUFFERS=64) ctx->queue = ctx->device->newCommandQueue(); - ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); + // ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); MTL::Library *metal_library = ctx->device->newDefaultLibrary(); // simd group support @@ -116,12 +63,12 @@ void init() { // load kernels { NS::Error *error = nullptr; - for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) { + for (int i = 0; i < METAL_KERNEL_TYPE_COUNT; ++i) { ctx->kernels[i].pipeline = nullptr; } -#define GGML_METAL_ADD_KERNEL(e, name, supported) \ +#define METAL_ADD_KERNEL(e, name, supported) \ if (supported) { \ - struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \ + struct metal_kernel * kernel = &ctx->kernels[e]; \ const char * str = "kernel_" + name; \ auto str = NS::String::string(str, NS::ASCIIStringEncoding); \ MTL::Function * metal_function = metal_library->newFunction(str); \ @@ -137,45 +84,267 @@ void init() { // simd_sum and simd_max requires MTLGPUFamilyApple7 // TODO: solve error - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_EMBEDDING, "embedding", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_BATCH_ADD, "batch_add", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_RELU, "relu", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_SILU, "silu", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_GELU, "gelu", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_GELU_QUICK, "gelu_quick", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_RMS_NORM, "rms_norm", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_SOFT_MAX, "soft_max", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_SOFT_MAX_4, "soft_max_4", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_ROPE, "rope", true); - GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_MUL_MM_INT4, "mul_mm_int4", true); + METAL_ADD_KERNEL(METAL_KERNEL_EMBEDDING, "embedding", true); + METAL_ADD_KERNEL(METAL_KERNEL_BATCH_ADD, "batch_add", true); + METAL_ADD_KERNEL(METAL_KERNEL_RELU, "relu", true); + METAL_ADD_KERNEL(METAL_KERNEL_SILU, "silu", true); + METAL_ADD_KERNEL(METAL_KERNEL_GELU, "gelu", true); + METAL_ADD_KERNEL(METAL_KERNEL_GELU_QUICK, "gelu_quick", true); + METAL_ADD_KERNEL(METAL_KERNEL_RMS_NORM, "rms_norm", true); + METAL_ADD_KERNEL(METAL_KERNEL_SOFT_MAX, "soft_max", true); + METAL_ADD_KERNEL(METAL_KERNEL_SOFT_MAX_4, "soft_max_4", true); + METAL_ADD_KERNEL(METAL_KERNEL_ROPE, "rope", true); + METAL_ADD_KERNEL(METAL_KERNEL_MUL_MM_INT4_F32, "mul_mm_int4_f32", true); + METAL_ADD_KERNEL(METAL_KERNEL_MUL_MV_INT4_F32, "mul_mv_int4_f32", true); + METAL_ADD_KERNEL(METAL_KERNEL_MUL_MM_F32_F32, "mul_mm_f32_f32", true); + METAL_ADD_KERNEL(METAL_KERNEL_MUL_MV_F32_F32, "mul_mv_f32_f32", true); } metal_library->release(); has_init = true; } -MTL::Buffer *MetalIMP::getBufferfromPtr(void *ptr) { - if (_mumap.find(ptr) == _mumap.end()) { +MTL::Buffer *getBufferfromPtr(void *ptr) { + if (ctx->_mumap.find(ptr) == ctx->_mumap.end()) { std::cerr << "Cannot find the corresponding MTL::Buffer." << std::endl; return NULL; } else - return _mumap[ptr]; + return ctx->_mumap[ptr]; } -static void ggml_metal_free(struct ggml_metal_context * ctx) { - for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) { +static void metal_free(struct metal_context * ctx) { + for (int i = 0; i < METAL_KERNEL_TYPE_COUNT; ++i) { ctx->kernels[i].pipeline->release(); } ctx->queue->release(); ctx->device->release(); - dispatch_release(ctx->d_queue); + // dispatch_release(ctx->d_queue); free(ctx); } -static enum ggml_status ggml_metal_graph_compute(struct ggml_metal_context * ctx, - struct ggml_cgraph * gf) { - - return GGML_STATUS_SUCCESS; +static enum status metal_graph_compute(metal_kernel_type op, + struct metal_cgraph * metal_data) { + // in TinyChatEngine, inputs are operations and grouped tensors + MTL::ComputePassDescriptor* edesc = MTL::ComputePassDescriptor::computePassDescriptor(); + edesc->setDispatchType(MTL::DispatchTypeSerial); + + const int n_nodes = metal_data->n_nodes; + const int n_cb = ctx->n_cb; // number of command buffer, TODO: currently 1 in TinyChatEngine + const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb; + + MTL::CommandBuffer *command_buffer_builder[n_cb]; + for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) { + MTL::CommandBuffer *command_buffer = ctx->queue->commandBufferWithUnretainedReferences(); + command_buffer_builder[cb_idx] = command_buffer; + // enqueue the command buffers in order to specify their execution order + command_buffer->enqueue(); + } + MTL::CommandBuffer **command_buffers = command_buffer_builder; + for (int iter = 0; iter < n_cb; ++iter){ + const int cb_idx = iter; + size_t offs_src0 = 0; + size_t offs_src1 = 0; + size_t offs_src2 = 0; + size_t offs_dst = 0; + MTL::CommandBuffer *command_buffer = command_buffers[cb_idx]; + MTL::ComputeCommandEncoder *encoder = command_buffer->computeCommandEncoder(edesc); + + const int node_start = (cb_idx + 0) * n_nodes_per_cb; + const int node_end = MIN((cb_idx == n_cb - 1) ? n_nodes : (cb_idx + 1) * n_nodes_per_cb, n_nodes); + + for (int i = node_start; i < node_end; ++i) { + if (i == -1) { + encoder->memoryBarrier(MTL::BarrierScopeBuffers); + continue; + } + switch (op) { + case (METAL_KERNEL_EMBEDDING): + MTL::Buffer *id_src0 = getBufferfromPtr(metal_data->input_id); + MTL::Buffer *id_dst = getBufferfromPtr(metal_data->output); + MTL::Buffer *id_lookup = getBufferfromPtr(metal_data->lookup); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_dst, offs_src1, 1); + encoder->setBuffer(id_lookup, offs_src2, 2); + encoder->setBytes(&metal_data->op_constants.embed_dim, sizeof(embed_dim), 3); + int threadsPerBlock = 1024; + int blocksPerGrid = (metal_data->input_id.m_dim_z + threadsPerBlock - 1) / threadsPerBlock; + MTL::Size ThreadperGroup = MTL::Size::Make(threadsPerBlock, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((input_id.m_dim_z + threadsPerBlock - 1) / threadsPerBlock, 1, 1); + // Dispatch the kernel + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + const metal_params * inputs = metal_data->mm_nodes[i]; + struct matrix src0 = inputs->A; + struct matrix src1 = inputs->B; + struct matrix dst = inputs->C; + // TODO: ne[0], nb[0] calculation & order + const int64_t ne00 = src0.row; + const int64_t ne01 = src0.column; + const int64_t ne02 = 1; + const int64_t ne03 = 1; + + const uint64_t nb00 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? sizeof(uint8_t) : sizeof(float); + const uint64_t nb01 = nb00*ne00; + const uint64_t nb02 = nb01*ne01; + const uint64_t nb03 = nb02*ne02; + + const int64_t ne10 = src1.row; + const int64_t ne11 = src1.column; + const int64_t ne12 = 1; + const int64_t ne13 = 1; + + const uint64_t nb10 = sizeof(float); + const uint64_t nb11 = nb10*ne10; + const uint64_t nb12 = nb11*ne11; + const uint64_t nb13 = nb12*ne12; + + const int64_t ne0 = dst.row; + const int64_t ne1 = dst.column; + const int64_t ne2 = 1; + const int64_t ne3 = 1; + + const uint64_t nb0 = sizeof(float); + const uint64_t nb1 = nb0*ne0; + const uint64_t nb2 = nb1*ne1; + const uint64_t nb3 = nb2*ne2; + case METAL_KERNEL_MUL_MM_INT4_F32: + case METAL_KERNEL_MUL_MV_INT4_F32: + case METAL_KERNEL_MUL_MM_F32_F32: + case METAL_KERNEL_MUL_MV_F32_F32: + // TODO: buffer retrieved? + MTL::Buffer *id_src0 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? getBufferfromPtr(src0.int4_data_ptr) : getBufferfromPtr(src0.data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); + const uint r2 = ne12/ne02; + const uint r3 = ne13/ne03; + int ne11_mm_min = 1; + // src0 quantized; src1 F32 + // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs + // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel + if (ctx->device->supportsFamily((MTL::GPUFamily)MTLGPUFamilyApple7)&& + ne00 % 32 == 0 && ne00 >= 64){ + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBuffer(id_dst, offs_src2, 2); + encoder->setBytes(&ne00, sizeof(ne00), 3); + encoder->setBytes(&ne02, sizeof(ne02), 4); + encoder->setBytes(&nb01, sizeof(nb01), 5); + encoder->setBytes(&nb02, sizeof(nb02), 6); + encoder->setBytes(&ne12, sizeof(ne12), 7); + encoder->setBytes(&nb10, sizeof(nb10), 8); + encoder->setBytes(&nb11, sizeof(nb11), 9); + encoder->setBytes(&nb12, sizeof(nb12), 10); + encoder->setBytes(&ne0, sizeof(ne0), 11); + encoder->setBytes(&ne1, sizeof(ne1), 12); + encoder->setBytes(&r2, sizeof(r2), 13); + encoder->setBytes(&r3, sizeof(r3), 14); + encoder->setThreadgroupMemoryLength(8192, 0); + MTL::Size ThreadperGroup = MTL::Size::Make(128, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((ne11 + 31)/32, (ne01 + 63)/64, ne12*ne13); // from https://github.com/ggerganov/llama.cpp/blob/d5ab29757ebc59a30f03e408294ec20628a6374e/ggml-metal.m#L1405 + // Dispatch the kernel + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + } + break; + case (METAL_KERNEL_BATCH_ADD): + MTL::Buffer *id_src0 = getBufferfromPtr(src0.data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBuffer(id_dst, offs_src2, 2); + MTL::Size ThreadperGroup = MTL::Size::Make(src0.row, src0.column, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make(1, 1, 1); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_RELU): + case (METAL_KERNEL_SILU): + case (METAL_KERNEL_GELU): + case (METAL_KERNEL_GELU_QUICK): + MTL::Buffer *id_src0 = getBufferfromPtr(src0.data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_dst, offs_src2, 1); + MTL::Size ThreadperGroup = MTL::Size::Make(src0.length,1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make(1, 1, 1); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_RMS_NORM): + int nth = 32; // SIMD width + const int64_t ne00 = src0.row; + const int64_t ne01 = src0.column; + const int64_t ne02 = 1; + const int64_t ne03 = 1; + // TODO: nb00 should be half? + const uint64_t nb00 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? sizeof(uint8_t) : sizeof(float); + const uint64_t nb01 = nb00*ne00; + const uint64_t nb02 = nb01*ne01; + const uint64_t nb03 = nb02*ne02; + MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr(src1.half_data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr(dst.half_data_ptr); + // TODO: add src1 + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_dst, offs_src2, 1); + encoder->setBytes(&ne00, sizeof(ne00), 2); + encoder->setBytes(&nb01, sizeof(nb01), 3); + encoder->setBytes(&(metal_data->op_constants.eps), sizeof(metal_data->op_constants.eps), 4); + encoder->setThreadgroupMemoryLength(32*sizeof(float), 0); + encoder->dispatchThreadgroups(MTL::Size::Make(src0.row, 1, 1), MTL::Size::Make(src0.row, 1, 1)); + break; + case (METAL_KERNEL_SOFT_MAX): + case (METAL_KERNEL_SOFT_MAX_4): + int nth = 32; // SIMD width + if (ne00%4 == 0) { + while (nth < ne00/4 && nth < 256) { + nth *= 2; + } + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + } else { + while (nth < ne00 && nth < 1024) { + nth *= 2; + } + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + } + // TODO: type + const int64_t ne00 = src0.row; + const int64_t ne01 = src0.column; + const int64_t ne02 = 1; + const int64_t ne03 = 1; + const float scale = metal_data->op_constants.scale; + MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBuffer(id_dst, offs_src2, 2); + encoder->setBytes(&ne00, sizeof(ne00), 3); + encoder->setBytes(&ne01, sizeof(ne01), 4); + encoder->setBytes(&ne02, sizeof(ne02), 5); + encoder->setBytes(&scale, sizeof(scale), 6); + encoder->setThreadgroupMemoryLength(32*sizeof(float), 0); + encoder->dispatchThreadgroups(MTL::Size::Make(ne01*ne02*ne03, 1, 1), MTL::Size::Make(nth, 1, 1)); + break; + case (METAL_KERNEL_ROPE): + //TODO: implement ROPE + break; + } + if (encoder!=nullptr){ + encoder->endEncoding(); + encoder=nullptr; + } + command_buffer->commit(); + command_buffer->waitUntilCompleted(); + if (command_buffer->status()!=MTL::CommandBufferStatusCompleted){ + return STATUS_FAILED; + } + } + } + return STATUS_SUCCESS; } \ No newline at end of file diff --git a/kernels/metal/metal_compute.h b/kernels/metal/metal_compute.h new file mode 100644 index 00000000..0d1ec21f --- /dev/null +++ b/kernels/metal/metal_compute.h @@ -0,0 +1,99 @@ +#ifndef METAL_COMPUTE_H +#define METAL_COMPUTE_H + +#include "../matmul.h" +#include "operators.h" +#include "Foundation/Foundation.hpp" +#include "Metal/Metal.hpp" + +bool has_init = false; + +struct metal_kernel { + MTL::ComputePipelineState * pipeline; +}; + +struct metal_context * ctx; + +enum { + MTLGPUFamilyApple1 = 1001, + MTLGPUFamilyCommon1 = 3001, + MTLGPUFamilyMetal3 = 5001, + MTLGPUFamilyApple7 = 1007, +}; + +enum metal_kernel_type { + METAL_KERNEL_EMBEDDING, + METAL_KERNEL_BATCH_ADD, + METAL_KERNEL_RELU, + METAL_KERNEL_SILU, + METAL_KERNEL_GELU, + METAL_KERNEL_GELU_QUICK, + METAL_KERNEL_RMS_NORM, + METAL_KERNEL_SOFT_MAX, + METAL_KERNEL_SOFT_MAX_4, + METAL_KERNEL_ROPE, + METAL_KERNEL_MUL_MM_INT4_F32, + METAL_KERNEL_MUL_MV_INT4_F32, + METAL_KERNEL_MUL_MM_F32_F32, + METAL_KERNEL_MUL_MV_F32_F32, + METAL_KERNEL_TYPE_COUNT +}; + +enum status { + STATUS_SUCCESS, + STATUS_FAILED +}; + +// Context struct holding Metal related objects and state +struct metal_context { + int n_cb; + MTL::Device * device; + MTL::CommandQueue * queue; + static std::unordered_map _mumap; + metal_kernel kernels[METAL_KERNEL_TYPE_COUNT]; + bool support_simdgroup_reduction; + bool support_simdgroup_mm; + bool should_capture_next_compute; + // dispatch_queue_t d_queue; +}; + +struct metal_constants { + float eps; //rms_norm + float scale; //softmax + int embed_dim; //embed +}; + +struct metal_params { + struct matrix A, B, C, bias; + struct optimization_params opt_params; + float alpha, beta; + float16_t half_alpha; + // for int4 + float *scales, *offset, *zero_point; + float16_t *half_scales; + naive_float16_t *fp16_scales; + int *int32_zero_point; + int block_size; + // for int8 activation + float *A_scales; + int8_t A_zero_point; +}; + +struct metal_cgraph{ + int n_nodes; + const struct metal_params ** mm_nodes; // matmul ops (A, B, C) + struct metal_constants op_constants; + + // for kernel_embedding + Matrix3D input_id; + Matrix3D output; + float* lookup; +}; + +void *allocateSharedMem(size_t size); +void init(); +static void metal_free(struct metal_context * ctx); +static enum status metal_graph_compute(metal_kernel_type op, + struct metal_cgraph * metal_data); + +#endif \ No newline at end of file diff --git a/kernels/metal/op.metal b/kernels/metal/op.metal index 19c70ef1..f4c94eed 100644 --- a/kernels/metal/op.metal +++ b/kernels/metal/op.metal @@ -1,7 +1,6 @@ #include #include "operators.h" #include "utils.h" -#include "opParams.h" using namespace metal; @@ -12,8 +11,21 @@ using namespace metal; #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; } -kernel void kernel_embedding(device Matrix3D_int& input_id [[buffer(0)]], - device Matrix3D_half& output [[buffer(1)]], +#define QK4_0 32 +#define QR4_0 2 +#define nl 2 +typedef struct { + half d; // delta + uint8_t qs[QK4_0 / 2]; // nibbles / quants +} block_q; + +typedef struct { + half d; // delta + uint8_t qs[QK4_0 / 2]; // nibbles / quants +} block_q4_0; + +kernel void kernel_embedding(device Matrix3D input_id [[buffer(0)]], + device Matrix3D output [[buffer(1)]], device float* lookup [[buffer(2)]], const unsigned int embed_dim [[buffer(3)]], uint id [[thread_position_in_grid]]) { @@ -82,12 +94,10 @@ kernel void kernel_gelu_quick( dst[tpig] = x * (1.0f / (1.0f + exp(GELU_QUICK_COEF * x))); } -// TODO: to be fixed kernel void kernel_rms_norm( device const void * src0, device const float * src1, device float * dst, - // constant MetalMatMulParams& params, constant int64_t & ne00, // row constant uint64_t & nb01, // col*sizeof(type) constant float & eps, @@ -98,9 +108,6 @@ kernel void kernel_rms_norm( uint tiisg[[thread_index_in_simdgroup]], uint ntg[[threads_per_threadgroup]]) { device const float4 * x = (device const float4 *) ((device const char *) src0 + tgpig*nb01); - unsigned int ne00 = params.m_dim_x; - unsigned int nb01 = params.m_dim_y*param.type_size; - float eps = param.eps; float4 sumf = 0; float all_sum = 0; @@ -143,7 +150,6 @@ kernel void kernel_soft_max( device const float * src0, device const float * src1, device float * dst, - // constant MetalMatMulParams& params, constant int64_t & ne00, constant int64_t & ne01, constant int64_t & ne02, @@ -154,12 +160,6 @@ kernel void kernel_soft_max( uint sgitg[[simdgroup_index_in_threadgroup]], uint tiisg[[thread_index_in_simdgroup]], uint ntg[[threads_per_threadgroup]]) { - const int64_t ne00 = params.m_dim_x; - const int64_t ne01 = params.m_dim_y; - const int64_t ne02 = params.m_dim_z; - const int64_t scale = params.scale; - - const int64_t i03 = (tgpig) / (ne02*ne01); const int64_t i02 = (tgpig - i03*ne02*ne01) / ne01; const int64_t i01 = (tgpig - i03*ne02*ne01 - i02*ne01); @@ -237,7 +237,6 @@ kernel void kernel_soft_max_4( device const float * src0, device const float * src1, device float * dst, - // constant MetalMatMulParams& params, constant int64_t & ne00, constant int64_t & ne01, constant int64_t & ne02, @@ -248,10 +247,6 @@ kernel void kernel_soft_max_4( uint sgitg[[simdgroup_index_in_threadgroup]], uint tiisg[[thread_index_in_simdgroup]], uint ntg[[threads_per_threadgroup]]) { - const int64_t ne00 = params.m_dim_x; - const int64_t ne01 = params.m_dim_y; - const int64_t ne02 = params.m_dim_z; - const int64_t scale = params.scale; const int64_t i03 = (tgpig) / (ne02*ne01); const int64_t i02 = (tgpig - i03*ne02*ne01) / ne01; @@ -406,7 +401,6 @@ kernel void kernel_rope( device const void * src0, device const int32_t * src1, device float * dst, - // constant MetalMatMulParams& params, constant int64_t & ne00, constant int64_t & ne01, constant int64_t & ne02, @@ -436,35 +430,6 @@ kernel void kernel_rope( uint tiitg[[thread_index_in_threadgroup]], uint3 tptg[[threads_per_threadgroup]], uint3 tgpig[[threadgroup_position_in_grid]]) { - // constant int64_t ne00 = param.m_dim_x; - // constant int64_t ne01 = param.m_dim_y; - // constant int64_t ne02 = param.m_dim_z; - // constant int64_t ne03 = 0; - // constant uint64_t nb00 = param.m_dim_x*param.type_size; - // constant uint64_t nb01 = param.m_dim_y*param.type_size; - // constant uint64_t nb02 = param.m_dim_z*param.type_size; - // constant uint64_t nb03 = 0; - // constant int64_t ne0 = param.m_dim_x; - // constant int64_t ne1 = param.m_dim_y; - // constant int64_t ne2 = param.m_dim_z; - // constant int64_t ne3 = 0; - // constant uint64_t nb0 = param.m_dim_x*param.type_size; - // constant uint64_t nb1 = param.m_dim_y*param.type_size; - // constant uint64_t nb2 = param.m_dim_z*param.type_size; - // constant uint64_t nb3 = 0; - - // int n_past = param.n_past; - // int n_dims = param.n_dims; - // int mode = param.mode; - // int n_orig_ctx = param.n_orig_ctx; - // float freq_base = param.freq_base; - // float freq_scale = param.freq_scale; - // float ext_factor = param.ext_factor; - // float attn_factor = param.attn_factor; - // float beta_fast = param.beta_fast; - // float beta_slow = param.beta_slow; - - const int64_t i3 = tgpig[2]; const int64_t i2 = tgpig[1]; const int64_t i1 = tgpig[0]; @@ -544,13 +509,16 @@ matmulInt4_SIMD_Q4Interleave_unroll16(GPU): 1800 ms, 133 GOP/s matmulInt4_SIMD_Q4Interleave_unroll32(GPU): 1500 ms, 160 GOP/s */ -#define QK4_0 32 -#define QR4_0 2 -#define nl 2 -typedef struct { - half d; // delta - uint8_t qs[QK4_0 / 2]; // nibbles / quants -} block_q; +#define BLOCK_SIZE_M 64 // 8 simdgroup matrices from matrix A +#define BLOCK_SIZE_N 32 // 4 simdgroup matrices from matrix B +#define BLOCK_SIZE_K 32 +#define THREAD_MAT_M 4 // each thread take 4 simdgroup matrices from matrix A +#define THREAD_MAT_N 2 // each thread take 2 simdgroup matrices from matrix B +#define THREAD_PER_BLOCK 128 +#define THREAD_PER_ROW 2 // 2 thread for each row in matrix A to load numbers +#define THREAD_PER_COL 4 // 4 thread for each row in matrix B to load numbers +#define SG_MAT_SIZE 64 // simdgroup matrix is of shape 8x8 +#define SG_MAT_ROW 8 template void dequantize_q4_0(device const block_q *xb, short il, thread type4x4 & reg) { @@ -567,7 +535,7 @@ void dequantize_q4_0(device const block_q *xb, short il, thread type4x4 & reg) { } } -void kernel_mul_mm_int4(device const uchar * src0, +void kernel_mul_mm_int4_f32(device const uchar * src0, device const uchar * src1, device float * dst, constant int64_t & ne00, @@ -586,7 +554,7 @@ void kernel_mul_mm_int4(device const uchar * src0, uint3 tgpig[[threadgroup_position_in_grid]], uint tiitg[[thread_index_in_threadgroup]], uint sgitg[[simdgroup_index_in_threadgroup]]) { - + short nl = 2; threadgroup half * sa = (threadgroup half *)(shared_memory); threadgroup float * sb = (threadgroup float *)(shared_memory + 4096); @@ -696,4 +664,344 @@ void kernel_mul_mm_int4(device const uchar * src0, } } } +} + +// putting them in the kernel cause a significant performance penalty +#define N_DST 4 // each SIMD group works on 4 rows +#define N_SIMDGROUP 2 // number of SIMD groups in a thread group +template +void mul_vec_q_n_f32_impl( + device const void * src0, + device const float * src1, + device float * dst, + int64_t ne00, + int64_t ne01, + int64_t ne02, + int64_t ne10, + int64_t ne12, + int64_t ne0, + int64_t ne1, + uint r2, + uint r3, + uint3 tgpig, uint tiisg, uint sgitg) { + const int nb = ne00/QK4_0; + + const int r0 = tgpig.x; + const int r1 = tgpig.y; + const int im = tgpig.z; + + const int first_row = (r0 * nsg + sgitg) * nr; + + const uint i12 = im%ne12; + const uint i13 = im/ne12; + + const uint offset0 = first_row * nb + (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02); + + device const block_q_type * x = (device const block_q_type *) src0 + offset0; + device const float * y = (device const float *) src1 + r1*ne10 + im*ne00*ne1; + + float yl[16]; // src1 vector cache + float sumf[nr] = {0.f}; + + const int ix = (tiisg/2); + const int il = (tiisg%2)*8; + + device const float * yb = y + ix * QK4_0 + il; + + // each thread in a SIMD group deals with half a block. + for (int ib = ix; ib < nb; ib += nw/2) { + float sumy = 0; + for (int i = 0; i < 8; i += 2) { + sumy += yb[i] + yb[i+1]; + yl[i+0] = yb[i+ 0]; + yl[i+1] = yb[i+ 1]/256.f; + + sumy += yb[i+16] + yb[i+17]; + yl[i+8] = yb[i+16]/16.f; + yl[i+9] = yb[i+17]/4096.f; + } + + for (int row = 0; row < nr; row++) { + sumf[row] += block_q_n_dot_y(x+ib+row*nb, sumy, yl, il); + } + + yb += QK4_0 * 16; + } + + for (int row = 0; row < nr; ++row) { + const float tot = simd_sum(sumf[row]); + if (tiisg == 0 && first_row + row < ne01) { + dst[im*ne0*ne1 + r1*ne0 + first_row + row] = tot; + } + } +} + +kernel void kernel_mul_mv_int4_f32( + device const void * src0, + device const float * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant uint64_t & nb00, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant int64_t & ne10, + constant int64_t & ne11, + constant int64_t & ne12, + constant uint64_t & nb10, + constant uint64_t & nb11, + constant uint64_t & nb12, + constant int64_t & ne0, + constant int64_t & ne1, + constant uint & r2, + constant uint & r3, + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiisg[[thread_index_in_simdgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]]) { + mul_vec_q_n_f32_impl(src0,src1,dst,ne00,ne01,ne02,ne10,ne12,ne0,ne1,r2,r3,tgpig,tiisg,sgitg); +} + +template +void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) { + float4x4 temp = *(((device float4x4 *)src)); + for (int i = 0; i < 16; i++){ + reg[i/4][i%4] = temp[i/4][i%4]; + } +} + +void kernel_mul_mm_f32_f32(device const uchar * src0, + device const uchar * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne02, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant int64_t & ne12, + constant uint64_t & nb10, + constant uint64_t & nb11, + constant uint64_t & nb12, + constant int64_t & ne0, + constant int64_t & ne1, + constant uint & r2, + constant uint & r3, + threadgroup uchar * shared_memory [[threadgroup(0)]], + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiitg[[thread_index_in_threadgroup]], + uint sgitg[[simdgroup_index_in_threadgroup]]) { + short nl = 1; + threadgroup half * sa = (threadgroup half *)(shared_memory); + threadgroup float * sb = (threadgroup float *)(shared_memory + 4096); + + const uint r0 = tgpig.y; + const uint r1 = tgpig.x; + const uint im = tgpig.z; + + // if this block is of 64x32 shape or smaller + short n_rows = (ne0 - r0 * BLOCK_SIZE_M < BLOCK_SIZE_M) ? (ne0 - r0 * BLOCK_SIZE_M) : BLOCK_SIZE_M; + short n_cols = (ne1 - r1 * BLOCK_SIZE_N < BLOCK_SIZE_N) ? (ne1 - r1 * BLOCK_SIZE_N) : BLOCK_SIZE_N; + + // a thread shouldn't load data outside of the matrix + short thread_row = ((short)tiitg/THREAD_PER_ROW) < n_rows ? ((short)tiitg/THREAD_PER_ROW) : n_rows - 1; + short thread_col = ((short)tiitg/THREAD_PER_COL) < n_cols ? ((short)tiitg/THREAD_PER_COL) : n_cols - 1; + + simdgroup_half8x8 ma[4]; + simdgroup_float8x8 mb[2]; + simdgroup_float8x8 c_res[8]; + for (int i = 0; i < 8; i++){ + c_res[i] = make_filled_simdgroup_matrix(0.f); + } + + short il = (tiitg % THREAD_PER_ROW); + + const uint i12 = im%ne12; + const uint i13 = im/ne12; + + uint offset0 = (i12/r2)*nb02 + (i13/r3)*(nb02*ne02); + ushort offset1 = il/nl; + + device const block_q * x = (device const float4x4 *)(src0 + (r0 * BLOCK_SIZE_M + thread_row) * nb01 + offset0) + offset1; + device const float * y = (device const float *)(src1 + + nb12 * im + + nb11 * (r1 * BLOCK_SIZE_N + thread_col) + + nb10 * (BLOCK_SIZE_K / THREAD_PER_COL * (tiitg % THREAD_PER_COL))); + + for (int loop_k = 0; loop_k < ne00; loop_k += BLOCK_SIZE_K) { + // load data and store to threadgroup memory + half4x4 temp_a; + dequantize_f32(x, il, temp_a); + threadgroup_barrier(mem_flags::mem_threadgroup); + + #pragma unroll(16) + for (int i = 0; i < 16; i++) { + *(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \ + + (tiitg % THREAD_PER_ROW) * 16 + (i / 8) * 8) \ + + (tiitg / THREAD_PER_ROW) % 8 + (i & 7) * 8) = temp_a[i/4][i%4]; + } + + *(threadgroup float2x4 *)(sb + (tiitg % THREAD_PER_COL) * 8 * 32 + 8 * (tiitg / THREAD_PER_COL)) = *((device float2x4 *)y); + + il = (il + 2 < nl) ? il + 2 : il % 2; + x = (il < 2) ? x + (2+nl-1)/nl : x; + y += BLOCK_SIZE_K; + + threadgroup_barrier(mem_flags::mem_threadgroup); + + // load matrices from threadgroup memory and conduct outer products + threadgroup half * lsma = (sa + THREAD_MAT_M * SG_MAT_SIZE * (sgitg % 2)); + threadgroup float * lsmb = (sb + THREAD_MAT_N * SG_MAT_SIZE * (sgitg / 2)); + + #pragma unroll(4) + for (int ik = 0; ik < BLOCK_SIZE_K / 8; ik++) { + #pragma unroll(4) + for (int i = 0; i < 4; i++) { + simdgroup_load(ma[i],lsma + SG_MAT_SIZE * i); + } + simdgroup_barrier(mem_flags::mem_none); + #pragma unroll(2) + for (int i = 0; i < 2; i++) { + simdgroup_load(mb[i],lsmb + SG_MAT_SIZE * i); + } + + lsma += BLOCK_SIZE_M / SG_MAT_ROW * SG_MAT_SIZE; + lsmb += BLOCK_SIZE_N / SG_MAT_ROW * SG_MAT_SIZE; + + #pragma unroll(8) + for (int i = 0; i < 8; i++){ + simdgroup_multiply_accumulate(c_res[i], mb[i/4], ma[i%4], c_res[i]); + } + } + } + + if ((r0 + 1) * BLOCK_SIZE_M <= ne0 && (r1 + 1) * BLOCK_SIZE_N <= ne1) { + device float * C = dst + (BLOCK_SIZE_M * r0 + 32 * (sgitg & 1)) \ + + (BLOCK_SIZE_N * r1 + 16 * (sgitg >> 1)) * ne0 + im*ne1*ne0; + for (int i = 0; i < 8; i++) { + simdgroup_store(c_res[i], C + 8 * (i%4) + 8 * ne0 * (i/4), ne0); + } + } else { + // block is smaller than 64x32, we should avoid writing data outside of the matrix + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup float * temp_str = ((threadgroup float *)shared_memory) \ + + 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M; + for (int i = 0; i < 8; i++) { + simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M); + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + device float * C = dst + (BLOCK_SIZE_M * r0) + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0; + if (sgitg == 0) { + for (int i = 0; i < n_rows; i++) { + for (int j = tiitg; j < n_cols; j += BLOCK_SIZE_N) { + *(C + i + j * ne0) = *(temp_str + i + j * BLOCK_SIZE_M); + } + } + } + } +} + +#define N_F32_F32 4 +void kernel_mul_mv_f32_f32_impl( + device const char * src0, + device const char * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant uint64_t & nb00, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant int64_t & ne10, + constant int64_t & ne11, + constant int64_t & ne12, + constant uint64_t & nb10, + constant uint64_t & nb11, + constant uint64_t & nb12, + constant int64_t & ne0, + constant int64_t & ne1, + constant uint & r2, + constant uint & r3, + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiisg[[thread_index_in_simdgroup]]) { + + const int64_t r0 = tgpig.x; + const int64_t rb = tgpig.y*N_F32_F32; + const int64_t im = tgpig.z; + + const uint i12 = im%ne12; + const uint i13 = im/ne12; + + const uint offset0 = r0*nb01 + (i12/r2)*nb02 + (i13/r3)*nb02*ne02; + + device const float * x = (device const float *) (src0 + offset0); + + if (ne00 < 128) { + for (int row = 0; row < N_F32_F32; ++row) { + int r1 = rb + row; + if (r1 >= ne11) { + break; + } + + device const float * y = (device const float *) (src1 + r1*nb11 + im*nb12); + + float sumf = 0; + for (int i = tiisg; i < ne00; i += 32) { + sumf += (float) x[i] * (float) y[i]; + } + + float all_sum = simd_sum(sumf); + if (tiisg == 0) { + dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum; + } + } + } else { + device const float4 * x4 = (device const float4 *)x; + for (int row = 0; row < N_F32_F32; ++row) { + int r1 = rb + row; + if (r1 >= ne11) { + break; + } + + device const float * y = (device const float *) (src1 + r1*nb11 + im*nb12); + device const float4 * y4 = (device const float4 *) y; + + float sumf = 0; + for (int i = tiisg; i < ne00/4; i += 32) { + for (int k = 0; k < 4; ++k) sumf += (float) x4[i][k] * y4[i][k]; + } + + float all_sum = simd_sum(sumf); + if (tiisg == 0) { + for (int i = 4*(ne00/4); i < ne00; ++i) all_sum += (float) x[i] * y[i]; + dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum; + } + } + } +} + +[[host_name("kernel_mul_mv_f32_f32")]] +kernel void kernel_mul_mv_f32_f32( + device const char * src0, + device const char * src1, + device float * dst, + constant int64_t & ne00, + constant int64_t & ne01, + constant int64_t & ne02, + constant uint64_t & nb00, + constant uint64_t & nb01, + constant uint64_t & nb02, + constant int64_t & ne10, + constant int64_t & ne11, + constant int64_t & ne12, + constant uint64_t & nb10, + constant uint64_t & nb11, + constant uint64_t & nb12, + constant int64_t & ne0, + constant int64_t & ne1, + constant uint & r2, + constant uint & r3, + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiisg[[thread_index_in_simdgroup]]) { + kernel_mul_mv_f32_f32_impl(src0, src1, dst, ne00, ne01, ne02, nb00, nb01, nb02, ne10, ne11, ne12, nb10, nb11, nb12, ne0, ne1, r2, r3, tgpig, tiisg); } \ No newline at end of file diff --git a/llm/src/ops/metal/LlamaRMSNorm.cc b/llm/src/ops/metal/LlamaRMSNorm.cc index 7903b0b2..230c5023 100644 --- a/llm/src/ops/metal/LlamaRMSNorm.cc +++ b/llm/src/ops/metal/LlamaRMSNorm.cc @@ -2,70 +2,25 @@ #include #include "operators.h" #include "utils.h" -#include "matmul_metal_imp.h" +#include "metal_compute.h" // TODO: modify metal for weights void LlamaRMSNorm_metal::forward(const Matrix3D &x, Matrix3D &output, float eps) { - int m = x.m_dim_x * x.m_dim_y; - int n = x.m_dim_z; - dim3 grid(m); - dim3 block(min(n, 1024)); - - /* For general cases, n is equal to hidden_units, e.g., 512/1024. - Since we have warp shuffle inside the code, block.x % 32 should be 0. - */ - if (n % 32 != 0) { - block.x = 1024; - } - - block.x = block.x / (4 / sizeof(half)); // if using half, only need half of block.x - - setupLibrary("kernel_rms_norm"); - - _mParams = _mDevice->newBuffer(sizeof(MetalMatMulParams), MTL::ResourceStorageModeShared); - _mParamsPtr = (MetalMatMulParams *)_mParams->contents(); - _mParamsPtr->m_dim_x = x.m_dim_x; - _mParamsPtr->m_dim_y = x.m_dim_y; - _mParamsPtr->m_dim_z = x.m_dim_z; - _mParamsPtr->eps = eps; - _mParamsPtr->type_size = sizeof(half); - - - /* should pay attention to the rsqrt precision */ - half *input = x.m_data, *out = output.m_data; - float *gamma = weight.m_data; - - _mBufferA = getBufferfromPtr((void *)input); - _mBufferB = getBufferfromPtr((void *)gamma); - _mBufferResult = getBufferfromPtr((void *)out); - - - - if (!_mBufferA || !_mBufferResult) { - std::cerr << "Failed to locate some buffer!" << std::endl; - exit(-1); - } - - // Create a command buffer to hold commands. - MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); - assert(commandBuffer != nullptr); - - // Start a compute pass. - MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); - assert(computeEncoder != nullptr); - - // Encode the pipeline state object and its parameters. - computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); - computeEncoder->setBuffer(_mBufferA, 0, 0); - computeEncoder->setBuffer(_mBufferB, 0, 1); - computeEncoder->setBuffer(_mBufferResult, 0, 2); - computeEncoder->setBuffer(_mParams, 0, 3); - - computeEncoder->setThreadgroupMemoryLength(param.type_size * N_SIMDWIDTH, 0); - - MTL::Size threadgroupSize = MTL::Size::Make(block.x, block.y, block.z); - MTL::Size gridSize = MTL::Size::Make(grid.x, grid.y, grid.z); - - SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); - _mMatmulFunctionPSO->release(); + const struct metal_params params; + params.A.row = x.m_dim_y; + params.A.column = x.m_dim_z; + params.A.half_data_ptr = x.m_data; + params.B.row = b.m_dim_z; // k + params.B.column = b.m_dim_y; // n + params.B.half_data_ptr = b.m_data; + params.C.row = output.m_dim_y; + params.C.column = output.m_dim_z; + params.C.half_data_ptr = output.m_data; + + struct metal_constants op_constants = new (struct metal_constants); + op_constants.eps = eps; + struct metal_cgraph *graph = new (struct metal_cgraph); + graph->n_nodes = 1; + graph->mm_nodes[0] = params; + metal_graph_compute(METAL_KERNEL_RMS_NORM, graph); } \ No newline at end of file diff --git a/llm/src/ops/metal/RotaryPosEmb.cc b/llm/src/ops/metal/RotaryPosEmb.cc index 594a210b..f039b055 100644 --- a/llm/src/ops/metal/RotaryPosEmb.cc +++ b/llm/src/ops/metal/RotaryPosEmb.cc @@ -20,6 +20,5 @@ void RotaryPosEmb_cuda_forward(Matrix3D query, Matrix3D key, Matrix3 matmul::MatmulOperator op = matmul::MatmulOperator(); op.rope_metal(¶ms, query.m_dim_x, query.m_dim_y, query.m_dim_z, n_past, n_dims, mode, n_orig_ctx, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow ); - // In llama.cpp: - // + } diff --git a/llm/src/ops/metal/batch_add.cc b/llm/src/ops/metal/batch_add.cc index 469538d6..6f35a5d9 100644 --- a/llm/src/ops/metal/batch_add.cc +++ b/llm/src/ops/metal/batch_add.cc @@ -1,12 +1,24 @@ #include "operators.h" +#include "metal_compute.h" // done void batch_Add(const Matrix3D &input, const Matrix3D &input2, Matrix3D &output) { - struct matmul_params params; + const struct metal_params params; + + params.A.row = input.m_dim_y; + params.A.column = input.m_dim_z; + params.A.fp16_data_ptr = input.m_data; + params.B.row = input2.m_dim_z; + params.B.column = input2.m_dim_y; + params.B.int32_data_ptr = input2.m_data; + params.C.row = output.m_dim_y; + params.C.column = output.m_dim_z; + params.C.fp16_data_ptr = output.m_data; params.A.data_ptr = input.m_data; params.B.data_ptr = input2.m_data; params.C.data_ptr = output.m_data; - - matmul::MatmulOperator op = matmul::MatmulOperator(); - op.batch_add_metal(¶ms, input.m_dim_x, input.m_dim_y, input.m_dim_z); + struct metal_cgraph *graph = new (struct metal_cgraph); + graph->n_nodes = 1; + graph->mm_nodes[0] = params; + metal_graph_compute(METAL_KERNEL_BATCH_ADD, graph); } diff --git a/llm/src/ops/metal/embedding.cc b/llm/src/ops/metal/embedding.cc index b6a18658..d7bc12f6 100644 --- a/llm/src/ops/metal/embedding.cc +++ b/llm/src/ops/metal/embedding.cc @@ -1,6 +1,6 @@ #include "operators.h" #include "utils.h" -#include "matmul_metal_imp.h" +#include "metal_compute.h" void load_Embedding_params_metal(Embedding_cuda& op, std::string prefix) { op.lookup.load((prefix + "/weight.bin").c_str()); @@ -14,44 +14,13 @@ void Embedding_cuda::forward(Matrix3D input_id, Matrix3D output) { assert(input_id.m_dim_z == output.m_dim_y); assert(output.m_dim_z == this->embed_dim); - int threadsPerBlock = 1024; - int blocksPerGrid = (input_id.m_dim_z + threadsPerBlock - 1) / threadsPerBlock; - - setupLibrary("EmbeddingKernel"); - - _mBufferA = getBufferfromPtr((void *)input_id); - _mBufferB = getBufferfromPtr((void *)this->lookup.m_data); - _mBufferResult = getBufferfromPtr((void *)output); - _mBufferEmbed_dim = getBufferfromPtr((void *)this->embed_dim); - - if (!_mBufferA || !_mBufferB || !_mBufferResult || !_mBufferScales) { - std::cerr << "Failed to locate some buffer!" << std::endl; - exit(-1); - } - - // Create a command buffer to hold commands. - MTL::CommandBuffer *commandBuffer = _mCommandQueue->commandBuffer(); - assert(commandBuffer != nullptr); - - // Start a compute pass. - MTL::ComputeCommandEncoder *computeEncoder = commandBuffer->computeCommandEncoder(); - assert(computeEncoder != nullptr); - - // Encode the pipeline state object and its parameters. - computeEncoder->setComputePipelineState(_mMatmulFunctionPSO); - computeEncoder->setBuffer(_mBufferA, 0, 0); - computeEncoder->setBuffer(_mBufferResult, 0, 1); - computeEncoder->setBuffer(_mBufferB, 0, 2); - computeEncoder->setBuffer(_mBufferEmbed_dim, 0, 3); - - MTL::Size gridSize = MTL::Size::Make(blocksPerGrid, 1, 1); - - // Calculate a threadgroup size - MTL::Size threadgroupSize = MTL::Size::Make(threadsPerBlock, 1, 1); - - SendEncode(gridSize, threadgroupSize, commandBuffer, computeEncoder); - _mMatmulFunctionPSO->release(); - - + struct metal_constants op_constants = new (struct metal_constants); + op_constants.embed_dim = this->embed_dim; + struct metal_cgraph *graph = new (struct metal_cgraph); + graph->n_nodes = 1; + graph->input_id = input_id; + graph->output = output; + graph->lookup = this->lookup.m_data; + metal_graph_compute(METAL_KERNEL_EMBEDDING, graph); PROFILE_END(profile_name); } \ No newline at end of file diff --git a/llm/src/ops/metal/linear.cc b/llm/src/ops/metal/linear.cc index 9455bba6..61e93b55 100644 --- a/llm/src/ops/metal/linear.cc +++ b/llm/src/ops/metal/linear.cc @@ -34,7 +34,7 @@ void Linear_half_int4::forward(const Matrix3D &x, Matrix3D params.block_size = QK; matmul::MatmulOperator op = matmul::MatmulOperator(); - op.mat_mul_accelerator_int4_fast_no_offset(¶ms); //BUG: gemv and matmul int4? (llama.cpp matmul needed) + op.mat_mul_int4_f32_metal(¶ms); //BUG: gemv and matmul int4? (llama.cpp matmul needed) PROFILE_END(profile_name); return; diff --git a/llm/src/ops/metal/softmax.cc b/llm/src/ops/metal/softmax.cc index ca1d3e22..3e250536 100644 --- a/llm/src/ops/metal/softmax.cc +++ b/llm/src/ops/metal/softmax.cc @@ -2,16 +2,22 @@ #include "operators.h" -// TODO: scale? void softmax(Matrix3D input, Matrix3D output) { - struct matmul_params params; + const struct metal_params params; params.A.row = input.m_dim_y; params.A.column = input.m_dim_z; params.A.half_data_ptr = input.m_data; + params.B.row = input.m_dim_z; // k + params.B.column = input.m_dim_y; // n + params.B.data_ptr = input.m_data; params.C.row = output.m_dim_y; params.C.column = output.m_dim_z; - params.C.half_data_ptr = output.m_data; + params.C.data_ptr = output.m_data; - matmul::MatmulOperator op = matmul::MatmulOperator(); - op.soft_max_metal(¶ms, input.m_dim_x, input.m_dim_y, input.m_dim_z, 1.0); + struct metal_constants op_constants = new (struct metal_constants); + op_constants.scales = this->scales; + struct metal_cgraph *graph = new (struct metal_cgraph); + graph->n_nodes = 1; + graph->mm_nodes[0] = params; + metal_graph_compute(METAL_KERNEL_SOFT_MAX, graph); } \ No newline at end of file From 0b094ad482eae70bd952339b9354100a23cfa403 Mon Sep 17 00:00:00 2001 From: RaymondWang0 Date: Sat, 6 Apr 2024 17:18:05 -0400 Subject: [PATCH 31/37] fix parameters --- llm/tests/metal/cpp_version/main.cc | 35 +++++++++++++++-------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/llm/tests/metal/cpp_version/main.cc b/llm/tests/metal/cpp_version/main.cc index cf3426a7..b29a742c 100755 --- a/llm/tests/metal/cpp_version/main.cc +++ b/llm/tests/metal/cpp_version/main.cc @@ -396,9 +396,10 @@ void test_matmul_llama(){ int m = 1; int n = 32000; int k = 4096; - int hidden_size = bs*m*k; - int weight_size = bs*n*k; - int output_size = bs*m*n; + int block_size = 32; + int hidden_size = bs * m * k; + int weight_size = bs * n * k; + int output_size = bs * m * n; unsigned char* src0 = new unsigned char[hidden_size]; unsigned char* src1 = new unsigned char[weight_size]; float* dst = new float[output_size]; @@ -449,27 +450,27 @@ void test_matmul_llama(){ computeEncoder->setThreadgroupMemoryLength(8192, 0); // from https://github.com/ggerganov/llama.cpp/blob/d5ab29757ebc59a30f03e408294ec20628a6374e/ggml-metal.m#L1315 - int64_t ne00 = bs; - int64_t ne01 = m; - int64_t ne02 = k; + int64_t ne00 = k; + int64_t ne01 = n; + int64_t ne02 = bs; int64_t ne03 = 1; uint64_t nb00 = sizeof(unsigned char); - uint64_t nb01 = nb00*ne00; //nb[0] * (ne[0] / ggml_blck_size(type)) + padding BUG: ggml_blck_size + uint64_t nb01 = nb00 * ne00 / block_size; // nb[0] * (ne[0] / ggml_blck_size(type)) + padding; BUG: ggml_blck_size uint64_t nb02 = nb01 * ne01; - int64_t ne10 = bs; - int64_t ne11 = n; - int64_t ne12 = k; + int64_t ne10 = k; + int64_t ne11 = m; + int64_t ne12 = bs; int64_t ne13 = 1; uint64_t nb10 = sizeof(unsigned char); - uint64_t nb11 = nb10*ne10; + uint64_t nb11 = nb10 * ne10; uint64_t nb12 = nb11 * ne11; - int64_t ne0 = bs; + int64_t ne0 = n; int64_t ne1 = m; - uint r2 = ne12/ne02; - uint r3 = ne13/ne03; - memcpy(bM1->contents(), src0, hidden_size*sizeof(unsigned char)); - memcpy(bM2->contents(), src1, weight_size*sizeof(unsigned char)); - memcpy(bM3->contents(), dst, output_size*sizeof(float)); + uint r2 = ne12 / ne02; + uint r3 = ne13 / ne03; + memcpy(bM1->contents(), src0, hidden_size * sizeof(unsigned char)); + memcpy(bM2->contents(), src1, weight_size * sizeof(unsigned char)); + memcpy(bM3->contents(), dst, output_size * sizeof(float)); memcpy(bne00->contents(), &ne00, sizeof(ne00)); memcpy(bne02->contents(), &ne02, sizeof(ne02)); memcpy(bnb01->contents(), &nb01, sizeof(nb01)); From 926dd0d3e2fbe93fd29931703c6362989565c820 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Tue, 16 Apr 2024 00:30:47 -0400 Subject: [PATCH 32/37] draft llama.cpp model --- kernels/metal/metal_compute.cc | 270 +++++++++++++++--- kernels/metal/metal_compute.h | 39 ++- kernels/metal/op.metal | 136 +++++++++ .../nn_modules/Int4llamaAttentionMetal.h | 89 ++++++ .../nn_modules/Int4llamaDecoderLayerMetal.h | 87 ++++++ .../nn_modules/Int4llamaDecoderMetal.h | 69 +++++ .../nn_modules/Int4llamaForCausalLMMetal.h | 61 ++++ .../metal/Int4llamaAttentionMetal.cc | 257 +++++++++++++++++ .../metal/Int4llamaDecoderLayerMetal.cc | 136 +++++++++ .../nn_modules/metal/Int4llamaDecoderMetal.cc | 118 ++++++++ .../metal/Int4llamaForCausalLMMetal.cc | 59 ++++ .../nn_modules/metal/LLaMAGenerateMetal.cc | 262 +++++++++++++++++ llm/src/ops/metal/BMM_F16T.cc | 60 ++++ llm/src/ops/metal/LlamaRMSNorm.cc | 10 +- llm/src/ops/metal/RotaryPosEmb.cc | 19 +- llm/src/ops/metal/batch_add.cc | 7 +- llm/src/ops/metal/embedding.cc | 18 +- llm/src/ops/metal/linear.cc | 8 +- llm/src/ops/metal/softmax.cc | 10 +- 19 files changed, 1640 insertions(+), 75 deletions(-) create mode 100644 llm/include/nn_modules/Int4llamaAttentionMetal.h create mode 100644 llm/include/nn_modules/Int4llamaDecoderLayerMetal.h create mode 100644 llm/include/nn_modules/Int4llamaDecoderMetal.h create mode 100644 llm/include/nn_modules/Int4llamaForCausalLMMetal.h create mode 100644 llm/src/nn_modules/metal/Int4llamaAttentionMetal.cc create mode 100644 llm/src/nn_modules/metal/Int4llamaDecoderLayerMetal.cc create mode 100644 llm/src/nn_modules/metal/Int4llamaDecoderMetal.cc create mode 100644 llm/src/nn_modules/metal/Int4llamaForCausalLMMetal.cc create mode 100644 llm/src/nn_modules/metal/LLaMAGenerateMetal.cc create mode 100644 llm/src/ops/metal/BMM_F16T.cc diff --git a/kernels/metal/metal_compute.cc b/kernels/metal/metal_compute.cc index 99a65b5f..17a41d25 100644 --- a/kernels/metal/metal_compute.cc +++ b/kernels/metal/metal_compute.cc @@ -1,3 +1,7 @@ +// Metla logic: +// (1) all computations are inserted as nodes; +// (2) every command buffer takes care of nodes computation by encoding nodes in correct order + #include #include @@ -8,6 +12,7 @@ #undef MAX #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define block_size 32 void *allocateSharedMem(size_t size) { if (!has_init) { @@ -24,8 +29,21 @@ void *allocateSharedMem(size_t size) { return void_ptr; } +bool init_graph(int initial_capacity) { + mgraph->mm_nodes = (const metal_params **)malloc(initial_capacity * sizeof(metal_params *)); + if (mgraph->mm_nodes == nullptr) { + return false; // Allocation failed + } + mgraph->n_nodes = 0; + mgraph->capacity = initial_capacity; + return true; +} + void init() { ctx = new(struct metal_context); + // load metal compute graph + mgraph = new(struct metal_cgraph); + init_graph(100); MTL::Device *device = MTL::CreateSystemDefaultDevice(); ctx->device = device; ctx->n_cb = 1; // TODO: n_cb and METAL_MAX_BUFFERS? MIN(n_cb, METAL_MAX_BUFFERS=64) @@ -83,7 +101,13 @@ void init() { } // simd_sum and simd_max requires MTLGPUFamilyApple7 - // TODO: solve error + // TODO: syntax error + METAL_ADD_KERNEL(METAL_KERNEL_SILUMUL_HALF, "SiLuMul_half", true); + METAL_ADD_KERNEL(METAL_KERNEL_ADD_HALF, "add_half", true); + METAL_ADD_KERNEL(METAL_KERNEL_SHAPE_QKV, "shape_qkv", true); + METAL_ADD_KERNEL(METAL_KERNEL_UNSHAPE, "unshape", true); + METAL_ADD_KERNEL(METAL_KERNEL_TRANSPOSE_1_2IDX, "transpose_1_2idx", true); + METAL_ADD_KERNEL(METAL_KERNEL_CHECK_INF_HALF, "check_inf_half", true); METAL_ADD_KERNEL(METAL_KERNEL_EMBEDDING, "embedding", true); METAL_ADD_KERNEL(METAL_KERNEL_BATCH_ADD, "batch_add", true); METAL_ADD_KERNEL(METAL_KERNEL_RELU, "relu", true); @@ -124,13 +148,12 @@ static void metal_free(struct metal_context * ctx) { free(ctx); } -static enum status metal_graph_compute(metal_kernel_type op, - struct metal_cgraph * metal_data) { +static enum status metal_graph_compute(struct metal_cgraph * mg) { // in TinyChatEngine, inputs are operations and grouped tensors MTL::ComputePassDescriptor* edesc = MTL::ComputePassDescriptor::computePassDescriptor(); edesc->setDispatchType(MTL::DispatchTypeSerial); - const int n_nodes = metal_data->n_nodes; + const int n_nodes = mg->n_nodes; const int n_cb = ctx->n_cb; // number of command buffer, TODO: currently 1 in TinyChatEngine const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb; @@ -155,45 +178,152 @@ static enum status metal_graph_compute(metal_kernel_type op, const int node_end = MIN((cb_idx == n_cb - 1) ? n_nodes : (cb_idx + 1) * n_nodes_per_cb, n_nodes); for (int i = node_start; i < node_end; ++i) { + const struct metal_params * curr_node = mg->mm_nodes[i]; + metal_kernel_type op = curr_node->op; if (i == -1) { encoder->memoryBarrier(MTL::BarrierScopeBuffers); continue; } switch (op) { + case (METAL_KERNEL_HALF2FLOAT): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr((curr_node->B).data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_dst, offs_src1, 1); + encoder->setBytes(&curr_node->sqlen, sizeof(int), 2); + MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_PREPARE_DECODER_ATTENTION_MASK_HALF): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBytes(&curr_node->sqlen, sizeof(int), 1); + encoder->setBytes(&curr_node->past_sqlen, sizeof(int), 2); + MTL::Size ThreadperGroup = MTL::Size::Make(32, 32, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen - curr_node->past_sqlen + 32 - 1) / 32, + (curr_node->sqlen + 32 - 1) / 32, 1); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_SILUMUL_HALF): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBytes(&curr_node->sqlen, sizeof(int), 2); + MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_ADD_HALF): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); + MTL::Buffer *id_src2 = getBufferfromPtr((curr_node->C).half_data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBuffer(id_src2, offs_src2, 2); + encoder->setBytes(&curr_node->sqlen, sizeof(int), 3); + MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_SHAPE_QKV): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); //input_ids int + MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); //output half + MTL::Buffer *id_src2 = getBufferfromPtr((curr_node->C).half_data_ptr); + MTL::Buffer *id_src3 = getBufferfromPtr((curr_node->D).half_data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBuffer(id_src2, offs_src2, 2); + encoder->setBuffer(id_src3, offs_dst, 3); + encoder->setBytes(&curr_node->num_heads, sizeof(int), 4); + encoder->setBytes(&curr_node->sqlen, sizeof(int), 5); + encoder->setBytes(&curr_node->head_dim, sizeof(int), 6); + MTL::Size ThreadperGroup = MTL::Size::Make(16, 1, 64); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->num_heads + 16 - 1) / 16, + (curr_node->sqlen + 1 - 1) / 1y, + (curr_node->head_dim + 64 - 1) / 64); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_UNSHAPE): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBytes(&curr_node->num_heads, sizeof(int), 2); + encoder->setBytes(&curr_node->sqlen, sizeof(int), 3); + encoder->setBytes(&curr_node->head_dim, sizeof(int), 4); + MTL::Size ThreadperGroup = MTL::Size::Make(16, 1, 64); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->num_heads + 16 - 1) / 16, + (curr_node->sqlen + 1 - 1) / 1, + (curr_node->head_dim + 64 - 1) / 64); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_TRANSPOSE_1_2IDX): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBytes(&curr_node->A.row, sizeof(int), 2); + encoder->setBytes(&curr_node->A.column, sizeof(int), 3); + encoder->setBytes(&curr_node->input_m_dim_z, sizeof(int), 4); + encoder->setBytes(&curr_node->B.row, sizeof(int), 5); + encoder->setBytes(&curr_node->B.column, sizeof(int), 6); + MTL::Size ThreadperGroup = MTL::Size::Make(8, 4, 32); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->num_heads + 8 - 1) / 8, + (curr_node->tgz + 4 - 1) / 4, + (curr_node->head_dim + 32 - 1) / 32); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; + case (METAL_KERNEL_CHECK_INF_HALF): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBytes(&curr_node->sqlen, sizeof(int), 1); + MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) /1024, 1, 1); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; case (METAL_KERNEL_EMBEDDING): - MTL::Buffer *id_src0 = getBufferfromPtr(metal_data->input_id); - MTL::Buffer *id_dst = getBufferfromPtr(metal_data->output); - MTL::Buffer *id_lookup = getBufferfromPtr(metal_data->lookup); + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).int32_data_ptr); //input_ids int + MTL::Buffer *id_dst = getBufferfromPtr((curr_node->C).half_data_ptr); //output half + MTL::Buffer *id_lookup = getBufferfromPtr((curr_node->B).data_ptr); //fp32 encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_dst, offs_src1, 1); encoder->setBuffer(id_lookup, offs_src2, 2); - encoder->setBytes(&metal_data->op_constants.embed_dim, sizeof(embed_dim), 3); + encoder->setBytes(&curr_node->embed_dim, sizeof(int), 3); int threadsPerBlock = 1024; - int blocksPerGrid = (metal_data->input_id.m_dim_z + threadsPerBlock - 1) / threadsPerBlock; + int blocksPerGrid = (curr_node->A.column + threadsPerBlock - 1) / threadsPerBlock; MTL::Size ThreadperGroup = MTL::Size::Make(threadsPerBlock, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((input_id.m_dim_z + threadsPerBlock - 1) / threadsPerBlock, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->A.column + threadsPerBlock - 1) / threadsPerBlock, 1, 1); // Dispatch the kernel encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); break; - const metal_params * inputs = metal_data->mm_nodes[i]; - struct matrix src0 = inputs->A; - struct matrix src1 = inputs->B; - struct matrix dst = inputs->C; + struct matrix src0 = curr_node->A; + struct matrix src1 = curr_node->B; + struct matrix dst = curr_node->C; // TODO: ne[0], nb[0] calculation & order const int64_t ne00 = src0.row; const int64_t ne01 = src0.column; - const int64_t ne02 = 1; + const int64_t ne02 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; const int64_t ne03 = 1; const uint64_t nb00 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? sizeof(uint8_t) : sizeof(float); - const uint64_t nb01 = nb00*ne00; + const uint64_t nb01 = nb00*ne00/block_size; const uint64_t nb02 = nb01*ne01; const uint64_t nb03 = nb02*ne02; const int64_t ne10 = src1.row; const int64_t ne11 = src1.column; - const int64_t ne12 = 1; + const int64_t ne12 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; const int64_t ne13 = 1; const uint64_t nb10 = sizeof(float); @@ -203,7 +333,7 @@ static enum status metal_graph_compute(metal_kernel_type op, const int64_t ne0 = dst.row; const int64_t ne1 = dst.column; - const int64_t ne2 = 1; + const int64_t ne2 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; const int64_t ne3 = 1; const uint64_t nb0 = sizeof(float); @@ -214,7 +344,6 @@ static enum status metal_graph_compute(metal_kernel_type op, case METAL_KERNEL_MUL_MV_INT4_F32: case METAL_KERNEL_MUL_MM_F32_F32: case METAL_KERNEL_MUL_MV_F32_F32: - // TODO: buffer retrieved? MTL::Buffer *id_src0 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? getBufferfromPtr(src0.int4_data_ptr) : getBufferfromPtr(src0.data_ptr); MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); @@ -250,9 +379,9 @@ static enum status metal_graph_compute(metal_kernel_type op, } break; case (METAL_KERNEL_BATCH_ADD): - MTL::Buffer *id_src0 = getBufferfromPtr(src0.data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); + MTL::Buffer *id_src0 = getBufferfromPtr(src0.fp16_data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr(src1.int32_data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr(dst.fp16_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_src1, offs_src1, 1); @@ -281,20 +410,20 @@ static enum status metal_graph_compute(metal_kernel_type op, const int64_t ne02 = 1; const int64_t ne03 = 1; // TODO: nb00 should be half? - const uint64_t nb00 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? sizeof(uint8_t) : sizeof(float); + const uint64_t nb00 = sizeof(half); const uint64_t nb01 = nb00*ne00; const uint64_t nb02 = nb01*ne01; const uint64_t nb03 = nb02*ne02; MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); MTL::Buffer *id_src1 = getBufferfromPtr(src1.half_data_ptr); MTL::Buffer *id_dst = getBufferfromPtr(dst.half_data_ptr); - // TODO: add src1 + // TODO: add src1 (weights) encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_dst, offs_src2, 1); encoder->setBytes(&ne00, sizeof(ne00), 2); encoder->setBytes(&nb01, sizeof(nb01), 3); - encoder->setBytes(&(metal_data->op_constants.eps), sizeof(metal_data->op_constants.eps), 4); + encoder->setBytes(&(curr_node->eps), sizeof(curr_node->eps), 4); encoder->setThreadgroupMemoryLength(32*sizeof(float), 0); encoder->dispatchThreadgroups(MTL::Size::Make(src0.row, 1, 1), MTL::Size::Make(src0.row, 1, 1)); break; @@ -312,12 +441,11 @@ static enum status metal_graph_compute(metal_kernel_type op, } encoder->setComputePipelineState(ctx->kernels[op].pipeline); } - // TODO: type const int64_t ne00 = src0.row; const int64_t ne01 = src0.column; const int64_t ne02 = 1; const int64_t ne03 = 1; - const float scale = metal_data->op_constants.scale; + const float scale = curr_node->scale; MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); @@ -332,19 +460,95 @@ static enum status metal_graph_compute(metal_kernel_type op, encoder->dispatchThreadgroups(MTL::Size::Make(ne01*ne02*ne03, 1, 1), MTL::Size::Make(nth, 1, 1)); break; case (METAL_KERNEL_ROPE): - //TODO: implement ROPE + MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); + MTL::Buffer *id_src1 = getBufferfromPtr(src1.int32_data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr(dst.half_data_ptr); + const int nth = MIN(1024, ne00); + + const int n_past = curr_node->n_past; //((int32_t *) dst.op_params)[0]; + const int n_dims = curr_node->n_dims; //((int32_t *) dst.op_params)[1]; + const int mode = curr_node->mode; //((int32_t *) dst.op_params)[2]; + // skip 3, n_ctx, used in GLM RoPE, unimplemented in metal + const int n_orig_ctx = curr_node->n_orig_ctx; //((int32_t *) dst.op_params)[4]; + + float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow; + memcpy(&freq_base, (int32_t *) curr_node->freq_base, sizeof(float)); //5 + memcpy(&freq_scale, (int32_t *) curr_node->freq_scale, sizeof(float)); //6 + memcpy(&ext_factor, (int32_t *) curr_node->ext_factor, sizeof(float)); //7 + memcpy(&attn_factor, (int32_t *) curr_node->attn_factor, sizeof(float)); //8 + memcpy(&beta_fast, (int32_t *) curr_node->beta_fast, sizeof(float)); //9 + memcpy(&beta_slow, (int32_t *) curr_node->beta_slow, sizeof(float)); //10 + MTL::ComputePipelineState *pipeline = ctx->kernels[METAL_KERNEL_ROPE].pipeline; + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_src1, offs_src1, 1); + encoder->setBuffer(id_dst, offs_src2, 2); + encoder->setBytes(&ne00, sizeof(int64_t), 3); + encoder->setBytes(&ne01, sizeof(int64_t), 4); + encoder->setBytes(&ne02, sizeof(int64_t), 5); + encoder->setBytes(&ne03, sizeof(int64_t), 6); + encoder->setBytes(&nb00, sizeof(uint64_t), 7); + encoder->setBytes(&nb01, sizeof(uint64_t), 8); + encoder->setBytes(&nb02, sizeof(uint64_t), 9); + encoder->setBytes(&nb03, sizeof(uint64_t), 10); + encoder->setBytes(&ne0, sizeof(int64_t), 11); + encoder->setBytes(&ne1, sizeof(int64_t), 12); + encoder->setBytes(&ne2, sizeof(int64_t), 13); + encoder->setBytes(&ne3, sizeof(int64_t), 14); + encoder->setBytes(&nb0, sizeof(uint64_t), 15); + encoder->setBytes(&nb1, sizeof(uint64_t), 16); + encoder->setBytes(&nb2, sizeof(uint64_t), 17); + encoder->setBytes(&nb3, sizeof(uint64_t), 18); + + encoder->setBytes(&n_past, sizeof(int), 19); + encoder->setBytes(&n_dims, sizeof(int), 20); + encoder->setBytes(&mode, sizeof(int), 21); + encoder->setBytes(&n_orig_ctx, sizeof(int), 22); + encoder->setBytes(&freq_base, sizeof(float), 23); + encoder->setBytes(&freq_scale, sizeof(float), 24); + encoder->setBytes(&ext_factor, sizeof(float), 25); + encoder->setBytes(&attn_factor, sizeof(float), 26); + encoder->setBytes(&beta_fast, sizeof(float), 27); + encoder->setBytes(&beta_slow, sizeof(float), 28); + + MTL::Size ThreadperGroup = MTL::Size::Make(nth, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make(ne01, ne02, ne03); // from https://github.com/ggerganov/llama.cpp/blob/1b496a745c315022df2d919374052e6004ced8d3/ggml-metal.m#L2240 + // Dispatch the kernel + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); break; } if (encoder!=nullptr){ encoder->endEncoding(); encoder=nullptr; } - command_buffer->commit(); - command_buffer->waitUntilCompleted(); - if (command_buffer->status()!=MTL::CommandBufferStatusCompleted){ - return STATUS_FAILED; - } + } + command_buffer->commit(); + command_buffer->waitUntilCompleted(); + if (command_buffer->status()!=MTL::CommandBufferStatusCompleted){ + return STATUS_FAILED; } } return STATUS_SUCCESS; +} + +void add_node(const struct metal_params *new_node) { + if (mgraph == nullptr) { + std::cerr << "Graph is null" << std::endl; + return; + } + if (mgraph->n_nodes == mgraph->capacity) { + // Need more space, so let's double the capacity + int new_capacity = mgraph->capacity * 2; + const metal_params **new_mm_nodes = (const metal_params **)realloc(mgraph->mm_nodes, new_capacity * sizeof(metal_params *)); + if (new_mm_nodes == nullptr) { + std::cerr << "Memory allocation failed" << std::endl; + return; // Keep the old memory intact + } + mgraph->mm_nodes = new_mm_nodes; + mgraph->capacity = new_capacity; + } + + // Add the new node at the end of the array + mgraph->mm_nodes[mgraph->n_nodes] = new_node; + mgraph->n_nodes++; } \ No newline at end of file diff --git a/kernels/metal/metal_compute.h b/kernels/metal/metal_compute.h index 0d1ec21f..16ad5e1b 100644 --- a/kernels/metal/metal_compute.h +++ b/kernels/metal/metal_compute.h @@ -13,6 +13,7 @@ struct metal_kernel { }; struct metal_context * ctx; +struct metal_cgraph * mgraph; enum { MTLGPUFamilyApple1 = 1001, @@ -22,6 +23,14 @@ enum { }; enum metal_kernel_type { + METAL_KERNEL_HALF2FLOAT, + METAL_KERNEL_PREPARE_DECODER_ATTENTION_MASK_HALF, + METAL_KERNEL_SILUMUL_HALF, + METAL_KERNEL_ADD_HALF, + METAL_KERNEL_SHAPE_QKV, + METAL_KERNEL_UNSHAPE, + METAL_KERNEL_TRANSPOSE_1_2IDX, + METAL_KERNEL_CHECK_INF_HALF, METAL_KERNEL_EMBEDDING, METAL_KERNEL_BATCH_ADD, METAL_KERNEL_RELU, @@ -64,10 +73,12 @@ struct metal_constants { }; struct metal_params { - struct matrix A, B, C, bias; + struct matrix A, B, C, D, bias; struct optimization_params opt_params; float alpha, beta; float16_t half_alpha; + // batch_size + int bs; // for int4 float *scales, *offset, *zero_point; float16_t *half_scales; @@ -77,17 +88,30 @@ struct metal_params { // for int8 activation float *A_scales; int8_t A_zero_point; + // op + metal_kernel_type op; + // consts + float eps; //rms_norm + float scale; //softmax + int embed_dim; //embed + int n_orig_ctx; + int n_past; + int n_dims; + int mode; + int freq_base; + int freq_scale; + int ext_factor; + int attn_factor; + int beta_fast; + int beta_slow; + // + int sqlen, past_sqlen, num_heads, head_dim, input_m_dim_z; }; struct metal_cgraph{ + int capacity; int n_nodes; const struct metal_params ** mm_nodes; // matmul ops (A, B, C) - struct metal_constants op_constants; - - // for kernel_embedding - Matrix3D input_id; - Matrix3D output; - float* lookup; }; void *allocateSharedMem(size_t size); @@ -95,5 +119,6 @@ void init(); static void metal_free(struct metal_context * ctx); static enum status metal_graph_compute(metal_kernel_type op, struct metal_cgraph * metal_data); +void add_node(const struct metal_params * new_node); #endif \ No newline at end of file diff --git a/kernels/metal/op.metal b/kernels/metal/op.metal index f4c94eed..3efd0549 100644 --- a/kernels/metal/op.metal +++ b/kernels/metal/op.metal @@ -24,6 +24,142 @@ typedef struct { uint8_t qs[QK4_0 / 2]; // nibbles / quants } block_q4_0; +kernel void half2float(device const half* halfArray [[buffer(0)]], + device float* floatArray [[buffer(1)]], + constant int& N [[buffer(2)]], + uint index [[thread_position_in_grid]]) { + if (index < N) { + floatArray[index] = halfArray[index]; // Implicit conversion from half to float + } +} + +kernel void kernel_prepare_decoder_attention_mask_half(device half* data, + constant int& length, + constant int& past_length, + uint2 gid[[thread_position_in_grid]]) { + int i = gid.x; + int j = gid.y; + + if (i < length - past_length && j < length) { + const half min = -HALF_MAX; + // Assuming 'data' represents a flat array of Matrix3D. + // Calculating index for 'causal_attention_mask(0, i, j)' + int index = i * length + j; // Assuming '0 * length * length' is omitted for simplicity + + if (i + past_length < j) { + data[index] = min; + } else { + data[index] = 0; + } + } +} + +kernel void kernel_add_half(device half* a [[buffer(0)]], + device half* b [[buffer(1)]], + device half* c [[buffer(2)]], + constant int& length [[buffer(3)]], + uint id [[thread_position_in_grid]]) { + if (id < length) { + c[id] = a[id] + b[id]; // Using native '+' operator for half types + } +} + +kernel void kernel_SiLuMul_half(device half* a [[buffer(0)]], + device half* b [[buffer(1)]], + constant int& length [[buffer(2)]], + uint id [[thread_position_in_grid]]) { + const half scalar_one = 1.0h; + + if (id < length) { + half v = a[id]; + half silu_v = v * (scalar_one / (scalar_one + exp(-v))); + a[id] = silu_v * b[id]; + } +} + +template +kernel void kernel_shape_qkv(device T* unshape, + device T* shaped_q, + device T* shaped_k, + device T* shaped_v, + constant int& num_heads, + constant int& sqlen, + constant int& head_dim, + uint3 gid[[thread_position_in_grid]]) { + + int i = gid.x; + int j = gid.y; + int k = gid.z; + int embed_dim = head_dim * num_heads; + + if (i < num_heads && j < sqlen && k < head_dim) { + // Calculate 1D index for the 3D access in a flattened array. + int unshape_index = j * num_heads * head_dim + i * head_dim + k; + int q_index = i * sqlen * head_dim + j * head_dim + k; + int k_index = q_index + num_heads * sqlen * head_dim; + int v_index = k_index + num_heads * sqlen * head_dim; + + shaped_q[q_index] = unshape[unshape_index]; + shaped_k[k_index] = unshape[unshape_index + embed_dim]; + shaped_v[v_index] = unshape[unshape_index + 2 * embed_dim]; + } +} + +template +kernel void kernel_unshape(device T* shaped, + device T* unshape, + constant int& num_heads, + constant int& sqlen, + constant int& head_dim, + uint3 gid[[thread_position_in_grid]]) { + int i = gid.x; + int j = gid.y; + int k = gid.z; + + if (i < num_heads && j < sqlen && k < head_dim) { + // Calculate 1D index for the 3D access in a flattened array. + int shaped_index = i * sqlen * head_dim + j * head_dim + k; + int unshape_index = j * num_heads * head_dim + i * head_dim + k; + + unshape[unshape_index] = shaped[shaped_index]; + } +} + +template +kernel void kernel_transpose_1_2idx(const device * input, + device * output, + device int& input_m_dim_x, + device int& input_m_dim_y, + device int& input_m_dim_z, + device int& output_m_dim_y, + device int& output_m_dim_z, + uint3 gid[[thread_position_in_grid]]) { + int i = gid.x; + int j = gid.y; + int k = gid.z; + + if (i < input_m_dim_x && j < input_m_dim_y && k < input_m_dim_z) { + output[i * output_m_dim_y * output_m_dim_z + k * output_m_dim_z + j] = + input[i * input_m_dim_y * input_m_dim_z + j * input_m_dim_z + k]; + } +} + +template +kernel void kernel_check_inf_half(device half* a [[ buffer(0) ]], + constant int& total [[ buffer(1) ]], + uint gid [[thread_position_in_grid]]) { + int i = gid; + + if (i < total) { + bool infCheck = isinf(a[i]); + bool nanCheck = isnan(a[i]); + + if (infCheck || nanCheck) { + a[i] = -HALF_MAX; // Metal defines HALF_MAX, the maximum value for half precision + } + } +} + kernel void kernel_embedding(device Matrix3D input_id [[buffer(0)]], device Matrix3D output [[buffer(1)]], device float* lookup [[buffer(2)]], diff --git a/llm/include/nn_modules/Int4llamaAttentionMetal.h b/llm/include/nn_modules/Int4llamaAttentionMetal.h new file mode 100644 index 00000000..b0abe976 --- /dev/null +++ b/llm/include/nn_modules/Int4llamaAttentionMetal.h @@ -0,0 +1,89 @@ +#include + +#include "common.h" +#include "operators.h" + +struct Int4llamaAttention_output { +#ifdef QM_METAL + Matrix3D attn_output; + Matrix3D attn_probs_reshaped; + std::pair, Matrix3D> past_key_value; +#else + Matrix3D attn_output; + Matrix3D attn_probs_reshaped; + std::pair, Matrix3D> past_key_value; +#endif +}; + +struct Int4llamaAttention_input { + bool has_past_key_value = false; + int layer_idx; +#ifdef QM_METAL + Matrix3D hidden_states; + Matrix3D attention_mask; + Matrix3D past_key, past_value; + + Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, int layer_idx_) +#else + Matrix3D hidden_states; + Matrix3D attention_mask; + Matrix3D past_key, past_value; + + Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, int layer_idx_) +#endif + : hidden_states(hidden_states_), attention_mask(attention_mask_), layer_idx(layer_idx_) { + } + +#ifdef QM_METAL + Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, + Matrix3D past_key_, Matrix3D past_value_, bool has_past_key_value_, + int layer_idx_) +#else + Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, Matrix3D past_key_, + Matrix3D past_value_, bool has_past_key_value_, int layer_idx_) +#endif + : hidden_states(hidden_states_), + attention_mask(attention_mask_), + past_key(past_key_), + past_value(past_value_), + has_past_key_value(has_past_key_value_), + layer_idx(layer_idx_) { + } +}; + +class Int4llamaAttention { + public: + Int4llamaAttention(std::string param_path, const struct model_config config, int layer_idx); + Int4llamaAttention() {} + static void initialized_memory(const struct model_config config); + struct Int4llamaAttention_output forward(std::string param_path, const struct Int4llamaAttention_input &input); + +#if !(DEC_SHARED_MEM) + int *q_weight = nullptr, *k_weight = nullptr, *v_weight = nullptr, *o_weight = nullptr, *qkv_weight = nullptr; +#endif + +#ifdef QM_METAL + void free_cuda_memory(); + half *cos_buf = nullptr, *sin_buf = nullptr; +#else + float *cos_buf = nullptr, *sin_buf = nullptr; +#endif + + private: + std::string profile_name = "Int4llamaAttention"; + int embed_dim, num_heads, head_dim; +#ifdef QM_METAL + Linear_half_int4 o_proj, qkv_proj; + RotaryPosEmb_cuda rotary_pos_emb; + BMM_F16T qk_bmm, pv_bmm; + int max_sqlen; +#else + Linear_FP_int4 k_proj, v_proj, q_proj, o_proj, qkv_proj; + RotaryPosEmb rotary_pos_emb; + BMM_F32T qk_bmm, pv_bmm; + void unshape(Matrix3D shaped, Matrix3D unshape, int sqlen); + void shape(Matrix3D unshape, Matrix3D shaped, int sqlen); + void shape_qkv(Matrix3D unshape, Matrix3D shaped_q, Matrix3D shaped_k, + Matrix3D shaped_v, int sqlen); +#endif +}; diff --git a/llm/include/nn_modules/Int4llamaDecoderLayerMetal.h b/llm/include/nn_modules/Int4llamaDecoderLayerMetal.h new file mode 100644 index 00000000..1537d1da --- /dev/null +++ b/llm/include/nn_modules/Int4llamaDecoderLayerMetal.h @@ -0,0 +1,87 @@ +#include "Int4llamaAttention.h" +#include "common.h" +#include "operators.h" + +struct Int4llamaDecoderLayer_output { +#ifdef QM_METAL + Matrix3D hidden_states; + Matrix3D attentions; + std::pair, Matrix3D> past_key_value; + + Int4llamaDecoderLayer_output(Matrix3D hidden_states_, Matrix3D attentions_, + std::pair, Matrix3D> past_key_value_) { +#else + Matrix3D hidden_states; + Matrix3D attentions; + std::pair, Matrix3D> past_key_value; + + Int4llamaDecoderLayer_output(Matrix3D hidden_states_, Matrix3D attentions_, + std::pair, Matrix3D> past_key_value_) { +#endif + hidden_states = hidden_states_; + attentions = attentions_; + past_key_value = past_key_value_; + }; +}; +struct Int4llamaDecoderLayer_input { + bool has_past_key_value = false; +#ifdef QM_METAL + Matrix3D hidden_states; + Matrix3D attention_mask; + Matrix3D past_key, past_value; + + Int4llamaDecoderLayer_input(Matrix3D hidden_states_, Matrix3D attention_mask_) { +#else + Matrix3D hidden_states; + Matrix3D attention_mask; + Matrix3D past_key, past_value; + + Int4llamaDecoderLayer_input(Matrix3D &hidden_states_, Matrix3D &attention_mask_) { +#endif + hidden_states = hidden_states_; + attention_mask = attention_mask_; + has_past_key_value = false; + } + +#ifdef QM_METAL + Int4llamaDecoderLayer_input(Matrix3D hidden_states_, Matrix3D attention_mask_, + Matrix3D past_key_, Matrix3D past_value_){ +#else + Int4llamaDecoderLayer_input(Matrix3D &hidden_states_, Matrix3D &attention_mask_, + Matrix3D past_key_, Matrix3D past_value_) { +#endif + hidden_states = hidden_states_; + attention_mask = attention_mask_; + past_key = past_key_; + past_value = past_value_; + has_past_key_value = true; +} +} +; + +class Int4llamaDecoderLayer { + public: + Int4llamaDecoderLayer(std::string param_path, const struct model_config config, int layer_idx); + Int4llamaDecoderLayer(){}; + struct Int4llamaDecoderLayer_output forward(std::string param_path, const struct Int4llamaDecoderLayer_input &input, int layer_idx); + + std::string profile_name = "Int4llamaDecoderLayer"; + int embed_dim, num_attention_heads, hidden_dim, layer_idx; + float rms_norm_eps; + Int4llamaAttention attn; +#ifdef QM_METAL + void free_cuda_memory(); + LlamaRMSNorm_metal input_layernorm, post_attention_layernorm; + Linear_half_int4 gate_proj, down_proj, up_proj; + +#if !(DEC_SHARED_MEM) + int *gate_proj_weight = nullptr, *down_proj_weight = nullptr, *up_proj_weight = nullptr; +#endif + +#else + LlamaRMSNorm input_layernorm, post_attention_layernorm; // from torch_int.nn + Linear_FP_int4 gate_proj, down_proj, up_proj; +#endif + float *input_layernorm_weight_ptr = nullptr; + float *post_attention_layernorm_ptr = nullptr; +}; diff --git a/llm/include/nn_modules/Int4llamaDecoderMetal.h b/llm/include/nn_modules/Int4llamaDecoderMetal.h new file mode 100644 index 00000000..5314f58a --- /dev/null +++ b/llm/include/nn_modules/Int4llamaDecoderMetal.h @@ -0,0 +1,69 @@ +#include +#include +#include + +#include "Int4llamaDecoderLayer.h" +#include "common.h" +#include "operators.h" + +struct Int4llamaDecoder_output { +#ifdef QM_METAL + Matrix3D last_hidden_state; + std::vector> past_keys, past_values; +#else + Matrix3D last_hidden_state; + std::vector> past_keys, past_values; +#endif +}; +struct Int4llamaDecoder_input { + Matrix3D input_ids; + bool has_past_keys_values; +#ifdef QM_METAL + std::vector> past_keys, past_values; +#else + std::vector> past_keys, past_values; +#endif + + Int4llamaDecoder_input(Matrix3D input_ids_) : input_ids(input_ids_) { has_past_keys_values = false; } +#ifdef QM_METAL + Int4llamaDecoder_input(Matrix3D input_ids_, std::vector> past_keys_, + std::vector> past_values_) +#else + Int4llamaDecoder_input(Matrix3D input_ids_, std::vector> past_keys_, + std::vector> past_values_) +#endif + : input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) { + has_past_keys_values = true; + } +}; + +class Int4llamaDecoder { + public: + Int4llamaDecoder(std::string param_path, const struct model_config config); + Int4llamaDecoder(){}; + Matrix3D prepare_decoder_attention_mask(int length, int past_length); + struct Int4llamaDecoder_output forward(std::string param_path, const struct Int4llamaDecoder_input& input); + int voc_size, embed_dim, padding_idx, hidden_dim, num_heads; + float rms_norm_eps; + std::vector layers; + std::string profile_name = "Int4llamaDecoder"; +#ifdef QM_METAL + void free_cuda_memory(); + Embedding embed_tokens; + LlamaRMSNorm_metal norm; + + float16_t* attention_mask_buf = nullptr; + float16_t* last_hidden_states_buf = nullptr; + float* hidden_states_buf = nullptr; + float16_t* hidden_states_half_buf = nullptr; +#else + Embedding embed_tokens; + LlamaRMSNorm norm; + + float* attention_mask_buf; + float* pos_embeds_buf; + float* last_hidden_states_buf; + float* hidden_states_buf; +#endif + float* norm_weight_ptr = nullptr; +}; diff --git a/llm/include/nn_modules/Int4llamaForCausalLMMetal.h b/llm/include/nn_modules/Int4llamaForCausalLMMetal.h new file mode 100644 index 00000000..5b6e1b09 --- /dev/null +++ b/llm/include/nn_modules/Int4llamaForCausalLMMetal.h @@ -0,0 +1,61 @@ +#include "Int4llamaDecoder.h" + +struct Int4LlamaForCausalLM_output { + Matrix3D logits; +#ifdef QM_METAL + std::vector> past_keys, past_values; +#else + std::vector> past_keys, past_values; +#endif +}; +struct Int4LlamaForCausalLM_input { + Matrix3D input_ids; + bool has_past_keys_values; +#ifdef QM_METAL + std::vector> past_keys, past_values; +#else + std::vector> past_keys, past_values; +#endif + + Int4LlamaForCausalLM_input() {} + Int4LlamaForCausalLM_input(Matrix3D input_ids_) : input_ids(input_ids_) { has_past_keys_values = false; } +#ifdef QM_METAL + Int4LlamaForCausalLM_input(Matrix3D input_ids_, std::vector> past_keys_, + std::vector> past_values_) +#else + Int4LlamaForCausalLM_input(Matrix3D input_ids_, std::vector> past_keys_, + std::vector> past_values_) +#endif + : input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) { + has_past_keys_values = true; + } +}; + +class Int4LlamaForCausalLM { + public: + Int4LlamaForCausalLM(std::string param_path, const struct model_config config); + Int4LlamaForCausalLM(){}; + struct Int4LlamaForCausalLM_output forward(std::string param_path, const struct Int4LlamaForCausalLM_input& input); + float* logits_output = nullptr; +#ifdef QM_METAL + void free_cuda_memory(); + int* lm_head_weight = nullptr; + float16_t* logits_output_half = nullptr; +#else + uint8_t* lm_head_weight; +#endif + + private: + std::string profile_name = "Int4LlamaForCausalLM"; + Int4llamaDecoder decoder; +#ifdef QM_METAL + Linear_half_int4 lm_head; +#else + Linear_FP_int4 lm_head; +#endif +}; + + +// 1. modified the code to be suitable for Metal +// 2. investigate the problem of waituntilcompleted (multiple encoders in order inside command buffer) +// 3. found more kernels needed \ No newline at end of file diff --git a/llm/src/nn_modules/metal/Int4llamaAttentionMetal.cc b/llm/src/nn_modules/metal/Int4llamaAttentionMetal.cc new file mode 100644 index 00000000..1d80ee2d --- /dev/null +++ b/llm/src/nn_modules/metal/Int4llamaAttentionMetal.cc @@ -0,0 +1,257 @@ +#include +#include +#include + +#include "Int4llamaAttention.h" +#include "operators.h" +#include "utils.h" +#include "metal_compute.h" + +static float16_t *attn_weights_arr = nullptr; +static float16_t *attn_output_half_arr = nullptr; +static float16_t *attn_output_arr = nullptr; +static float16_t *attn_output_transpose_arr = nullptr; +static float16_t *key_states_arr = nullptr; +static float16_t *value_states_arr = nullptr; +static float16_t *query_states_arr = nullptr; +static float16_t *value_states_transpose_arr = nullptr; +static float16_t *key_states_arr_cache = nullptr; +static float16_t *value_states_arr_cache = nullptr; +static int *cache_num = nullptr; +static float16_t *qkv_states_unshape_arr = nullptr; + +void Int4llamaAttention::initialized_memory(const struct model_config config) { + allocate_aligned_memory(attn_weights_arr, config.num_heads * config.max_sqlen * config.max_sqlen * sizeof(float16_t)); + allocate_aligned_memory(attn_output_half_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(attn_output_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(attn_output_transpose_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(key_states_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(value_states_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(query_states_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(value_states_transpose_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + + allocate_aligned_memory(cache_num, config.num_layers * sizeof(int)); + for (int i = 0; i < config.num_layers; i++) cache_num[i] = 0; + + allocate_aligned_memory(key_states_arr_cache, config.num_layers * 2 * config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(value_states_arr_cache, config.num_layers * 2 * config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(qkv_states_unshape_arr, config.max_sqlen * config.embed_dim * 3 * sizeof(float16_t)); +} + +Int4llamaAttention::Int4llamaAttention(std::string param_path, const struct model_config config, int layer_idx) { + allocate_aligned_memory(o_weight, (config.embed_dim * config.embed_dim * sizeof(int)) / 8); + allocate_aligned_memory(qkv_weight, (config.embed_dim * config.embed_dim * 3 * sizeof(int)) / 8); + this->o_proj = Linear_half_int4(Matrix3D(o_weight, 1, config.embed_dim, config.embed_dim / 8), + param_path + "/o_proj"); + this->qkv_proj = Linear_half_int4(Matrix3D(qkv_weight, 1, config.embed_dim, config.embed_dim * 3 / 8), + param_path + "/qkv_proj"); + + allocate_aligned_memory(cos_buf, config.max_sqlen * (config.embed_dim / config.num_heads) * sizeof(half)); + allocate_aligned_memory(sin_buf, config.max_sqlen * (config.embed_dim / config.num_heads) * sizeof(half)); + Matrix3D cos(cos_buf, 1, config.max_sqlen, (config.embed_dim / config.num_heads)); + Matrix3D sin(sin_buf, 1, config.max_sqlen, (config.embed_dim / config.num_heads)); + + this->rotary_pos_emb = RotaryPosEmb_metal(cos, sin, param_path + "/rotary_emb"); + + half qk_bmm_alpha; + read_to_array_half((param_path + "/qk_bmm/alpha_half.bin").c_str(), &qk_bmm_alpha, 1); + this->qk_bmm = BMM_F16T(qk_bmm_alpha); + this->pv_bmm = BMM_F16T(__float2half(1.0f)); + + this->embed_dim = config.embed_dim; + this->num_heads = config.num_heads; + assert(config.embed_dim % config.num_heads == 0); + this->head_dim = config.embed_dim / config.num_heads; + this->max_sqlen = config.max_sqlen; +} + +void shape_qkv(Matrix3D qkv_states_unshape, Matrix3D query_states, Matrix3D key_states, Matrix3D value_states, int num_heads, int sqlen, int head_dim){ + const struct metal_params params; + + params.A.half_data_ptr = qkv_states_unshape.m_data; + params.B.half_data_ptr = query_states.m_data; + params.C.half_data_ptr = key_states.m_data; + params.D.half_data_ptr = value_states.m_data; + params.num_heads = num_heads; + params.sqlen = sqlen; + params.head_dim = head_dim; + params.op = METAL_KERNEL_SHAPE_QKV; + add_node(¶ms); +} + +void unshape(Matrix3D attn_output, Matrix3D attn_output_transpose, int num_heads, int sqlen, int head_dim){ + const struct metal_params params; + + params.A.half_data_ptr = attn_output.m_data; + params.B.half_data_ptr = attn_output_transpose.m_data; + params.num_heads = num_heads; + params.sqlen = sqlen; + params.head_dim = head_dim; + params.op = METAL_KERNEL_UNSHAPE; + add_node(¶ms); +} + +void check_inf_half(Matrix3D attn_weights){ + const struct metal_params params; + + params.A.half_data_ptr = attn_weights.m_data; + params.sqlen = attn_weights.length(); + params.op = METAL_KERNEL_CHECK_INF_HALF; + add_node(¶ms); + return; +} + +void transpose_1_2idx(Matrix3D final_value_states, Matrix3D value_states_transpose, int num_heads, int sqlen, int head_dim, int tgz){ + const struct metal_params params; + + params.A.half_data_ptr = final_value_states.m_data; + params.A.row = final_value_states.m_dim_x; + params.A.column = final_value_states.m_dim_y; + params.m_dim_z = final_value_states.m_dim_z; + params.B.half_data_ptr = value_states_transpose.m_data; + params.B.row = value_states_transpose.m_dim_x; + params.B.column = value_states_transpose.m_dim_y; + params.num_heads = num_heads; + params.sqlen = sqlen; + params.head_dim = head_dim; + params.tgz = tgz; + params.op = METAL_KERNEL_TRANSPOSE_1_2IDX; + add_node(¶ms); + return; +} + +struct Int4llamaAttention_output Int4llamaAttention::forward(std::string param_path, const struct Int4llamaAttention_input &input) { + PROFILE_START(profile_name); + + struct Int4llamaAttention_output output; + const int sqlen = input.hidden_states.m_dim_y, b = input.hidden_states.m_dim_x; + assert(b == 1); + + // Fused QKV + Matrix3D qkv_states_unshape(qkv_states_unshape_arr, b, sqlen, embed_dim * 3); + this->qkv_proj.forward(input.hidden_states, qkv_states_unshape); + Matrix3D query_states(query_states_arr, this->num_heads, sqlen, this->head_dim); + Matrix3D key_states(key_states_arr, this->num_heads, sqlen, this->head_dim); + Matrix3D value_states(value_states_arr, this->num_heads, sqlen, this->head_dim); + // METAL: more kernels needed + shape_qkv(qkv_states_unshape, query_states, key_states, value_states, this->num_heads, sqlen, this->head_dim); + + int tgz = sqlen; + if (input.has_past_key_value) { + assert(input.past_key.m_dim_z == this->head_dim); + tgz += input.past_key.m_dim_y; + } + float16_t *ret_value_states, *ret_key_states; + if (cache_num[input.layer_idx] == 1) { + ret_value_states = &value_states_arr_cache[(input.layer_idx * 2 + 1) * this->max_sqlen * this->embed_dim]; + ret_key_states = &key_states_arr_cache[(input.layer_idx * 2 + 1) * this->max_sqlen * this->embed_dim]; + cache_num[input.layer_idx] = 0; + } else { + ret_value_states = &value_states_arr_cache[input.layer_idx * 2 * this->max_sqlen * this->embed_dim]; + ret_key_states = &key_states_arr_cache[input.layer_idx * 2 * this->max_sqlen * this->embed_dim]; + cache_num[input.layer_idx] = 1; + } + Matrix3D final_value_states(ret_value_states, this->num_heads, tgz, this->head_dim); + Matrix3D final_key_states(ret_key_states, this->num_heads, tgz, this->head_dim); + Matrix3D attn_output_half(attn_output_half_arr, 1, sqlen, this->num_heads * this->head_dim); + + int start_idx = 0; + if (input.has_past_key_value) start_idx = input.past_key.m_dim_y; + + dim3 grid(num_heads, 1, 1); + dim3 block(sqlen, 1, 1); + // METAL: ROPE metal + RotaryPosEmb_metal_forward(query_states, key_states, this->rotary_pos_emb.cos, this->rotary_pos_emb.sin, start_idx, sqlen); + + // int tgz = sqlen; + if (input.has_past_key_value) { + // assert(input.past_key.m_dim_z == this->head_dim); + // tgz += input.past_key.m_dim_y; + float16_t *val_ptr = ret_value_states, *key_ptr = ret_key_states; + int past_block = input.past_key.m_dim_y * input.past_key.m_dim_z; + int sq_block = sqlen * this->head_dim; +#pragma unroll + for (int i = 0; i < input.past_key.m_dim_x; i++) { + cudaMemcpyAsync(val_ptr, &input.past_value.m_data[past_block * i], past_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); + val_ptr += past_block; + cudaMemcpyAsync(val_ptr, &value_states.m_data[sq_block * i], sq_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); + val_ptr += sq_block; + cudaMemcpyAsync(key_ptr, &input.past_key.m_data[past_block * i], past_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); + key_ptr += past_block; + cudaMemcpyAsync(key_ptr, &key_states.m_data[sq_block * i], sq_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); + key_ptr += sq_block; + } + } else { + cudaMemcpyAsync(ret_value_states, value_states_arr, (this->num_heads * tgz * this->head_dim) * sizeof(float16_t), cudaMemcpyDeviceToDevice); + cudaMemcpyAsync(ret_key_states, key_states_arr, (this->num_heads * tgz * this->head_dim) * sizeof(float16_t), cudaMemcpyDeviceToDevice); + } + + Matrix3D attn_weights(attn_weights_arr, this->num_heads, sqlen, tgz); + this->qk_bmm.forward(query_states, final_key_states, attn_weights); + + // dim3 threadsPerBlock2(16, 4, 16); + // dim3 numBlocks2((this->num_heads + threadsPerBlock2.x - 1) / threadsPerBlock2.x, + // (sqlen + threadsPerBlock2.y - 1) / threadsPerBlock2.y, + // (tgz + threadsPerBlock2.z - 1) / threadsPerBlock2.z); + // METAL: Metal + batch_Add(attn_weights, input.attention_mask, attn_weights); + + int threadsPerBlock_1D = 1024; + int blocksPerGrid_1D =(attn_weights.length() + threadsPerBlock_1D - 1) / threadsPerBlock_1D; + // METAL: more kernels needed + check_inf_half(attn_weights); + + Matrix3D attn_probs(attn_weights_arr, this->num_heads, sqlen, tgz); + dim3 threadsPerBlock3(64, 16); + dim3 numBlocks3((this->num_heads + threadsPerBlock3.x - 1) / threadsPerBlock3.x, (sqlen + threadsPerBlock3.y - 1) / threadsPerBlock3.y); + // METAL: Metal + softmax(attn_weights, attn_probs); + + /* Legacy Implementation of PV_BMM*/ + Matrix3D value_states_transpose(value_states_transpose_arr, this->num_heads, this->head_dim, tgz); + // METAL: more kernels needed + transpose_1_2idx(final_value_states, value_states_transpose, this->num_heads, sqlen, this->head_dim, tgz); + + Matrix3D attn_output(attn_output_arr, this->num_heads, sqlen, this->head_dim); + this->pv_bmm.forward(attn_probs, value_states_transpose, attn_output); + /* Alternative Implementation (untransposed) of PV_BMM*/ + // Matrix3D attn_output(attn_output_arr, this->num_heads, sqlen, this->head_dim); + // this->pv_bmm.forward_weight_untransposed(attn_probs, final_value_states, attn_output); + + Matrix3D attn_output_transpose(attn_output_transpose_arr, 1, sqlen, this->num_heads * this->head_dim); + // METAL: more kernels + unshape(attn_output, attn_output_transpose, this->num_heads, sqlen, this->head_dim); + + // Matrix3D attn_output_half(attn_output_half_arr, 1, sqlen, this->num_heads * this->head_dim); + this->o_proj.forward(attn_output_transpose, attn_output_half); + + // output assignment + output.attn_output = attn_output_half; + output.past_key_value = {final_key_states, final_value_states}; + + PROFILE_END(profile_name); + + return output; +} + +void Int4llamaAttention::free_cuda_memory() { + free_aligned_memory_gpu(attn_weights_arr); + free_aligned_memory_gpu(attn_output_half_arr); + free_aligned_memory_gpu(attn_output_arr); + free_aligned_memory_gpu(attn_output_transpose_arr); + free_aligned_memory_gpu(key_states_arr); + free_aligned_memory_gpu(value_states_arr); + free_aligned_memory_gpu(query_states_arr); + free_aligned_memory_gpu(value_states_transpose_arr); + free_aligned_memory_gpu(key_states_arr_cache); + free_aligned_memory_gpu(value_states_arr_cache); + free_aligned_memory_gpu(cos_buf); + free_aligned_memory_gpu(sin_buf); + free_aligned_memory_gpu(o_weight); + free_aligned_memory_gpu(qkv_states_unshape_arr); + + if(cache_num) { + free(cache_num); + cache_num = nullptr; + } +} diff --git a/llm/src/nn_modules/metal/Int4llamaDecoderLayerMetal.cc b/llm/src/nn_modules/metal/Int4llamaDecoderLayerMetal.cc new file mode 100644 index 00000000..cb5942f7 --- /dev/null +++ b/llm/src/nn_modules/metal/Int4llamaDecoderLayerMetal.cc @@ -0,0 +1,136 @@ +#include "Int4llamaDecoderLayer.h" +#include "utils.h" + +// Shared memory space across all layers +static float16_t *hidden_states_half_arr = nullptr; +static float16_t *final_layer_norm_arr = nullptr; +static float16_t *gate_proj_arr = nullptr; +static float16_t *up_proj_arr = nullptr; +static float16_t *down_proj_arr = nullptr; +static float16_t *hidden_states_arr = nullptr; + +void add_half(Matrix3D a, Matrix3D b, Matrix3D c){ + const struct metal_params params; + + params.A.half_data_ptr = a.m_data; + params.B.half_data_ptr = b.m_data; + params.C.half_data_ptr = c.m_data; + params.num_heads = num_heads; + params.sqlen = a.length(); + params.op = METAL_KERNEL_ADD_HALF; + add_node(¶ms); +} + +void SiLuMul_half(Matrix3D gate_proj, Matrix3D up_proj){ + const struct metal_params params; + + params.A.half_data_ptr = gate_proj.m_data; + params.B.half_data_ptr = up_proj.m_data; + params.sqlen = a.length(); + params.op = METAL_KERNEL_SILUMUL_HALF; + add_node(¶ms); +} + +Int4llamaDecoderLayer::Int4llamaDecoderLayer(std::string param_path, const struct model_config config, int layer_idx) { + if (layer_idx == 0) { + allocate_aligned_memory(hidden_states_half_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(final_layer_norm_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(gate_proj_arr, config.max_sqlen * config.hidden_dim * sizeof(float16_t)); + allocate_aligned_memory(up_proj_arr, config.max_sqlen * config.hidden_dim * sizeof(float16_t)); + allocate_aligned_memory(down_proj_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(hidden_states_arr, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + Int4llamaAttention::initialized_memory(config); + } + + allocate_aligned_memory(input_layernorm_weight_ptr, config.embed_dim * sizeof(float)); + Matrix3D input_layernorm_weight(input_layernorm_weight_ptr, 1, 1, config.embed_dim); + input_layernorm_weight.load((param_path + "/input_layernorm/weight.bin").c_str()); + // METAL: metal rms_norm + this->input_layernorm = LlamaRMSNorm_metal(input_layernorm_weight); + + allocate_aligned_memory(post_attention_layernorm_ptr, config.embed_dim * sizeof(float)); + Matrix3D post_attention_layernorm_weight(post_attention_layernorm_ptr, 1, 1, config.embed_dim); + post_attention_layernorm_weight.load((param_path + "/post_attention_layernorm/weight.bin").c_str()); + // METAL: metal rms_norm + this->post_attention_layernorm = LlamaRMSNorm_metal(post_attention_layernorm_weight); + + this->rms_norm_eps = config.rms_norm_eps; + + this->embed_dim = config.embed_dim; + this->num_attention_heads = config.num_heads; + this->hidden_dim = config.hidden_dim; + this->layer_idx = layer_idx; + + this->attn = Int4llamaAttention(param_path + "/self_attn", config, layer_idx); + + allocate_aligned_memory(gate_proj_weight, (config.embed_dim * config.hidden_dim * sizeof(int)) / 8); + allocate_aligned_memory(down_proj_weight, (config.hidden_dim * config.embed_dim * sizeof(int)) / 8 + 1); + allocate_aligned_memory(up_proj_weight, (config.embed_dim * config.hidden_dim * sizeof(int)) / 8); + this->gate_proj = Linear_half_int4(Matrix3D(gate_proj_weight, 1, config.hidden_dim, config.embed_dim / 8), + (param_path + "/gate_proj")); + this->down_proj = Linear_half_int4(Matrix3D(down_proj_weight, 1, config.embed_dim, config.hidden_dim / 8), + (param_path + "/down_proj")); + this->up_proj = Linear_half_int4(Matrix3D(up_proj_weight, 1, config.hidden_dim, config.embed_dim / 8), + (param_path + "/up_proj")); +} + +struct Int4llamaDecoderLayer_output Int4llamaDecoderLayer::forward(std::string param_path, const struct Int4llamaDecoderLayer_input &input, int layer_idx) { + PROFILE_START(profile_name); + + Matrix3D hidden_states(hidden_states_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, + input.hidden_states.m_dim_z); + this->input_layernorm.forward(input.hidden_states, hidden_states, rms_norm_eps); + + struct Int4llamaAttention_input attn_param(hidden_states, input.attention_mask, input.past_key, input.past_value, + input.has_past_key_value, this->layer_idx); + struct Int4llamaAttention_output attn_output = this->attn.forward(param_path + "/self_attn", attn_param); + + Matrix3D residual_add(hidden_states_half_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, + input.hidden_states.m_dim_z); + int threadsPerBlock = 1024; + int blocksPerGrid =(input.hidden_states.length() + threadsPerBlock - 1) / threadsPerBlock; + // METAL: add interface + add_half(input.hidden_states, attn_output.attn_output, residual_add); + + Matrix3D post_attention_layernorm(final_layer_norm_arr, input.hidden_states.m_dim_x, + input.hidden_states.m_dim_y, input.hidden_states.m_dim_z); + this->post_attention_layernorm.forward(residual_add, post_attention_layernorm, rms_norm_eps); + + Matrix3D gate_proj(gate_proj_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, + this->hidden_dim); + this->gate_proj.forward(post_attention_layernorm, gate_proj); + + Matrix3D up_proj(up_proj_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, this->hidden_dim); + this->up_proj.forward(post_attention_layernorm, up_proj); + + int blocksPerGrid2 =(gate_proj.length() + threadsPerBlock - 1) / threadsPerBlock; + // METAL: add interface + SiLuMul_half(gate_proj, up_proj); + + Matrix3D down_proj(down_proj_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, this->embed_dim); + this->down_proj.forward(gate_proj, down_proj); + + int blocksPerGrid3 =(residual_add.length() + threadsPerBlock - 1) / threadsPerBlock; + // METAL: add interface + add_half(residual_add, down_proj, residual_add); + + struct Int4llamaDecoderLayer_output output(residual_add, attn_output.attn_probs_reshaped, + attn_output.past_key_value); + PROFILE_END(profile_name); + + return output; +} + +void Int4llamaDecoderLayer::free_cuda_memory() { + free_aligned_memory_gpu(hidden_states_half_arr); + free_aligned_memory_gpu(final_layer_norm_arr); + free_aligned_memory_gpu(gate_proj_arr); + free_aligned_memory_gpu(up_proj_arr); + free_aligned_memory_gpu(down_proj_arr); + free_aligned_memory_gpu(hidden_states_arr); + free_aligned_memory_gpu(input_layernorm_weight_ptr); + free_aligned_memory_gpu(post_attention_layernorm_ptr); + free_aligned_memory_gpu(gate_proj_weight); + free_aligned_memory_gpu(down_proj_weight); + free_aligned_memory_gpu(up_proj_weight); +} diff --git a/llm/src/nn_modules/metal/Int4llamaDecoderMetal.cc b/llm/src/nn_modules/metal/Int4llamaDecoderMetal.cc new file mode 100644 index 00000000..8073fef2 --- /dev/null +++ b/llm/src/nn_modules/metal/Int4llamaDecoderMetal.cc @@ -0,0 +1,118 @@ +#include +#include +#include + +#include "Int4llamaDecoder.h" +#include "utils.h" + +void prepare_decoder_attention_mask_half(Matrix3D causal_attention_mask, int length, int past_length){ + const struct metal_params params; + + params.A.half_data_ptr = causal_attention_mask.m_data; + params.sqlen = length; + params.past_sqlen = past_length; + params.op = METAL_KERNEL_PREPARE_DECODER_ATTENTION_MASK_HALF; + add_node(¶ms); +} + + +Int4llamaDecoder::Int4llamaDecoder(std::string param_path, const struct model_config config) { + allocate_aligned_memory(attention_mask_buf, config.max_sqlen * config.max_sqlen * sizeof(float16_t)); + allocate_aligned_memory(last_hidden_states_buf, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + allocate_aligned_memory(hidden_states_buf, config.max_sqlen * config.embed_dim * sizeof(float)); + allocate_aligned_memory(hidden_states_half_buf, config.max_sqlen * config.embed_dim * sizeof(float16_t)); + + this->voc_size = config.vocsize; + this->embed_dim = config.embed_dim; + this->hidden_dim = config.hidden_dim; + this->num_heads = config.num_heads; + this->padding_idx = config.padding_idx; + this->rms_norm_eps = config.rms_norm_eps; + + // Embedding + Matrix3D embweight(new float[voc_size * embed_dim], 1, voc_size, embed_dim); + // METAL: Metal Embedding + this->embed_tokens = Embedding(embed_dim, voc_size, padding_idx, embweight); // METAL: how to deal with aliasing + load_Embedding_params(this->embed_tokens, param_path + "/embed_tokens"); + + allocate_aligned_memory(norm_weight_ptr, embed_dim * sizeof(float)); + Matrix3D norm_weight(norm_weight_ptr, 1, 1, embed_dim); + norm_weight.load((param_path + "/norm/weight.bin").c_str()); + // METAL: Metal RMSNorm + this->norm = LlamaRMSNorm_metal(norm_weight); + + // Load all the decoder layers + for (int layer_idx = 0; layer_idx < config.num_layers; layer_idx++) { + DEBUG_INS(std::cout << "Start loading layer:" << layer_idx << "..." << std::endl;) + + std::string path = param_path + "/layer" + std::to_string(layer_idx); + Int4llamaDecoderLayer layer = Int4llamaDecoderLayer(path, config, layer_idx); + + this->layers.push_back(layer); + } +}; + +// Int4llamaDecoder +struct Int4llamaDecoder_output Int4llamaDecoder::forward(std::string param_path, const struct Int4llamaDecoder_input &input) { + PROFILE_START(profile_name); + + int sqlen = input.input_ids.m_dim_z, past_key_values_length = 0; + + Matrix3D hidden_states_float(hidden_states_buf, 1, sqlen, this->embed_dim); + this->embed_tokens.forward(input.input_ids, hidden_states_float); + + // Convert from float to float16_t + Matrix3D hidden_states(hidden_states_half_buf, 1, sqlen, this->embed_dim); + int threadsPerBlock_1D = 1024; + int blocksPerGrid =(sqlen * this->embed_dim + threadsPerBlock_1D - 1) / threadsPerBlock_1D; + // METAL: more kernels + float2half<<>>(hidden_states_buf, hidden_states_half_buf, sqlen * this->embed_dim); + + if (input.has_past_keys_values) { + past_key_values_length = input.past_keys[0].m_dim_y; + } + + int length = sqlen + past_key_values_length; + int past_length = past_key_values_length; + Matrix3D causal_attention_mask(attention_mask_buf, 1, length - past_length, length); + // METAL: more kernels + prepare_decoder_attention_mask_half(causal_attention_mask, length, past_length); + + std::vector> past_keys, past_values; + for (int i = 0; i < this->layers.size(); i++) { + std::string path = param_path + "/layer" + std::to_string(i); + + if (!input.has_past_keys_values) { + struct Int4llamaDecoderLayer_input l_i = {hidden_states, causal_attention_mask}; + struct Int4llamaDecoderLayer_output l_o = this->layers[i].forward(path, l_i, i); + + hidden_states = l_o.hidden_states; + past_keys.push_back(l_o.past_key_value.first); + past_values.push_back(l_o.past_key_value.second); + } else { + struct Int4llamaDecoderLayer_input l_i = {hidden_states, causal_attention_mask, input.past_keys[i], + input.past_values[i]}; + struct Int4llamaDecoderLayer_output l_o = this->layers[i].forward(path, l_i, i); + + hidden_states = l_o.hidden_states; + past_keys.push_back(l_o.past_key_value.first); + past_values.push_back(l_o.past_key_value.second); + } + } + + Matrix3D last_hidden_states(last_hidden_states_buf, 1, sqlen, this->embed_dim); + this->norm.forward(hidden_states, last_hidden_states, rms_norm_eps); + + struct Int4llamaDecoder_output output = {last_hidden_states, past_keys, past_values}; + PROFILE_END(profile_name); + + return output; +} + +void Int4llamaDecoder::free_cuda_memory() { + free_aligned_memory_gpu(attention_mask_buf); + free_aligned_memory_gpu(last_hidden_states_buf); + free_aligned_memory_gpu(hidden_states_buf); + free_aligned_memory_gpu(hidden_states_half_buf); + free_aligned_memory_gpu(norm_weight_ptr); +} diff --git a/llm/src/nn_modules/metal/Int4llamaForCausalLMMetal.cc b/llm/src/nn_modules/metal/Int4llamaForCausalLMMetal.cc new file mode 100644 index 00000000..1775cf00 --- /dev/null +++ b/llm/src/nn_modules/metal/Int4llamaForCausalLMMetal.cc @@ -0,0 +1,59 @@ +#include + +#include "Int4llamaForCausalLM.h" +#include "operators.h" +#include "utils.h" + +void half2float(const half* halfArray, float* floatArray, int N){ + const struct metal_params params; + + params.A.half_data_ptr = halfArray; + params.B.data_ptr = floatArray; + params.sqlen = N; + params.op = METAL_KERNEL_HALF2FLOAT; + add_node(¶ms); +} + + +Int4LlamaForCausalLM::Int4LlamaForCausalLM(std::string param_path, const struct model_config config) { + allocate_aligned_memory(logits_output_half, config.max_sqlen * config.vocsize * sizeof(float16_t)); + allocate_aligned_memory(logits_output, config.max_sqlen * config.vocsize * sizeof(float)); + allocate_aligned_memory(lm_head_weight, (config.embed_dim * config.vocsize * sizeof(int)) / 8); + + this->decoder = Int4llamaDecoder(param_path + "/decoder", config); + this->lm_head = Linear_half_int4(Matrix3D(lm_head_weight, 1, config.vocsize, config.embed_dim / 8), + param_path + "/lm_head"); +} + +struct Int4LlamaForCausalLM_output Int4LlamaForCausalLM::forward(std::string param_path, const struct Int4LlamaForCausalLM_input &input) { + PROFILE_START(profile_name); + int sqlen = input.input_ids.m_dim_z; + + struct Int4llamaDecoder_output decoder_output; + + if (input.has_past_keys_values) { + struct Int4llamaDecoder_input decoder_input = {input.input_ids, input.past_keys, input.past_values}; + decoder_output = this->decoder.forward(param_path + "/decoder", decoder_input); + + } else { + struct Int4llamaDecoder_input decoder_input = {input.input_ids}; + decoder_output = this->decoder.forward(param_path + "/decoder", decoder_input); + } + + Matrix3D logits_half(logits_output_half, 1, sqlen, this->decoder.voc_size); + this->lm_head.forward(decoder_output.last_hidden_state, logits_half); + + Matrix3D logits(logits_output, 1, sqlen, this->decoder.voc_size); + int threadsPerBlock_1D = 1024; + int blocksPerGrid =(sqlen * this->decoder.voc_size + threadsPerBlock_1D - 1) / threadsPerBlock_1D; + // METAL: more kernels needed + half2float<<>>(logits_output_half, logits_output, sqlen * this->decoder.voc_size); + + // waituntilcompleted + metal_completed(); + struct Int4LlamaForCausalLM_output LMoutput = {logits, decoder_output.past_keys, decoder_output.past_values}; + PROFILE_END(profile_name); + + return LMoutput; +} + diff --git a/llm/src/nn_modules/metal/LLaMAGenerateMetal.cc b/llm/src/nn_modules/metal/LLaMAGenerateMetal.cc new file mode 100644 index 00000000..a48a462c --- /dev/null +++ b/llm/src/nn_modules/metal/LLaMAGenerateMetal.cc @@ -0,0 +1,262 @@ +#include "Generate.h" +#include "LLaMATokenizer.h" +#include "common.h" +#include "utils.h" +#include +#include +#include +#include +#include + +std::mutex mtx; // Create a mutex for synchronization + + +// Function to speak in the background +void sayInBackground(const std::string& text) { + std::lock_guard lock(mtx); + std::string command = "./application/sts_utils/speak \"" + text + "\""; + int result = std::system(command.c_str()); + (void)result; +} + +std::string LLaMAGenerate(std::string param_path, void *model_ptr, int model_type, std::string text, const struct opt_params generation_config, + std::string voc_path, bool interactive, bool voicechat) { + std::vector last_n_tokens(generation_config.n_ctx); + std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0); + std::vector embd; + std::vector generate_ids; + + const int max_token = 2048; + std::vector input_ids(max_token); + llama_vocab vocab = llama_init_vocab(voc_path.c_str()); + const int n = llama_tokenize(vocab, text.c_str(), input_ids.data(), input_ids.size(), true); + input_ids.resize(n); + + bool is_codellama = false; + if (param_path.find("CodeLLaMA") != std::string::npos) { + is_codellama = true; + } + + int n_consumed = 0; + while ((int)input_ids.size() > n_consumed) { + embd.push_back(input_ids[n_consumed]); + last_n_tokens.erase(last_n_tokens.begin()); + last_n_tokens.push_back(input_ids[n_consumed]); + ++n_consumed; + + if ((int)embd.size() >= generation_config.n_batch) { + break; + } + } + // if (interactive) std::cout << "ASSISTANT: " << std::endl; + + bool previous_two_hash = false; + int break_cnt = 2; + bool new_prompt = true; + static bool has_past_kv = false; +#ifdef QM_METAL + static std::vector> past_keys, past_values; +#else + static std::vector> past_keys, past_values; +#endif + static std::vector> past_keys_fp32, past_values_fp32; + int n_remain = generation_config.n_predict; + std::string output; + while (n_remain != 0 && break_cnt) { + std::vector logits(generation_config.n_vocab); + + int sqlen = 1; + if (new_prompt) { + sqlen = input_ids.size(); + } + if (model_type == LLaMA_INT4) { + Int4LlamaForCausalLM *model = static_cast(model_ptr); + struct Int4LlamaForCausalLM_output model_output; + struct Int4LlamaForCausalLM_input model_input; + if (has_past_kv) { + Matrix3D input_ids_mat(input_ids.data(), 1, 1, sqlen); + model_input = {input_ids_mat, past_keys, past_values}; + } else { + Matrix3D input_ids_mat(input_ids.data(), 1, 1, sqlen); + model_input = {input_ids_mat}; + } + if (!new_prompt) STATS_START("Inference latency"); + model_output = model->forward(param_path, model_input); + if (!new_prompt) STATS_END("Inference latency"); + past_keys = model_output.past_keys; + past_values = model_output.past_values; + // memcpy model_ouput.logits[-1] to logits + memcpy(logits.data(), &model_output.logits.m_data[(sqlen - 1) * generation_config.n_vocab], + generation_config.n_vocab * sizeof(float)); + } else if (model_type == LLaMA_FP32) { + Fp32LlamaForCausalLM *model = static_cast(model_ptr); + struct Fp32LlamaForCausalLM_output model_output; + struct Fp32LlamaForCausalLM_input model_input; + if (has_past_kv) { + Matrix3D input_ids_mat(input_ids.data(), 1, 1, sqlen); + model_input = {input_ids_mat, past_keys_fp32, past_values_fp32}; + } else { + Matrix3D input_ids_mat(input_ids.data(), 1, 1, sqlen); + model_input = {input_ids_mat}; + } + if (!new_prompt) STATS_START("Inference latency"); + model_output = model->forward(model_input); + if (!new_prompt) STATS_END("Inference latency"); + past_keys_fp32 = model_output.past_keys; + past_values_fp32 = model_output.past_values; + // memcpy model_ouput.logits[-1] to logits + memcpy(logits.data(), &model_output.logits.m_data[(sqlen - 1) * generation_config.n_vocab], + generation_config.n_vocab * sizeof(float)); + } + has_past_kv = true; + + // Generate + const int n_ctx = generation_config.n_ctx; + const float temp = generation_config.temp; + const int32_t top_k = generation_config.top_k <= 0 ? generation_config.n_vocab : generation_config.top_k; + const float top_p = generation_config.top_p; + const float tfs_z = generation_config.tfs_z; + const float typical_p = generation_config.typical_p; + const int32_t repeat_last_n = generation_config.repeat_last_n < 0 ? n_ctx : generation_config.repeat_last_n; + const float repeat_penalty = generation_config.repeat_penalty; + const float alpha_presence = generation_config.presence_penalty; + const float alpha_frequency = generation_config.frequency_penalty; + const int mirostat = generation_config.mirostat; + const float mirostat_tau = generation_config.mirostat_tau; + const float mirostat_eta = generation_config.mirostat_eta; + const int n_vocab = generation_config.n_vocab; + + std::vector candidates; + candidates.reserve(n_vocab); + for (int token_id = 0; token_id < n_vocab; token_id++) { + candidates.emplace_back(OPT_token_data{token_id, logits[token_id], 0.0f}); + } + + OPT_token_data_array candidates_p = {candidates.data(), candidates.size(), false}; + + // Apply penalties + auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx); + sample_repetition_penalty(&candidates_p, last_n_tokens.data() + last_n_tokens.size() - last_n_repeat, + last_n_repeat, repeat_penalty); + sample_frequency_and_presence_penalties(&candidates_p, + last_n_tokens.data() + last_n_tokens.size() - last_n_repeat, + last_n_repeat, alpha_frequency, alpha_presence); + + int id = 0; + if (temp <= 0) { + id = sample_token_greedy(&candidates_p); + } else { + if (mirostat == 1) { + static float mirostat_mu = 2.0f * mirostat_tau; + const int mirostat_m = 100; + sample_temperature(&candidates_p, temp); + id = + sample_token_mirostat(n_vocab, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu); + } else if (mirostat == 2) { + static float mirostat_mu = 2.0f * mirostat_tau; + sample_temperature(&candidates_p, temp); + id = sample_token_mirostat_v2(&candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu); + } else { + // Temperature sampling + sample_top_k(&candidates_p, top_k, 1); + sample_tail_free(&candidates_p, tfs_z, 1); + sample_typical(&candidates_p, typical_p, 1); + sample_top_p(&candidates_p, top_p, 1); + sample_temperature(&candidates_p, temp); + id = sample_token(&candidates_p); + } + } + + if (id == 2) { + break_cnt--; + continue; + } // eos + else if (id == 1) + continue; + break_cnt = 2; + bool skip = false; + if (id == 2277 && !previous_two_hash) { + previous_two_hash = true; + skip = true; + } else if (previous_two_hash && id == 29937) { // token = # + break_cnt = 0; + skip = true; + } else { + if (previous_two_hash) std::cout << "##" << std::endl; + previous_two_hash = false; + } + + last_n_tokens.erase(last_n_tokens.begin()); + last_n_tokens.push_back(id); + embd.push_back(id); + generate_ids.push_back(id); + input_ids = std::vector{id}; + + + if (interactive && !skip) { + output += llama_id_to_token(vocab, id); + std::cout << llama_id_to_token(vocab, id) << std::flush; + if (voicechat) { + // Remove quotes + output.erase(std::remove(output.begin(), output.end(), '\"'), output.end()); + // Remove hashtags + output.erase(std::remove(output.begin(), output.end(), '#'), output.end()); + // Remove dashes + std::replace(output.begin(), output.end(), '-', ' '); + // Remove numbered lists + output = std::regex_replace(output, std::regex("\\d+\\."), ""); + + size_t lastPos; + // starts ealier but slows down dictation + bool ended = false; + if (output.find(", ") != std::string::npos){ + lastPos = output.rfind(','); + ended = true; + } + if (output.find("\n") != std::string::npos){ + lastPos = output.rfind('\n'); + ended = true; + } + else if (output.find(". ") != std::string::npos){ + lastPos = output.rfind('.'); + ended = true; + } + else if (output.find("! ") != std::string::npos){ + lastPos = output.rfind('!'); + ended = true; + } + else if (output.find("? ") != std::string::npos){ + lastPos = output.rfind('?'); + ended = true; + + } + else if (output.find(": ") != std::string::npos){ + lastPos = output.rfind(':'); + ended = true; + } + if (ended){ + // Extract sentence 1 (up to and including the last period) + std::string output_copy = output.substr(0, lastPos + 1); + // Extract beginning of sentence 2 (excluding the space after the last period) + output = output.substr(lastPos + 1); // Skip the last period and space + std::thread sayThread(sayInBackground, output_copy); + sayThread.detach(); + } + } + } + + new_prompt = false; + --n_remain; + } + + if (voicechat && interactive){ + sayInBackground(output); + } + + if (interactive) std::cout << std::endl; + + Profiler::getInstance().report_internal(); + Profiler::getInstance().reset(); + + return output; +} diff --git a/llm/src/ops/metal/BMM_F16T.cc b/llm/src/ops/metal/BMM_F16T.cc new file mode 100644 index 00000000..5f54a0d1 --- /dev/null +++ b/llm/src/ops/metal/BMM_F16T.cc @@ -0,0 +1,60 @@ +#include "operators.h" +#include "utils.h" +#include "metal_compute.h" + +void load_BMM_F16T(BMM_F16T &op, std::string prefix) { read_to_array_half((prefix + "/alpha_half.bin").c_str(), &op.alpha, 1); } + +BMM_F16T::BMM_F16T(half _alpha) { this->alpha = _alpha; } + +void BMM_F16T::forward(const Matrix3D &a, const Matrix3D &weight, Matrix3D &c) { + const Matrix3D b = weight; + PROFILE_START(profile_name); + + // a: m x k b: n x k c: m x n + assert(a.m_dim_x == b.m_dim_x); // batch dim + assert(a.m_dim_z == b.m_dim_z); // k + assert(a.m_dim_y == c.m_dim_y); // m + assert(b.m_dim_y == c.m_dim_z); // n + + const struct metal_params params; + params.A.row = a.m_dim_y; + params.A.column = a.m_dim_z; + params.A.half_data_ptr = a.m_data; + params.B.row = b.m_dim_y; + params.B.column = b.m_dim_z; + params.B.half_data_ptr = b.m_data; + params.C.row = c.m_dim_y; + params.C.column = c.m_dim_z; + params.C.half_data_ptr = c.m_data; + params.half_alpha = alpha; + params.bs = a.m_dim_x; + + params.op = METAL_KERNEL_MUL_MM_INT4_F32; + add_node(¶ms); + PROFILE_END(profile_name); +} + +void BMM_F16T::forward_weight_untransposed(const Matrix3D &a, const Matrix3D &weight, Matrix3D &c) { + const Matrix3D b = weight; + PROFILE_START(profile_name); + + // a: m x k b: n x k c: m x n + assert(a.m_dim_x == b.m_dim_x); // batch dim + assert(a.m_dim_z == b.m_dim_y); // k + assert(a.m_dim_y == c.m_dim_y); // m + assert(b.m_dim_z == c.m_dim_z); // n + + struct matmul_params params; + params.A.row = a.m_dim_y; + params.A.column = a.m_dim_z; + params.A.half_data_ptr = a.m_data; + params.B.row = b.m_dim_y; + params.B.column = b.m_dim_z; + params.B.half_data_ptr = b.m_data; + params.C.row = c.m_dim_y; + params.C.column = c.m_dim_z; + params.C.half_data_ptr = c.m_data; + params.op = METAL_KERNEL_MUL_MM_INT4_F32; + add_node(¶ms); + PROFILE_END(profile_name); +} \ No newline at end of file diff --git a/llm/src/ops/metal/LlamaRMSNorm.cc b/llm/src/ops/metal/LlamaRMSNorm.cc index 230c5023..358e8c5a 100644 --- a/llm/src/ops/metal/LlamaRMSNorm.cc +++ b/llm/src/ops/metal/LlamaRMSNorm.cc @@ -17,10 +17,8 @@ void LlamaRMSNorm_metal::forward(const Matrix3D &x, Matrix3D &output params.C.column = output.m_dim_z; params.C.half_data_ptr = output.m_data; - struct metal_constants op_constants = new (struct metal_constants); - op_constants.eps = eps; - struct metal_cgraph *graph = new (struct metal_cgraph); - graph->n_nodes = 1; - graph->mm_nodes[0] = params; - metal_graph_compute(METAL_KERNEL_RMS_NORM, graph); + params.op = METAL_KERNEL_RMS_NORM; + params.eps = eps; + add_node(¶ms); + return; } \ No newline at end of file diff --git a/llm/src/ops/metal/RotaryPosEmb.cc b/llm/src/ops/metal/RotaryPosEmb.cc index f039b055..d9b48f4e 100644 --- a/llm/src/ops/metal/RotaryPosEmb.cc +++ b/llm/src/ops/metal/RotaryPosEmb.cc @@ -1,8 +1,9 @@ #include #include "operators.h" +#include "metal_compute.h" // TODO: match constants on metal -void RotaryPosEmb_cuda_forward(Matrix3D query, Matrix3D key, Matrix3D cos, Matrix3D sin, int start_idx, int len) { +void RotaryPosEmb_metal_forward(Matrix3D query, Matrix3D key, Matrix3D cos, Matrix3D sin, int start_idx, int len) { struct matmul_params params; params.A.row = query.m_dim_y; params.A.column = query.m_dim_z; @@ -18,7 +19,17 @@ void RotaryPosEmb_cuda_forward(Matrix3D query, Matrix3D key, Matrix3 params.int32_zero_point = this->zero_point.m_data; params.block_size = QK; - matmul::MatmulOperator op = matmul::MatmulOperator(); - op.rope_metal(¶ms, query.m_dim_x, query.m_dim_y, query.m_dim_z, n_past, n_dims, mode, n_orig_ctx, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow ); - + params.n_orig_ctx = 1; + params.n_past = 1; + params.n_dims = 3; + params.mode = 1; + params.freq_base = 1; + params.freq_scale = 1; + params.ext_factor = 1; + params.attn_factor = 1; + params.beta_fast = 1; + params.beta_slow = 1; + params.op = METAL_KERNEL_ROPE; + add_node(¶ms); + return; } diff --git a/llm/src/ops/metal/batch_add.cc b/llm/src/ops/metal/batch_add.cc index 6f35a5d9..8fc17233 100644 --- a/llm/src/ops/metal/batch_add.cc +++ b/llm/src/ops/metal/batch_add.cc @@ -1,7 +1,6 @@ #include "operators.h" #include "metal_compute.h" -// done void batch_Add(const Matrix3D &input, const Matrix3D &input2, Matrix3D &output) { const struct metal_params params; @@ -17,8 +16,6 @@ void batch_Add(const Matrix3D &input, const Matrix3D &input2, Matr params.A.data_ptr = input.m_data; params.B.data_ptr = input2.m_data; params.C.data_ptr = output.m_data; - struct metal_cgraph *graph = new (struct metal_cgraph); - graph->n_nodes = 1; - graph->mm_nodes[0] = params; - metal_graph_compute(METAL_KERNEL_BATCH_ADD, graph); + params.op = METAL_KERNEL_BATCH_ADD; + add_node(¶ms); } diff --git a/llm/src/ops/metal/embedding.cc b/llm/src/ops/metal/embedding.cc index d7bc12f6..8ddba94b 100644 --- a/llm/src/ops/metal/embedding.cc +++ b/llm/src/ops/metal/embedding.cc @@ -7,20 +7,20 @@ void load_Embedding_params_metal(Embedding_cuda& op, std::string prefix) { } // TODO: implement metal side -void Embedding_cuda::forward(Matrix3D input_id, Matrix3D output) { +void Embedding_metal::forward(Matrix3D input_id, Matrix3D output) { PROFILE_START(profile_name); assert(input_id.m_dim_x == 1); assert(input_id.m_dim_y == 1); assert(input_id.m_dim_z == output.m_dim_y); assert(output.m_dim_z == this->embed_dim); - struct metal_constants op_constants = new (struct metal_constants); - op_constants.embed_dim = this->embed_dim; - struct metal_cgraph *graph = new (struct metal_cgraph); - graph->n_nodes = 1; - graph->input_id = input_id; - graph->output = output; - graph->lookup = this->lookup.m_data; - metal_graph_compute(METAL_KERNEL_EMBEDDING, graph); + const struct metal_params params; + + params.A.int32_data_ptr = input.m_data; + params.B.data_ptr = this->lookup.m_data; + params.C.half_data_ptr = output.m_data; + params.op = METAL_KERNEL_EMBEDDING; + params.embed_dim = this->embed_dim; + add_node(¶ms); PROFILE_END(profile_name); } \ No newline at end of file diff --git a/llm/src/ops/metal/linear.cc b/llm/src/ops/metal/linear.cc index 61e93b55..1908799c 100644 --- a/llm/src/ops/metal/linear.cc +++ b/llm/src/ops/metal/linear.cc @@ -1,6 +1,7 @@ #include #include "operators.h" #include "utils.h" +#include "metal_compute.h" // TODO: incorporate gemv from llama.cpp @@ -18,7 +19,7 @@ void Linear_half_int4::forward(const Matrix3D &x, Matrix3D assert(output.m_dim_z > num_thread); assert(output.m_dim_z % (num_thread * 2) == 0); // unroll column by 2 - struct matmul_params params; + struct metal_params params; params.A.row = x.m_dim_y; params.A.column = x.m_dim_z; params.A.half_data_ptr = x.m_data; @@ -32,9 +33,8 @@ void Linear_half_int4::forward(const Matrix3D &x, Matrix3D params.half_scales = this->scale.m_data; params.int32_zero_point = this->zero_point.m_data; params.block_size = QK; - - matmul::MatmulOperator op = matmul::MatmulOperator(); - op.mat_mul_int4_f32_metal(¶ms); //BUG: gemv and matmul int4? (llama.cpp matmul needed) + params.op = METAL_KERNEL_MUL_MM_INT4_F32; + add_node(¶ms); PROFILE_END(profile_name); return; diff --git a/llm/src/ops/metal/softmax.cc b/llm/src/ops/metal/softmax.cc index 3e250536..6bde6067 100644 --- a/llm/src/ops/metal/softmax.cc +++ b/llm/src/ops/metal/softmax.cc @@ -13,11 +13,7 @@ void softmax(Matrix3D input, Matrix3D output) { params.C.row = output.m_dim_y; params.C.column = output.m_dim_z; params.C.data_ptr = output.m_data; - - struct metal_constants op_constants = new (struct metal_constants); - op_constants.scales = this->scales; - struct metal_cgraph *graph = new (struct metal_cgraph); - graph->n_nodes = 1; - graph->mm_nodes[0] = params; - metal_graph_compute(METAL_KERNEL_SOFT_MAX, graph); + params.scale = this->scales; + params.op = METAL_KERNEL_SOFT_MAX; + add_node(¶ms); } \ No newline at end of file From 3ac955ca2801bc7118852505a828b27fa5935222 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Fri, 26 Apr 2024 11:59:25 -0400 Subject: [PATCH 33/37] update rope --- kernels/metal/metal_compute.cc | 33 ++++++++++--------------------- llm/src/ops/metal/RotaryPosEmb.cc | 20 +++++++++---------- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/kernels/metal/metal_compute.cc b/kernels/metal/metal_compute.cc index 17a41d25..72a33f90 100644 --- a/kernels/metal/metal_compute.cc +++ b/kernels/metal/metal_compute.cc @@ -310,23 +310,23 @@ static enum status metal_graph_compute(struct metal_cgraph * mg) { struct matrix src0 = curr_node->A; struct matrix src1 = curr_node->B; struct matrix dst = curr_node->C; - // TODO: ne[0], nb[0] calculation & order - const int64_t ne00 = src0.row; - const int64_t ne01 = src0.column; - const int64_t ne02 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; + // TODO: double check the placement of parameters + const int64_t ne00 = src0.row; // k + const int64_t ne01 = src0.column; // n + const int64_t ne02 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; // bs const int64_t ne03 = 1; - const uint64_t nb00 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? sizeof(uint8_t) : sizeof(float); + const uint64_t nb00 = sizeof(unsigned char); const uint64_t nb01 = nb00*ne00/block_size; const uint64_t nb02 = nb01*ne01; const uint64_t nb03 = nb02*ne02; - const int64_t ne10 = src1.row; - const int64_t ne11 = src1.column; - const int64_t ne12 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; + const int64_t ne10 = src1.row; // k + const int64_t ne11 = src1.column; // m + const int64_t ne12 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; // bs const int64_t ne13 = 1; - const uint64_t nb10 = sizeof(float); + const uint64_t nb10 = sizeof(unsigned char); const uint64_t nb11 = nb10*ne10; const uint64_t nb12 = nb11*ne11; const uint64_t nb13 = nb12*ne12; @@ -336,7 +336,7 @@ static enum status metal_graph_compute(struct metal_cgraph * mg) { const int64_t ne2 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; const int64_t ne3 = 1; - const uint64_t nb0 = sizeof(float); + const uint64_t nb0 = sizeof(unsigned char); const uint64_t nb1 = nb0*ne0; const uint64_t nb2 = nb1*ne1; const uint64_t nb3 = nb2*ne2; @@ -405,15 +405,6 @@ static enum status metal_graph_compute(struct metal_cgraph * mg) { break; case (METAL_KERNEL_RMS_NORM): int nth = 32; // SIMD width - const int64_t ne00 = src0.row; - const int64_t ne01 = src0.column; - const int64_t ne02 = 1; - const int64_t ne03 = 1; - // TODO: nb00 should be half? - const uint64_t nb00 = sizeof(half); - const uint64_t nb01 = nb00*ne00; - const uint64_t nb02 = nb01*ne01; - const uint64_t nb03 = nb02*ne02; MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); MTL::Buffer *id_src1 = getBufferfromPtr(src1.half_data_ptr); MTL::Buffer *id_dst = getBufferfromPtr(dst.half_data_ptr); @@ -441,10 +432,6 @@ static enum status metal_graph_compute(struct metal_cgraph * mg) { } encoder->setComputePipelineState(ctx->kernels[op].pipeline); } - const int64_t ne00 = src0.row; - const int64_t ne01 = src0.column; - const int64_t ne02 = 1; - const int64_t ne03 = 1; const float scale = curr_node->scale; MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); diff --git a/llm/src/ops/metal/RotaryPosEmb.cc b/llm/src/ops/metal/RotaryPosEmb.cc index d9b48f4e..ce49f409 100644 --- a/llm/src/ops/metal/RotaryPosEmb.cc +++ b/llm/src/ops/metal/RotaryPosEmb.cc @@ -19,16 +19,16 @@ void RotaryPosEmb_metal_forward(Matrix3D query, Matrix3D key, Matrix params.int32_zero_point = this->zero_point.m_data; params.block_size = QK; - params.n_orig_ctx = 1; - params.n_past = 1; - params.n_dims = 3; - params.mode = 1; - params.freq_base = 1; - params.freq_scale = 1; - params.ext_factor = 1; - params.attn_factor = 1; - params.beta_fast = 1; - params.beta_slow = 1; + params.n_orig_ctx = 4096; + params.n_past = 0; + params.n_dims = 128; + params.mode = 0; + params.freq_base = 10000.0; + params.freq_scale = 1.0; + params.ext_factor = 0.0; + params.attn_factor = 1.0; + params.beta_fast = 32.0; + params.beta_slow = 1.0; params.op = METAL_KERNEL_ROPE; add_node(¶ms); return; From 46d96276ecc3741771ccc53ec7cdab70fa079099 Mon Sep 17 00:00:00 2001 From: RaymondWang0 Date: Wed, 1 May 2024 12:44:57 -0400 Subject: [PATCH 34/37] update makefile --- kernels/metal/metal_compute.cc | 1 + llm/Makefile | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/kernels/metal/metal_compute.cc b/kernels/metal/metal_compute.cc index 72a33f90..19e8396d 100644 --- a/kernels/metal/metal_compute.cc +++ b/kernels/metal/metal_compute.cc @@ -5,6 +5,7 @@ #include #include +#include #include #include "metal_compute.h" diff --git a/llm/Makefile b/llm/Makefile index 6a2b87b0..6fd1609b 100644 --- a/llm/Makefile +++ b/llm/Makefile @@ -64,8 +64,11 @@ $(info Detected CUDA_PATH: $(CUDA_HOME)) LIB_SRC_CUDA_CC = $(wildcard $(LIB_DIR)/cuda/*.cc) LIB_SRC_CUDA_CU = $(wildcard $(LIB_DIR)/cuda/*.cu) $(wildcard src/*.cu) $(wildcard src/nn_modules/cuda/*.cu) $(wildcard src/ops/cuda/*.cu) INCLUDE_DIRS += -I./include/ops/cuda +else ifdef USE_METAL +$(info Metal is available!) + LIB_SRC += $(wildcard src/nn_modules/metal/*.cc) else -$(info CUDA is unavailable!) +$(info CUDA and Metal are unavailable!) LIB_SRC += $(wildcard src/nn_modules/non_cuda/*.cc) endif @@ -105,7 +108,7 @@ else ifeq ($(shell uname -p),arm) # For ARM A-series (such as Mac M1) with Metal GPU ifdef USE_METAL LIB_ACC_INC = -I$(LIB_DIR)/metal/include -I$(LIB_DIR)/metal/metal-cpp -I$(LIB_DIR)/metal - LIB_SRC += $(wildcard $(LIB_DIR)/metal/*.cc) + LIB_SRC += $(wildcard $(LIB_DIR)/metal/*.cc) $(wildcard src/nn_modules/metal/*.cc) $(wildcard src/ops/cuda/*.cu) INCLUDE_DIRS += -I/opt/homebrew/opt/boost/include $(LIB_ACC_INC) LIB += -framework Metal -framework Foundation -framework MetalKit TARGET += default.metallib library.air @@ -190,6 +193,9 @@ $(TEST_TARGET_GENERAL): %: tests/%.cc $(OBJS) ifdef CUDA_AVAILABLE $(TEST_TARGET_IF_CUDA): %: tests/cuda/%.cu $(OBJS) $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -o $@ $^ $(LIB) $(LDFLAGS) +else ifdef USE_METAL +$(TEST_TARGET_IF_CUDA): %: tests/metal/%.cc $(OBJS) + $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -o $@ $^ $(LIB) $(LDFLAGS) else $(TEST_TARGET_IF_CUDA): %: tests/non_cuda/%.cc $(OBJS) $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -o $@ $^ $(LIB) $(LDFLAGS) @@ -210,6 +216,12 @@ profile_Int4llamaForCausalLM: tests/cuda/test_Int4llamaForCausalLM.cu $(PROFILE_ profile_ops: tests/cuda/test_ops.cu $(PROFILE_OBJS) $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -DPROFILER -o $@ $^ $(LIB) $(LDFLAGS) +else ifdef USE_METAL +profile_Int4llamaForCausalLM: tests/metal/test_Int4llamaForCausalLM.cc $(PROFILE_OBJS) + $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -DPROFILER -o $@ $^ $(LIB) $(LDFLAGS) + +profile_ops: tests/metal/test_ops.cc $(PROFILE_OBJS) + $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -DPROFILER -o $@ $^ $(LIB) $(LDFLAGS) else profile_Int4llamaForCausalLM: tests/non_cuda/test_Int4llamaForCausalLM.cc $(PROFILE_OBJS) $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -DPROFILER -o $@ $^ $(LIB) $(LDFLAGS) From 0d4cd667127d78f53c7cf6e41291b852acb0de72 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Sun, 12 May 2024 17:20:25 -0400 Subject: [PATCH 35/37] reorganize metal --- kernels/matmul.h | 8 ++ kernels/metal/{ => include}/metal_compute.h | 7 +- kernels/metal/{ => src}/matmul_f32_f32.cc | 2 +- kernels/metal/{ => src}/matmul_int4_f32.cc | 2 +- kernels/metal/{ => src}/matvec_f32_f32.cc | 2 +- kernels/metal/{ => src}/matvec_int4_f32.cc | 2 +- kernels/metal/{ => src}/metal_compute.cc | 22 +++-- kernels/metal/{ => src}/op.metal | 13 ++- llm/Makefile | 5 +- llm/include/nn_modules/Int4llamaAttention.h | 22 +++++ .../nn_modules/Int4llamaAttentionMetal.h | 89 ------------------- llm/include/nn_modules/Int4llamaDecoder.h | 17 ++++ .../nn_modules/Int4llamaDecoderLayer.h | 22 ++++- .../nn_modules/Int4llamaDecoderLayerMetal.h | 87 ------------------ .../nn_modules/Int4llamaDecoderMetal.h | 69 -------------- llm/include/nn_modules/Int4llamaForCausalLM.h | 13 +++ .../nn_modules/Int4llamaForCausalLMMetal.h | 61 ------------- llm/include/operators.h | 13 ++- llm/include/ops/linear.h | 39 ++++++++ llm/include/ops/metal/BMM_F16T.h | 15 ++++ llm/include/ops/metal/Embedding.h | 20 +++++ llm/include/ops/metal/LlamaRMSNorm.h | 14 +++ llm/include/ops/metal/RotaryPosEmb.h | 26 ++++++ llm/include/ops/metal/batch_add.h | 4 + llm/include/ops/metal/reduction.h | 1 + llm/include/ops/metal/softmax.h | 4 + llm/include/utils.h | 5 ++ ...ttentionMetal.cc => Int4llamaAttention.cc} | 88 +++++++----------- ...amaDecoderMetal.cc => Int4llamaDecoder.cc} | 24 ++--- ...LayerMetal.cc => Int4llamaDecoderLayer.cc} | 27 ++---- ...usalLMMetal.cc => Int4llamaForCausalLM.cc} | 13 ++- ...LLaMAGenerateMetal.cc => LLaMAGenerate.cc} | 0 llm/src/ops/metal/BMM_F16T.cc | 2 +- llm/src/ops/metal/LlamaRMSNorm.cc | 2 +- llm/src/ops/metal/RotaryPosEmb.cc | 4 +- llm/src/ops/metal/batch_add.cc | 10 +-- llm/src/ops/metal/embedding.cc | 4 +- llm/src/ops/metal/softmax.cc | 5 +- llm/src/utils.cc | 37 +++++++- 39 files changed, 355 insertions(+), 445 deletions(-) rename kernels/metal/{ => include}/metal_compute.h (94%) rename kernels/metal/{ => src}/matmul_f32_f32.cc (89%) rename kernels/metal/{ => src}/matmul_int4_f32.cc (89%) rename kernels/metal/{ => src}/matvec_f32_f32.cc (89%) rename kernels/metal/{ => src}/matvec_int4_f32.cc (89%) rename kernels/metal/{ => src}/metal_compute.cc (96%) rename kernels/metal/{ => src}/op.metal (98%) delete mode 100644 llm/include/nn_modules/Int4llamaAttentionMetal.h delete mode 100644 llm/include/nn_modules/Int4llamaDecoderLayerMetal.h delete mode 100644 llm/include/nn_modules/Int4llamaDecoderMetal.h delete mode 100644 llm/include/nn_modules/Int4llamaForCausalLMMetal.h create mode 100644 llm/include/ops/metal/BMM_F16T.h create mode 100644 llm/include/ops/metal/Embedding.h create mode 100644 llm/include/ops/metal/LlamaRMSNorm.h create mode 100644 llm/include/ops/metal/RotaryPosEmb.h create mode 100644 llm/include/ops/metal/batch_add.h create mode 100644 llm/include/ops/metal/reduction.h create mode 100644 llm/include/ops/metal/softmax.h rename llm/src/nn_modules/metal/{Int4llamaAttentionMetal.cc => Int4llamaAttention.cc} (78%) rename llm/src/nn_modules/metal/{Int4llamaDecoderMetal.cc => Int4llamaDecoder.cc} (91%) rename llm/src/nn_modules/metal/{Int4llamaDecoderLayerMetal.cc => Int4llamaDecoderLayer.cc} (88%) rename llm/src/nn_modules/metal/{Int4llamaForCausalLMMetal.cc => Int4llamaForCausalLM.cc} (81%) rename llm/src/nn_modules/metal/{LLaMAGenerateMetal.cc => LLaMAGenerate.cc} (100%) diff --git a/kernels/matmul.h b/kernels/matmul.h index 148d1faf..fb655e90 100644 --- a/kernels/matmul.h +++ b/kernels/matmul.h @@ -16,6 +16,9 @@ typedef half_float::half naive_float16_t; #include #include typedef half float16_t; +#elif defined(QM_METAL) +typedef half_float::half float16_t; +typedef float16_t half; #elif defined(__ARM_NEON) typedef __fp16 float16_t; #elif defined(__x86_64__) @@ -99,6 +102,11 @@ struct thread_args { int start_i, end_i, blk_size; }; +// #ifdef QM_METAL +// #include "metal/include/metal_compute.h" +// // typedef half_float::half half; +// #endif + #define MAX(A, B) ((A) > (B) ? (A) : (B)) #define MIN(A, B) ((A) < (B) ? (A) : (B)) diff --git a/kernels/metal/metal_compute.h b/kernels/metal/include/metal_compute.h similarity index 94% rename from kernels/metal/metal_compute.h rename to kernels/metal/include/metal_compute.h index 16ad5e1b..0f0e86c3 100644 --- a/kernels/metal/metal_compute.h +++ b/kernels/metal/include/metal_compute.h @@ -1,7 +1,7 @@ #ifndef METAL_COMPUTE_H #define METAL_COMPUTE_H -#include "../matmul.h" +#include "../../matmul.h" #include "operators.h" #include "Foundation/Foundation.hpp" #include "Metal/Metal.hpp" @@ -23,6 +23,7 @@ enum { }; enum metal_kernel_type { + METAL_KERNEL_FLOAT2HALF, METAL_KERNEL_HALF2FLOAT, METAL_KERNEL_PREPARE_DECODER_ATTENTION_MASK_HALF, METAL_KERNEL_SILUMUL_HALF, @@ -105,7 +106,7 @@ struct metal_params { int beta_fast; int beta_slow; // - int sqlen, past_sqlen, num_heads, head_dim, input_m_dim_z; + int sqlen, past_sqlen, num_heads, head_dim, input_m_dim_z, tgz ; }; struct metal_cgraph{ @@ -117,7 +118,7 @@ struct metal_cgraph{ void *allocateSharedMem(size_t size); void init(); static void metal_free(struct metal_context * ctx); -static enum status metal_graph_compute(metal_kernel_type op, +enum status metal_graph_compute( struct metal_cgraph * metal_data); void add_node(const struct metal_params * new_node); diff --git a/kernels/metal/matmul_f32_f32.cc b/kernels/metal/src/matmul_f32_f32.cc similarity index 89% rename from kernels/metal/matmul_f32_f32.cc rename to kernels/metal/src/matmul_f32_f32.cc index d29e6ff0..98ac0e89 100644 --- a/kernels/metal/matmul_f32_f32.cc +++ b/kernels/metal/src/matmul_f32_f32.cc @@ -1,4 +1,4 @@ -#include "metal_compute.h" +#include "../include/metal_compute.h" namespace matmul { void mat_mul_f32_f32_metal(const struct matmul_params *params){ struct metal_cgraph *graph = new (struct metal_cgraph); diff --git a/kernels/metal/matmul_int4_f32.cc b/kernels/metal/src/matmul_int4_f32.cc similarity index 89% rename from kernels/metal/matmul_int4_f32.cc rename to kernels/metal/src/matmul_int4_f32.cc index 9eee2426..79ad3bc6 100644 --- a/kernels/metal/matmul_int4_f32.cc +++ b/kernels/metal/src/matmul_int4_f32.cc @@ -1,4 +1,4 @@ -#include "metal_compute.h" +#include "../include/metal_compute.h" namespace matmul { void mat_mul_int4_f32_metal(const struct matmul_params *params){ struct metal_cgraph *graph = new (struct metal_cgraph); diff --git a/kernels/metal/matvec_f32_f32.cc b/kernels/metal/src/matvec_f32_f32.cc similarity index 89% rename from kernels/metal/matvec_f32_f32.cc rename to kernels/metal/src/matvec_f32_f32.cc index 9d8a5ab9..2abd779f 100644 --- a/kernels/metal/matvec_f32_f32.cc +++ b/kernels/metal/src/matvec_f32_f32.cc @@ -1,4 +1,4 @@ -#include "metal_compute.h" +#include "../include/metal_compute.h" namespace matmul { void mat_vec_f32_f32_metal(const struct matmul_params *params){ struct metal_cgraph *graph = new (struct metal_cgraph); diff --git a/kernels/metal/matvec_int4_f32.cc b/kernels/metal/src/matvec_int4_f32.cc similarity index 89% rename from kernels/metal/matvec_int4_f32.cc rename to kernels/metal/src/matvec_int4_f32.cc index a6c95702..c8f843dd 100644 --- a/kernels/metal/matvec_int4_f32.cc +++ b/kernels/metal/src/matvec_int4_f32.cc @@ -1,4 +1,4 @@ -#include "metal_compute.h" +#include "../include/metal_compute.h" namespace matmul { void mat_vec_int4_f32_metal(const struct matmul_params *params){ struct metal_cgraph *graph = new (struct metal_cgraph); diff --git a/kernels/metal/metal_compute.cc b/kernels/metal/src/metal_compute.cc similarity index 96% rename from kernels/metal/metal_compute.cc rename to kernels/metal/src/metal_compute.cc index 19e8396d..dea9903e 100644 --- a/kernels/metal/metal_compute.cc +++ b/kernels/metal/src/metal_compute.cc @@ -7,12 +7,8 @@ #include #include -#include "metal_compute.h" +#include "../include/metal_compute.h" -#undef MIN -#undef MAX -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define block_size 32 void *allocateSharedMem(size_t size) { @@ -103,6 +99,9 @@ void init() { // simd_sum and simd_max requires MTLGPUFamilyApple7 // TODO: syntax error + METAL_ADD_KERNEL(METAL_KERNEL_FLOAT2HALF "float2half", true); + METAL_ADD_KERNEL(METAL_KERNEL_HALF2FLOAT, "half2float", true); + METAL_ADD_KERNEL(METAL_KERNEL_PREPARE_DECODER_ATTENTION_MASK_HALF, "kernel_prepare_decoder_attention_mask_half", true); METAL_ADD_KERNEL(METAL_KERNEL_SILUMUL_HALF, "SiLuMul_half", true); METAL_ADD_KERNEL(METAL_KERNEL_ADD_HALF, "add_half", true); METAL_ADD_KERNEL(METAL_KERNEL_SHAPE_QKV, "shape_qkv", true); @@ -149,7 +148,7 @@ static void metal_free(struct metal_context * ctx) { free(ctx); } -static enum status metal_graph_compute(struct metal_cgraph * mg) { +enum status metal_graph_compute(struct metal_cgraph * mg) { // in TinyChatEngine, inputs are operations and grouped tensors MTL::ComputePassDescriptor* edesc = MTL::ComputePassDescriptor::computePassDescriptor(); edesc->setDispatchType(MTL::DispatchTypeSerial); @@ -186,6 +185,17 @@ static enum status metal_graph_compute(struct metal_cgraph * mg) { continue; } switch (op) { + case (METAL_KERNEL_FLOAT2FLOAT): + MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).data_ptr); + MTL::Buffer *id_dst = getBufferfromPtr((curr_node->B).half_data_ptr); + encoder->setComputePipelineState(ctx->kernels[op].pipeline); + encoder->setBuffer(id_src0, offs_src0, 0); + encoder->setBuffer(id_dst, offs_src1, 1); + encoder->setBytes(&curr_node->sqlen, sizeof(int), 2); + MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); + MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); + encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + break; case (METAL_KERNEL_HALF2FLOAT): MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); MTL::Buffer *id_dst = getBufferfromPtr((curr_node->B).data_ptr); diff --git a/kernels/metal/op.metal b/kernels/metal/src/op.metal similarity index 98% rename from kernels/metal/op.metal rename to kernels/metal/src/op.metal index 3efd0549..1bd09ea4 100644 --- a/kernels/metal/op.metal +++ b/kernels/metal/src/op.metal @@ -4,8 +4,6 @@ using namespace metal; -using namespace metal; - #define N_SIMDWIDTH 32 #define MAX(x, y) ((x) > (y) ? (x) : (y)) #define MIN(x, y) ((x) < (y) ? (x) : (y)) @@ -24,7 +22,16 @@ typedef struct { uint8_t qs[QK4_0 / 2]; // nibbles / quants } block_q4_0; -kernel void half2float(device const half* halfArray [[buffer(0)]], +kernel void kernel_float2half(device const half* floatArray [[buffer(0)]], + device float* halfArray [[buffer(1)]], + constant int& N [[buffer(2)]], + uint index [[thread_position_in_grid]]) { + if (index < N) { + halfArray[index] = floatArray[index]; // Implicit conversion from half to float + } +} + +kernel void kernel_half2float(device const half* halfArray [[buffer(0)]], device float* floatArray [[buffer(1)]], constant int& N [[buffer(2)]], uint index [[thread_position_in_grid]]) { diff --git a/llm/Makefile b/llm/Makefile index 6fd1609b..6186450c 100644 --- a/llm/Makefile +++ b/llm/Makefile @@ -107,8 +107,8 @@ else ifeq ($(shell uname -p),arm) LIB += -L/opt/homebrew/opt/boost/lib # For ARM A-series (such as Mac M1) with Metal GPU ifdef USE_METAL - LIB_ACC_INC = -I$(LIB_DIR)/metal/include -I$(LIB_DIR)/metal/metal-cpp -I$(LIB_DIR)/metal - LIB_SRC += $(wildcard $(LIB_DIR)/metal/*.cc) $(wildcard src/nn_modules/metal/*.cc) $(wildcard src/ops/cuda/*.cu) + LIB_ACC_INC = -I$(LIB_DIR)/metal/include -I$(LIB_DIR)/metal/metal-cpp -I$(LIB_DIR)/metal/src + LIB_SRC += $(wildcard $(LIB_DIR)/metal/*.cc) $(wildcard src/nn_modules/metal/*.cc) INCLUDE_DIRS += -I/opt/homebrew/opt/boost/include $(LIB_ACC_INC) LIB += -framework Metal -framework Foundation -framework MetalKit TARGET += default.metallib library.air @@ -131,7 +131,6 @@ else endif # $(info $(LIB_SRC)) - SRC_DIR = src SRC = $(wildcard src/*.cc) SRC += $(wildcard src/nn_modules/*.cc) diff --git a/llm/include/nn_modules/Int4llamaAttention.h b/llm/include/nn_modules/Int4llamaAttention.h index 316cd975..9a41c468 100644 --- a/llm/include/nn_modules/Int4llamaAttention.h +++ b/llm/include/nn_modules/Int4llamaAttention.h @@ -8,6 +8,10 @@ struct Int4llamaAttention_output { Matrix3D attn_output; Matrix3D attn_probs_reshaped; std::pair, Matrix3D> past_key_value; +#elif defined(QM_METAL) + Matrix3D attn_output; + Matrix3D attn_probs_reshaped; + std::pair, Matrix3D> past_key_value; #else Matrix3D attn_output; Matrix3D attn_probs_reshaped; @@ -23,6 +27,12 @@ struct Int4llamaAttention_input { Matrix3D attention_mask; Matrix3D past_key, past_value; + Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, int layer_idx_) +#elif defined(QM_METAL) + Matrix3D hidden_states; + Matrix3D attention_mask; + Matrix3D past_key, past_value; + Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, int layer_idx_) #else Matrix3D hidden_states; @@ -38,6 +48,10 @@ struct Int4llamaAttention_input { Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, Matrix3D past_key_, Matrix3D past_value_, bool has_past_key_value_, int layer_idx_) +#elif defined(QM_METAL) + Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, + Matrix3D past_key_, Matrix3D past_value_, bool has_past_key_value_, + int layer_idx_) #else Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, Matrix3D past_key_, Matrix3D past_value_, bool has_past_key_value_, int layer_idx_) @@ -65,6 +79,9 @@ class Int4llamaAttention { #ifdef QM_CUDA void free_cuda_memory(); half *cos_buf = nullptr, *sin_buf = nullptr; +#elif defined(QM_METAL) + void metal_free(); + half *cos_buf = nullptr, *sin_buf = nullptr; #else float *cos_buf = nullptr, *sin_buf = nullptr; #endif @@ -77,6 +94,11 @@ class Int4llamaAttention { RotaryPosEmb_cuda rotary_pos_emb; BMM_F16T qk_bmm, pv_bmm; int max_sqlen; +#elif defined(QM_METAL) + Linear_half_int4 o_proj, qkv_proj; + RotaryPosEmb_metal rotary_pos_emb; + BMM_F16T qk_bmm, pv_bmm; + int max_sqlen; #else Linear_FP_int4 k_proj, v_proj, q_proj, o_proj, qkv_proj; RotaryPosEmb rotary_pos_emb; diff --git a/llm/include/nn_modules/Int4llamaAttentionMetal.h b/llm/include/nn_modules/Int4llamaAttentionMetal.h deleted file mode 100644 index b0abe976..00000000 --- a/llm/include/nn_modules/Int4llamaAttentionMetal.h +++ /dev/null @@ -1,89 +0,0 @@ -#include - -#include "common.h" -#include "operators.h" - -struct Int4llamaAttention_output { -#ifdef QM_METAL - Matrix3D attn_output; - Matrix3D attn_probs_reshaped; - std::pair, Matrix3D> past_key_value; -#else - Matrix3D attn_output; - Matrix3D attn_probs_reshaped; - std::pair, Matrix3D> past_key_value; -#endif -}; - -struct Int4llamaAttention_input { - bool has_past_key_value = false; - int layer_idx; -#ifdef QM_METAL - Matrix3D hidden_states; - Matrix3D attention_mask; - Matrix3D past_key, past_value; - - Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, int layer_idx_) -#else - Matrix3D hidden_states; - Matrix3D attention_mask; - Matrix3D past_key, past_value; - - Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, int layer_idx_) -#endif - : hidden_states(hidden_states_), attention_mask(attention_mask_), layer_idx(layer_idx_) { - } - -#ifdef QM_METAL - Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, - Matrix3D past_key_, Matrix3D past_value_, bool has_past_key_value_, - int layer_idx_) -#else - Int4llamaAttention_input(Matrix3D hidden_states_, Matrix3D attention_mask_, Matrix3D past_key_, - Matrix3D past_value_, bool has_past_key_value_, int layer_idx_) -#endif - : hidden_states(hidden_states_), - attention_mask(attention_mask_), - past_key(past_key_), - past_value(past_value_), - has_past_key_value(has_past_key_value_), - layer_idx(layer_idx_) { - } -}; - -class Int4llamaAttention { - public: - Int4llamaAttention(std::string param_path, const struct model_config config, int layer_idx); - Int4llamaAttention() {} - static void initialized_memory(const struct model_config config); - struct Int4llamaAttention_output forward(std::string param_path, const struct Int4llamaAttention_input &input); - -#if !(DEC_SHARED_MEM) - int *q_weight = nullptr, *k_weight = nullptr, *v_weight = nullptr, *o_weight = nullptr, *qkv_weight = nullptr; -#endif - -#ifdef QM_METAL - void free_cuda_memory(); - half *cos_buf = nullptr, *sin_buf = nullptr; -#else - float *cos_buf = nullptr, *sin_buf = nullptr; -#endif - - private: - std::string profile_name = "Int4llamaAttention"; - int embed_dim, num_heads, head_dim; -#ifdef QM_METAL - Linear_half_int4 o_proj, qkv_proj; - RotaryPosEmb_cuda rotary_pos_emb; - BMM_F16T qk_bmm, pv_bmm; - int max_sqlen; -#else - Linear_FP_int4 k_proj, v_proj, q_proj, o_proj, qkv_proj; - RotaryPosEmb rotary_pos_emb; - BMM_F32T qk_bmm, pv_bmm; - void unshape(Matrix3D shaped, Matrix3D unshape, int sqlen); - void shape(Matrix3D unshape, Matrix3D shaped, int sqlen); - void shape_qkv(Matrix3D unshape, Matrix3D shaped_q, Matrix3D shaped_k, - Matrix3D shaped_v, int sqlen); -#endif -}; diff --git a/llm/include/nn_modules/Int4llamaDecoder.h b/llm/include/nn_modules/Int4llamaDecoder.h index 7b5c57f2..3ddada1b 100644 --- a/llm/include/nn_modules/Int4llamaDecoder.h +++ b/llm/include/nn_modules/Int4llamaDecoder.h @@ -10,6 +10,9 @@ struct Int4llamaDecoder_output { #ifdef QM_CUDA Matrix3D last_hidden_state; std::vector> past_keys, past_values; +#elif defined(QM_METAL) + Matrix3D last_hidden_state; + std::vector> past_keys, past_values; #else Matrix3D last_hidden_state; std::vector> past_keys, past_values; @@ -20,6 +23,8 @@ struct Int4llamaDecoder_input { bool has_past_keys_values; #ifdef QM_CUDA std::vector> past_keys, past_values; +#elif defined(QM_METAL) + std::vector> past_keys, past_values; #else std::vector> past_keys, past_values; #endif @@ -28,6 +33,9 @@ struct Int4llamaDecoder_input { #ifdef QM_CUDA Int4llamaDecoder_input(Matrix3D input_ids_, std::vector> past_keys_, std::vector> past_values_) +#elif defined(QM_METAL) + Int4llamaDecoder_input(Matrix3D input_ids_, std::vector> past_keys_, + std::vector> past_values_) #else Int4llamaDecoder_input(Matrix3D input_ids_, std::vector> past_keys_, std::vector> past_values_) @@ -52,6 +60,15 @@ class Int4llamaDecoder { Embedding embed_tokens; LlamaRMSNorm_cuda norm; + float16_t* attention_mask_buf = nullptr; + float16_t* last_hidden_states_buf = nullptr; + float* hidden_states_buf = nullptr; + float16_t* hidden_states_half_buf = nullptr; +#elif defined(QM_METAL) + void metal_free(); + Embedding embed_tokens; + LlamaRMSNorm_metal norm; + float16_t* attention_mask_buf = nullptr; float16_t* last_hidden_states_buf = nullptr; float* hidden_states_buf = nullptr; diff --git a/llm/include/nn_modules/Int4llamaDecoderLayer.h b/llm/include/nn_modules/Int4llamaDecoderLayer.h index e90fbabe..17931ad1 100644 --- a/llm/include/nn_modules/Int4llamaDecoderLayer.h +++ b/llm/include/nn_modules/Int4llamaDecoderLayer.h @@ -8,6 +8,13 @@ struct Int4llamaDecoderLayer_output { Matrix3D attentions; std::pair, Matrix3D> past_key_value; + Int4llamaDecoderLayer_output(Matrix3D hidden_states_, Matrix3D attentions_, + std::pair, Matrix3D> past_key_value_) { +#elif defined(QM_METAL) + Matrix3D hidden_states; + Matrix3D attentions; + std::pair, Matrix3D> past_key_value; + Int4llamaDecoderLayer_output(Matrix3D hidden_states_, Matrix3D attentions_, std::pair, Matrix3D> past_key_value_) { #else @@ -30,6 +37,12 @@ struct Int4llamaDecoderLayer_input { Matrix3D attention_mask; Matrix3D past_key, past_value; + Int4llamaDecoderLayer_input(Matrix3D hidden_states_, Matrix3D attention_mask_) { +#elif defined(QM_METAL) + Matrix3D hidden_states; + Matrix3D attention_mask; + Matrix3D past_key, past_value; + Int4llamaDecoderLayer_input(Matrix3D hidden_states_, Matrix3D attention_mask_) { #else Matrix3D hidden_states; @@ -46,11 +59,14 @@ struct Int4llamaDecoderLayer_input { #ifdef QM_CUDA Int4llamaDecoderLayer_input(Matrix3D hidden_states_, Matrix3D attention_mask_, Matrix3D past_key_, Matrix3D past_value_){ +#elif defined(QM_METAL) + Int4llamaDecoderLayer_input(Matrix3D hidden_states_, Matrix3D attention_mask_, + Matrix3D past_key_, Matrix3D past_value_){ #else Int4llamaDecoderLayer_input(Matrix3D &hidden_states_, Matrix3D &attention_mask_, Matrix3D past_key_, Matrix3D past_value_) { #endif - hidden_states = hidden_states_; + hidden_states = hidden_states_; attention_mask = attention_mask_; past_key = past_key_; past_value = past_value_; @@ -73,6 +89,10 @@ class Int4llamaDecoderLayer { void free_cuda_memory(); LlamaRMSNorm_cuda input_layernorm, post_attention_layernorm; Linear_half_int4 gate_proj, down_proj, up_proj; +#elif defined(QM_METAL) + void metal_free(); + LlamaRMSNorm_metal input_layernorm, post_attention_layernorm; + Linear_half_int4 gate_proj, down_proj, up_proj; #if !(DEC_SHARED_MEM) int *gate_proj_weight = nullptr, *down_proj_weight = nullptr, *up_proj_weight = nullptr; diff --git a/llm/include/nn_modules/Int4llamaDecoderLayerMetal.h b/llm/include/nn_modules/Int4llamaDecoderLayerMetal.h deleted file mode 100644 index 1537d1da..00000000 --- a/llm/include/nn_modules/Int4llamaDecoderLayerMetal.h +++ /dev/null @@ -1,87 +0,0 @@ -#include "Int4llamaAttention.h" -#include "common.h" -#include "operators.h" - -struct Int4llamaDecoderLayer_output { -#ifdef QM_METAL - Matrix3D hidden_states; - Matrix3D attentions; - std::pair, Matrix3D> past_key_value; - - Int4llamaDecoderLayer_output(Matrix3D hidden_states_, Matrix3D attentions_, - std::pair, Matrix3D> past_key_value_) { -#else - Matrix3D hidden_states; - Matrix3D attentions; - std::pair, Matrix3D> past_key_value; - - Int4llamaDecoderLayer_output(Matrix3D hidden_states_, Matrix3D attentions_, - std::pair, Matrix3D> past_key_value_) { -#endif - hidden_states = hidden_states_; - attentions = attentions_; - past_key_value = past_key_value_; - }; -}; -struct Int4llamaDecoderLayer_input { - bool has_past_key_value = false; -#ifdef QM_METAL - Matrix3D hidden_states; - Matrix3D attention_mask; - Matrix3D past_key, past_value; - - Int4llamaDecoderLayer_input(Matrix3D hidden_states_, Matrix3D attention_mask_) { -#else - Matrix3D hidden_states; - Matrix3D attention_mask; - Matrix3D past_key, past_value; - - Int4llamaDecoderLayer_input(Matrix3D &hidden_states_, Matrix3D &attention_mask_) { -#endif - hidden_states = hidden_states_; - attention_mask = attention_mask_; - has_past_key_value = false; - } - -#ifdef QM_METAL - Int4llamaDecoderLayer_input(Matrix3D hidden_states_, Matrix3D attention_mask_, - Matrix3D past_key_, Matrix3D past_value_){ -#else - Int4llamaDecoderLayer_input(Matrix3D &hidden_states_, Matrix3D &attention_mask_, - Matrix3D past_key_, Matrix3D past_value_) { -#endif - hidden_states = hidden_states_; - attention_mask = attention_mask_; - past_key = past_key_; - past_value = past_value_; - has_past_key_value = true; -} -} -; - -class Int4llamaDecoderLayer { - public: - Int4llamaDecoderLayer(std::string param_path, const struct model_config config, int layer_idx); - Int4llamaDecoderLayer(){}; - struct Int4llamaDecoderLayer_output forward(std::string param_path, const struct Int4llamaDecoderLayer_input &input, int layer_idx); - - std::string profile_name = "Int4llamaDecoderLayer"; - int embed_dim, num_attention_heads, hidden_dim, layer_idx; - float rms_norm_eps; - Int4llamaAttention attn; -#ifdef QM_METAL - void free_cuda_memory(); - LlamaRMSNorm_metal input_layernorm, post_attention_layernorm; - Linear_half_int4 gate_proj, down_proj, up_proj; - -#if !(DEC_SHARED_MEM) - int *gate_proj_weight = nullptr, *down_proj_weight = nullptr, *up_proj_weight = nullptr; -#endif - -#else - LlamaRMSNorm input_layernorm, post_attention_layernorm; // from torch_int.nn - Linear_FP_int4 gate_proj, down_proj, up_proj; -#endif - float *input_layernorm_weight_ptr = nullptr; - float *post_attention_layernorm_ptr = nullptr; -}; diff --git a/llm/include/nn_modules/Int4llamaDecoderMetal.h b/llm/include/nn_modules/Int4llamaDecoderMetal.h deleted file mode 100644 index 5314f58a..00000000 --- a/llm/include/nn_modules/Int4llamaDecoderMetal.h +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include -#include - -#include "Int4llamaDecoderLayer.h" -#include "common.h" -#include "operators.h" - -struct Int4llamaDecoder_output { -#ifdef QM_METAL - Matrix3D last_hidden_state; - std::vector> past_keys, past_values; -#else - Matrix3D last_hidden_state; - std::vector> past_keys, past_values; -#endif -}; -struct Int4llamaDecoder_input { - Matrix3D input_ids; - bool has_past_keys_values; -#ifdef QM_METAL - std::vector> past_keys, past_values; -#else - std::vector> past_keys, past_values; -#endif - - Int4llamaDecoder_input(Matrix3D input_ids_) : input_ids(input_ids_) { has_past_keys_values = false; } -#ifdef QM_METAL - Int4llamaDecoder_input(Matrix3D input_ids_, std::vector> past_keys_, - std::vector> past_values_) -#else - Int4llamaDecoder_input(Matrix3D input_ids_, std::vector> past_keys_, - std::vector> past_values_) -#endif - : input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) { - has_past_keys_values = true; - } -}; - -class Int4llamaDecoder { - public: - Int4llamaDecoder(std::string param_path, const struct model_config config); - Int4llamaDecoder(){}; - Matrix3D prepare_decoder_attention_mask(int length, int past_length); - struct Int4llamaDecoder_output forward(std::string param_path, const struct Int4llamaDecoder_input& input); - int voc_size, embed_dim, padding_idx, hidden_dim, num_heads; - float rms_norm_eps; - std::vector layers; - std::string profile_name = "Int4llamaDecoder"; -#ifdef QM_METAL - void free_cuda_memory(); - Embedding embed_tokens; - LlamaRMSNorm_metal norm; - - float16_t* attention_mask_buf = nullptr; - float16_t* last_hidden_states_buf = nullptr; - float* hidden_states_buf = nullptr; - float16_t* hidden_states_half_buf = nullptr; -#else - Embedding embed_tokens; - LlamaRMSNorm norm; - - float* attention_mask_buf; - float* pos_embeds_buf; - float* last_hidden_states_buf; - float* hidden_states_buf; -#endif - float* norm_weight_ptr = nullptr; -}; diff --git a/llm/include/nn_modules/Int4llamaForCausalLM.h b/llm/include/nn_modules/Int4llamaForCausalLM.h index 80f03002..e066980c 100644 --- a/llm/include/nn_modules/Int4llamaForCausalLM.h +++ b/llm/include/nn_modules/Int4llamaForCausalLM.h @@ -4,6 +4,8 @@ struct Int4LlamaForCausalLM_output { Matrix3D logits; #ifdef QM_CUDA std::vector> past_keys, past_values; +#elif defined(QM_METAL) + std::vector> past_keys, past_values; #else std::vector> past_keys, past_values; #endif @@ -13,6 +15,8 @@ struct Int4LlamaForCausalLM_input { bool has_past_keys_values; #ifdef QM_CUDA std::vector> past_keys, past_values; +#elif defined(QM_METAL) + std::vector> past_keys, past_values; #else std::vector> past_keys, past_values; #endif @@ -22,6 +26,9 @@ struct Int4LlamaForCausalLM_input { #ifdef QM_CUDA Int4LlamaForCausalLM_input(Matrix3D input_ids_, std::vector> past_keys_, std::vector> past_values_) +#elif defined(QM_METAL) + Int4LlamaForCausalLM_input(Matrix3D input_ids_, std::vector> past_keys_, + std::vector> past_values_) #else Int4LlamaForCausalLM_input(Matrix3D input_ids_, std::vector> past_keys_, std::vector> past_values_) @@ -41,6 +48,10 @@ class Int4LlamaForCausalLM { void free_cuda_memory(); int* lm_head_weight = nullptr; float16_t* logits_output_half = nullptr; +#elif defined(QM_METAL) + void metal_free(); + int* lm_head_weight = nullptr; + float16_t* logits_output_half = nullptr; #else uint8_t* lm_head_weight; #endif @@ -50,6 +61,8 @@ class Int4LlamaForCausalLM { Int4llamaDecoder decoder; #ifdef QM_CUDA Linear_half_int4 lm_head; +#elif defined(QM_METAL) + Linear_half_int4 lm_head; #else Linear_FP_int4 lm_head; #endif diff --git a/llm/include/nn_modules/Int4llamaForCausalLMMetal.h b/llm/include/nn_modules/Int4llamaForCausalLMMetal.h deleted file mode 100644 index 5b6e1b09..00000000 --- a/llm/include/nn_modules/Int4llamaForCausalLMMetal.h +++ /dev/null @@ -1,61 +0,0 @@ -#include "Int4llamaDecoder.h" - -struct Int4LlamaForCausalLM_output { - Matrix3D logits; -#ifdef QM_METAL - std::vector> past_keys, past_values; -#else - std::vector> past_keys, past_values; -#endif -}; -struct Int4LlamaForCausalLM_input { - Matrix3D input_ids; - bool has_past_keys_values; -#ifdef QM_METAL - std::vector> past_keys, past_values; -#else - std::vector> past_keys, past_values; -#endif - - Int4LlamaForCausalLM_input() {} - Int4LlamaForCausalLM_input(Matrix3D input_ids_) : input_ids(input_ids_) { has_past_keys_values = false; } -#ifdef QM_METAL - Int4LlamaForCausalLM_input(Matrix3D input_ids_, std::vector> past_keys_, - std::vector> past_values_) -#else - Int4LlamaForCausalLM_input(Matrix3D input_ids_, std::vector> past_keys_, - std::vector> past_values_) -#endif - : input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) { - has_past_keys_values = true; - } -}; - -class Int4LlamaForCausalLM { - public: - Int4LlamaForCausalLM(std::string param_path, const struct model_config config); - Int4LlamaForCausalLM(){}; - struct Int4LlamaForCausalLM_output forward(std::string param_path, const struct Int4LlamaForCausalLM_input& input); - float* logits_output = nullptr; -#ifdef QM_METAL - void free_cuda_memory(); - int* lm_head_weight = nullptr; - float16_t* logits_output_half = nullptr; -#else - uint8_t* lm_head_weight; -#endif - - private: - std::string profile_name = "Int4LlamaForCausalLM"; - Int4llamaDecoder decoder; -#ifdef QM_METAL - Linear_half_int4 lm_head; -#else - Linear_FP_int4 lm_head; -#endif -}; - - -// 1. modified the code to be suitable for Metal -// 2. investigate the problem of waituntilcompleted (multiple encoders in order inside command buffer) -// 3. found more kernels needed \ No newline at end of file diff --git a/llm/include/operators.h b/llm/include/operators.h index 5ec27963..bf4a2a8e 100644 --- a/llm/include/operators.h +++ b/llm/include/operators.h @@ -44,13 +44,12 @@ __global__ void softmax_cuda(Matrix3D input, Matrix3D outp #endif #ifdef QM_METAL -#include "ops/metal/BMM_F16T.cuh" -#include "ops/metal/Embedding.cuh" -#include "ops/metal/LlamaRMSNorm.cuh" -#include "ops/metal/RotaryPosEmb.cuh" - -void batch_Add_metal(const Matrix3D input, const Matrix3D input2, Matrix3D output); -void softmax_metal(Matrix3D input, Matrix3D output); +#include "ops/metal/BMM_F16T.h" +#include "ops/metal/Embedding.h" +#include "ops/metal/LlamaRMSNorm.h" +#include "ops/metal/RotaryPosEmb.h" +#include "ops/metal/batch_add.h" +#include "ops/metal/softmax.h" #endif #endif // OPERATORS_H diff --git a/llm/include/ops/linear.h b/llm/include/ops/linear.h index 37c3d623..32115479 100644 --- a/llm/include/ops/linear.h +++ b/llm/include/ops/linear.h @@ -218,4 +218,43 @@ class Linear_half_int4 { }; #endif +#ifdef QM_METAL +class Linear_half_int4 { + public: + Linear_half_int4(Matrix3D weight_, std::string weight_path) : weight(weight_) { + int output_channel = this->weight.m_dim_y, input_channel = this->weight.m_dim_z * 8; + + float16_t *scale_ptr; + // float16_t *offset_ptr; // TODO: Currently, we don't need offset + int *zero_point_ptr; + // length of int8_t weight = elements / 2 + // length of scales/offset = elements / QK = weight / (QK/2) + // length of zero_point = 1 + // assert((weight.m_dim_z * 8) % (QK) == 0); + allocate_aligned_memory(scale_ptr, output_channel * calculate_zeros_width(input_channel, QK) * 8 * sizeof(float16_t)); + // allocate_aligned_memory(offset_ptr, (this->weight.length() * 8 * sizeof(float16_t)) / QK); // TODO: Currently, we don't need offset + // Currently, we don't need offset + allocate_aligned_memory(zero_point_ptr, output_channel * calculate_zeros_width(input_channel, QK) * sizeof(int)); + + scale = Matrix3D(scale_ptr, 1, output_channel, calculate_zeros_width(input_channel, QK) * 8); + // offset = Matrix3D(offset_ptr, x, y, z); // TODO: Currently, we don't need offset + zero_point = Matrix3D(zero_point_ptr, 1, output_channel, calculate_zeros_width(input_channel, QK)); + weight.load((weight_path + "/weight_int4.bin").c_str()); + // offset.load((weight_path + "/offset_int4.bin").c_str()); // TODO: Currently, we don't need offset + scale.load((weight_path + "/scaling_factor_int4.bin").c_str()); + zero_point.load((weight_path + "/zero_point_int4.bin").c_str()); + }; + Linear_half_int4(){}; + // void forward(const Matrix3D &x, Matrix3D &output); + void forward(const Matrix3D &x, Matrix3D &output); + Matrix3D weight; + Matrix3D scale; + Matrix3D offset; // TODO: Currently, we don't need offset + Matrix3D zero_point; + + private: + std::string profile_name = "Linear_half_int4"; +}; +#endif + #endif diff --git a/llm/include/ops/metal/BMM_F16T.h b/llm/include/ops/metal/BMM_F16T.h new file mode 100644 index 00000000..87738150 --- /dev/null +++ b/llm/include/ops/metal/BMM_F16T.h @@ -0,0 +1,15 @@ +#include "utils.h" +#include "common.h" + +class BMM_F16T{ +public: + BMM_F16T(half _alpha); + BMM_F16T(){}; + void forward(const Matrix3D &x, const Matrix3D &weight, Matrix3D &output); // TODO: convert weight to half + void forward_weight_untransposed(const Matrix3D &a, const Matrix3D &weight, Matrix3D &c); + half alpha; +private: + std::string profile_name = "BMM_F16T"; +}; + +void load_BMM_F16T(BMM_F16T &op, std::string prefix); diff --git a/llm/include/ops/metal/Embedding.h b/llm/include/ops/metal/Embedding.h new file mode 100644 index 00000000..878021e1 --- /dev/null +++ b/llm/include/ops/metal/Embedding.h @@ -0,0 +1,20 @@ +#include +#include "utils.h" +#include "common.h" + +class Embedding_metal { + public: + Embedding_metal(int embed_dim_, int voc_size_, int padding_idx_, Matrix3D lookup_) + : embed_dim(embed_dim_), voc_size(voc_size_), padding_idx(padding_idx_), lookup(lookup_) { + assert(lookup_.m_dim_y == voc_size_); + assert(lookup_.m_dim_z == embed_dim_); + } + Embedding_metal(){}; + void forward(Matrix3D input_id, Matrix3D output); + int embed_dim, voc_size, padding_idx; + Matrix3D lookup; +private: + std::string profile_name = "Embedding"; +}; + +void load_Embedding_params_metal(Embedding_metal &op, std::string prefix); diff --git a/llm/include/ops/metal/LlamaRMSNorm.h b/llm/include/ops/metal/LlamaRMSNorm.h new file mode 100644 index 00000000..3c77795f --- /dev/null +++ b/llm/include/ops/metal/LlamaRMSNorm.h @@ -0,0 +1,14 @@ +#include "utils.h" +#include "common.h" + +class LlamaRMSNorm_metal { + public: + LlamaRMSNorm_metal(Matrix3D _weight) : weight(_weight){}; + LlamaRMSNorm_metal(){}; + void forward(const Matrix3D &x, Matrix3D &output, float eps); + Matrix3D weight; + // half half_eps = 6.10352e-05; + + private: + std::string profile_name = "LlamaRMSNorm_metal"; +}; \ No newline at end of file diff --git a/llm/include/ops/metal/RotaryPosEmb.h b/llm/include/ops/metal/RotaryPosEmb.h new file mode 100644 index 00000000..dbfb2595 --- /dev/null +++ b/llm/include/ops/metal/RotaryPosEmb.h @@ -0,0 +1,26 @@ +#include + +#include "utils.h" +#include "common.h" + +class RotaryPosEmb_metal +{ +public: + RotaryPosEmb_metal(Matrix3D _cos, Matrix3D _sin, std::string path) + { + sin = _sin; + cos = _cos; + read_to_array_half((path + "/cos_cached_half.bin").c_str(), cos.m_data, cos.length()); + read_to_array_half((path + "/sin_cached_half.bin").c_str(), sin.m_data, sin.length()); + }; + RotaryPosEmb_metal(){}; + void forward(Matrix3D &key, Matrix3D &value, int start_idx, int len); + Matrix3D cos, sin; + +private: + std::string profile_name = "RotaryPosEmb_metal"; +}; + +void load_RotaryPosEmb_metal(RotaryPosEmb_metal &op, std::string prefix); + +void RotaryPosEmb_metal_forward(Matrix3D query, Matrix3D key, Matrix3D cos, Matrix3D sin, int start_idx, int len); diff --git a/llm/include/ops/metal/batch_add.h b/llm/include/ops/metal/batch_add.h new file mode 100644 index 00000000..5f491d1f --- /dev/null +++ b/llm/include/ops/metal/batch_add.h @@ -0,0 +1,4 @@ +#include "utils.h" +#include "common.h" + +void batch_Add_metal(const Matrix3D input, const Matrix3D input2, Matrix3D output); \ No newline at end of file diff --git a/llm/include/ops/metal/reduction.h b/llm/include/ops/metal/reduction.h new file mode 100644 index 00000000..943cafe6 --- /dev/null +++ b/llm/include/ops/metal/reduction.h @@ -0,0 +1 @@ +// place_holder \ No newline at end of file diff --git a/llm/include/ops/metal/softmax.h b/llm/include/ops/metal/softmax.h new file mode 100644 index 00000000..0bd9ba40 --- /dev/null +++ b/llm/include/ops/metal/softmax.h @@ -0,0 +1,4 @@ +#include "utils.h" +#include "common.h" + +void softmax(Matrix3D input, Matrix3D output); \ No newline at end of file diff --git a/llm/include/utils.h b/llm/include/utils.h index 8e94fd92..9b3cc5d7 100644 --- a/llm/include/utils.h +++ b/llm/include/utils.h @@ -48,6 +48,11 @@ void print_first_k_elelment(std::string name, const int32_t* arr, int k, int sta void print_first_k_elelment(std::string name, const float* arr, int k, int start_idx = 0); #ifdef QM_METAL +typedef half_float::half float16_t; +typedef float16_t half; +int make_divisible_c(int c, int divisor); +int calculate_zeros_width(int in_features, int group_size=128, int pack_num=8); +void read_to_array_half(const char* path, half* array, int size); template void allocate_aligned_memory(T*& ptr, size_t size); #else diff --git a/llm/src/nn_modules/metal/Int4llamaAttentionMetal.cc b/llm/src/nn_modules/metal/Int4llamaAttention.cc similarity index 78% rename from llm/src/nn_modules/metal/Int4llamaAttentionMetal.cc rename to llm/src/nn_modules/metal/Int4llamaAttention.cc index 1d80ee2d..593ab41d 100644 --- a/llm/src/nn_modules/metal/Int4llamaAttentionMetal.cc +++ b/llm/src/nn_modules/metal/Int4llamaAttention.cc @@ -56,7 +56,7 @@ Int4llamaAttention::Int4llamaAttention(std::string param_path, const struct mode half qk_bmm_alpha; read_to_array_half((param_path + "/qk_bmm/alpha_half.bin").c_str(), &qk_bmm_alpha, 1); this->qk_bmm = BMM_F16T(qk_bmm_alpha); - this->pv_bmm = BMM_F16T(__float2half(1.0f)); + this->pv_bmm = BMM_F16T((half)(1.0f)); //float2half? this->embed_dim = config.embed_dim; this->num_heads = config.num_heads; @@ -66,7 +66,7 @@ Int4llamaAttention::Int4llamaAttention(std::string param_path, const struct mode } void shape_qkv(Matrix3D qkv_states_unshape, Matrix3D query_states, Matrix3D key_states, Matrix3D value_states, int num_heads, int sqlen, int head_dim){ - const struct metal_params params; + struct metal_params params; params.A.half_data_ptr = qkv_states_unshape.m_data; params.B.half_data_ptr = query_states.m_data; @@ -80,7 +80,7 @@ void shape_qkv(Matrix3D qkv_states_unshape, Matrix3D query } void unshape(Matrix3D attn_output, Matrix3D attn_output_transpose, int num_heads, int sqlen, int head_dim){ - const struct metal_params params; + struct metal_params params; params.A.half_data_ptr = attn_output.m_data; params.B.half_data_ptr = attn_output_transpose.m_data; @@ -92,7 +92,7 @@ void unshape(Matrix3D attn_output, Matrix3D attn_output_tr } void check_inf_half(Matrix3D attn_weights){ - const struct metal_params params; + struct metal_params params; params.A.half_data_ptr = attn_weights.m_data; params.sqlen = attn_weights.length(); @@ -102,12 +102,12 @@ void check_inf_half(Matrix3D attn_weights){ } void transpose_1_2idx(Matrix3D final_value_states, Matrix3D value_states_transpose, int num_heads, int sqlen, int head_dim, int tgz){ - const struct metal_params params; + struct metal_params params; params.A.half_data_ptr = final_value_states.m_data; params.A.row = final_value_states.m_dim_x; params.A.column = final_value_states.m_dim_y; - params.m_dim_z = final_value_states.m_dim_z; + params.input_m_dim_z = final_value_states.m_dim_z; params.B.half_data_ptr = value_states_transpose.m_data; params.B.row = value_states_transpose.m_dim_x; params.B.column = value_states_transpose.m_dim_y; @@ -158,33 +158,33 @@ struct Int4llamaAttention_output Int4llamaAttention::forward(std::string param_p int start_idx = 0; if (input.has_past_key_value) start_idx = input.past_key.m_dim_y; - dim3 grid(num_heads, 1, 1); - dim3 block(sqlen, 1, 1); + // dim3 grid(num_heads, 1, 1); + // dim3 block(sqlen, 1, 1); // METAL: ROPE metal RotaryPosEmb_metal_forward(query_states, key_states, this->rotary_pos_emb.cos, this->rotary_pos_emb.sin, start_idx, sqlen); // int tgz = sqlen; - if (input.has_past_key_value) { - // assert(input.past_key.m_dim_z == this->head_dim); - // tgz += input.past_key.m_dim_y; - float16_t *val_ptr = ret_value_states, *key_ptr = ret_key_states; - int past_block = input.past_key.m_dim_y * input.past_key.m_dim_z; - int sq_block = sqlen * this->head_dim; -#pragma unroll - for (int i = 0; i < input.past_key.m_dim_x; i++) { - cudaMemcpyAsync(val_ptr, &input.past_value.m_data[past_block * i], past_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); - val_ptr += past_block; - cudaMemcpyAsync(val_ptr, &value_states.m_data[sq_block * i], sq_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); - val_ptr += sq_block; - cudaMemcpyAsync(key_ptr, &input.past_key.m_data[past_block * i], past_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); - key_ptr += past_block; - cudaMemcpyAsync(key_ptr, &key_states.m_data[sq_block * i], sq_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); - key_ptr += sq_block; - } - } else { - cudaMemcpyAsync(ret_value_states, value_states_arr, (this->num_heads * tgz * this->head_dim) * sizeof(float16_t), cudaMemcpyDeviceToDevice); - cudaMemcpyAsync(ret_key_states, key_states_arr, (this->num_heads * tgz * this->head_dim) * sizeof(float16_t), cudaMemcpyDeviceToDevice); - } +// if (input.has_past_key_value) { +// // assert(input.past_key.m_dim_z == this->head_dim); +// // tgz += input.past_key.m_dim_y; +// float16_t *val_ptr = ret_value_states, *key_ptr = ret_key_states; +// int past_block = input.past_key.m_dim_y * input.past_key.m_dim_z; +// int sq_block = sqlen * this->head_dim; +// #pragma unroll +// for (int i = 0; i < input.past_key.m_dim_x; i++) { +// cudaMemcpyAsync(val_ptr, &input.past_value.m_data[past_block * i], past_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); +// val_ptr += past_block; +// cudaMemcpyAsync(val_ptr, &value_states.m_data[sq_block * i], sq_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); +// val_ptr += sq_block; +// cudaMemcpyAsync(key_ptr, &input.past_key.m_data[past_block * i], past_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); +// key_ptr += past_block; +// cudaMemcpyAsync(key_ptr, &key_states.m_data[sq_block * i], sq_block * sizeof(float16_t), cudaMemcpyDeviceToDevice); +// key_ptr += sq_block; +// } +// } else { +// cudaMemcpyAsync(ret_value_states, value_states_arr, (this->num_heads * tgz * this->head_dim) * sizeof(float16_t), cudaMemcpyDeviceToDevice); +// cudaMemcpyAsync(ret_key_states, key_states_arr, (this->num_heads * tgz * this->head_dim) * sizeof(float16_t), cudaMemcpyDeviceToDevice); +// } Matrix3D attn_weights(attn_weights_arr, this->num_heads, sqlen, tgz); this->qk_bmm.forward(query_states, final_key_states, attn_weights); @@ -194,7 +194,7 @@ struct Int4llamaAttention_output Int4llamaAttention::forward(std::string param_p // (sqlen + threadsPerBlock2.y - 1) / threadsPerBlock2.y, // (tgz + threadsPerBlock2.z - 1) / threadsPerBlock2.z); // METAL: Metal - batch_Add(attn_weights, input.attention_mask, attn_weights); + batch_Add_metal(attn_weights, input.attention_mask, attn_weights); int threadsPerBlock_1D = 1024; int blocksPerGrid_1D =(attn_weights.length() + threadsPerBlock_1D - 1) / threadsPerBlock_1D; @@ -202,8 +202,8 @@ struct Int4llamaAttention_output Int4llamaAttention::forward(std::string param_p check_inf_half(attn_weights); Matrix3D attn_probs(attn_weights_arr, this->num_heads, sqlen, tgz); - dim3 threadsPerBlock3(64, 16); - dim3 numBlocks3((this->num_heads + threadsPerBlock3.x - 1) / threadsPerBlock3.x, (sqlen + threadsPerBlock3.y - 1) / threadsPerBlock3.y); + // dim3 threadsPerBlock3(64, 16); + // dim3 numBlocks3((this->num_heads + threadsPerBlock3.x - 1) / threadsPerBlock3.x, (sqlen + threadsPerBlock3.y - 1) / threadsPerBlock3.y); // METAL: Metal softmax(attn_weights, attn_probs); @@ -232,26 +232,4 @@ struct Int4llamaAttention_output Int4llamaAttention::forward(std::string param_p PROFILE_END(profile_name); return output; -} - -void Int4llamaAttention::free_cuda_memory() { - free_aligned_memory_gpu(attn_weights_arr); - free_aligned_memory_gpu(attn_output_half_arr); - free_aligned_memory_gpu(attn_output_arr); - free_aligned_memory_gpu(attn_output_transpose_arr); - free_aligned_memory_gpu(key_states_arr); - free_aligned_memory_gpu(value_states_arr); - free_aligned_memory_gpu(query_states_arr); - free_aligned_memory_gpu(value_states_transpose_arr); - free_aligned_memory_gpu(key_states_arr_cache); - free_aligned_memory_gpu(value_states_arr_cache); - free_aligned_memory_gpu(cos_buf); - free_aligned_memory_gpu(sin_buf); - free_aligned_memory_gpu(o_weight); - free_aligned_memory_gpu(qkv_states_unshape_arr); - - if(cache_num) { - free(cache_num); - cache_num = nullptr; - } -} +} \ No newline at end of file diff --git a/llm/src/nn_modules/metal/Int4llamaDecoderMetal.cc b/llm/src/nn_modules/metal/Int4llamaDecoder.cc similarity index 91% rename from llm/src/nn_modules/metal/Int4llamaDecoderMetal.cc rename to llm/src/nn_modules/metal/Int4llamaDecoder.cc index 8073fef2..9f65d9fe 100644 --- a/llm/src/nn_modules/metal/Int4llamaDecoderMetal.cc +++ b/llm/src/nn_modules/metal/Int4llamaDecoder.cc @@ -6,8 +6,7 @@ #include "utils.h" void prepare_decoder_attention_mask_half(Matrix3D causal_attention_mask, int length, int past_length){ - const struct metal_params params; - + struct metal_params params; params.A.half_data_ptr = causal_attention_mask.m_data; params.sqlen = length; params.past_sqlen = past_length; @@ -15,6 +14,15 @@ void prepare_decoder_attention_mask_half(Matrix3D causal_attention_ma add_node(¶ms); } +void float2half(Matrix3D hidden_states_buf, Matrix3D hidden_states_half_buf, int sq_embed){ + struct metal_params params; + params.A.data_ptr = hidden_states_buf.m_data; + params.B.half_data_ptr = hidden_states_half_buf.m_data; + params.sqlen = sq_embed; + params.op = METAL_KERNEL_FLOAT2HALF; + add_node(¶ms); +} + Int4llamaDecoder::Int4llamaDecoder(std::string param_path, const struct model_config config) { allocate_aligned_memory(attention_mask_buf, config.max_sqlen * config.max_sqlen * sizeof(float16_t)); @@ -66,7 +74,7 @@ struct Int4llamaDecoder_output Int4llamaDecoder::forward(std::string param_path, int threadsPerBlock_1D = 1024; int blocksPerGrid =(sqlen * this->embed_dim + threadsPerBlock_1D - 1) / threadsPerBlock_1D; // METAL: more kernels - float2half<<>>(hidden_states_buf, hidden_states_half_buf, sqlen * this->embed_dim); + float2half(hidden_states_float, hidden_states, sqlen * this->embed_dim); if (input.has_past_keys_values) { past_key_values_length = input.past_keys[0].m_dim_y; @@ -107,12 +115,4 @@ struct Int4llamaDecoder_output Int4llamaDecoder::forward(std::string param_path, PROFILE_END(profile_name); return output; -} - -void Int4llamaDecoder::free_cuda_memory() { - free_aligned_memory_gpu(attention_mask_buf); - free_aligned_memory_gpu(last_hidden_states_buf); - free_aligned_memory_gpu(hidden_states_buf); - free_aligned_memory_gpu(hidden_states_half_buf); - free_aligned_memory_gpu(norm_weight_ptr); -} +} \ No newline at end of file diff --git a/llm/src/nn_modules/metal/Int4llamaDecoderLayerMetal.cc b/llm/src/nn_modules/metal/Int4llamaDecoderLayer.cc similarity index 88% rename from llm/src/nn_modules/metal/Int4llamaDecoderLayerMetal.cc rename to llm/src/nn_modules/metal/Int4llamaDecoderLayer.cc index cb5942f7..827316ee 100644 --- a/llm/src/nn_modules/metal/Int4llamaDecoderLayerMetal.cc +++ b/llm/src/nn_modules/metal/Int4llamaDecoderLayer.cc @@ -9,9 +9,8 @@ static float16_t *up_proj_arr = nullptr; static float16_t *down_proj_arr = nullptr; static float16_t *hidden_states_arr = nullptr; -void add_half(Matrix3D a, Matrix3D b, Matrix3D c){ - const struct metal_params params; - +void add_half(Matrix3D a, Matrix3D b, Matrix3D c, int num_heads){ + struct metal_params params; params.A.half_data_ptr = a.m_data; params.B.half_data_ptr = b.m_data; params.C.half_data_ptr = c.m_data; @@ -22,11 +21,11 @@ void add_half(Matrix3D a, Matrix3D b, Matrix3D } void SiLuMul_half(Matrix3D gate_proj, Matrix3D up_proj){ - const struct metal_params params; + struct metal_params params; params.A.half_data_ptr = gate_proj.m_data; params.B.half_data_ptr = up_proj.m_data; - params.sqlen = a.length(); + params.sqlen = gate_proj.length(); params.op = METAL_KERNEL_SILUMUL_HALF; add_node(¶ms); } @@ -90,7 +89,7 @@ struct Int4llamaDecoderLayer_output Int4llamaDecoderLayer::forward(std::string p int threadsPerBlock = 1024; int blocksPerGrid =(input.hidden_states.length() + threadsPerBlock - 1) / threadsPerBlock; // METAL: add interface - add_half(input.hidden_states, attn_output.attn_output, residual_add); + add_half(input.hidden_states, attn_output.attn_output, residual_add, this->num_attention_heads); Matrix3D post_attention_layernorm(final_layer_norm_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, input.hidden_states.m_dim_z); @@ -112,7 +111,7 @@ struct Int4llamaDecoderLayer_output Int4llamaDecoderLayer::forward(std::string p int blocksPerGrid3 =(residual_add.length() + threadsPerBlock - 1) / threadsPerBlock; // METAL: add interface - add_half(residual_add, down_proj, residual_add); + add_half(residual_add, down_proj, residual_add, this->num_attention_heads); struct Int4llamaDecoderLayer_output output(residual_add, attn_output.attn_probs_reshaped, attn_output.past_key_value); @@ -120,17 +119,3 @@ struct Int4llamaDecoderLayer_output Int4llamaDecoderLayer::forward(std::string p return output; } - -void Int4llamaDecoderLayer::free_cuda_memory() { - free_aligned_memory_gpu(hidden_states_half_arr); - free_aligned_memory_gpu(final_layer_norm_arr); - free_aligned_memory_gpu(gate_proj_arr); - free_aligned_memory_gpu(up_proj_arr); - free_aligned_memory_gpu(down_proj_arr); - free_aligned_memory_gpu(hidden_states_arr); - free_aligned_memory_gpu(input_layernorm_weight_ptr); - free_aligned_memory_gpu(post_attention_layernorm_ptr); - free_aligned_memory_gpu(gate_proj_weight); - free_aligned_memory_gpu(down_proj_weight); - free_aligned_memory_gpu(up_proj_weight); -} diff --git a/llm/src/nn_modules/metal/Int4llamaForCausalLMMetal.cc b/llm/src/nn_modules/metal/Int4llamaForCausalLM.cc similarity index 81% rename from llm/src/nn_modules/metal/Int4llamaForCausalLMMetal.cc rename to llm/src/nn_modules/metal/Int4llamaForCausalLM.cc index 1775cf00..de645ec6 100644 --- a/llm/src/nn_modules/metal/Int4llamaForCausalLMMetal.cc +++ b/llm/src/nn_modules/metal/Int4llamaForCausalLM.cc @@ -4,10 +4,10 @@ #include "operators.h" #include "utils.h" -void half2float(const half* halfArray, float* floatArray, int N){ - const struct metal_params params; +void half2float(const float16_t* halfArray, float* floatArray, int N){ + struct metal_params params; - params.A.half_data_ptr = halfArray; + params.A.half_data_ptr = (float16_t*) halfArray; params.B.data_ptr = floatArray; params.sqlen = N; params.op = METAL_KERNEL_HALF2FLOAT; @@ -44,13 +44,10 @@ struct Int4LlamaForCausalLM_output Int4LlamaForCausalLM::forward(std::string par this->lm_head.forward(decoder_output.last_hidden_state, logits_half); Matrix3D logits(logits_output, 1, sqlen, this->decoder.voc_size); - int threadsPerBlock_1D = 1024; - int blocksPerGrid =(sqlen * this->decoder.voc_size + threadsPerBlock_1D - 1) / threadsPerBlock_1D; - // METAL: more kernels needed - half2float<<>>(logits_output_half, logits_output, sqlen * this->decoder.voc_size); + half2float(logits_output_half, logits_output, sqlen * this->decoder.voc_size); // waituntilcompleted - metal_completed(); + metal_graph_compute(mgraph); struct Int4LlamaForCausalLM_output LMoutput = {logits, decoder_output.past_keys, decoder_output.past_values}; PROFILE_END(profile_name); diff --git a/llm/src/nn_modules/metal/LLaMAGenerateMetal.cc b/llm/src/nn_modules/metal/LLaMAGenerate.cc similarity index 100% rename from llm/src/nn_modules/metal/LLaMAGenerateMetal.cc rename to llm/src/nn_modules/metal/LLaMAGenerate.cc diff --git a/llm/src/ops/metal/BMM_F16T.cc b/llm/src/ops/metal/BMM_F16T.cc index 5f54a0d1..76681593 100644 --- a/llm/src/ops/metal/BMM_F16T.cc +++ b/llm/src/ops/metal/BMM_F16T.cc @@ -1,4 +1,4 @@ -#include "operators.h" +#include "../../../include/operators.h" #include "utils.h" #include "metal_compute.h" diff --git a/llm/src/ops/metal/LlamaRMSNorm.cc b/llm/src/ops/metal/LlamaRMSNorm.cc index 358e8c5a..8ff673ad 100644 --- a/llm/src/ops/metal/LlamaRMSNorm.cc +++ b/llm/src/ops/metal/LlamaRMSNorm.cc @@ -1,6 +1,6 @@ #include #include -#include "operators.h" +#include "../../../include/operators.h" #include "utils.h" #include "metal_compute.h" diff --git a/llm/src/ops/metal/RotaryPosEmb.cc b/llm/src/ops/metal/RotaryPosEmb.cc index ce49f409..47a02d59 100644 --- a/llm/src/ops/metal/RotaryPosEmb.cc +++ b/llm/src/ops/metal/RotaryPosEmb.cc @@ -1,8 +1,8 @@ #include -#include "operators.h" +#include "../../../include/operators.h" +#include "utils.h" #include "metal_compute.h" -// TODO: match constants on metal void RotaryPosEmb_metal_forward(Matrix3D query, Matrix3D key, Matrix3D cos, Matrix3D sin, int start_idx, int len) { struct matmul_params params; params.A.row = query.m_dim_y; diff --git a/llm/src/ops/metal/batch_add.cc b/llm/src/ops/metal/batch_add.cc index 8fc17233..706e1465 100644 --- a/llm/src/ops/metal/batch_add.cc +++ b/llm/src/ops/metal/batch_add.cc @@ -1,21 +1,19 @@ #include "operators.h" +#include "utils.h" #include "metal_compute.h" -void batch_Add(const Matrix3D &input, const Matrix3D &input2, Matrix3D &output) { - const struct metal_params params; +void batch_Add_metal(const Matrix3D &input, const Matrix3D &input2, Matrix3D &output) { + struct metal_params params; params.A.row = input.m_dim_y; params.A.column = input.m_dim_z; params.A.fp16_data_ptr = input.m_data; params.B.row = input2.m_dim_z; params.B.column = input2.m_dim_y; - params.B.int32_data_ptr = input2.m_data; + params.B.fp16_data_ptr = input2.m_data; params.C.row = output.m_dim_y; params.C.column = output.m_dim_z; params.C.fp16_data_ptr = output.m_data; - params.A.data_ptr = input.m_data; - params.B.data_ptr = input2.m_data; - params.C.data_ptr = output.m_data; params.op = METAL_KERNEL_BATCH_ADD; add_node(¶ms); } diff --git a/llm/src/ops/metal/embedding.cc b/llm/src/ops/metal/embedding.cc index 8ddba94b..0fd201ef 100644 --- a/llm/src/ops/metal/embedding.cc +++ b/llm/src/ops/metal/embedding.cc @@ -1,8 +1,8 @@ -#include "operators.h" +#include "../../../include/operators.h" #include "utils.h" #include "metal_compute.h" -void load_Embedding_params_metal(Embedding_cuda& op, std::string prefix) { +void load_Embedding_params_metal(Embedding_metal& op, std::string prefix) { op.lookup.load((prefix + "/weight.bin").c_str()); } diff --git a/llm/src/ops/metal/softmax.cc b/llm/src/ops/metal/softmax.cc index 6bde6067..91153931 100644 --- a/llm/src/ops/metal/softmax.cc +++ b/llm/src/ops/metal/softmax.cc @@ -1,8 +1,9 @@ #include -#include "operators.h" +#include "../../../include/operators.h" +#include "utils.h" -void softmax(Matrix3D input, Matrix3D output) { +void softmax_metal(Matrix3D input, Matrix3D output) { const struct metal_params params; params.A.row = input.m_dim_y; params.A.column = input.m_dim_z; diff --git a/llm/src/utils.cc b/llm/src/utils.cc index 7a9f23c2..03f01727 100644 --- a/llm/src/utils.cc +++ b/llm/src/utils.cc @@ -24,6 +24,39 @@ void read_to_array(const char* path, T* array, int size) { } } +void read_to_array_half(const char* path, half* array, int size) { + std::ifstream infile(path, std::ios::binary | std::ios::in); + if (infile.fail()) { + std::cout << strerror(errno) << ": " << path << std::endl; + throw("Expected error..."); + } else { + infile.read(reinterpret_cast(array), size * sizeof(half)); + infile.close(); + } +} + +int make_divisible_c(int c, int divisor) { + return (c + divisor - 1) / divisor; +} + +int calculate_zeros_width(int in_features, int group_size, int pack_num) { + int size_multiplier; + + if (group_size >= 128) { + size_multiplier = 1; + } else if (group_size == 64) { + size_multiplier = 2; + } else if (group_size == 32) { + size_multiplier = 4; + } else { + throw std::runtime_error("The group_size of calculate_zeros_width should be 128, 64 or 32."); + } + + int base_width = make_divisible_c(in_features / group_size, pack_num); + base_width = make_divisible_c(base_width, size_multiplier) * size_multiplier; + return base_width; +} + struct max_error_info { int idx; float a1, a2; @@ -186,12 +219,12 @@ void print_first_k_elelment(std::string name, const float* arr, int k, int start // 2. make a mapping (unordered_map) between MTL::Buffer and memory address // 3. when GPU want to access some address space, use the table to get the corresponding MTL::Buffer object // Befenits: not to worry about memory alignment, better performance -#include "matmul_metal_int4_imp.h" +#include "metal_compute.h" template void allocate_aligned_memory(T*& ptr, size_t size) { // allocate and get the pointer - void* void_ptr = MetalMatmulInt4IMP::allocateSharedMem(size); + void* void_ptr = allocateSharedMem(size); if (void_ptr == NULL) { std::cerr << "Metal memory allocation failed." << std::endl; exit(-1); From 562d32aac9d32a0c7844a60d4fc3ee0e8e0000a6 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Thu, 16 May 2024 11:36:05 -0400 Subject: [PATCH 36/37] minor fix --- kernels/matmul.h | 9 ++++----- llm/src/nn_modules/metal/Int4llamaAttention.cc | 6 +++--- llm/src/nn_modules/metal/Int4llamaDecoder.cc | 2 +- llm/src/nn_modules/metal/Int4llamaDecoderLayer.cc | 8 ++++---- llm/src/nn_modules/metal/Int4llamaForCausalLM.cc | 2 +- llm/src/ops/metal/linear.cc | 2 -- 6 files changed, 13 insertions(+), 16 deletions(-) diff --git a/kernels/matmul.h b/kernels/matmul.h index fb655e90..7ca85c6a 100644 --- a/kernels/matmul.h +++ b/kernels/matmul.h @@ -102,11 +102,10 @@ struct thread_args { int start_i, end_i, blk_size; }; -// #ifdef QM_METAL -// #include "metal/include/metal_compute.h" -// // typedef half_float::half half; -// #endif - +#ifdef QM_METAL +#include "metal/include/metal_compute.h" +// typedef half_float::half half; +#endif #define MAX(A, B) ((A) > (B) ? (A) : (B)) #define MIN(A, B) ((A) < (B) ? (A) : (B)) diff --git a/llm/src/nn_modules/metal/Int4llamaAttention.cc b/llm/src/nn_modules/metal/Int4llamaAttention.cc index 593ab41d..53480065 100644 --- a/llm/src/nn_modules/metal/Int4llamaAttention.cc +++ b/llm/src/nn_modules/metal/Int4llamaAttention.cc @@ -42,7 +42,7 @@ Int4llamaAttention::Int4llamaAttention(std::string param_path, const struct mode allocate_aligned_memory(o_weight, (config.embed_dim * config.embed_dim * sizeof(int)) / 8); allocate_aligned_memory(qkv_weight, (config.embed_dim * config.embed_dim * 3 * sizeof(int)) / 8); this->o_proj = Linear_half_int4(Matrix3D(o_weight, 1, config.embed_dim, config.embed_dim / 8), - param_path + "/o_proj"); + param_path + "/o_proj"); // TODO: type match this->qkv_proj = Linear_half_int4(Matrix3D(qkv_weight, 1, config.embed_dim, config.embed_dim * 3 / 8), param_path + "/qkv_proj"); @@ -196,8 +196,8 @@ struct Int4llamaAttention_output Int4llamaAttention::forward(std::string param_p // METAL: Metal batch_Add_metal(attn_weights, input.attention_mask, attn_weights); - int threadsPerBlock_1D = 1024; - int blocksPerGrid_1D =(attn_weights.length() + threadsPerBlock_1D - 1) / threadsPerBlock_1D; + // int threadsPerBlock_1D = 1024; + // int blocksPerGrid_1D =(attn_weights.length() + threadsPerBlock_1D - 1) / threadsPerBlock_1D; // METAL: more kernels needed check_inf_half(attn_weights); diff --git a/llm/src/nn_modules/metal/Int4llamaDecoder.cc b/llm/src/nn_modules/metal/Int4llamaDecoder.cc index 9f65d9fe..406596ef 100644 --- a/llm/src/nn_modules/metal/Int4llamaDecoder.cc +++ b/llm/src/nn_modules/metal/Int4llamaDecoder.cc @@ -40,7 +40,7 @@ Int4llamaDecoder::Int4llamaDecoder(std::string param_path, const struct model_co // Embedding Matrix3D embweight(new float[voc_size * embed_dim], 1, voc_size, embed_dim); // METAL: Metal Embedding - this->embed_tokens = Embedding(embed_dim, voc_size, padding_idx, embweight); // METAL: how to deal with aliasing + this->embed_tokens = Embedding(embed_dim, voc_size, padding_idx, embweight); // METAL load_Embedding_params(this->embed_tokens, param_path + "/embed_tokens"); allocate_aligned_memory(norm_weight_ptr, embed_dim * sizeof(float)); diff --git a/llm/src/nn_modules/metal/Int4llamaDecoderLayer.cc b/llm/src/nn_modules/metal/Int4llamaDecoderLayer.cc index 827316ee..5d82c68a 100644 --- a/llm/src/nn_modules/metal/Int4llamaDecoderLayer.cc +++ b/llm/src/nn_modules/metal/Int4llamaDecoderLayer.cc @@ -86,8 +86,8 @@ struct Int4llamaDecoderLayer_output Int4llamaDecoderLayer::forward(std::string p Matrix3D residual_add(hidden_states_half_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, input.hidden_states.m_dim_z); - int threadsPerBlock = 1024; - int blocksPerGrid =(input.hidden_states.length() + threadsPerBlock - 1) / threadsPerBlock; + // int threadsPerBlock = 1024; + // int blocksPerGrid =(input.hidden_states.length() + threadsPerBlock - 1) / threadsPerBlock; // METAL: add interface add_half(input.hidden_states, attn_output.attn_output, residual_add, this->num_attention_heads); @@ -102,14 +102,14 @@ struct Int4llamaDecoderLayer_output Int4llamaDecoderLayer::forward(std::string p Matrix3D up_proj(up_proj_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, this->hidden_dim); this->up_proj.forward(post_attention_layernorm, up_proj); - int blocksPerGrid2 =(gate_proj.length() + threadsPerBlock - 1) / threadsPerBlock; + // int blocksPerGrid2 =(gate_proj.length() + threadsPerBlock - 1) / threadsPerBlock; // METAL: add interface SiLuMul_half(gate_proj, up_proj); Matrix3D down_proj(down_proj_arr, input.hidden_states.m_dim_x, input.hidden_states.m_dim_y, this->embed_dim); this->down_proj.forward(gate_proj, down_proj); - int blocksPerGrid3 =(residual_add.length() + threadsPerBlock - 1) / threadsPerBlock; + // int blocksPerGrid3 =(residual_add.length() + threadsPerBlock - 1) / tchreadsPerBlock; // METAL: add interface add_half(residual_add, down_proj, residual_add, this->num_attention_heads); diff --git a/llm/src/nn_modules/metal/Int4llamaForCausalLM.cc b/llm/src/nn_modules/metal/Int4llamaForCausalLM.cc index de645ec6..65d0e4f2 100644 --- a/llm/src/nn_modules/metal/Int4llamaForCausalLM.cc +++ b/llm/src/nn_modules/metal/Int4llamaForCausalLM.cc @@ -46,7 +46,7 @@ struct Int4LlamaForCausalLM_output Int4LlamaForCausalLM::forward(std::string par Matrix3D logits(logits_output, 1, sqlen, this->decoder.voc_size); half2float(logits_output_half, logits_output, sqlen * this->decoder.voc_size); - // waituntilcompleted + // compute all metal nodes metal_graph_compute(mgraph); struct Int4LlamaForCausalLM_output LMoutput = {logits, decoder_output.past_keys, decoder_output.past_values}; PROFILE_END(profile_name); diff --git a/llm/src/ops/metal/linear.cc b/llm/src/ops/metal/linear.cc index 1908799c..87899638 100644 --- a/llm/src/ops/metal/linear.cc +++ b/llm/src/ops/metal/linear.cc @@ -3,8 +3,6 @@ #include "utils.h" #include "metal_compute.h" - -// TODO: incorporate gemv from llama.cpp void Linear_half_int4::forward(const Matrix3D &x, Matrix3D &output) { const int num_thread = 8; Matrix3D b = this->weight; From a2690016737770bdb51ff845c49c2a16d0f211e4 Mon Sep 17 00:00:00 2001 From: DerrickYLJ Date: Fri, 24 May 2024 16:15:08 -0400 Subject: [PATCH 37/37] add op source --- .../{src => old_metal}/MetalMatmulInt4.cpp | 0 .../MetalMatmulInt4.hpp | 0 .../metal/{include => old_metal}/opParams.h | 0 kernels/metal/src/matmul_f32_f32.cc | 5 +- kernels/metal/src/matmul_int4_f32.cc | 5 +- kernels/metal/src/matvec_f32_f32.cc | 5 +- kernels/metal/src/matvec_int4_f32.cc | 5 +- kernels/metal/src/metal_compute.cc | 362 +++++++++--------- kernels/metal/src/op.metal | 36 -- llm/Makefile | 6 +- llm/src/ops/metal/BMM_F16T.cc | 4 +- llm/src/ops/metal/LlamaRMSNorm.cc | 10 +- llm/src/ops/metal/RotaryPosEmb.cc | 74 ++-- llm/src/ops/metal/embedding.cc | 5 +- llm/src/ops/metal/softmax.cc | 9 +- 15 files changed, 259 insertions(+), 267 deletions(-) rename kernels/metal/{src => old_metal}/MetalMatmulInt4.cpp (100%) rename kernels/metal/{include => old_metal}/MetalMatmulInt4.hpp (100%) rename kernels/metal/{include => old_metal}/opParams.h (100%) diff --git a/kernels/metal/src/MetalMatmulInt4.cpp b/kernels/metal/old_metal/MetalMatmulInt4.cpp similarity index 100% rename from kernels/metal/src/MetalMatmulInt4.cpp rename to kernels/metal/old_metal/MetalMatmulInt4.cpp diff --git a/kernels/metal/include/MetalMatmulInt4.hpp b/kernels/metal/old_metal/MetalMatmulInt4.hpp similarity index 100% rename from kernels/metal/include/MetalMatmulInt4.hpp rename to kernels/metal/old_metal/MetalMatmulInt4.hpp diff --git a/kernels/metal/include/opParams.h b/kernels/metal/old_metal/opParams.h similarity index 100% rename from kernels/metal/include/opParams.h rename to kernels/metal/old_metal/opParams.h diff --git a/kernels/metal/src/matmul_f32_f32.cc b/kernels/metal/src/matmul_f32_f32.cc index 98ac0e89..e47a54c0 100644 --- a/kernels/metal/src/matmul_f32_f32.cc +++ b/kernels/metal/src/matmul_f32_f32.cc @@ -1,9 +1,6 @@ #include "../include/metal_compute.h" namespace matmul { void mat_mul_f32_f32_metal(const struct matmul_params *params){ - struct metal_cgraph *graph = new (struct metal_cgraph); - graph->n_nodes = 1; - graph->mm_nodes[0] = (const metal_params *) params; - metal_graph_compute(METAL_KERNEL_MUL_MM_F32_F32, graph); + // placeholder } } \ No newline at end of file diff --git a/kernels/metal/src/matmul_int4_f32.cc b/kernels/metal/src/matmul_int4_f32.cc index 79ad3bc6..263b36a1 100644 --- a/kernels/metal/src/matmul_int4_f32.cc +++ b/kernels/metal/src/matmul_int4_f32.cc @@ -1,9 +1,6 @@ #include "../include/metal_compute.h" namespace matmul { void mat_mul_int4_f32_metal(const struct matmul_params *params){ - struct metal_cgraph *graph = new (struct metal_cgraph); - graph->n_nodes = 1; - graph->mm_nodes[0] = (const metal_params *) params; - metal_graph_compute(METAL_KERNEL_MUL_MM_INT4_F32, graph); + // placeholder } } \ No newline at end of file diff --git a/kernels/metal/src/matvec_f32_f32.cc b/kernels/metal/src/matvec_f32_f32.cc index 2abd779f..5f02e6d3 100644 --- a/kernels/metal/src/matvec_f32_f32.cc +++ b/kernels/metal/src/matvec_f32_f32.cc @@ -1,9 +1,6 @@ #include "../include/metal_compute.h" namespace matmul { void mat_vec_f32_f32_metal(const struct matmul_params *params){ - struct metal_cgraph *graph = new (struct metal_cgraph); - graph->n_nodes = 1; - graph->mm_nodes[0] = (const metal_params *) params; - metal_graph_compute(METAL_KERNEL_MUL_MV_F32_F32, graph); + // placeholder } } \ No newline at end of file diff --git a/kernels/metal/src/matvec_int4_f32.cc b/kernels/metal/src/matvec_int4_f32.cc index c8f843dd..982766ae 100644 --- a/kernels/metal/src/matvec_int4_f32.cc +++ b/kernels/metal/src/matvec_int4_f32.cc @@ -1,9 +1,6 @@ #include "../include/metal_compute.h" namespace matmul { void mat_vec_int4_f32_metal(const struct matmul_params *params){ - struct metal_cgraph *graph = new (struct metal_cgraph); - graph->n_nodes = 1; - graph->mm_nodes[0] = (const metal_params *) params; - metal_graph_compute(METAL_KERNEL_MUL_MV_INT4_F32, graph); + // placeholder } } \ No newline at end of file diff --git a/kernels/metal/src/metal_compute.cc b/kernels/metal/src/metal_compute.cc index dea9903e..22aaf358 100644 --- a/kernels/metal/src/metal_compute.cc +++ b/kernels/metal/src/metal_compute.cc @@ -75,54 +75,57 @@ void init() { ctx->support_simdgroup_mm = ctx->device->supportsFamily((MTL::GPUFamily)MTLGPUFamilyApple7); - // load kernels - { - NS::Error *error = nullptr; - for (int i = 0; i < METAL_KERNEL_TYPE_COUNT; ++i) { - ctx->kernels[i].pipeline = nullptr; - } -#define METAL_ADD_KERNEL(e, name, supported) \ +// Assuming necessary headers and namespaces are included + +// load kernels +{ + NS::Error *error = nullptr; + for (int i = 0; i < METAL_KERNEL_TYPE_COUNT; ++i) { + ctx->kernels[i].pipeline = nullptr; + } + + #define METAL_ADD_KERNEL(e, name, supported) \ if (supported) { \ struct metal_kernel * kernel = &ctx->kernels[e]; \ - const char * str = "kernel_" + name; \ - auto str = NS::String::string(str, NS::ASCIIStringEncoding); \ + std::string kernel_name = "kernel_"; \ + kernel_name += name; \ + auto str = NS::String::string(kernel_name.c_str(), NS::ASCIIStringEncoding); \ MTL::Function * metal_function = metal_library->newFunction(str); \ kernel->pipeline = ctx->device->newComputePipelineState(metal_function, &error); \ metal_function->release(); \ if (error) { \ - printf("load pipeline error"); \ - return nullptr; \ - } \ + printf("load pipeline error\n"); \ + } \ } else { \ - printf("kernel name not supported "); \ + printf("kernel name not supported\n"); \ } - // simd_sum and simd_max requires MTLGPUFamilyApple7 - // TODO: syntax error - METAL_ADD_KERNEL(METAL_KERNEL_FLOAT2HALF "float2half", true); - METAL_ADD_KERNEL(METAL_KERNEL_HALF2FLOAT, "half2float", true); - METAL_ADD_KERNEL(METAL_KERNEL_PREPARE_DECODER_ATTENTION_MASK_HALF, "kernel_prepare_decoder_attention_mask_half", true); - METAL_ADD_KERNEL(METAL_KERNEL_SILUMUL_HALF, "SiLuMul_half", true); - METAL_ADD_KERNEL(METAL_KERNEL_ADD_HALF, "add_half", true); - METAL_ADD_KERNEL(METAL_KERNEL_SHAPE_QKV, "shape_qkv", true); - METAL_ADD_KERNEL(METAL_KERNEL_UNSHAPE, "unshape", true); - METAL_ADD_KERNEL(METAL_KERNEL_TRANSPOSE_1_2IDX, "transpose_1_2idx", true); - METAL_ADD_KERNEL(METAL_KERNEL_CHECK_INF_HALF, "check_inf_half", true); - METAL_ADD_KERNEL(METAL_KERNEL_EMBEDDING, "embedding", true); - METAL_ADD_KERNEL(METAL_KERNEL_BATCH_ADD, "batch_add", true); - METAL_ADD_KERNEL(METAL_KERNEL_RELU, "relu", true); - METAL_ADD_KERNEL(METAL_KERNEL_SILU, "silu", true); - METAL_ADD_KERNEL(METAL_KERNEL_GELU, "gelu", true); - METAL_ADD_KERNEL(METAL_KERNEL_GELU_QUICK, "gelu_quick", true); - METAL_ADD_KERNEL(METAL_KERNEL_RMS_NORM, "rms_norm", true); - METAL_ADD_KERNEL(METAL_KERNEL_SOFT_MAX, "soft_max", true); - METAL_ADD_KERNEL(METAL_KERNEL_SOFT_MAX_4, "soft_max_4", true); - METAL_ADD_KERNEL(METAL_KERNEL_ROPE, "rope", true); - METAL_ADD_KERNEL(METAL_KERNEL_MUL_MM_INT4_F32, "mul_mm_int4_f32", true); - METAL_ADD_KERNEL(METAL_KERNEL_MUL_MV_INT4_F32, "mul_mv_int4_f32", true); - METAL_ADD_KERNEL(METAL_KERNEL_MUL_MM_F32_F32, "mul_mm_f32_f32", true); - METAL_ADD_KERNEL(METAL_KERNEL_MUL_MV_F32_F32, "mul_mv_f32_f32", true); - } + // simd_sum and simd_max requires MTLGPUFamilyApple7 + METAL_ADD_KERNEL(METAL_KERNEL_FLOAT2HALF, "float2half", true); + METAL_ADD_KERNEL(METAL_KERNEL_HALF2FLOAT, "half2float", true); + METAL_ADD_KERNEL(METAL_KERNEL_PREPARE_DECODER_ATTENTION_MASK_HALF, "kernel_prepare_decoder_attention_mask_half", true); + METAL_ADD_KERNEL(METAL_KERNEL_SILUMUL_HALF, "SiLuMul_half", true); + METAL_ADD_KERNEL(METAL_KERNEL_ADD_HALF, "add_half", true); + METAL_ADD_KERNEL(METAL_KERNEL_SHAPE_QKV, "shape_qkv", true); + METAL_ADD_KERNEL(METAL_KERNEL_UNSHAPE, "unshape", true); + METAL_ADD_KERNEL(METAL_KERNEL_TRANSPOSE_1_2IDX, "transpose_1_2idx", true); + METAL_ADD_KERNEL(METAL_KERNEL_CHECK_INF_HALF, "check_inf_half", true); + METAL_ADD_KERNEL(METAL_KERNEL_EMBEDDING, "embedding", true); + METAL_ADD_KERNEL(METAL_KERNEL_BATCH_ADD, "batch_add", true); + METAL_ADD_KERNEL(METAL_KERNEL_RELU, "relu", true); + METAL_ADD_KERNEL(METAL_KERNEL_SILU, "silu", true); + METAL_ADD_KERNEL(METAL_KERNEL_GELU, "gelu", true); + METAL_ADD_KERNEL(METAL_KERNEL_GELU_QUICK, "gelu_quick", true); + METAL_ADD_KERNEL(METAL_KERNEL_RMS_NORM, "rms_norm", true); + METAL_ADD_KERNEL(METAL_KERNEL_SOFT_MAX, "soft_max", true); + METAL_ADD_KERNEL(METAL_KERNEL_SOFT_MAX_4, "soft_max_4", true); + METAL_ADD_KERNEL(METAL_KERNEL_ROPE, "rope", true); + METAL_ADD_KERNEL(METAL_KERNEL_MUL_MM_INT4_F32, "mul_mm_int4_f32", true); + METAL_ADD_KERNEL(METAL_KERNEL_MUL_MV_INT4_F32, "mul_mv_int4_f32", true); + METAL_ADD_KERNEL(METAL_KERNEL_MUL_MM_F32_F32, "mul_mm_f32_f32", true); + METAL_ADD_KERNEL(METAL_KERNEL_MUL_MV_F32_F32, "mul_mv_f32_f32", true); +} + metal_library->release(); has_init = true; } @@ -167,12 +170,19 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { MTL::CommandBuffer **command_buffers = command_buffer_builder; for (int iter = 0; iter < n_cb; ++iter){ const int cb_idx = iter; + int nth = 0; size_t offs_src0 = 0; size_t offs_src1 = 0; size_t offs_src2 = 0; size_t offs_dst = 0; MTL::CommandBuffer *command_buffer = command_buffers[cb_idx]; MTL::ComputeCommandEncoder *encoder = command_buffer->computeCommandEncoder(edesc); + MTL::Buffer *id_src0 = nullptr; + MTL::Buffer *id_src1 = nullptr; + MTL::Buffer *id_src2 = nullptr; + MTL::Buffer *id_dst = nullptr; + MTL::Size ThreadperGroup; + MTL::Size ThreadpergroupsperGrid; const int node_start = (cb_idx + 0) * n_nodes_per_cb; const int node_end = MIN((cb_idx == n_cb - 1) ? n_nodes : (cb_idx + 1) * n_nodes_per_cb, n_nodes); @@ -184,68 +194,107 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { encoder->memoryBarrier(MTL::BarrierScopeBuffers); continue; } + struct matrix src0 = curr_node ? curr_node->A : (struct matrix){0}; // Initialize to default if curr_node is NULL + struct matrix src1 = curr_node ? curr_node->B : (struct matrix){0}; // Initialize to default if curr_node is NULL + struct matrix dst = curr_node ? curr_node->C : (struct matrix){0}; // Initialize to default if curr_node is NULL + + // Validity check to ensure src0, src1, and dst are valid matrices + bool is_src0_valid = curr_node && (curr_node->A.row > 0) && (curr_node->A.column > 0); + bool is_src1_valid = curr_node && (curr_node->B.row > 0) && (curr_node->B.column > 0); + bool is_dst_valid = curr_node && (curr_node->C.row > 0) && (curr_node->C.column > 0); + + // TODO: double check the placement of parameters + const int64_t ne00 = is_src0_valid ? src0.row : 0; // k + const int64_t ne01 = is_src0_valid ? src0.column : 0; // n + const int64_t ne02 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; // bs + const int64_t ne03 = 1; + + const uint64_t nb00 = is_src0_valid ? sizeof(unsigned char) :0; + const uint64_t nb01 = is_src0_valid ? nb00*ne00/block_size :0; + const uint64_t nb02 = is_src0_valid ? nb01*ne01 :0; + const uint64_t nb03 = is_src0_valid ? nb02*ne02 :0; + + const int64_t ne10 = is_src1_valid ? src1.row : 0; // k + const int64_t ne11 = is_src1_valid ? src1.column : 0; // m + const int64_t ne12 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; // bs + const int64_t ne13 = 1; + + const uint64_t nb10 = is_src1_valid ? sizeof(unsigned char) : 0; + const uint64_t nb11 = is_src1_valid ? nb10*ne10 : 0; + const uint64_t nb12 = is_src1_valid ? nb11*ne11 : 0; + const uint64_t nb13 = is_src1_valid ? nb12*ne12 : 0; + + const int64_t ne0 = is_dst_valid ? dst.row : 0; + const int64_t ne1 = is_dst_valid ? dst.column : 0; + const int64_t ne2 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; + const int64_t ne3 = 1; + + const uint64_t nb0 = is_dst_valid ? sizeof(unsigned char) : 0; + const uint64_t nb1 = is_dst_valid ? nb0*ne0 : 0; + const uint64_t nb2 = is_dst_valid ? nb1*ne1 : 0; + const uint64_t nb3 = is_dst_valid ? nb2*ne2 : 0; switch (op) { - case (METAL_KERNEL_FLOAT2FLOAT): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr((curr_node->B).half_data_ptr); + case (METAL_KERNEL_FLOAT2HALF): + {id_src0 = getBufferfromPtr((curr_node->A).data_ptr); + id_dst = getBufferfromPtr((curr_node->B).half_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_dst, offs_src1, 1); encoder->setBytes(&curr_node->sqlen, sizeof(int), 2); - MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + ThreadperGroup = MTL::Size::Make(1024, 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_HALF2FLOAT): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr((curr_node->B).data_ptr); + {id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + id_dst = getBufferfromPtr((curr_node->B).data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_dst, offs_src1, 1); encoder->setBytes(&curr_node->sqlen, sizeof(int), 2); - MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + ThreadperGroup = MTL::Size::Make(1024, 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_PREPARE_DECODER_ATTENTION_MASK_HALF): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + {id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBytes(&curr_node->sqlen, sizeof(int), 1); encoder->setBytes(&curr_node->past_sqlen, sizeof(int), 2); - MTL::Size ThreadperGroup = MTL::Size::Make(32, 32, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen - curr_node->past_sqlen + 32 - 1) / 32, + ThreadperGroup = MTL::Size::Make(32, 32, 1); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->sqlen - curr_node->past_sqlen + 32 - 1) / 32, (curr_node->sqlen + 32 - 1) / 32, 1); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_SILUMUL_HALF): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); + {id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_src1, offs_src1, 1); encoder->setBytes(&curr_node->sqlen, sizeof(int), 2); - MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + ThreadperGroup = MTL::Size::Make(1024, 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_ADD_HALF): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); - MTL::Buffer *id_src2 = getBufferfromPtr((curr_node->C).half_data_ptr); + {id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); + id_src2 = getBufferfromPtr((curr_node->C).half_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_src1, offs_src1, 1); encoder->setBuffer(id_src2, offs_src2, 2); encoder->setBytes(&curr_node->sqlen, sizeof(int), 3); - MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + ThreadperGroup = MTL::Size::Make(1024, 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) / 1024, 1, 1); + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_SHAPE_QKV): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); //input_ids int - MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); //output half - MTL::Buffer *id_src2 = getBufferfromPtr((curr_node->C).half_data_ptr); + {id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); //input_ids int + id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); //output half + id_src2 = getBufferfromPtr((curr_node->C).half_data_ptr); MTL::Buffer *id_src3 = getBufferfromPtr((curr_node->D).half_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); @@ -255,30 +304,30 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { encoder->setBytes(&curr_node->num_heads, sizeof(int), 4); encoder->setBytes(&curr_node->sqlen, sizeof(int), 5); encoder->setBytes(&curr_node->head_dim, sizeof(int), 6); - MTL::Size ThreadperGroup = MTL::Size::Make(16, 1, 64); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->num_heads + 16 - 1) / 16, - (curr_node->sqlen + 1 - 1) / 1y, + ThreadperGroup = MTL::Size::Make(16, 1, 64); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->num_heads + 16 - 1) / 16, + (curr_node->sqlen + 1 - 1) / 1, (curr_node->head_dim + 64 - 1) / 64); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_UNSHAPE): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); + {id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_src1, offs_src1, 1); encoder->setBytes(&curr_node->num_heads, sizeof(int), 2); encoder->setBytes(&curr_node->sqlen, sizeof(int), 3); encoder->setBytes(&curr_node->head_dim, sizeof(int), 4); - MTL::Size ThreadperGroup = MTL::Size::Make(16, 1, 64); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->num_heads + 16 - 1) / 16, + ThreadperGroup = MTL::Size::Make(16, 1, 64); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->num_heads + 16 - 1) / 16, (curr_node->sqlen + 1 - 1) / 1, (curr_node->head_dim + 64 - 1) / 64); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_TRANSPOSE_1_2IDX): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); + {id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + id_src1 = getBufferfromPtr((curr_node->B).half_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_src1, offs_src1, 1); @@ -287,24 +336,24 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { encoder->setBytes(&curr_node->input_m_dim_z, sizeof(int), 4); encoder->setBytes(&curr_node->B.row, sizeof(int), 5); encoder->setBytes(&curr_node->B.column, sizeof(int), 6); - MTL::Size ThreadperGroup = MTL::Size::Make(8, 4, 32); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->num_heads + 8 - 1) / 8, + ThreadperGroup = MTL::Size::Make(8, 4, 32); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->num_heads + 8 - 1) / 8, (curr_node->tgz + 4 - 1) / 4, (curr_node->head_dim + 32 - 1) / 32); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_CHECK_INF_HALF): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); + { id_src0 = getBufferfromPtr((curr_node->A).half_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBytes(&curr_node->sqlen, sizeof(int), 1); - MTL::Size ThreadperGroup = MTL::Size::Make(1024, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) /1024, 1, 1); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + ThreadperGroup = MTL::Size::Make(1024, 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->sqlen + 1024 - 1) /1024, 1, 1); + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_EMBEDDING): - MTL::Buffer *id_src0 = getBufferfromPtr((curr_node->A).int32_data_ptr); //input_ids int - MTL::Buffer *id_dst = getBufferfromPtr((curr_node->C).half_data_ptr); //output half + {id_src0 = getBufferfromPtr((curr_node->A).int32_data_ptr); //input_ids int + id_dst = getBufferfromPtr((curr_node->C).half_data_ptr); //output half MTL::Buffer *id_lookup = getBufferfromPtr((curr_node->B).data_ptr); //fp32 encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); @@ -313,51 +362,18 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { encoder->setBytes(&curr_node->embed_dim, sizeof(int), 3); int threadsPerBlock = 1024; int blocksPerGrid = (curr_node->A.column + threadsPerBlock - 1) / threadsPerBlock; - MTL::Size ThreadperGroup = MTL::Size::Make(threadsPerBlock, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((curr_node->A.column + threadsPerBlock - 1) / threadsPerBlock, 1, 1); + ThreadperGroup = MTL::Size::Make(threadsPerBlock, 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make((curr_node->A.column + threadsPerBlock - 1) / threadsPerBlock, 1, 1); // Dispatch the kernel - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; - struct matrix src0 = curr_node->A; - struct matrix src1 = curr_node->B; - struct matrix dst = curr_node->C; - // TODO: double check the placement of parameters - const int64_t ne00 = src0.row; // k - const int64_t ne01 = src0.column; // n - const int64_t ne02 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; // bs - const int64_t ne03 = 1; - - const uint64_t nb00 = sizeof(unsigned char); - const uint64_t nb01 = nb00*ne00/block_size; - const uint64_t nb02 = nb01*ne01; - const uint64_t nb03 = nb02*ne02; - - const int64_t ne10 = src1.row; // k - const int64_t ne11 = src1.column; // m - const int64_t ne12 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; // bs - const int64_t ne13 = 1; - - const uint64_t nb10 = sizeof(unsigned char); - const uint64_t nb11 = nb10*ne10; - const uint64_t nb12 = nb11*ne11; - const uint64_t nb13 = nb12*ne12; - - const int64_t ne0 = dst.row; - const int64_t ne1 = dst.column; - const int64_t ne2 = (curr_node && curr_node->bs != 0) ? curr_node->bs : 1; - const int64_t ne3 = 1; - - const uint64_t nb0 = sizeof(unsigned char); - const uint64_t nb1 = nb0*ne0; - const uint64_t nb2 = nb1*ne1; - const uint64_t nb3 = nb2*ne2; + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case METAL_KERNEL_MUL_MM_INT4_F32: case METAL_KERNEL_MUL_MV_INT4_F32: case METAL_KERNEL_MUL_MM_F32_F32: case METAL_KERNEL_MUL_MV_F32_F32: - MTL::Buffer *id_src0 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? getBufferfromPtr(src0.int4_data_ptr) : getBufferfromPtr(src0.data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); + { id_src0 = (op == METAL_KERNEL_MUL_MM_INT4_F32) || (op == METAL_KERNEL_MUL_MV_INT4_F32) ? getBufferfromPtr(src0.int4_data_ptr) : getBufferfromPtr(src0.data_ptr); + id_src1 = getBufferfromPtr(src1.data_ptr); + id_dst = getBufferfromPtr(dst.data_ptr); const uint r2 = ne12/ne02; const uint r3 = ne13/ne03; int ne11_mm_min = 1; @@ -383,42 +399,42 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { encoder->setBytes(&r2, sizeof(r2), 13); encoder->setBytes(&r3, sizeof(r3), 14); encoder->setThreadgroupMemoryLength(8192, 0); - MTL::Size ThreadperGroup = MTL::Size::Make(128, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make((ne11 + 31)/32, (ne01 + 63)/64, ne12*ne13); // from https://github.com/ggerganov/llama.cpp/blob/d5ab29757ebc59a30f03e408294ec20628a6374e/ggml-metal.m#L1405 + ThreadperGroup = MTL::Size::Make(128, 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make((ne11 + 31)/32, (ne01 + 63)/64, ne12*ne13); // from https://github.com/ggerganov/llama.cpp/blob/d5ab29757ebc59a30f03e408294ec20628a6374e/ggml-metal.m#L1405 // Dispatch the kernel - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); } - break; + break;} case (METAL_KERNEL_BATCH_ADD): - MTL::Buffer *id_src0 = getBufferfromPtr(src0.fp16_data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr(src1.int32_data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr(dst.fp16_data_ptr); + {id_src0 = getBufferfromPtr(src0.fp16_data_ptr); + id_src1 = getBufferfromPtr(src1.int32_data_ptr); + id_dst = getBufferfromPtr(dst.fp16_data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_src1, offs_src1, 1); encoder->setBuffer(id_dst, offs_src2, 2); - MTL::Size ThreadperGroup = MTL::Size::Make(src0.row, src0.column, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make(1, 1, 1); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + ThreadperGroup = MTL::Size::Make(src0.row, src0.column, 1); + ThreadpergroupsperGrid = MTL::Size::Make(1, 1, 1); + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_RELU): case (METAL_KERNEL_SILU): case (METAL_KERNEL_GELU): case (METAL_KERNEL_GELU_QUICK): - MTL::Buffer *id_src0 = getBufferfromPtr(src0.data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); + {id_src0 = getBufferfromPtr(src0.data_ptr); + id_dst = getBufferfromPtr(dst.data_ptr); encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_dst, offs_src2, 1); - MTL::Size ThreadperGroup = MTL::Size::Make(src0.length,1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make(1, 1, 1); - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + ThreadperGroup = MTL::Size::Make(src0.length(), 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make(1, 1, 1); + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} case (METAL_KERNEL_RMS_NORM): - int nth = 32; // SIMD width - MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr(src1.half_data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr(dst.half_data_ptr); + {nth = 32; // SIMD width + id_src0 = getBufferfromPtr(src0.half_data_ptr); + id_src1 = getBufferfromPtr(src1.half_data_ptr); + id_dst = getBufferfromPtr(dst.half_data_ptr); // TODO: add src1 (weights) encoder->setComputePipelineState(ctx->kernels[op].pipeline); encoder->setBuffer(id_src0, offs_src0, 0); @@ -428,10 +444,10 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { encoder->setBytes(&(curr_node->eps), sizeof(curr_node->eps), 4); encoder->setThreadgroupMemoryLength(32*sizeof(float), 0); encoder->dispatchThreadgroups(MTL::Size::Make(src0.row, 1, 1), MTL::Size::Make(src0.row, 1, 1)); - break; + break;} case (METAL_KERNEL_SOFT_MAX): case (METAL_KERNEL_SOFT_MAX_4): - int nth = 32; // SIMD width + {nth = 32; // SIMD width if (ne00%4 == 0) { while (nth < ne00/4 && nth < 256) { nth *= 2; @@ -443,10 +459,10 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { } encoder->setComputePipelineState(ctx->kernels[op].pipeline); } - const float scale = curr_node->scale; - MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr(src1.data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr(dst.data_ptr); + float scale = curr_node->scale; + id_src0 = getBufferfromPtr(src0.half_data_ptr); + id_src1 = getBufferfromPtr(src1.data_ptr); + id_dst = getBufferfromPtr(dst.data_ptr); encoder->setBuffer(id_src0, offs_src0, 0); encoder->setBuffer(id_src1, offs_src1, 1); encoder->setBuffer(id_dst, offs_src2, 2); @@ -456,12 +472,12 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { encoder->setBytes(&scale, sizeof(scale), 6); encoder->setThreadgroupMemoryLength(32*sizeof(float), 0); encoder->dispatchThreadgroups(MTL::Size::Make(ne01*ne02*ne03, 1, 1), MTL::Size::Make(nth, 1, 1)); - break; + break;} case (METAL_KERNEL_ROPE): - MTL::Buffer *id_src0 = getBufferfromPtr(src0.half_data_ptr); - MTL::Buffer *id_src1 = getBufferfromPtr(src1.int32_data_ptr); - MTL::Buffer *id_dst = getBufferfromPtr(dst.half_data_ptr); - const int nth = MIN(1024, ne00); + {id_src0 = getBufferfromPtr(src0.half_data_ptr); + id_src1 = getBufferfromPtr(src1.int32_data_ptr); + id_dst = getBufferfromPtr(dst.half_data_ptr); + nth = MIN(1024, ne00); const int n_past = curr_node->n_past; //((int32_t *) dst.op_params)[0]; const int n_dims = curr_node->n_dims; //((int32_t *) dst.op_params)[1]; @@ -509,11 +525,11 @@ enum status metal_graph_compute(struct metal_cgraph * mg) { encoder->setBytes(&beta_fast, sizeof(float), 27); encoder->setBytes(&beta_slow, sizeof(float), 28); - MTL::Size ThreadperGroup = MTL::Size::Make(nth, 1, 1); - MTL::Size ThreadgroupsperGrid = MTL::Size::Make(ne01, ne02, ne03); // from https://github.com/ggerganov/llama.cpp/blob/1b496a745c315022df2d919374052e6004ced8d3/ggml-metal.m#L2240 + ThreadperGroup = MTL::Size::Make(nth, 1, 1); + ThreadpergroupsperGrid = MTL::Size::Make(ne01, ne02, ne03); // from https://github.com/ggerganov/llama.cpp/blob/1b496a745c315022df2d919374052e6004ced8d3/ggml-metal.m#L2240 // Dispatch the kernel - encoder->dispatchThreadgroups(ThreadgroupsperGrid, ThreadperGroup); - break; + encoder->dispatchThreadgroups(ThreadpergroupsperGrid, ThreadperGroup); + break;} } if (encoder!=nullptr){ encoder->endEncoding(); diff --git a/kernels/metal/src/op.metal b/kernels/metal/src/op.metal index 1bd09ea4..770fab52 100644 --- a/kernels/metal/src/op.metal +++ b/kernels/metal/src/op.metal @@ -504,42 +504,6 @@ static void rope_yarn_corr_dims( dims[1] = min(n_dims - 1.0f, ceil(rope_yarn_corr_factor(n_dims, n_orig_ctx, beta_slow, freq_base))); } -// typedef void (rope_t)( -// device const void * src0, -// device const int32_t * src1, -// device float * dst, -// constant int64_t & ne00, -// constant int64_t & ne01, -// constant int64_t & ne02, -// constant int64_t & ne03, -// constant uint64_t & nb00, -// constant uint64_t & nb01, -// constant uint64_t & nb02, -// constant uint64_t & nb03, -// constant int64_t & ne0, -// constant int64_t & ne1, -// constant int64_t & ne2, -// constant int64_t & ne3, -// constant uint64_t & nb0, -// constant uint64_t & nb1, -// constant uint64_t & nb2, -// constant uint64_t & nb3, -// constant int & n_past, -// constant int & n_dims, -// constant int & mode, -// constant int & n_orig_ctx, -// constant float & freq_base, -// constant float & freq_scale, -// constant float & ext_factor, -// constant float & attn_factor, -// constant float & beta_fast, -// constant float & beta_slow, -// uint tiitg[[thread_index_in_threadgroup]], -// uint3 tptg[[threads_per_threadgroup]], -// uint3 tgpig[[threadgroup_position_in_grid]]); - -// // TODO: to be fixed -// template kernel void kernel_rope( device const void * src0, device const int32_t * src1, diff --git a/llm/Makefile b/llm/Makefile index 6186450c..41ba1c7f 100644 --- a/llm/Makefile +++ b/llm/Makefile @@ -67,6 +67,8 @@ $(info Detected CUDA_PATH: $(CUDA_HOME)) else ifdef USE_METAL $(info Metal is available!) LIB_SRC += $(wildcard src/nn_modules/metal/*.cc) + LIB_SRC += $(wildcard src/ops/metal/*.cc) + INCLUDE_DIRS += -I./include/ops/metal else $(info CUDA and Metal are unavailable!) LIB_SRC += $(wildcard src/nn_modules/non_cuda/*.cc) @@ -107,8 +109,8 @@ else ifeq ($(shell uname -p),arm) LIB += -L/opt/homebrew/opt/boost/lib # For ARM A-series (such as Mac M1) with Metal GPU ifdef USE_METAL - LIB_ACC_INC = -I$(LIB_DIR)/metal/include -I$(LIB_DIR)/metal/metal-cpp -I$(LIB_DIR)/metal/src - LIB_SRC += $(wildcard $(LIB_DIR)/metal/*.cc) $(wildcard src/nn_modules/metal/*.cc) + LIB_ACC_INC = -I$(LIB_DIR)/metal/include -I$(LIB_DIR)/metal/metal-cpp + LIB_SRC += $(wildcard $(LIB_DIR)/metal/src/*.cc) $(wildcard src/nn_modules/metal/*.cc) INCLUDE_DIRS += -I/opt/homebrew/opt/boost/include $(LIB_ACC_INC) LIB += -framework Metal -framework Foundation -framework MetalKit TARGET += default.metallib library.air diff --git a/llm/src/ops/metal/BMM_F16T.cc b/llm/src/ops/metal/BMM_F16T.cc index 76681593..e4d96135 100644 --- a/llm/src/ops/metal/BMM_F16T.cc +++ b/llm/src/ops/metal/BMM_F16T.cc @@ -16,7 +16,7 @@ void BMM_F16T::forward(const Matrix3D &a, const Matrix3D &weight, Ma assert(a.m_dim_y == c.m_dim_y); // m assert(b.m_dim_y == c.m_dim_z); // n - const struct metal_params params; + struct metal_params params; params.A.row = a.m_dim_y; params.A.column = a.m_dim_z; params.A.half_data_ptr = a.m_data; @@ -44,7 +44,7 @@ void BMM_F16T::forward_weight_untransposed(const Matrix3D &a, const Matrix assert(a.m_dim_y == c.m_dim_y); // m assert(b.m_dim_z == c.m_dim_z); // n - struct matmul_params params; + struct metal_params params; params.A.row = a.m_dim_y; params.A.column = a.m_dim_z; params.A.half_data_ptr = a.m_data; diff --git a/llm/src/ops/metal/LlamaRMSNorm.cc b/llm/src/ops/metal/LlamaRMSNorm.cc index 8ff673ad..5f2b31a0 100644 --- a/llm/src/ops/metal/LlamaRMSNorm.cc +++ b/llm/src/ops/metal/LlamaRMSNorm.cc @@ -4,15 +4,15 @@ #include "utils.h" #include "metal_compute.h" -// TODO: modify metal for weights +// TODO: src1 should be weights void LlamaRMSNorm_metal::forward(const Matrix3D &x, Matrix3D &output, float eps) { - const struct metal_params params; + struct metal_params params; params.A.row = x.m_dim_y; params.A.column = x.m_dim_z; params.A.half_data_ptr = x.m_data; - params.B.row = b.m_dim_z; // k - params.B.column = b.m_dim_y; // n - params.B.half_data_ptr = b.m_data; + params.B.row = x.m_dim_z; + params.B.column = x.m_dim_y; + params.B.half_data_ptr = x.m_data; params.C.row = output.m_dim_y; params.C.column = output.m_dim_z; params.C.half_data_ptr = output.m_data; diff --git a/llm/src/ops/metal/RotaryPosEmb.cc b/llm/src/ops/metal/RotaryPosEmb.cc index 47a02d59..c71a6278 100644 --- a/llm/src/ops/metal/RotaryPosEmb.cc +++ b/llm/src/ops/metal/RotaryPosEmb.cc @@ -4,32 +4,54 @@ #include "metal_compute.h" void RotaryPosEmb_metal_forward(Matrix3D query, Matrix3D key, Matrix3D cos, Matrix3D sin, int start_idx, int len) { - struct matmul_params params; - params.A.row = query.m_dim_y; - params.A.column = query.m_dim_z; - params.A.half_data_ptr = query.m_data; - params.B.row = key.m_dim_z; // k - params.B.column = key.m_dim_y; // n - params.B.int32_data_ptr = key.m_data; - params.C.row = output.m_dim_y; - params.C.column = output.m_dim_z; - params.C.half_data_ptr = output.m_data; - params.opt_params.num_thread = num_thread; - params.half_scales = this->scale.m_data; - params.int32_zero_point = this->zero_point.m_data; - params.block_size = QK; + + struct metal_params params_query; + struct metal_params params_key; + params_query.A.row = query.m_dim_y; + params_query.A.column = query.m_dim_z; + params_query.A.half_data_ptr = query.m_data; + params_query.B.row = key.m_dim_z; // k + params_query.B.column = key.m_dim_y; // n + params_query.B.half_data_ptr = key.m_data; + params_query.C.row = query.m_dim_y; + params_query.C.column = query.m_dim_z; + params_query.C.half_data_ptr = query.m_data; - params.n_orig_ctx = 4096; - params.n_past = 0; - params.n_dims = 128; - params.mode = 0; - params.freq_base = 10000.0; - params.freq_scale = 1.0; - params.ext_factor = 0.0; - params.attn_factor = 1.0; - params.beta_fast = 32.0; - params.beta_slow = 1.0; - params.op = METAL_KERNEL_ROPE; - add_node(¶ms); + params_query.n_orig_ctx = 4096; + params_query.n_past = 0; + params_query.n_dims = 128; + params_query.mode = 0; + params_query.freq_base = 10000.0; + params_query.freq_scale = 1.0; + params_query.ext_factor = 0.0; + params_query.attn_factor = 1.0; + params_query.beta_fast = 32.0; + params_query.beta_slow = 1.0; + params_query.op = METAL_KERNEL_ROPE; + add_node(¶ms_query); + + // TO DO: src1: indices + params_key.A.row = key.m_dim_y; + params_key.A.column = key.m_dim_z; + params_key.A.half_data_ptr = key.m_data; + params_key.B.row = key.m_dim_z; // k + params_key.B.column = key.m_dim_y; // n + params_key.B.half_data_ptr = key.m_data; + params_key.C.row = key.m_dim_y; + params_key.C.column = key.m_dim_z; + params_key.C.half_data_ptr = key.m_data; + + params_key.n_orig_ctx = 4096; + params_key.n_past = 0; + params_key.n_dims = 128; + params_key.mode = 0; + params_key.freq_base = 10000.0; + params_key.freq_scale = 1.0; + params_key.ext_factor = 0.0; + params_key.attn_factor = 1.0; + params_key.beta_fast = 32.0; + params_key.beta_slow = 1.0; + params_key.op = METAL_KERNEL_ROPE; + add_node(¶ms_query); return; } diff --git a/llm/src/ops/metal/embedding.cc b/llm/src/ops/metal/embedding.cc index 0fd201ef..ed94e68c 100644 --- a/llm/src/ops/metal/embedding.cc +++ b/llm/src/ops/metal/embedding.cc @@ -6,7 +6,6 @@ void load_Embedding_params_metal(Embedding_metal& op, std::string prefix) { op.lookup.load((prefix + "/weight.bin").c_str()); } -// TODO: implement metal side void Embedding_metal::forward(Matrix3D input_id, Matrix3D output) { PROFILE_START(profile_name); assert(input_id.m_dim_x == 1); @@ -14,9 +13,9 @@ void Embedding_metal::forward(Matrix3D input_id, Matrix3D output) { assert(input_id.m_dim_z == output.m_dim_y); assert(output.m_dim_z == this->embed_dim); - const struct metal_params params; + struct metal_params params; - params.A.int32_data_ptr = input.m_data; + params.A.int32_data_ptr = input_id.m_data; params.B.data_ptr = this->lookup.m_data; params.C.half_data_ptr = output.m_data; params.op = METAL_KERNEL_EMBEDDING; diff --git a/llm/src/ops/metal/softmax.cc b/llm/src/ops/metal/softmax.cc index 91153931..16a59d6a 100644 --- a/llm/src/ops/metal/softmax.cc +++ b/llm/src/ops/metal/softmax.cc @@ -1,20 +1,21 @@ #include #include "../../../include/operators.h" #include "utils.h" +#include "metal_compute.h" void softmax_metal(Matrix3D input, Matrix3D output) { - const struct metal_params params; + struct metal_params params; params.A.row = input.m_dim_y; params.A.column = input.m_dim_z; params.A.half_data_ptr = input.m_data; params.B.row = input.m_dim_z; // k params.B.column = input.m_dim_y; // n - params.B.data_ptr = input.m_data; + params.B.half_data_ptr = input.m_data; params.C.row = output.m_dim_y; params.C.column = output.m_dim_z; - params.C.data_ptr = output.m_data; - params.scale = this->scales; + params.C.half_data_ptr = output.m_data; + params.scale = 0.088388; params.op = METAL_KERNEL_SOFT_MAX; add_node(¶ms); } \ No newline at end of file