Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* LICENSE file in the root directory of this source tree.
*/

#import <ExecuTorch/ExecuTorch.h>

#import "ExecuTorchLLMConfig.h"

NS_ASSUME_NONNULL_BEGIN
Expand All @@ -29,6 +31,16 @@ __attribute__((deprecated("This API is experimental.")))
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchLLMImage : NSObject<NSCopying>

/**
Initializes an image container from a tensor.

@param tensor A tensor with shape {C, H, W} and dtype Byte or Float.
@return An initialized ExecuTorchLLMImage instance.
*/
- (instancetype)initWithTensor:(ExecuTorchTensor *)tensor
NS_DESIGNATED_INITIALIZER
NS_SWIFT_NAME(init(_:));

/**
Initializes an image container with the provided data and dimensions.

Expand All @@ -41,16 +53,21 @@ __attribute__((objc_subclassing_restricted))
- (instancetype)initWithData:(NSData *)data
width:(NSInteger)width
height:(NSInteger)height
channels:(NSInteger)channels
NS_DESIGNATED_INITIALIZER;
channels:(NSInteger)channels;

/**
Initializes an image container with the provided float data and dimensions.

@param data Float image buffer.
@param width Image width in pixels.
@param height Image height in pixels.
@param channels Number of channels.
@return An initialized ExecuTorchLLMImage instance.
*/
- (instancetype)initWithFloatData:(NSData *)data
width:(NSInteger)width
height:(NSInteger)height
channels:(NSInteger)channels
NS_DESIGNATED_INITIALIZER;

@property(nonatomic, readonly) NSData *data;
channels:(NSInteger)channels;

@property(nonatomic, readonly) NSInteger width;

Expand All @@ -60,6 +77,8 @@ __attribute__((objc_subclassing_restricted))

@property(nonatomic, readonly) BOOL isFloat;

@property(nonatomic, readonly) ExecuTorchTensor *tensor;

+ (instancetype)new NS_UNAVAILABLE;
- (instancetype)init NS_UNAVAILABLE;

Expand All @@ -73,6 +92,16 @@ __attribute__((deprecated("This API is experimental.")))
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchLLMAudio : NSObject<NSCopying>

/**
Initializes an audio features container from a tensor.

@param tensor A tensor with shape {batchSize, bins, frames} and dtype Byte or Float.
@return An initialized ExecuTorchLLMAudio instance.
*/
- (instancetype)initWithTensor:(ExecuTorchTensor *)tensor
NS_DESIGNATED_INITIALIZER
NS_SWIFT_NAME(init(_:));

/**
Initializes an audio features container with the provided data and shape.

Expand All @@ -85,16 +114,21 @@ __attribute__((objc_subclassing_restricted))
- (instancetype)initWithData:(NSData *)data
batchSize:(NSInteger)batchSize
bins:(NSInteger)bins
frames:(NSInteger)frames
NS_DESIGNATED_INITIALIZER;
frames:(NSInteger)frames;

/**
Initializes an audio features container with the provided float data and shape.

@param data Float feature buffer.
@param batchSize Batch dimension size.
@param bins Number of frequency bins.
@param frames Number of time frames.
@return An initialized ExecuTorchLLMAudio instance.
*/
- (instancetype)initWithFloatData:(NSData *)data
batchSize:(NSInteger)batchSize
bins:(NSInteger)bins
frames:(NSInteger)frames
NS_DESIGNATED_INITIALIZER;

@property(nonatomic, readonly) NSData *data;
frames:(NSInteger)frames;

@property(nonatomic, readonly) NSInteger batchSize;

Expand All @@ -104,6 +138,8 @@ __attribute__((objc_subclassing_restricted))

@property(nonatomic, readonly) BOOL isFloat;

@property(nonatomic, readonly) ExecuTorchTensor *tensor;

+ (instancetype)new NS_UNAVAILABLE;
- (instancetype)init NS_UNAVAILABLE;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,35 +21,60 @@ @interface ExecuTorchLLMConfig ()

@end

@implementation ExecuTorchLLMImage
@implementation ExecuTorchLLMImage {
ExecuTorchTensor *_tensor;
}

- (instancetype)initWithTensor:(ExecuTorchTensor *)tensor {
ET_CHECK(tensor);
if (self = [super init]) {
ET_CHECK_MSG(tensor.shape.count == 3, "Image tensor must be rank-3 {C,H,W}");
ExecuTorchDataType dataType = tensor.dataType;
ET_CHECK_MSG(dataType == ExecuTorchDataTypeByte || dataType == ExecuTorchDataTypeFloat,
"Image tensor must be Byte or Float");
_tensor = tensor;
}
return self;
}

- (instancetype)initWithData:(NSData *)data
width:(NSInteger)width
height:(NSInteger)height
channels:(NSInteger)channels {
if (self = [super init]) {
_data = [data copy];
_width = width;
_height = height;
_channels = channels;
_isFloat = NO;
}
return self;
return [self initWithTensor:[[ExecuTorchTensor alloc]
initWithData:data
shape:@[@(channels), @(height), @(width)]
dataType:ExecuTorchDataTypeByte]];
}

- (instancetype)initWithFloatData:(NSData *)data
width:(NSInteger)width
height:(NSInteger)height
channels:(NSInteger)channels {
self = [super init];
if (self) {
_data = [data copy];
_width = width;
_height = height;
_channels = channels;
_isFloat = YES;
}
return self;
return [self initWithTensor:[[ExecuTorchTensor alloc]
initWithData:data
shape:@[@(channels), @(height), @(width)]
dataType:ExecuTorchDataTypeFloat]];
}

- (NSInteger)width {
return _tensor.shape[2].integerValue;
}

- (NSInteger)height {
return _tensor.shape[1].integerValue;
}

- (NSInteger)channels {
return _tensor.shape[0].integerValue;
}

- (BOOL)isFloat {
return _tensor.dataType == ExecuTorchDataTypeFloat;
}

- (ExecuTorchTensor *)tensor {
return _tensor;
}

- (id)copyWithZone:(NSZone *)zone {
Expand All @@ -58,35 +83,60 @@ - (id)copyWithZone:(NSZone *)zone {

@end

@implementation ExecuTorchLLMAudio
@implementation ExecuTorchLLMAudio {
ExecuTorchTensor *_tensor;
}

- (instancetype)initWithTensor:(ExecuTorchTensor *)tensor {
ET_CHECK(tensor);
if (self = [super init]) {
ET_CHECK_MSG(tensor.shape.count == 3, "Audio tensor must be rank-3 {B,bins,frames}");
ExecuTorchDataType dataType = tensor.dataType;
ET_CHECK_MSG(dataType == ExecuTorchDataTypeByte || dataType == ExecuTorchDataTypeFloat,
"Audio tensor must be Byte or Float");
_tensor = tensor;
}
return self;
}

- (instancetype)initWithData:(NSData *)data
batchSize:(NSInteger)batchSize
bins:(NSInteger)bins
frames:(NSInteger)frames {
if (self = [super init]) {
_data = [data copy];
_batchSize = batchSize;
_bins = bins;
_frames = frames;
_isFloat = NO;
}
return self;
return [self initWithTensor:
[[ExecuTorchTensor alloc] initWithData:data
shape:@[@(batchSize), @(bins), @(frames)]
dataType:ExecuTorchDataTypeByte]];
}

- (instancetype)initWithFloatData:(NSData *)data
batchSize:(NSInteger)batchSize
bins:(NSInteger)bins
frames:(NSInteger)frames {
self = [super init];
if (self) {
_data = [data copy];
_batchSize = batchSize;
_bins = bins;
_frames = frames;
_isFloat = YES;
}
return self;
return [self initWithTensor:
[[ExecuTorchTensor alloc] initWithData:data
shape:@[@(batchSize), @(bins), @(frames)]
dataType:ExecuTorchDataTypeFloat]];
}

- (NSInteger)batchSize {
return _tensor.shape[0].integerValue;
}

- (NSInteger)bins {
return _tensor.shape[1].integerValue;
}

- (NSInteger)frames {
return _tensor.shape[2].integerValue;
}

- (BOOL)isFloat {
return _tensor.dataType == ExecuTorchDataTypeFloat;
}

- (ExecuTorchTensor *)tensor {
return _tensor;
}

- (id)copyWithZone:(NSZone *)zone {
Expand Down Expand Up @@ -208,54 +258,16 @@ - (BOOL)generateWithInputs:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
case ExecuTorchLLMMultimodalInputTypeText:
nativeInputs.emplace_back(llm::MultimodalInput(input.text.UTF8String));
break;
case ExecuTorchLLMMultimodalInputTypeImage: {
ExecuTorchLLMImage *image = input.image;
if (image.isFloat) {
const float *buffer = (const float *)image.data.bytes;
size_t elementCount = (size_t)image.data.length / sizeof(float);
std::vector<float> data(buffer, buffer + elementCount);
nativeInputs.emplace_back(llm::MultimodalInput(llm::Image(
std::move(data),
(int32_t)image.width,
(int32_t)image.height,
(int32_t)image.channels
)));
} else {
const uint8_t *buffer = (const uint8_t *)image.data.bytes;
std::vector<uint8_t> data(buffer, buffer + image.data.length);
nativeInputs.emplace_back(llm::MultimodalInput(llm::Image(
std::move(data),
(int32_t)image.width,
(int32_t)image.height,
(int32_t)image.channels
)));
}
case ExecuTorchLLMMultimodalInputTypeImage:
nativeInputs.emplace_back(llm::MultimodalInput(llm::Image(
make_tensor_ptr(*reinterpret_cast<TensorPtr *>(input.image.tensor.nativeInstance))
)));
break;
}
case ExecuTorchLLMMultimodalInputTypeAudio: {
ExecuTorchLLMAudio *audio = input.audio;
if (audio.isFloat) {
const float *buffer = (const float *)audio.data.bytes;
size_t elementCount = (size_t)audio.data.length / sizeof(float);
std::vector<float> data(buffer, buffer + elementCount);
nativeInputs.emplace_back(llm::MultimodalInput(llm::Audio(
std::move(data),
(int32_t)audio.batchSize,
(int32_t)audio.bins,
(int32_t)audio.frames
)));
} else {
const uint8_t *buffer = (const uint8_t *)audio.data.bytes;
std::vector<uint8_t> data(buffer, buffer + audio.data.length);
nativeInputs.emplace_back(llm::MultimodalInput(llm::Audio(
std::move(data),
(int32_t)audio.batchSize,
(int32_t)audio.bins,
(int32_t)audio.frames
)));
}
case ExecuTorchLLMMultimodalInputTypeAudio:
nativeInputs.emplace_back(llm::MultimodalInput(llm::Audio(
make_tensor_ptr(*reinterpret_cast<TensorPtr *>(input.audio.tensor.nativeInstance))
)));
break;
}
default: {
if (error) {
*error = [NSError errorWithDomain:ExecuTorchLLMErrorDomain
Expand Down
Loading
Loading