Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

macOS 13 support #40

Merged
merged 18 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ on:

jobs:
build-and-test:
runs-on: macos-14
strategy:
matrix:
os: [macos-13-xlarge, macos-14]
include:
- os: macos-13-xlarge
ios-version: "16.1" # oldest available version
- os: macos-14
ios-version: "17.2" # latest available version
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: maxim-lobanov/setup-xcode@v1
Expand Down Expand Up @@ -40,14 +48,16 @@ jobs:
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=iOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=iOS Simulator,OS=17.2,name=iPhone 15" | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=iOS Simulator,OS=${{ matrix.ios-version }},name=iPhone 15" | xcpretty
- name: Build and Test - watchOS
if: matrix.os == 'macos-14'
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=watchOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=watchOS Simulator,OS=10.2,name=Apple Watch Ultra 2 (49mm)" | xcpretty
- name: Build and Test - visionOS
if: matrix.os == 'macos-14'
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=visionOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=visionOS Simulator,name=Apple Vision Pro" | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=visionOS Simulator,name=Apple Vision Pro" | xcpretty
15 changes: 7 additions & 8 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@ import PackageDescription
let package = Package(
name: "whisperkit",
platforms: [
.iOS(.v17),
.macOS(.v14),
.watchOS(.v10),
.visionOS(.v1)
.iOS(.v16),
.macOS(.v13),
],
products: [
.library(
Expand All @@ -18,7 +16,8 @@ let package = Package(
),
.executable(
name: "transcribe",
targets: ["WhisperKitCLI"])
targets: ["WhisperKitCLI"]
),
],
dependencies: [
.package(url: "https://github.com/huggingface/swift-transformers.git", exact: "0.1.2"),
Expand All @@ -35,7 +34,7 @@ let package = Package(
name: "WhisperKitCLI",
dependencies: [
"WhisperKit",
.product(name: "ArgumentParser", package: "swift-argument-parser")
.product(name: "ArgumentParser", package: "swift-argument-parser"),
]
),
.testTarget(
Expand All @@ -51,11 +50,11 @@ let package = Package(
"Makefile",
"README.md",
"LICENSE",
"CONTRIBUTING.md"
"CONTRIBUTING.md",
],
resources: [
.process("Tests/WhisperKitTests/Resources"),
.copy("Models/whisperkit-coreml")
.copy("Models/whisperkit-coreml"),
]
),
]
Expand Down
2 changes: 1 addition & 1 deletion Sources/WhisperKit/Core/AudioEncoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public protocol AudioEncoding {
func encodeFeatures(_ features: MLMultiArray) async throws -> MLMultiArray?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoder: AudioEncoding, WhisperMLModel {
public var model: MLModel?

Expand Down
31 changes: 28 additions & 3 deletions Sources/WhisperKit/Core/AudioProcessor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public extension AudioProcessing {
}
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioProcessor: NSObject, AudioProcessing {
public var audioEngine: AVAudioEngine?
public var audioSamples: ContiguousArray<Float> = []
Expand Down Expand Up @@ -302,7 +302,32 @@ public class AudioProcessor: NSObject, AudioProcessing {
}

public static func requestRecordPermission() async -> Bool {
await AVAudioApplication.requestRecordPermission()
if #available(macOS 14, iOS 17, *) {
return await AVAudioApplication.requestRecordPermission()
} else {
#if os(watchOS)
// watchOS does not support AVCaptureDevice
return true
#else
let microphoneStatus = AVCaptureDevice.authorizationStatus(for: .audio)
switch microphoneStatus {
case .notDetermined:
return await withCheckedContinuation { continuation in
AVCaptureDevice.requestAccess(for: .audio) { granted in
continuation.resume(returning: granted)
}
}
case .restricted, .denied:
Logging.error("Microphone access denied")
return false
case .authorized:
return true
@unknown default:
Logging.error("Unknown authorization status")
return false
}
#endif
}
}

deinit {
Expand All @@ -312,7 +337,7 @@ public class AudioProcessor: NSObject, AudioProcessing {

// MARK: - Streaming

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension AudioProcessor {
/// We have a new buffer, process and store it.
/// NOTE: Assumes audio is 16khz mono
Expand Down
3 changes: 3 additions & 0 deletions Sources/WhisperKit/Core/AudioStreamTranscriber.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import Foundation

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension AudioStreamTranscriber {
struct State {
public var isRecording: Bool = false
Expand All @@ -17,9 +18,11 @@ public extension AudioStreamTranscriber {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public typealias AudioStreamTranscriberCallback = (AudioStreamTranscriber.State, AudioStreamTranscriber.State) -> Void

/// Responsible for streaming audio from the microphone, processing it, and transcribing it in real-time.
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public actor AudioStreamTranscriber {
private var state: AudioStreamTranscriber.State = .init() {
didSet {
Expand Down
2 changes: 1 addition & 1 deletion Sources/WhisperKit/Core/FeatureExtractor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public protocol FeatureExtracting {
func logMelSpectrogram(fromAudio inputAudio: MLMultiArray) async throws -> MLMultiArray?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class FeatureExtractor: FeatureExtracting, WhisperMLModel {
public var model: MLModel?

Expand Down
6 changes: 3 additions & 3 deletions Sources/WhisperKit/Core/LogitsFilter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public protocol LogitsFiltering {
func filterLogits(_ logits: MLMultiArray, withTokens tokens: [Int]) -> MLMultiArray
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class SuppressTokensFilter: LogitsFiltering {
let suppressTokens: [Int]
private let suppressTokenIndexes: [[NSNumber]]
Expand All @@ -25,7 +25,7 @@ public class SuppressTokensFilter: LogitsFiltering {
}
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class SuppressBlankFilter: LogitsFiltering {
let suppressBlankTokens: [Int]
let sampleBegin: Int
Expand All @@ -46,7 +46,7 @@ public class SuppressBlankFilter: LogitsFiltering {
}
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TimestampRulesFilter: LogitsFiltering {
let tokenizer: Tokenizer
let sampleBegin: Int
Expand Down
16 changes: 8 additions & 8 deletions Sources/WhisperKit/Core/Models.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import NaturalLanguage
import Tokenizers

#if os(watchOS) || arch(arm64)
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public typealias FloatType = Float16
#else
public typealias FloatType = Float
Expand Down Expand Up @@ -200,7 +200,7 @@ public struct DecodingCache {
/// - logProbThreshold: If the average log probability over sampled tokens is below this value, treat as failed.
/// - noSpeechThreshold: If the no speech probability is higher than this value AND the average log
/// probability over sampled tokens is below `logProbThreshold`, consider the segment as silent.
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public struct DecodingOptions {
public var verbose: Bool
public var task: DecodingTask
Expand Down Expand Up @@ -489,7 +489,7 @@ public class MelSpectrogramInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class MelSpectrogramOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -526,7 +526,7 @@ public class MelSpectrogramOutput: MLFeatureProvider {
// MARK: AudioEncoder

/// Model Prediction Input Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoderInput: MLFeatureProvider {
/// melspectrogram_features as 1 × {80,128} × 1 × 3000 4-dimensional array of floats
public var melspectrogram_features: MLMultiArray
Expand All @@ -552,7 +552,7 @@ public class AudioEncoderInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoderOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -589,7 +589,7 @@ public class AudioEncoderOutput: MLFeatureProvider {
// MARK: TextDecoder

/// Model Prediction Input Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderInput: MLFeatureProvider {
/// input_ids as 1 element vector of 32-bit integers
public var input_ids: MLMultiArray
Expand Down Expand Up @@ -657,7 +657,7 @@ public class TextDecoderInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -764,7 +764,7 @@ public class TextDecoderCachePrefillInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderCachePrefillOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down
4 changes: 2 additions & 2 deletions Sources/WhisperKit/Core/SegmentSeeker.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import CoreML
import Foundation
import Tokenizers

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol SegmentSeeking {
func findSeekPointAndSegments(
decodingResult: DecodingResult,
Expand Down Expand Up @@ -34,7 +34,7 @@ public protocol SegmentSeeking {
) throws -> [TranscriptionSegment]?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class SegmentSeeker: SegmentSeeking {
public init() {}

Expand Down
6 changes: 3 additions & 3 deletions Sources/WhisperKit/Core/TextDecoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import Accelerate
import CoreML
import Tokenizers

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol TextDecoding {
var tokenizer: Tokenizer? { get set }
var prefillData: WhisperMLModel? { get set }
Expand Down Expand Up @@ -43,7 +43,7 @@ public protocol TextDecoding {
)
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension TextDecoding {
func prepareDecoderInputs(withPrompt initialPrompt: [Int]) -> DecodingInputs? {
let tokenShape = [NSNumber(value: 1), NSNumber(value: initialPrompt.count)]
Expand Down Expand Up @@ -234,7 +234,7 @@ public class TextDecoderContextPrefill: WhisperMLModel {
public var model: MLModel?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoder: TextDecoding, WhisperMLModel {
public var model: MLModel?
public var tokenizer: Tokenizer?
Expand Down
2 changes: 1 addition & 1 deletion Sources/WhisperKit/Core/TokenSampler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public struct SamplingResult {
public var completed: Bool
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class GreedyTokenSampler: TokenSampling {
public var temperature: FloatType
public var eotToken: Int
Expand Down
1 change: 1 addition & 0 deletions Sources/WhisperKit/Core/Utils.swift
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ extension MLMultiArray {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
func initMLMultiArray(shape: [NSNumber], dataType: MLMultiArrayDataType, initialValue: Any) -> MLMultiArray {
let multiArray = try! MLMultiArray(shape: shape, dataType: dataType)

Expand Down
3 changes: 2 additions & 1 deletion Sources/WhisperKit/Core/WhisperKit.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ import Hub
import TensorUtils
import Tokenizers

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol Transcriber {
func transcribe(audioPath: String, decodeOptions: DecodingOptions?, callback: TranscriptionCallback) async throws -> TranscriptionResult?
func transcribe(audioArray: [Float], decodeOptions: DecodingOptions?, callback: TranscriptionCallback) async throws -> TranscriptionResult?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class WhisperKit: Transcriber {
/// Models
public var modelVariant: ModelVariant = .tiny
Expand Down
2 changes: 1 addition & 1 deletion Sources/WhisperKitCLI/transcribe.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import Foundation

import WhisperKit

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
@main
struct WhisperKitCLI: AsyncParsableCommand {
@Option(help: "Path to audio file")
Expand Down
2 changes: 1 addition & 1 deletion Tests/WhisperKitTests/FunctionalTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import CoreML
@testable import WhisperKit
import XCTest

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
final class FunctionalTests: XCTestCase {
func testInitLarge() async {
let modelPath = largev3ModelPath()
Expand Down
Loading