From e9a6c149c5025b640695583c78105ef469dc9ef7 Mon Sep 17 00:00:00 2001
From: Chase Farmer <cgfarmer4@gmail.com>
Date: Wed, 6 Mar 2024 14:44:32 -0800
Subject: [PATCH 1/9] Implement selecting input device (#51)

* add microphone assignment

* PR Feedback: Clean up code fork with alias, move Picker to controls view.
---
 .../WhisperAX/Views/ContentView.swift         |  57 +++++--
 Sources/WhisperKit/Core/AudioProcessor.swift  | 153 ++++++++++++++++--
 2 files changed, 189 insertions(+), 21 deletions(-)

diff --git a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift
index 0df7640e..2bb6982e 100644
--- a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift
+++ b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift
@@ -12,6 +12,9 @@ import AVFoundation
 
 struct ContentView: View {
     @State var whisperKit: WhisperKit? = nil
+    #if os(macOS)
+    @State var audioDevices: [AudioDevice]? = nil
+    #endif
     @State var isRecording: Bool = false
     @State var isTranscribing: Bool = false
     @State var currentText: String = ""
@@ -24,7 +27,8 @@ struct ContentView: View {
     @State private var availableModels: [String] = []
     @State private var availableLanguages: [String] = []
     @State private var disabledModels: [String] = WhisperKit.recommendedModels().disabled
-
+    
+    @AppStorage("selectedAudioInput") private var selectedAudioInput: String = "No Audio Input"
     @AppStorage("selectedModel") private var selectedModel: String = WhisperKit.recommendedModels().default
     @AppStorage("selectedTab") private var selectedTab: String = "Transcribe"
     @AppStorage("selectedTask") private var selectedTask: String = "transcribe"
@@ -302,7 +306,6 @@ struct ContentView: View {
     }
 
     // MARK: - Controls
-
     var controlsView: some View {
         VStack {
             basicSettingsView
@@ -417,14 +420,39 @@ struct ContentView: View {
                             .buttonStyle(BorderlessButtonStyle())
                             .disabled(modelState != .loaded)
                             .frame(minWidth: 0, maxWidth: .infinity)
-
-                            Button {
-                                showAdvancedOptions.toggle()
-                            } label: {
-                                Label("Settings", systemImage: "slider.horizontal.3")
+                            
+                            VStack {
+                                Button {
+                                    showAdvancedOptions.toggle()
+                                } label: {
+                                    Label("Settings", systemImage: "slider.horizontal.3")
+                                }
+                                .frame(minWidth: 0, maxWidth: .infinity)
+                                .buttonStyle(.borderless)
+                                
+                                #if os(macOS)
+                                HStack {
+                                    if let audioDevices = audioDevices, audioDevices.count > 0 {
+                                        Picker("", selection: $selectedAudioInput) {
+                                            ForEach(audioDevices, id: \.self) { device in
+                                                Text(device.name).tag(device.name)
+                                            }
+                                        }
+                                        .frame(minWidth: 80)
+                                        .disabled(isRecording)
+                                    }
+                                }
+                                .onAppear {
+                                    audioDevices = AudioProcessor.getAudioDevices()
+                                    if let audioDevices = audioDevices,
+                                       !audioDevices.isEmpty,
+                                       selectedAudioInput == "No Audio Input",
+                                       let device = audioDevices.first {
+                                        selectedAudioInput = device.name
+                                    }
+                                }
+                                #endif
                             }
-                            .frame(minWidth: 0, maxWidth: .infinity)
-                            .buttonStyle(.borderless)
                         }
                     default:
                         EmptyView()
@@ -854,8 +882,17 @@ struct ContentView: View {
                     print("Microphone access was not granted.")
                     return
                 }
+                
+                var deviceId: DeviceID?
+                #if os(macOS)
+                if self.selectedAudioInput != "No Audio Input",
+                   let devices = self.audioDevices,
+                   let device = devices.first(where: {$0.name == selectedAudioInput}) {
+                    deviceId = device.id
+                }
+                #endif
 
-                try? audioProcessor.startRecordingLive { _ in
+                try? audioProcessor.startRecordingLive(inputDeviceID: deviceId) { _ in
                     DispatchQueue.main.async {
                         bufferEnergy = whisperKit?.audioProcessor.relativeEnergy ?? []
                     }
diff --git a/Sources/WhisperKit/Core/AudioProcessor.swift b/Sources/WhisperKit/Core/AudioProcessor.swift
index 248396fd..4909f26c 100644
--- a/Sources/WhisperKit/Core/AudioProcessor.swift
+++ b/Sources/WhisperKit/Core/AudioProcessor.swift
@@ -6,6 +6,18 @@ import AVFoundation
 import CoreAudio
 import CoreML
 
+/// Core Audio Device 
+#if os(macOS)
+public typealias DeviceID = AudioDeviceID
+#else
+public typealias DeviceID = String 
+#endif
+
+public struct AudioDevice: Identifiable, Hashable {
+    public let id: DeviceID
+    public let name: String
+}
+ 
 public protocol AudioProcessing {
     /// Loads audio data from a specified file path.
     /// - Parameter audioFilePath: The file path of the audio file.
@@ -40,8 +52,8 @@ public protocol AudioProcessing {
     var relativeEnergyWindow: Int { get set }
 
     /// Starts recording audio from the specified input device, resetting the previous state
-    func startRecordingLive(callback: (([Float]) -> Void)?) throws
-
+    func startRecordingLive(inputDeviceID: DeviceID?, callback: (([Float]) -> Void)?) throws
+    
     /// Pause recording
     func pauseRecording()
 
@@ -51,8 +63,8 @@ public protocol AudioProcessing {
 
 /// Overrideable default methods for AudioProcessing
 public extension AudioProcessing {
-    func startRecordingLive(callback: (([Float]) -> Void)?) throws {
-        try startRecordingLive(callback: callback)
+    func startRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)?) throws {
+        try startRecordingLive(inputDeviceID: inputDeviceID, callback: callback)
     }
 
     static func padOrTrimAudio(fromArray audioArray: [Float], startAt startIndex: Int = 0, toLength frameLength: Int = 480_000, saveSegment: Bool = false) -> MLMultiArray? {
@@ -304,6 +316,94 @@ public class AudioProcessor: NSObject, AudioProcessing {
     public static func requestRecordPermission() async -> Bool {
         await AVAudioApplication.requestRecordPermission()
     }
+    
+    #if os(macOS)
+    public static func getAudioDevices() -> [AudioDevice] {
+        var devices = [AudioDevice]()
+        
+        var propertySize: UInt32 = 0
+        var status: OSStatus = noErr
+        
+        // Get the number of devices
+        var propertyAddress = AudioObjectPropertyAddress(
+            mSelector: kAudioHardwarePropertyDevices,
+            mScope: kAudioObjectPropertyScopeGlobal,
+            mElement: kAudioObjectPropertyElementMain
+        )
+        status = AudioObjectGetPropertyDataSize(
+            AudioObjectID(kAudioObjectSystemObject),
+            &propertyAddress,
+            0,
+            nil,
+            &propertySize
+        )
+        if status != noErr {
+            Logging.error("Error: Unable to get the number of audio devices.")
+            return devices
+        }
+        
+        // Get the device IDs
+        let deviceCount = Int(propertySize) / MemoryLayout<AudioDeviceID>.size
+        var deviceIDs = [AudioDeviceID](repeating: 0, count: deviceCount)
+        status = AudioObjectGetPropertyData(
+            AudioObjectID(kAudioObjectSystemObject),
+            &propertyAddress,
+            0,
+            nil,
+            &propertySize,
+            &deviceIDs
+        )
+        if status != noErr {
+            Logging.error("Error: Unable to get the audio device IDs.")
+            return devices
+        }
+        
+        // Get device info for each device
+        for deviceID in deviceIDs {
+            var deviceName: String = ""
+            var inputChannels: Int = 0
+            
+            // Get device name
+            var propertySize: UInt32 = UInt32(MemoryLayout<Unmanaged<CFString>?>.size)
+            var name: Unmanaged<CFString>? = nil
+            propertyAddress.mSelector = kAudioDevicePropertyDeviceNameCFString
+            
+            status = AudioObjectGetPropertyData(
+                deviceID,
+                &propertyAddress,
+                0,
+                nil,
+                &propertySize,
+                &name
+            )
+            if status == noErr, let deviceNameCF = name?.takeUnretainedValue() as String? {
+                deviceName = deviceNameCF
+            }
+            
+            // Get input channels
+            propertyAddress.mSelector = kAudioDevicePropertyStreamConfiguration
+            propertyAddress.mScope = kAudioDevicePropertyScopeInput
+            status = AudioObjectGetPropertyDataSize(deviceID, &propertyAddress, 0, nil, &propertySize)
+            if status == noErr {
+                let bufferListPointer = UnsafeMutablePointer<AudioBufferList>.allocate(capacity: 1)
+                defer { bufferListPointer.deallocate() }
+                status = AudioObjectGetPropertyData(deviceID, &propertyAddress, 0, nil, &propertySize, bufferListPointer)
+                if status == noErr {
+                    let bufferList = UnsafeMutableAudioBufferListPointer(bufferListPointer)
+                    for buffer in bufferList {
+                        inputChannels += Int(buffer.mNumberChannels)
+                    }
+                }
+            }
+            
+            if inputChannels > 0 {
+                devices.append(AudioDevice(id: deviceID, name: deviceName))
+            }
+        }
+        
+        return devices
+    }
+    #endif
 
     deinit {
         stopRecording()
@@ -336,10 +436,43 @@ public extension AudioProcessor {
             Logging.debug("Current audio size: \(self.audioSamples.count) samples, most recent buffer: \(buffer.count) samples, most recent energy: \(newEnergy)")
         }
     }
-
-    func setupEngine() throws -> AVAudioEngine {
+    
+    #if os(macOS)
+    func assignAudioInput(inputNode: AVAudioInputNode, inputDeviceID: AudioDeviceID) {
+        guard let audioUnit = inputNode.audioUnit else {
+            Logging.error("Failed to access the audio unit of the input node.")
+            return
+        }
+        
+        var inputDeviceID = inputDeviceID
+
+        let error = AudioUnitSetProperty(
+            audioUnit,
+            kAudioOutputUnitProperty_CurrentDevice,
+            kAudioUnitScope_Global,
+            0,
+            &inputDeviceID,
+            UInt32(MemoryLayout<AudioDeviceID>.size)
+        )
+        
+        if error != noErr {
+            Logging.error("Error setting Audio Unit property: \(error)")
+        } else {
+            Logging.info("Successfully set input device.")
+        }
+    }
+    #endif
+    
+    func setupEngine(inputDeviceID: DeviceID? = nil) throws -> AVAudioEngine {
         let audioEngine = AVAudioEngine()
         let inputNode = audioEngine.inputNode
+        
+        #if os(macOS)
+        if let inputDeviceID = inputDeviceID {
+            assignAudioInput(inputNode: inputNode, inputDeviceID: inputDeviceID)
+        }
+        #endif
+        
         let inputFormat = inputNode.outputFormat(forBus: 0)
 
         // Desired format (16,000 Hz, 1 channel)
@@ -384,14 +517,12 @@ public extension AudioProcessor {
             audioSamples.removeFirst(audioSamples.count - keep)
         }
     }
-
-    func startRecordingLive(callback: (([Float]) -> Void)? = nil) throws {
+    
+    func startRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)? = nil) throws {
         audioSamples = []
         audioEnergy = []
 
-        // TODO: implement selecting input device
-
-        audioEngine = try setupEngine()
+        audioEngine = try setupEngine(inputDeviceID: inputDeviceID)
 
         // Set the callback
         audioBufferCallback = callback

From 880f95e88f4c234042d275ef1edb82ecc57507f7 Mon Sep 17 00:00:00 2001
From: Zach Nagengast <zacharynagengast@gmail.com>
Date: Wed, 6 Mar 2024 17:45:03 -0800
Subject: [PATCH 2/9] macOS 13 support (#40)

* Initial macOS 13 support

* Handle watchos case

* Test on macos 13 and 14 runners

* Update test script for macos 13 simulators

* Use m1 macos 13 runners

* Use m1 macos 13 runners with appropriate matrix

* Use m1 macos 13 runners with appropriate matrix

* Reduce ios requirement to 16

* Only test watchos on macos 14

* Add ios to the avaudioapplication test

* Add note on version choices

* Remove missing os versions from Package.swift

* Add missing os checks

* Remove excess os checks causing warnings

* Test mel with cpuonly on simulator
---
 .github/workflows/unit-tests.yml              | 16 ++++++++--
 Package.swift                                 | 15 +++++----
 Sources/WhisperKit/Core/AudioEncoder.swift    |  2 +-
 Sources/WhisperKit/Core/AudioProcessor.swift  | 31 +++++++++++++++++--
 .../Core/AudioStreamTranscriber.swift         |  3 ++
 .../WhisperKit/Core/FeatureExtractor.swift    |  2 +-
 Sources/WhisperKit/Core/LogitsFilter.swift    |  6 ++--
 Sources/WhisperKit/Core/Models.swift          | 16 +++++-----
 Sources/WhisperKit/Core/SegmentSeeker.swift   |  4 +--
 Sources/WhisperKit/Core/TextDecoder.swift     |  6 ++--
 Sources/WhisperKit/Core/TokenSampler.swift    |  2 +-
 Sources/WhisperKit/Core/Utils.swift           |  1 +
 Sources/WhisperKit/Core/WhisperKit.swift      |  3 +-
 Sources/WhisperKitCLI/transcribe.swift        |  2 +-
 Tests/WhisperKitTests/FunctionalTests.swift   |  2 +-
 Tests/WhisperKitTests/UnitTests.swift         | 11 ++++---
 16 files changed, 82 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 7526d95c..5167fed2 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -9,7 +9,15 @@ on:
 
 jobs:
   build-and-test:
-    runs-on: macos-14
+    strategy:
+      matrix:
+        os: [macos-13-xlarge, macos-14]
+        include:
+          - os: macos-13-xlarge
+            ios-version: "16.1" # oldest available version
+          - os: macos-14
+            ios-version: "17.2" # latest available version
+    runs-on: ${{ matrix.os }}
     steps:
     - uses: actions/checkout@v4
     - uses: maxim-lobanov/setup-xcode@v1
@@ -40,14 +48,16 @@ jobs:
       run: |
         set -o pipefail
         xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=iOS | xcpretty
-        xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=iOS Simulator,OS=17.2,name=iPhone 15" | xcpretty
+        xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=iOS Simulator,OS=${{ matrix.ios-version }},name=iPhone 15" | xcpretty
     - name: Build and Test - watchOS
+      if: matrix.os == 'macos-14'
       run: |
         set -o pipefail
         xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=watchOS | xcpretty
         xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=watchOS Simulator,OS=10.2,name=Apple Watch Ultra 2 (49mm)" | xcpretty
     - name: Build and Test - visionOS
+      if: matrix.os == 'macos-14'
       run: |
         set -o pipefail
         xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=visionOS | xcpretty
-        xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=visionOS Simulator,name=Apple Vision Pro" | xcpretty
\ No newline at end of file
+        xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=visionOS Simulator,name=Apple Vision Pro" | xcpretty
diff --git a/Package.swift b/Package.swift
index 057482c2..b4887194 100644
--- a/Package.swift
+++ b/Package.swift
@@ -6,10 +6,8 @@ import PackageDescription
 let package = Package(
     name: "whisperkit",
     platforms: [
-        .iOS(.v17),
-        .macOS(.v14),
-        .watchOS(.v10),
-        .visionOS(.v1)
+        .iOS(.v16),
+        .macOS(.v13),
     ],
     products: [
         .library(
@@ -18,7 +16,8 @@ let package = Package(
         ),
         .executable(
             name: "transcribe",
-            targets: ["WhisperKitCLI"])
+            targets: ["WhisperKitCLI"]
+        ),
     ],
     dependencies: [
         .package(url: "https://github.com/huggingface/swift-transformers.git", exact: "0.1.2"),
@@ -35,7 +34,7 @@ let package = Package(
             name: "WhisperKitCLI",
             dependencies: [
                 "WhisperKit",
-                .product(name: "ArgumentParser", package: "swift-argument-parser")
+                .product(name: "ArgumentParser", package: "swift-argument-parser"),
             ]
         ),
         .testTarget(
@@ -51,11 +50,11 @@ let package = Package(
                 "Makefile",
                 "README.md",
                 "LICENSE",
-                "CONTRIBUTING.md"
+                "CONTRIBUTING.md",
             ],
             resources: [
                 .process("Tests/WhisperKitTests/Resources"),
-                .copy("Models/whisperkit-coreml")
+                .copy("Models/whisperkit-coreml"),
             ]
         ),
     ]
diff --git a/Sources/WhisperKit/Core/AudioEncoder.swift b/Sources/WhisperKit/Core/AudioEncoder.swift
index bd32fad6..73061157 100644
--- a/Sources/WhisperKit/Core/AudioEncoder.swift
+++ b/Sources/WhisperKit/Core/AudioEncoder.swift
@@ -14,7 +14,7 @@ public protocol AudioEncoding {
     func encodeFeatures(_ features: MLMultiArray) async throws -> MLMultiArray?
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class AudioEncoder: AudioEncoding, WhisperMLModel {
     public var model: MLModel?
 
diff --git a/Sources/WhisperKit/Core/AudioProcessor.swift b/Sources/WhisperKit/Core/AudioProcessor.swift
index 4909f26c..0231fba4 100644
--- a/Sources/WhisperKit/Core/AudioProcessor.swift
+++ b/Sources/WhisperKit/Core/AudioProcessor.swift
@@ -143,7 +143,7 @@ public extension AudioProcessing {
     }
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class AudioProcessor: NSObject, AudioProcessing {
     public var audioEngine: AVAudioEngine?
     public var audioSamples: ContiguousArray<Float> = []
@@ -314,7 +314,32 @@ public class AudioProcessor: NSObject, AudioProcessing {
     }
 
     public static func requestRecordPermission() async -> Bool {
-        await AVAudioApplication.requestRecordPermission()
+        if #available(macOS 14, iOS 17, *) {
+            return await AVAudioApplication.requestRecordPermission()
+        } else {
+            #if os(watchOS)
+            // watchOS does not support AVCaptureDevice
+            return true
+            #else
+            let microphoneStatus = AVCaptureDevice.authorizationStatus(for: .audio)
+            switch microphoneStatus {
+                case .notDetermined:
+                return await withCheckedContinuation { continuation in
+                    AVCaptureDevice.requestAccess(for: .audio) { granted in
+                        continuation.resume(returning: granted)
+                    }
+                }
+                case .restricted, .denied:
+                Logging.error("Microphone access denied")
+                return false
+                case .authorized:
+                return true
+                @unknown default:
+                Logging.error("Unknown authorization status")
+                return false
+            }
+            #endif
+        }
     }
     
     #if os(macOS)
@@ -412,7 +437,7 @@ public class AudioProcessor: NSObject, AudioProcessing {
 
 // MARK: - Streaming
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public extension AudioProcessor {
     /// We have a new buffer, process and store it.
     /// NOTE: Assumes audio is 16khz mono
diff --git a/Sources/WhisperKit/Core/AudioStreamTranscriber.swift b/Sources/WhisperKit/Core/AudioStreamTranscriber.swift
index c3c158ff..779a25d3 100644
--- a/Sources/WhisperKit/Core/AudioStreamTranscriber.swift
+++ b/Sources/WhisperKit/Core/AudioStreamTranscriber.swift
@@ -3,6 +3,7 @@
 
 import Foundation
 
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public extension AudioStreamTranscriber {
     struct State {
         public var isRecording: Bool = false
@@ -17,9 +18,11 @@ public extension AudioStreamTranscriber {
     }
 }
 
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public typealias AudioStreamTranscriberCallback = (AudioStreamTranscriber.State, AudioStreamTranscriber.State) -> Void
 
 /// Responsible for streaming audio from the microphone, processing it, and transcribing it in real-time.
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public actor AudioStreamTranscriber {
     private var state: AudioStreamTranscriber.State = .init() {
         didSet {
diff --git a/Sources/WhisperKit/Core/FeatureExtractor.swift b/Sources/WhisperKit/Core/FeatureExtractor.swift
index 78544f72..c44e56ab 100644
--- a/Sources/WhisperKit/Core/FeatureExtractor.swift
+++ b/Sources/WhisperKit/Core/FeatureExtractor.swift
@@ -12,7 +12,7 @@ public protocol FeatureExtracting {
     func logMelSpectrogram(fromAudio inputAudio: MLMultiArray) async throws -> MLMultiArray?
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class FeatureExtractor: FeatureExtracting, WhisperMLModel {
     public var model: MLModel?
 
diff --git a/Sources/WhisperKit/Core/LogitsFilter.swift b/Sources/WhisperKit/Core/LogitsFilter.swift
index e71f0186..6bd60a37 100644
--- a/Sources/WhisperKit/Core/LogitsFilter.swift
+++ b/Sources/WhisperKit/Core/LogitsFilter.swift
@@ -9,7 +9,7 @@ public protocol LogitsFiltering {
     func filterLogits(_ logits: MLMultiArray, withTokens tokens: [Int]) -> MLMultiArray
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class SuppressTokensFilter: LogitsFiltering {
     let suppressTokens: [Int]
     private let suppressTokenIndexes: [[NSNumber]]
@@ -25,7 +25,7 @@ public class SuppressTokensFilter: LogitsFiltering {
     }
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class SuppressBlankFilter: LogitsFiltering {
     let suppressBlankTokens: [Int]
     let sampleBegin: Int
@@ -46,7 +46,7 @@ public class SuppressBlankFilter: LogitsFiltering {
     }
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class TimestampRulesFilter: LogitsFiltering {
     let tokenizer: Tokenizer
     let sampleBegin: Int
diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift
index 2048089f..a1d8196b 100644
--- a/Sources/WhisperKit/Core/Models.swift
+++ b/Sources/WhisperKit/Core/Models.swift
@@ -7,7 +7,7 @@ import NaturalLanguage
 import Tokenizers
 
 #if os(watchOS) || arch(arm64)
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public typealias FloatType = Float16
 #else
 public typealias FloatType = Float
@@ -200,7 +200,7 @@ public struct DecodingCache {
 ///   - logProbThreshold: If the average log probability over sampled tokens is below this value, treat as failed.
 ///   - noSpeechThreshold: If the no speech probability is higher than this value AND the average log
 ///                        probability over sampled tokens is below `logProbThreshold`, consider the segment as silent.
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public struct DecodingOptions {
     public var verbose: Bool
     public var task: DecodingTask
@@ -489,7 +489,7 @@ public class MelSpectrogramInput: MLFeatureProvider {
 }
 
 /// Model Prediction Output Type
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class MelSpectrogramOutput: MLFeatureProvider {
     /// Source provided by CoreML
     private let provider: MLFeatureProvider
@@ -526,7 +526,7 @@ public class MelSpectrogramOutput: MLFeatureProvider {
 // MARK: AudioEncoder
 
 /// Model Prediction Input Type
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class AudioEncoderInput: MLFeatureProvider {
     /// melspectrogram_features as 1 × {80,128} × 1 × 3000 4-dimensional array of floats
     public var melspectrogram_features: MLMultiArray
@@ -552,7 +552,7 @@ public class AudioEncoderInput: MLFeatureProvider {
 }
 
 /// Model Prediction Output Type
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class AudioEncoderOutput: MLFeatureProvider {
     /// Source provided by CoreML
     private let provider: MLFeatureProvider
@@ -589,7 +589,7 @@ public class AudioEncoderOutput: MLFeatureProvider {
 // MARK: TextDecoder
 
 /// Model Prediction Input Type
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class TextDecoderInput: MLFeatureProvider {
     /// input_ids as 1 element vector of 32-bit integers
     public var input_ids: MLMultiArray
@@ -657,7 +657,7 @@ public class TextDecoderInput: MLFeatureProvider {
 }
 
 /// Model Prediction Output Type
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class TextDecoderOutput: MLFeatureProvider {
     /// Source provided by CoreML
     private let provider: MLFeatureProvider
@@ -764,7 +764,7 @@ public class TextDecoderCachePrefillInput: MLFeatureProvider {
 }
 
 /// Model Prediction Output Type
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class TextDecoderCachePrefillOutput: MLFeatureProvider {
     /// Source provided by CoreML
     private let provider: MLFeatureProvider
diff --git a/Sources/WhisperKit/Core/SegmentSeeker.swift b/Sources/WhisperKit/Core/SegmentSeeker.swift
index 8e78415f..e37f8e63 100644
--- a/Sources/WhisperKit/Core/SegmentSeeker.swift
+++ b/Sources/WhisperKit/Core/SegmentSeeker.swift
@@ -6,7 +6,7 @@ import CoreML
 import Foundation
 import Tokenizers
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public protocol SegmentSeeking {
     func findSeekPointAndSegments(
         decodingResult: DecodingResult,
@@ -34,7 +34,7 @@ public protocol SegmentSeeking {
     ) throws -> [TranscriptionSegment]?
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class SegmentSeeker: SegmentSeeking {
     public init() {}
 
diff --git a/Sources/WhisperKit/Core/TextDecoder.swift b/Sources/WhisperKit/Core/TextDecoder.swift
index 97f87323..6575b32a 100644
--- a/Sources/WhisperKit/Core/TextDecoder.swift
+++ b/Sources/WhisperKit/Core/TextDecoder.swift
@@ -5,7 +5,7 @@ import Accelerate
 import CoreML
 import Tokenizers
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public protocol TextDecoding {
     var tokenizer: Tokenizer? { get set }
     var prefillData: WhisperMLModel? { get set }
@@ -43,7 +43,7 @@ public protocol TextDecoding {
     )
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public extension TextDecoding {
     func prepareDecoderInputs(withPrompt initialPrompt: [Int]) -> DecodingInputs? {
         let tokenShape = [NSNumber(value: 1), NSNumber(value: initialPrompt.count)]
@@ -234,7 +234,7 @@ public class TextDecoderContextPrefill: WhisperMLModel {
     public var model: MLModel?
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class TextDecoder: TextDecoding, WhisperMLModel {
     public var model: MLModel?
     public var tokenizer: Tokenizer?
diff --git a/Sources/WhisperKit/Core/TokenSampler.swift b/Sources/WhisperKit/Core/TokenSampler.swift
index d06b69d6..19470543 100644
--- a/Sources/WhisperKit/Core/TokenSampler.swift
+++ b/Sources/WhisperKit/Core/TokenSampler.swift
@@ -16,7 +16,7 @@ public struct SamplingResult {
     public var completed: Bool
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class GreedyTokenSampler: TokenSampling {
     public var temperature: FloatType
     public var eotToken: Int
diff --git a/Sources/WhisperKit/Core/Utils.swift b/Sources/WhisperKit/Core/Utils.swift
index e90bcc6d..193b2168 100644
--- a/Sources/WhisperKit/Core/Utils.swift
+++ b/Sources/WhisperKit/Core/Utils.swift
@@ -39,6 +39,7 @@ extension MLMultiArray {
     }
 }
 
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 func initMLMultiArray(shape: [NSNumber], dataType: MLMultiArrayDataType, initialValue: Any) -> MLMultiArray {
     let multiArray = try! MLMultiArray(shape: shape, dataType: dataType)
 
diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift
index 649674b9..9628a9fc 100644
--- a/Sources/WhisperKit/Core/WhisperKit.swift
+++ b/Sources/WhisperKit/Core/WhisperKit.swift
@@ -9,12 +9,13 @@ import Hub
 import TensorUtils
 import Tokenizers
 
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public protocol Transcriber {
     func transcribe(audioPath: String, decodeOptions: DecodingOptions?, callback: TranscriptionCallback) async throws -> TranscriptionResult?
     func transcribe(audioArray: [Float], decodeOptions: DecodingOptions?, callback: TranscriptionCallback) async throws -> TranscriptionResult?
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public class WhisperKit: Transcriber {
     /// Models
     public var modelVariant: ModelVariant = .tiny
diff --git a/Sources/WhisperKitCLI/transcribe.swift b/Sources/WhisperKitCLI/transcribe.swift
index d2d73a6a..7a1c571a 100644
--- a/Sources/WhisperKitCLI/transcribe.swift
+++ b/Sources/WhisperKitCLI/transcribe.swift
@@ -7,7 +7,7 @@ import Foundation
 
 import WhisperKit
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 @main
 struct WhisperKitCLI: AsyncParsableCommand {
     @Option(help: "Path to audio file")
diff --git a/Tests/WhisperKitTests/FunctionalTests.swift b/Tests/WhisperKitTests/FunctionalTests.swift
index e783adfb..44fd92f3 100644
--- a/Tests/WhisperKitTests/FunctionalTests.swift
+++ b/Tests/WhisperKitTests/FunctionalTests.swift
@@ -5,7 +5,7 @@ import CoreML
 @testable import WhisperKit
 import XCTest
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 final class FunctionalTests: XCTestCase {
     func testInitLarge() async {
         let modelPath = largev3ModelPath()
diff --git a/Tests/WhisperKitTests/UnitTests.swift b/Tests/WhisperKitTests/UnitTests.swift
index 96f02e93..c6781d21 100644
--- a/Tests/WhisperKitTests/UnitTests.swift
+++ b/Tests/WhisperKitTests/UnitTests.swift
@@ -7,7 +7,7 @@ import Tokenizers
 @testable import WhisperKit
 import XCTest
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 final class UnitTests: XCTestCase {
     func testInit() async {
         let whisperKit = try? await WhisperKit(prewarm: false, load: false, download: false)
@@ -226,7 +226,9 @@ final class UnitTests: XCTestCase {
     }
 
     func testWindowing() async {
-        let computeOptions = ModelComputeOptions()
+        let computeOptions = ModelComputeOptions(
+            melCompute: .cpuOnly
+        )
         let whisperKit = try? await WhisperKit(modelFolder: tinyModelPath(), computeOptions: computeOptions, verbose: true, logLevel: .debug)
 
         guard let audioFilePath = Bundle.module.path(forResource: "jfk", ofType: "wav") else {
@@ -373,7 +375,7 @@ final class UnitTests: XCTestCase {
             XCTFail("Failed to transcribe")
             return
         }
-        XCTAssertEqual(result.text.prefix(4), "東京は晴")
+        XCTAssertEqual(result.text.prefix(3), "東京は")
     }
 
     func testNoTimestamps() async {
@@ -831,6 +833,7 @@ final class UnitTests: XCTestCase {
 
 // MARK: Helpers
 
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 extension MLMultiArray {
     /// Create `MLMultiArray` of shape [1, 1, arr.count] and fill up the last
     /// dimension with with values from arr.
@@ -858,7 +861,7 @@ extension MLMultiArray {
     }
 }
 
-@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 extension XCTestCase {
     func transcribe(with variant: ModelVariant, options: DecodingOptions, audioFile: String = "jfk.wav", file: StaticString = #file, line: UInt = #line) async throws -> TranscriptionResult? {
         var modelPath = tinyModelPath()

From 20c90018a8948e5c323abf8c85d1fbda8ef21bdc Mon Sep 17 00:00:00 2001
From: Jan Krukowski <jan.krukowski@geneva.com>
Date: Thu, 7 Mar 2024 16:27:25 +0100
Subject: [PATCH 3/9] Fixed Conformance of 'Float16' warning (#58)

* fixed warnings

* removed fixme
---
 Sources/WhisperKit/Core/Models.swift       | 8 ++++++--
 Sources/WhisperKit/Core/TokenSampler.swift | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift
index a1d8196b..e9a7b2bd 100644
--- a/Sources/WhisperKit/Core/Models.swift
+++ b/Sources/WhisperKit/Core/Models.swift
@@ -1,18 +1,22 @@
 //  For licensing see accompanying LICENSE.md file.
 //  Copyright © 2024 Argmax, Inc. All rights reserved.
 
+import Accelerate
 import CoreML
 import Hub
 import NaturalLanguage
 import Tokenizers
 
-#if os(watchOS) || arch(arm64)
-@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
+#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64))
 public typealias FloatType = Float16
 #else
 public typealias FloatType = Float
 #endif
 
+#if (os(macOS) || targetEnvironment(macCatalyst)) && arch(arm64)
+extension Float16: BNNSScalar {}
+#endif
+
 // MARK: - CoreML
 
 public protocol WhisperMLModel {
diff --git a/Sources/WhisperKit/Core/TokenSampler.swift b/Sources/WhisperKit/Core/TokenSampler.swift
index 19470543..931331a4 100644
--- a/Sources/WhisperKit/Core/TokenSampler.swift
+++ b/Sources/WhisperKit/Core/TokenSampler.swift
@@ -44,7 +44,7 @@ public class GreedyTokenSampler: TokenSampling {
 
             let logitsDescriptor = BNNSNDArrayDescriptor(
                 data: logitsRawPointer,
-                scalarType: FloatType.self, // FIXME: Float16 here breaks in swift 6
+                scalarType: FloatType.self,
                 shape: .vector(logits.count, stride: 1)
             )!
 

From df15f89289d5ac7b57138ef5b6fc7328c1cb178f Mon Sep 17 00:00:00 2001
From: Finn Voorhees <finnvoorhees@gmail.com>
Date: Thu, 7 Mar 2024 15:46:20 +0000
Subject: [PATCH 4/9] Fix memory leak from non-async MLModel prediction (#56)

---
 Sources/WhisperKit/Core/AudioEncoder.swift     |  2 +-
 Sources/WhisperKit/Core/FeatureExtractor.swift |  2 +-
 Sources/WhisperKit/Core/TextDecoder.swift      |  4 ++--
 Sources/WhisperKit/Core/Utils.swift            | 15 +++++++++++++++
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/Sources/WhisperKit/Core/AudioEncoder.swift b/Sources/WhisperKit/Core/AudioEncoder.swift
index 73061157..8b66b1f2 100644
--- a/Sources/WhisperKit/Core/AudioEncoder.swift
+++ b/Sources/WhisperKit/Core/AudioEncoder.swift
@@ -46,7 +46,7 @@ public class AudioEncoder: AudioEncoding, WhisperMLModel {
 
         try Task.checkCancellation()
 
-        let outputFeatures = try await model.prediction(from: modelInputs, options: MLPredictionOptions())
+        let outputFeatures = try await model.asyncPrediction(from: modelInputs, options: MLPredictionOptions())
 
         let output = AudioEncoderOutput(features: outputFeatures)
 
diff --git a/Sources/WhisperKit/Core/FeatureExtractor.swift b/Sources/WhisperKit/Core/FeatureExtractor.swift
index c44e56ab..4838fa06 100644
--- a/Sources/WhisperKit/Core/FeatureExtractor.swift
+++ b/Sources/WhisperKit/Core/FeatureExtractor.swift
@@ -35,7 +35,7 @@ public class FeatureExtractor: FeatureExtracting, WhisperMLModel {
 
         try Task.checkCancellation()
 
-        let outputFeatures = try await model.prediction(from: modelInputs, options: MLPredictionOptions())
+        let outputFeatures = try await model.asyncPrediction(from: modelInputs, options: MLPredictionOptions())
 
         let output = MelSpectrogramOutput(features: outputFeatures)
 
diff --git a/Sources/WhisperKit/Core/TextDecoder.swift b/Sources/WhisperKit/Core/TextDecoder.swift
index 6575b32a..2e1fbd1c 100644
--- a/Sources/WhisperKit/Core/TextDecoder.swift
+++ b/Sources/WhisperKit/Core/TextDecoder.swift
@@ -176,7 +176,7 @@ public extension TextDecoding {
 
         try Task.checkCancellation()
 
-        let outputFeatures = try await prefillModel.prediction(from: modelInputs, options: MLPredictionOptions())
+        let outputFeatures = try await prefillModel.asyncPrediction(from: modelInputs, options: MLPredictionOptions())
 
         let output = TextDecoderCachePrefillOutput(features: outputFeatures)
 
@@ -291,7 +291,7 @@ public class TextDecoder: TextDecoding, WhisperMLModel {
 
         try Task.checkCancellation()
 
-        let outputFeatures = try await model.prediction(from: modelInputs, options: MLPredictionOptions())
+        let outputFeatures = try await model.asyncPrediction(from: modelInputs, options: MLPredictionOptions())
 
         let output = TextDecoderOutput(features: outputFeatures)
 
diff --git a/Sources/WhisperKit/Core/Utils.swift b/Sources/WhisperKit/Core/Utils.swift
index 193b2168..ce614902 100644
--- a/Sources/WhisperKit/Core/Utils.swift
+++ b/Sources/WhisperKit/Core/Utils.swift
@@ -39,6 +39,21 @@ extension MLMultiArray {
     }
 }
 
+extension MLModel {
+    func asyncPrediction(
+        from input: MLFeatureProvider,
+        options: MLPredictionOptions
+    ) async throws -> MLFeatureProvider {
+        if #available(macOS 14, iOS 17, watchOS 10, visionOS 1, *) {
+            return try await prediction(from: input, options: options)
+        } else {
+            return try await Task {
+                try prediction(from: input, options: options)
+            }.value
+        }
+    }
+}
+
 @available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 func initMLMultiArray(shape: [NSNumber], dataType: MLMultiArrayDataType, initialValue: Any) -> MLMultiArray {
     let multiArray = try! MLMultiArray(shape: shape, dataType: dataType)

From d08fb1b67567d92b481adfedf2a6b42d51ab2c58 Mon Sep 17 00:00:00 2001
From: Finn Voorhees <finnvoorhees@gmail.com>
Date: Fri, 8 Mar 2024 00:27:34 +0000
Subject: [PATCH 5/9] Expose downloadBase in WhisperKit init (#57)

---
 Sources/WhisperKit/Core/WhisperKit.swift | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift
index 9628a9fc..f0678c7a 100644
--- a/Sources/WhisperKit/Core/WhisperKit.swift
+++ b/Sources/WhisperKit/Core/WhisperKit.swift
@@ -49,6 +49,7 @@ public class WhisperKit: Transcriber {
 
     public init(
         model: String? = nil,
+        downloadBase: URL? = nil,
         modelRepo: String? = nil,
         modelFolder: String? = nil,
         computeOptions: ModelComputeOptions? = nil,
@@ -74,7 +75,7 @@ public class WhisperKit: Transcriber {
         Logging.shared.logLevel = verbose ? logLevel : .none
         currentTimings = TranscriptionTimings()
 
-        try await setupModels(model: model, modelRepo: modelRepo, modelFolder: modelFolder, download: download)
+        try await setupModels(model: model, downloadBase: downloadBase, modelRepo: modelRepo, modelFolder: modelFolder, download: download)
 
         if let prewarm = prewarm, prewarm {
             Logging.info("Prewarming models...")
@@ -179,7 +180,7 @@ public class WhisperKit: Transcriber {
     }
 
     /// Sets up the model folder either from a local path or by downloading from a repository.
-    public func setupModels(model: String?, modelRepo: String?, modelFolder: String?, download: Bool) async throws {
+    public func setupModels(model: String?, downloadBase: URL? = nil, modelRepo: String?, modelFolder: String?, download: Bool) async throws {
         // Determine the model variant to use
         let modelVariant = model ?? WhisperKit.recommendedModels().default
 
@@ -189,7 +190,7 @@ public class WhisperKit: Transcriber {
         } else if download {
             let repo = modelRepo ?? "argmaxinc/whisperkit-coreml"
             do {
-                let hubModelFolder = try await Self.download(variant: modelVariant, from: repo)
+                let hubModelFolder = try await Self.download(variant: modelVariant, downloadBase: downloadBase, from: repo)
                 self.modelFolder = hubModelFolder!
             } catch {
                 // Handle errors related to model downloading

From c0dc2644ae1313ea2a1da9c32257e7ebdee1a46f Mon Sep 17 00:00:00 2001
From: ZachNagengast <znagengast@gmail.com>
Date: Fri, 8 Mar 2024 14:58:07 -0800
Subject: [PATCH 6/9] Disable early stopping during prefill tokens, fixing #63

---
 Sources/WhisperKit/Core/TextDecoder.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sources/WhisperKit/Core/TextDecoder.swift b/Sources/WhisperKit/Core/TextDecoder.swift
index 2e1fbd1c..bc22446e 100644
--- a/Sources/WhisperKit/Core/TextDecoder.swift
+++ b/Sources/WhisperKit/Core/TextDecoder.swift
@@ -484,7 +484,7 @@ public class TextDecoder: TextDecoding, WhisperMLModel {
 
                 // Call the callback if it is provided
                 if let shouldContinue = callback?(result) {
-                    if !shouldContinue {
+                    if !shouldContinue && !isPrefill {
                         Logging.debug("Early stopping")
                         break
                     }

From 7fcda5194d0d64dce6bb6b4136b0f52b70dcd28e Mon Sep 17 00:00:00 2001
From: ZachNagengast <znagengast@gmail.com>
Date: Fri, 8 Mar 2024 14:58:39 -0800
Subject: [PATCH 7/9] Enable ipad interface for example app #60

---
 Examples/WhisperAX/WhisperAX.xcodeproj/project.pbxproj | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Examples/WhisperAX/WhisperAX.xcodeproj/project.pbxproj b/Examples/WhisperAX/WhisperAX.xcodeproj/project.pbxproj
index 4a4d4bcc..743e7730 100644
--- a/Examples/WhisperAX/WhisperAX.xcodeproj/project.pbxproj
+++ b/Examples/WhisperAX/WhisperAX.xcodeproj/project.pbxproj
@@ -872,9 +872,10 @@
 				SDKROOT = auto;
 				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
 				SUPPORTS_MACCATALYST = NO;
+				SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = YES;
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				SWIFT_VERSION = 5.0;
-				TARGETED_DEVICE_FAMILY = 1;
+				TARGETED_DEVICE_FAMILY = "1,2";
 			};
 			name = Debug;
 		};
@@ -915,9 +916,10 @@
 				SDKROOT = auto;
 				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
 				SUPPORTS_MACCATALYST = NO;
+				SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = YES;
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				SWIFT_VERSION = 5.0;
-				TARGETED_DEVICE_FAMILY = 1;
+				TARGETED_DEVICE_FAMILY = "1,2";
 			};
 			name = Release;
 		};

From ccdd77d56d3ae4d5636251f88dc801678d029a82 Mon Sep 17 00:00:00 2001
From: Chase Farmer <cgfarmer4@gmail.com>
Date: Fri, 8 Mar 2024 15:19:36 -0800
Subject: [PATCH 8/9] Add audio device selector to transcribe + take a stab at
 Delete/Retry models (#54)

* Add audio devices to transcribe view

* Add delete and retry buttons

* Adjust mic device picker location

* RM retry

---------

Co-authored-by: ZachNagengast <znagengast@gmail.com>
---
 .../WhisperAX/Views/ContentView.swift         | 148 ++++++++++++------
 1 file changed, 100 insertions(+), 48 deletions(-)

diff --git a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift
index 2bb6982e..92fe7fb6 100644
--- a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift
+++ b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift
@@ -246,6 +246,16 @@ struct ContentView: View {
                             .progressViewStyle(CircularProgressViewStyle())
                             .scaleEffect(0.5)
                     }
+                    
+                    Button(action: {
+                        deleteModel()
+                    }, label: {
+                        Image(systemName: "trash")
+                    })
+                    .help("Delete model")
+                    .buttonStyle(BorderlessButtonStyle())
+                    .disabled(localModels.count == 0)
+                    .disabled(!localModels.contains(selectedModel))
 
                     #if os(macOS)
                     Button(action: {
@@ -306,6 +316,33 @@ struct ContentView: View {
     }
 
     // MARK: - Controls
+    var audioDevicesView: some View {
+        Group {
+            #if os(macOS)
+            HStack {
+                if let audioDevices = audioDevices, audioDevices.count > 0 {
+                    Picker("", selection: $selectedAudioInput) {
+                        ForEach(audioDevices, id: \.self) { device in
+                            Text(device.name).tag(device.name)
+                        }
+                    }
+                    .frame(width: 250)
+                    .disabled(isRecording)
+                }
+            }
+            .onAppear {
+                audioDevices = AudioProcessor.getAudioDevices()
+                if let audioDevices = audioDevices,
+                   !audioDevices.isEmpty,
+                   selectedAudioInput == "No Audio Input",
+                   let device = audioDevices.first {
+                    selectedAudioInput = device.name
+                }
+            }
+            #endif
+        }
+    }
+    
     var controlsView: some View {
         VStack {
             basicSettingsView
@@ -321,7 +358,13 @@ struct ContentView: View {
                                     Label("Reset", systemImage: "arrow.clockwise")
                                 }
                                 .buttonStyle(.borderless)
+
+                                Spacer()
+
+                                audioDevicesView
+
                                 Spacer()
+
                                 Button {
                                     showAdvancedOptions.toggle()
                                 } label: {
@@ -395,63 +438,50 @@ struct ContentView: View {
                             }
                         }
                     case "Stream":
-                        HStack {
-                            Button {
-                                resetState()
-                            } label: {
-                                Label("Reset", systemImage: "arrow.clockwise")
-                            }
-                            .frame(minWidth: 0, maxWidth: .infinity)
-                            .buttonStyle(.borderless)
-
-                            Button {
-                                withAnimation {
-                                    toggleRecording(shouldLoop: true)
-                                }
-                            } label: {
-                                Image(systemName: !isRecording ? "record.circle" : "stop.circle.fill")
-                                    .resizable()
-                                    .scaledToFit()
-                                    .frame(width: 70, height: 70)
-                                    .padding()
-                                    .foregroundColor(modelState != .loaded ? .gray : .red)
-                            }
-                            .contentTransition(.symbolEffect(.replace))
-                            .buttonStyle(BorderlessButtonStyle())
-                            .disabled(modelState != .loaded)
-                            .frame(minWidth: 0, maxWidth: .infinity)
-                            
-                            VStack {
+                        VStack {
+                            HStack {
                                 Button {
-                                    showAdvancedOptions.toggle()
+                                    resetState()
                                 } label: {
-                                    Label("Settings", systemImage: "slider.horizontal.3")
+                                    Label("Reset", systemImage: "arrow.clockwise")
                                 }
                                 .frame(minWidth: 0, maxWidth: .infinity)
                                 .buttonStyle(.borderless)
-                                
-                                #if os(macOS)
-                                HStack {
-                                    if let audioDevices = audioDevices, audioDevices.count > 0 {
-                                        Picker("", selection: $selectedAudioInput) {
-                                            ForEach(audioDevices, id: \.self) { device in
-                                                Text(device.name).tag(device.name)
-                                            }
-                                        }
-                                        .frame(minWidth: 80)
-                                        .disabled(isRecording)
+
+                                Spacer()
+
+                                audioDevicesView
+
+                                Spacer()
+
+                                VStack {
+                                    Button {
+                                        showAdvancedOptions.toggle()
+                                    } label: {
+                                        Label("Settings", systemImage: "slider.horizontal.3")
                                     }
+                                    .frame(minWidth: 0, maxWidth: .infinity)
+                                    .buttonStyle(.borderless)
                                 }
-                                .onAppear {
-                                    audioDevices = AudioProcessor.getAudioDevices()
-                                    if let audioDevices = audioDevices,
-                                       !audioDevices.isEmpty,
-                                       selectedAudioInput == "No Audio Input",
-                                       let device = audioDevices.first {
-                                        selectedAudioInput = device.name
+                            }
+
+                            HStack {
+                                Button {
+                                    withAnimation {
+                                        toggleRecording(shouldLoop: true)
                                     }
+                                } label: {
+                                    Image(systemName: !isRecording ? "record.circle" : "stop.circle.fill")
+                                        .resizable()
+                                        .scaledToFit()
+                                        .frame(width: 70, height: 70)
+                                        .padding()
+                                        .foregroundColor(modelState != .loaded ? .gray : .red)
                                 }
-                                #endif
+                                .contentTransition(.symbolEffect(.replace))
+                                .buttonStyle(BorderlessButtonStyle())
+                                .disabled(modelState != .loaded)
+                                .frame(minWidth: 0, maxWidth: .infinity)
                             }
                         }
                     default:
@@ -779,6 +809,10 @@ struct ContentView: View {
                 try await whisperKit.loadModels()
 
                 await MainActor.run {
+                    if !localModels.contains(model) {
+                        localModels.append(model)
+                    }
+                    
                     availableLanguages = whisperKit.tokenizer?.langauges.map { $0.key }.sorted() ?? ["english"]
                     loadingProgressValue = 1.0
                     modelState = whisperKit.modelState
@@ -786,6 +820,24 @@ struct ContentView: View {
             }
         }
     }
+    
+    func deleteModel() {
+        if localModels.contains(selectedModel) {
+            let modelFolder = URL(fileURLWithPath: localModelPath).appendingPathComponent("openai_whisper-\(selectedModel)")
+            
+            do {
+                try FileManager.default.removeItem(at: modelFolder)
+                
+                if let index = localModels.firstIndex(of: selectedModel) {
+                    localModels.remove(at: index)
+                }
+                
+                modelState = .unloaded
+            } catch {
+                print("Error deleting model: \(error)")
+            }
+        }
+    }
 
     func updateProgressBar(targetProgress: Float, maxTime: TimeInterval) async {
         let initialProgress = loadingProgressValue

From bfa357e897dadae0dca6314424bb119149e375d2 Mon Sep 17 00:00:00 2001
From: bharat9806 <60149810+bharat9806@users.noreply.github.com>
Date: Sat, 9 Mar 2024 05:29:21 +0530
Subject: [PATCH 9/9] Issue - 42 WhisperKit support simulator fixed (#52)

* Issue - 42 WhisperKit support simulator fixed

* Issue - 42 enhancement done

* Setup AVAudioSession, which fixes crash in simulators

* Add availability checks for model compute options

---------

Co-authored-by: ZachNagengast <znagengast@gmail.com>
---
 .../xcshareddata/swiftpm/Package.resolved     |  3 ++-
 .../WhisperAXExampleView.swift                |  2 +-
 Sources/WhisperKit/Core/AudioProcessor.swift  | 24 ++++++++++++++++++-
 Sources/WhisperKit/Core/Models.swift          |  8 +++++++
 Sources/WhisperKit/Core/Utils.swift           | 11 +++++++++
 5 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/Examples/WhisperAX/WhisperAX.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Examples/WhisperAX/WhisperAX.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
index 307759dd..3877d56d 100644
--- a/Examples/WhisperAX/WhisperAX.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
+++ b/Examples/WhisperAX/WhisperAX.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
@@ -1,4 +1,5 @@
 {
+  "originHash" : "cd17206b47bb810af9459722192530e3838d8e6629a970988e32a432aaa05f6e",
   "pins" : [
     {
       "identity" : "networkimage",
@@ -37,5 +38,5 @@
       }
     }
   ],
-  "version" : 2
+  "version" : 3
 }
diff --git a/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift b/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift
index 5e206781..2809c834 100644
--- a/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift
+++ b/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift
@@ -73,7 +73,7 @@ struct WhisperAXWatchView: View {
 
     var body: some View {
         NavigationSplitView {
-            if WhisperKit.deviceName().hasPrefix("Watch7") {
+            if WhisperKit.deviceName().hasPrefix("Watch7") || WhisperKit.isRunningOnSimulator {
                 modelSelectorView
                     .navigationTitle("WhisperAX")
                     .navigationBarTitleDisplayMode(.automatic)
diff --git a/Sources/WhisperKit/Core/AudioProcessor.swift b/Sources/WhisperKit/Core/AudioProcessor.swift
index 0231fba4..a746135c 100644
--- a/Sources/WhisperKit/Core/AudioProcessor.swift
+++ b/Sources/WhisperKit/Core/AudioProcessor.swift
@@ -487,7 +487,27 @@ public extension AudioProcessor {
         }
     }
     #endif
-    
+
+    /// Attempts to setup the shared audio session if available on the device's OS
+    func setupAudioSessionForDevice() throws {
+        #if !os(macOS) // AVAudioSession is not available on macOS
+
+        #if !os(watchOS) // watchOS does not support .defaultToSpeaker
+        let options: AVAudioSession.CategoryOptions = [.defaultToSpeaker, .allowBluetooth]
+        #else
+        let options: AVAudioSession.CategoryOptions = .mixWithOthers
+        #endif
+
+        let audioSession = AVAudioSession.sharedInstance()
+        do {
+            try audioSession.setCategory(.playAndRecord, options: options)
+            try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
+        } catch let error as NSError {
+            throw WhisperError.audioProcessingFailed("Failed to set up audio session: \(error)")
+        }
+        #endif
+    }
+
     func setupEngine(inputDeviceID: DeviceID? = nil) throws -> AVAudioEngine {
         let audioEngine = AVAudioEngine()
         let inputNode = audioEngine.inputNode
@@ -546,6 +566,8 @@ public extension AudioProcessor {
     func startRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)? = nil) throws {
         audioSamples = []
         audioEnergy = []
+        
+        try? setupAudioSessionForDevice()
 
         audioEngine = try setupEngine(inputDeviceID: inputDeviceID)
 
diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift
index e9a7b2bd..50e92283 100644
--- a/Sources/WhisperKit/Core/Models.swift
+++ b/Sources/WhisperKit/Core/Models.swift
@@ -126,6 +126,7 @@ public enum ModelState: CustomStringConvertible {
     }
 }
 
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
 public struct ModelComputeOptions {
     public var melCompute: MLComputeUnits
     public var audioEncoderCompute: MLComputeUnits
@@ -138,6 +139,13 @@ public struct ModelComputeOptions {
         textDecoderCompute: MLComputeUnits = .cpuAndNeuralEngine,
         prefillCompute: MLComputeUnits = .cpuOnly
     ) {
+        if WhisperKit.isRunningOnSimulator {
+            self.melCompute = .cpuOnly
+            self.audioEncoderCompute = .cpuOnly
+            self.textDecoderCompute = .cpuOnly
+            self.prefillCompute = .cpuOnly
+            return
+        }
         self.melCompute = melCompute
         self.audioEncoderCompute = audioEncoderCompute
         self.textDecoderCompute = textDecoderCompute
diff --git a/Sources/WhisperKit/Core/Utils.swift b/Sources/WhisperKit/Core/Utils.swift
index ce614902..8d9e5202 100644
--- a/Sources/WhisperKit/Core/Utils.swift
+++ b/Sources/WhisperKit/Core/Utils.swift
@@ -238,6 +238,17 @@ extension Process {
 }
 #endif
 
+@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
+public extension WhisperKit {
+    static var isRunningOnSimulator: Bool {
+        #if targetEnvironment(simulator)
+        return true
+        #else
+        return false
+        #endif
+    }
+}
+
 public func resolveAbsolutePath(_ inputPath: String) -> String {
     let fileManager = FileManager.default