Skip to content

Commit

Permalink
Merge branch 'argmaxinc:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
metropol authored Mar 9, 2024
2 parents f1cff8d + bfa357e commit 1340625
Show file tree
Hide file tree
Showing 20 changed files with 422 additions and 96 deletions.
16 changes: 13 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ on:

jobs:
build-and-test:
runs-on: macos-14
strategy:
matrix:
os: [macos-13-xlarge, macos-14]
include:
- os: macos-13-xlarge
ios-version: "16.1" # oldest available version
- os: macos-14
ios-version: "17.2" # latest available version
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: maxim-lobanov/setup-xcode@v1
Expand Down Expand Up @@ -40,14 +48,16 @@ jobs:
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=iOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=iOS Simulator,OS=17.2,name=iPhone 15" | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=iOS Simulator,OS=${{ matrix.ios-version }},name=iPhone 15" | xcpretty
- name: Build and Test - watchOS
if: matrix.os == 'macos-14'
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=watchOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=watchOS Simulator,OS=10.2,name=Apple Watch Ultra 2 (49mm)" | xcpretty
- name: Build and Test - visionOS
if: matrix.os == 'macos-14'
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=visionOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=visionOS Simulator,name=Apple Vision Pro" | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=visionOS Simulator,name=Apple Vision Pro" | xcpretty
6 changes: 4 additions & 2 deletions Examples/WhisperAX/WhisperAX.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -872,9 +872,10 @@
SDKROOT = auto;
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
SUPPORTS_MACCATALYST = NO;
SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = YES;
SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = 1;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Debug;
};
Expand Down Expand Up @@ -915,9 +916,10 @@
SDKROOT = auto;
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
SUPPORTS_MACCATALYST = NO;
SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = YES;
SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = 1;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Release;
};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"originHash" : "cd17206b47bb810af9459722192530e3838d8e6629a970988e32a432aaa05f6e",
"pins" : [
{
"identity" : "networkimage",
Expand Down Expand Up @@ -37,5 +38,5 @@
}
}
],
"version" : 2
"version" : 3
}
153 changes: 121 additions & 32 deletions Examples/WhisperAX/WhisperAX/Views/ContentView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import AVFoundation

struct ContentView: View {
@State var whisperKit: WhisperKit? = nil
#if os(macOS)
@State var audioDevices: [AudioDevice]? = nil
#endif
@State var isRecording: Bool = false
@State var isTranscribing: Bool = false
@State var currentText: String = ""
Expand All @@ -24,7 +27,8 @@ struct ContentView: View {
@State private var availableModels: [String] = []
@State private var availableLanguages: [String] = []
@State private var disabledModels: [String] = WhisperKit.recommendedModels().disabled


@AppStorage("selectedAudioInput") private var selectedAudioInput: String = "No Audio Input"
@AppStorage("selectedModel") private var selectedModel: String = WhisperKit.recommendedModels().default
@AppStorage("selectedTab") private var selectedTab: String = "Transcribe"
@AppStorage("selectedTask") private var selectedTask: String = "transcribe"
Expand Down Expand Up @@ -242,6 +246,16 @@ struct ContentView: View {
.progressViewStyle(CircularProgressViewStyle())
.scaleEffect(0.5)
}

Button(action: {
deleteModel()
}, label: {
Image(systemName: "trash")
})
.help("Delete model")
.buttonStyle(BorderlessButtonStyle())
.disabled(localModels.count == 0)
.disabled(!localModels.contains(selectedModel))

#if os(macOS)
Button(action: {
Expand Down Expand Up @@ -302,7 +316,33 @@ struct ContentView: View {
}

// MARK: - Controls

var audioDevicesView: some View {
Group {
#if os(macOS)
HStack {
if let audioDevices = audioDevices, audioDevices.count > 0 {
Picker("", selection: $selectedAudioInput) {
ForEach(audioDevices, id: \.self) { device in
Text(device.name).tag(device.name)
}
}
.frame(width: 250)
.disabled(isRecording)
}
}
.onAppear {
audioDevices = AudioProcessor.getAudioDevices()
if let audioDevices = audioDevices,
!audioDevices.isEmpty,
selectedAudioInput == "No Audio Input",
let device = audioDevices.first {
selectedAudioInput = device.name
}
}
#endif
}
}

var controlsView: some View {
VStack {
basicSettingsView
Expand All @@ -318,7 +358,13 @@ struct ContentView: View {
Label("Reset", systemImage: "arrow.clockwise")
}
.buttonStyle(.borderless)

Spacer()

audioDevicesView

Spacer()

Button {
showAdvancedOptions.toggle()
} label: {
Expand Down Expand Up @@ -392,39 +438,51 @@ struct ContentView: View {
}
}
case "Stream":
HStack {
Button {
resetState()
} label: {
Label("Reset", systemImage: "arrow.clockwise")
}
.frame(minWidth: 0, maxWidth: .infinity)
.buttonStyle(.borderless)
VStack {
HStack {
Button {
resetState()
} label: {
Label("Reset", systemImage: "arrow.clockwise")
}
.frame(minWidth: 0, maxWidth: .infinity)
.buttonStyle(.borderless)

Spacer()

audioDevicesView

Spacer()

Button {
withAnimation {
toggleRecording(shouldLoop: true)
VStack {
Button {
showAdvancedOptions.toggle()
} label: {
Label("Settings", systemImage: "slider.horizontal.3")
}
.frame(minWidth: 0, maxWidth: .infinity)
.buttonStyle(.borderless)
}
} label: {
Image(systemName: !isRecording ? "record.circle" : "stop.circle.fill")
.resizable()
.scaledToFit()
.frame(width: 70, height: 70)
.padding()
.foregroundColor(modelState != .loaded ? .gray : .red)
}
.contentTransition(.symbolEffect(.replace))
.buttonStyle(BorderlessButtonStyle())
.disabled(modelState != .loaded)
.frame(minWidth: 0, maxWidth: .infinity)

Button {
showAdvancedOptions.toggle()
} label: {
Label("Settings", systemImage: "slider.horizontal.3")

HStack {
Button {
withAnimation {
toggleRecording(shouldLoop: true)
}
} label: {
Image(systemName: !isRecording ? "record.circle" : "stop.circle.fill")
.resizable()
.scaledToFit()
.frame(width: 70, height: 70)
.padding()
.foregroundColor(modelState != .loaded ? .gray : .red)
}
.contentTransition(.symbolEffect(.replace))
.buttonStyle(BorderlessButtonStyle())
.disabled(modelState != .loaded)
.frame(minWidth: 0, maxWidth: .infinity)
}
.frame(minWidth: 0, maxWidth: .infinity)
.buttonStyle(.borderless)
}
default:
EmptyView()
Expand Down Expand Up @@ -751,13 +809,35 @@ struct ContentView: View {
try await whisperKit.loadModels()

await MainActor.run {
if !localModels.contains(model) {
localModels.append(model)
}

availableLanguages = whisperKit.tokenizer?.langauges.map { $0.key }.sorted() ?? ["english"]
loadingProgressValue = 1.0
modelState = whisperKit.modelState
}
}
}
}

func deleteModel() {
if localModels.contains(selectedModel) {
let modelFolder = URL(fileURLWithPath: localModelPath).appendingPathComponent("openai_whisper-\(selectedModel)")

do {
try FileManager.default.removeItem(at: modelFolder)

if let index = localModels.firstIndex(of: selectedModel) {
localModels.remove(at: index)
}

modelState = .unloaded
} catch {
print("Error deleting model: \(error)")
}
}
}

func updateProgressBar(targetProgress: Float, maxTime: TimeInterval) async {
let initialProgress = loadingProgressValue
Expand Down Expand Up @@ -854,8 +934,17 @@ struct ContentView: View {
print("Microphone access was not granted.")
return
}

var deviceId: DeviceID?
#if os(macOS)
if self.selectedAudioInput != "No Audio Input",
let devices = self.audioDevices,
let device = devices.first(where: {$0.name == selectedAudioInput}) {
deviceId = device.id
}
#endif

try? audioProcessor.startRecordingLive { _ in
try? audioProcessor.startRecordingLive(inputDeviceID: deviceId) { _ in
DispatchQueue.main.async {
bufferEnergy = whisperKit?.audioProcessor.relativeEnergy ?? []
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ struct WhisperAXWatchView: View {

var body: some View {
NavigationSplitView {
if WhisperKit.deviceName().hasPrefix("Watch7") {
if WhisperKit.deviceName().hasPrefix("Watch7") || WhisperKit.isRunningOnSimulator {
modelSelectorView
.navigationTitle("WhisperAX")
.navigationBarTitleDisplayMode(.automatic)
Expand Down
15 changes: 7 additions & 8 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@ import PackageDescription
let package = Package(
name: "whisperkit",
platforms: [
.iOS(.v17),
.macOS(.v14),
.watchOS(.v10),
.visionOS(.v1)
.iOS(.v16),
.macOS(.v13),
],
products: [
.library(
Expand All @@ -18,7 +16,8 @@ let package = Package(
),
.executable(
name: "transcribe",
targets: ["WhisperKitCLI"])
targets: ["WhisperKitCLI"]
),
],
dependencies: [
.package(url: "https://github.com/huggingface/swift-transformers.git", exact: "0.1.2"),
Expand All @@ -35,7 +34,7 @@ let package = Package(
name: "WhisperKitCLI",
dependencies: [
"WhisperKit",
.product(name: "ArgumentParser", package: "swift-argument-parser")
.product(name: "ArgumentParser", package: "swift-argument-parser"),
]
),
.testTarget(
Expand All @@ -51,11 +50,11 @@ let package = Package(
"Makefile",
"README.md",
"LICENSE",
"CONTRIBUTING.md"
"CONTRIBUTING.md",
],
resources: [
.process("Tests/WhisperKitTests/Resources"),
.copy("Models/whisperkit-coreml")
.copy("Models/whisperkit-coreml"),
]
),
]
Expand Down
4 changes: 2 additions & 2 deletions Sources/WhisperKit/Core/AudioEncoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public protocol AudioEncoding {
func encodeFeatures(_ features: MLMultiArray) async throws -> MLMultiArray?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoder: AudioEncoding, WhisperMLModel {
public var model: MLModel?

Expand Down Expand Up @@ -46,7 +46,7 @@ public class AudioEncoder: AudioEncoding, WhisperMLModel {

try Task.checkCancellation()

let outputFeatures = try await model.prediction(from: modelInputs, options: MLPredictionOptions())
let outputFeatures = try await model.asyncPrediction(from: modelInputs, options: MLPredictionOptions())

let output = AudioEncoderOutput(features: outputFeatures)

Expand Down
Loading

0 comments on commit 1340625

Please sign in to comment.