Skip to content

Commit

Permalink
fixed compilation error
Browse files Browse the repository at this point in the history
  • Loading branch information
jkrukowski committed Apr 15, 2024
1 parent a39cf65 commit 083fe58
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 125 deletions.
244 changes: 121 additions & 123 deletions Sources/WhisperKit/Core/Models.swift
Original file line number Diff line number Diff line change
Expand Up @@ -906,128 +906,6 @@ public class TextDecoderCachePrefillOutput: MLFeatureProvider {
}
}

// MARK: Languages

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension WhisperKit {
static var languages: [String: String] {
[
"english": "en",
"chinese": "zh",
"german": "de",
"spanish": "es",
"russian": "ru",
"korean": "ko",
"french": "fr",
"japanese": "ja",
"portuguese": "pt",
"turkish": "tr",
"polish": "pl",
"catalan": "ca",
"dutch": "nl",
"arabic": "ar",
"swedish": "sv",
"italian": "it",
"indonesian": "id",
"hindi": "hi",
"finnish": "fi",
"vietnamese": "vi",
"hebrew": "he",
"ukrainian": "uk",
"greek": "el",
"malay": "ms",
"czech": "cs",
"romanian": "ro",
"danish": "da",
"hungarian": "hu",
"tamil": "ta",
"norwegian": "no",
"thai": "th",
"urdu": "ur",
"croatian": "hr",
"bulgarian": "bg",
"lithuanian": "lt",
"latin": "la",
"maori": "mi",
"malayalam": "ml",
"welsh": "cy",
"slovak": "sk",
"telugu": "te",
"persian": "fa",
"latvian": "lv",
"bengali": "bn",
"serbian": "sr",
"azerbaijani": "az",
"slovenian": "sl",
"kannada": "kn",
"estonian": "et",
"macedonian": "mk",
"breton": "br",
"basque": "eu",
"icelandic": "is",
"armenian": "hy",
"nepali": "ne",
"mongolian": "mn",
"bosnian": "bs",
"kazakh": "kk",
"albanian": "sq",
"swahili": "sw",
"galician": "gl",
"marathi": "mr",
"punjabi": "pa",
"sinhala": "si",
"khmer": "km",
"shona": "sn",
"yoruba": "yo",
"somali": "so",
"afrikaans": "af",
"occitan": "oc",
"georgian": "ka",
"belarusian": "be",
"tajik": "tg",
"sindhi": "sd",
"gujarati": "gu",
"amharic": "am",
"yiddish": "yi",
"lao": "lo",
"uzbek": "uz",
"faroese": "fo",
"haitian creole": "ht",
"pashto": "ps",
"turkmen": "tk",
"nynorsk": "nn",
"maltese": "mt",
"sanskrit": "sa",
"luxembourgish": "lb",
"myanmar": "my",
"tibetan": "bo",
"tagalog": "tl",
"malagasy": "mg",
"assamese": "as",
"tatar": "tt",
"hawaiian": "haw",
"lingala": "ln",
"hausa": "ha",
"bashkir": "ba",
"javanese": "jw",
"sundanese": "su",
"cantonese": "yue",
"burmese": "my",
"valencian": "ca",
"flemish": "nl",
"haitian": "ht",
"letzeburgesch": "lb",
"pushto": "ps",
"panjabi": "pa",
"moldavian": "ro",
"moldovan": "ro",
"sinhalese": "si",
"castilian": "es",
"mandarin": "zh",
]
}
}

// MARK: SpecialTokens

public struct SpecialTokens {
Expand Down Expand Up @@ -1099,7 +977,7 @@ struct WhisperTokenizerWrapper: WhisperTokenizer {
self.tokenizer = tokenizer
self.specialTokens = specialTokens
self.allLanguageTokens = Set(
WhisperKit.languages
Constants.languages
.compactMap { tokenizer.convertTokenToId("<|\($0.value)|>") }
.filter { $0 > specialTokens.specialTokenBegin }
)
Expand Down Expand Up @@ -1248,3 +1126,123 @@ extension WhisperTokenizerWrapper {
static var defaultNoTimestampsToken: Int { 50363 }
static var defaultTimeTokenBegin: Int { 50364 }
}

// MARK: Constants

public enum Constants {
public static let languages: [String: String] =
[
"english": "en",
"chinese": "zh",
"german": "de",
"spanish": "es",
"russian": "ru",
"korean": "ko",
"french": "fr",
"japanese": "ja",
"portuguese": "pt",
"turkish": "tr",
"polish": "pl",
"catalan": "ca",
"dutch": "nl",
"arabic": "ar",
"swedish": "sv",
"italian": "it",
"indonesian": "id",
"hindi": "hi",
"finnish": "fi",
"vietnamese": "vi",
"hebrew": "he",
"ukrainian": "uk",
"greek": "el",
"malay": "ms",
"czech": "cs",
"romanian": "ro",
"danish": "da",
"hungarian": "hu",
"tamil": "ta",
"norwegian": "no",
"thai": "th",
"urdu": "ur",
"croatian": "hr",
"bulgarian": "bg",
"lithuanian": "lt",
"latin": "la",
"maori": "mi",
"malayalam": "ml",
"welsh": "cy",
"slovak": "sk",
"telugu": "te",
"persian": "fa",
"latvian": "lv",
"bengali": "bn",
"serbian": "sr",
"azerbaijani": "az",
"slovenian": "sl",
"kannada": "kn",
"estonian": "et",
"macedonian": "mk",
"breton": "br",
"basque": "eu",
"icelandic": "is",
"armenian": "hy",
"nepali": "ne",
"mongolian": "mn",
"bosnian": "bs",
"kazakh": "kk",
"albanian": "sq",
"swahili": "sw",
"galician": "gl",
"marathi": "mr",
"punjabi": "pa",
"sinhala": "si",
"khmer": "km",
"shona": "sn",
"yoruba": "yo",
"somali": "so",
"afrikaans": "af",
"occitan": "oc",
"georgian": "ka",
"belarusian": "be",
"tajik": "tg",
"sindhi": "sd",
"gujarati": "gu",
"amharic": "am",
"yiddish": "yi",
"lao": "lo",
"uzbek": "uz",
"faroese": "fo",
"haitian creole": "ht",
"pashto": "ps",
"turkmen": "tk",
"nynorsk": "nn",
"maltese": "mt",
"sanskrit": "sa",
"luxembourgish": "lb",
"myanmar": "my",
"tibetan": "bo",
"tagalog": "tl",
"malagasy": "mg",
"assamese": "as",
"tatar": "tt",
"hawaiian": "haw",
"lingala": "ln",
"hausa": "ha",
"bashkir": "ba",
"javanese": "jw",
"sundanese": "su",
"cantonese": "yue",
"burmese": "my",
"valencian": "ca",
"flemish": "nl",
"haitian": "ht",
"letzeburgesch": "lb",
"pushto": "ps",
"panjabi": "pa",
"moldavian": "ro",
"moldovan": "ro",
"sinhalese": "si",
"castilian": "es",
"mandarin": "zh",
]
}
4 changes: 2 additions & 2 deletions Sources/WhisperKitCLI/Transcribe.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ struct Transcribe: AsyncParsableCommand {

mutating func validate() throws {
if let language = cliArguments.language {
if !WhisperKit.languages.values.contains(language) {
throw ValidationError("Invalid language code \"\(language)\". Supported languages: \(WhisperKit.languages.values)")
if !Constants.languages.values.contains(language) {
throw ValidationError("Invalid language code \"\(language)\". Supported languages: \(Constants.languages.values)")
}
}
}
Expand Down

0 comments on commit 083fe58

Please sign in to comment.