Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: get all scripts to be POSIX Compliant #1725

Merged
merged 16 commits into from
Jan 12, 2024
Merged
50 changes: 25 additions & 25 deletions models/download-coreml-model.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/bin/sh

# This script downloads Whisper model files that have already been converted to Core ML format.
# This way you don't have to convert them yourself.
Expand All @@ -7,76 +7,76 @@ src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
pfx="resolve/main/ggml"

# get the path of this script
function get_script_path() {
get_script_path() {
if [ -x "$(command -v realpath)" ]; then
echo "$(dirname $(realpath $0))"
dirname "$(realpath "$0")"
else
local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
echo "$ret"
_ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
echo "$_ret"
fi
}

models_path="$(get_script_path)"

# Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3"

# list available models
function list_models {
printf "\n"
printf " Available models:"
for model in "${models[@]}"; do
printf " $model"
done
printf "\n\n"
list_models() {
printf "\n"
printf " Available models:"
for model in $models; do
printf " %s" "$models"
done
printf "\n\n"
}

if [ "$#" -ne 1 ]; then
printf "Usage: $0 <model>\n"
printf "Usage: %s <model>\n" "$0"
list_models

exit 1
fi

model=$1

if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
printf "Invalid model: $model\n"
if ! echo "$models" | grep -q -w "$model"; then
printf "Invalid model: %s\n" "$model"
list_models

exit 1
fi

# download Core ML model

printf "Downloading Core ML model $model from '$src' ...\n"
printf "Downloading Core ML model %s from '%s' ...\n" "$model" "$src"

cd $models_path
cd "$models_path" || exit

if [ -f "ggml-$model.mlmodel" ]; then
printf "Model $model already exists. Skipping download.\n"
printf "Model %s already exists. Skipping download.\n" "$model"
exit 0
fi

if [ -x "$(command -v wget)" ]; then
wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
wget --quiet --show-progress -O ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
elif [ -x "$(command -v curl)" ]; then
curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
curl -L --output ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
else
printf "Either wget or curl is required to download models.\n"
exit 1
fi


if [ $? -ne 0 ]; then
printf "Failed to download Core ML model $model \n"
printf "Failed to download Core ML model %s \n" "$model"
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
exit 1
fi

printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
printf "Done! Model '%s' saved in 'models/ggml-%s.mlmodel'\n" "$model" "$model"
printf "Run the following command to compile it:\n\n"
printf " $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
printf " $ xcrun coremlc compile ./models/ggml-%s.mlmodel ./models\n\n" "$model"
printf "You can now use it like this:\n\n"
printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
printf " $ ./main -m models/ggml-%s.bin -f samples/jfk.wav\n" "$model"
printf "\n"
87 changes: 44 additions & 43 deletions models/download-ggml-model.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/bin/sh

# This script downloads Whisper model files that have already been converted to ggml format.
# This way you don't have to convert them yourself.
Expand All @@ -10,102 +10,103 @@ src="https://huggingface.co/ggerganov/whisper.cpp"
pfx="resolve/main/ggml"

# get the path of this script
function get_script_path() {
get_script_path() {
if [ -x "$(command -v realpath)" ]; then
echo "$(dirname "$(realpath "$0")")"
dirname "$(realpath "$0")"
else
local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
echo "$ret"
_ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
echo "$_ret"
fi
}

models_path="${2:-$(get_script_path)}"

# Whisper models
models=(
"tiny.en"
"tiny"
"tiny-q5_1"
"tiny.en-q5_1"
"base.en"
"base"
"base-q5_1"
"base.en-q5_1"
"small.en"
"small.en-tdrz"
"small"
"small-q5_1"
"small.en-q5_1"
"medium"
"medium.en"
"medium-q5_0"
"medium.en-q5_0"
"large-v1"
"large-v2"
"large-v3"
"large-v3-q5_0"
)
models="tiny.en
tiny
tiny-q5_1
tiny.en-q5_1
base.en
base
base-q5_1
base.en-q5_1
small.en
small.en-tdrz
small
small-q5_1
small.en-q5_1
medium
medium.en
medium-q5_0
medium.en-q5_0
large-v1
large-v2
large-v3
large-v3-q5_0"

# list available models
function list_models {
list_models() {
printf "\n"
printf " Available models:"
for model in "${models[@]}"; do
printf " $model"
for model in $models; do
printf " %s" "$model"
done
printf "\n\n"
}

if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: $0 <model> [models_path]\n"
printf "Usage: %s <model> [models_path]\n" "$0"
list_models

exit 1
fi

model=$1

if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
printf "Invalid model: $model\n"
if ! echo "$models" | grep -q -w "$model"; then
printf "Invalid model: %s\n" "$model"
list_models

exit 1
fi

# check if model contains `tdrz` and update the src and pfx accordingly
if [[ $model == *"tdrz"* ]]; then
if echo "$model" | grep -q "tdrz"; then
src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
pfx="resolve/main/ggml"
fi

echo "$model" | grep -q '^"tdrz"*$'

# download ggml model

printf "Downloading ggml model $model from '$src' ...\n"
printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"

cd "$models_path"
cd "$models_path" || exit

if [ -f "ggml-$model.bin" ]; then
printf "Model $model already exists. Skipping download.\n"
printf "Model %s already exists. Skipping download.\n" "$model"
exit 0
fi

if [ -x "$(command -v wget)" ]; then
wget --no-config --quiet --show-progress -O ggml-$model.bin $src/$pfx-$model.bin
wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin
elif [ -x "$(command -v curl)" ]; then
curl -L --output ggml-$model.bin $src/$pfx-$model.bin
curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin
else
printf "Either wget or curl is required to download models.\n"
exit 1
fi


if [ $? -ne 0 ]; then
printf "Failed to download ggml model $model \n"
printf "Failed to download ggml model %s \n" "$model"
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
exit 1
fi

printf "Done! Model '$model' saved in '$models_path/ggml-$model.bin'\n"

printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
printf "You can now use it like this:\n\n"
printf " $ ./main -m $models_path/ggml-$model.bin -f samples/jfk.wav\n"
printf " $ ./main -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$models_path" "$model"
printf "\n"
4 changes: 2 additions & 2 deletions models/generate-coreml-interface.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/bin/bash
#!/bin/sh
#
# This generates:
# - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m
# - coreml/whisper-decoder-impl.h and coreml/whisper-decoder-impl.m
#

wd=$(dirname "$0")
cd "$wd/../"
cd "$wd/../" || exit

python3 models/convert-whisper-to-coreml.py --model tiny.en

Expand Down
20 changes: 10 additions & 10 deletions models/generate-coreml-model.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/bin/bash
#!/bin/sh

# Usage: ./generate-coreml-model.sh <model-name>
if [ $# -eq 0 ]; then
echo "No model name supplied"
echo "Usage for Whisper models: ./generate-coreml-model.sh <model-name>"
echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
exit 1
elif [[ "$1" == "-h5" && $# != 3 ]]; then
elif [ "$1" = "-h5" ] && [ $# != 3 ]; then
echo "No model name and model path supplied for a HuggingFace model"
echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
exit 1
Expand All @@ -15,20 +15,20 @@ fi
mname="$1"

wd=$(dirname "$0")
cd "$wd/../"
cd "$wd/../" || exit

if [[ $mname == "-h5" ]]; then
if [ "$mname" = "-h5" ]; then
mname="$2"
mpath="$3"
echo $mpath
python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
echo "$mpath"
python3 models/convert-h5-to-coreml.py --model-name "$mname" --model-path "$mpath" --encoder-only True
else
python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True --optimize-ane True
python3 models/convert-whisper-to-coreml.py --model "$mname" --encoder-only True --optimize-ane True
fi

xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/
rm -rf models/ggml-${mname}-encoder.mlmodelc
mv -v models/coreml-encoder-${mname}.mlmodelc models/ggml-${mname}-encoder.mlmodelc
xcrun coremlc compile models/coreml-encoder-"${mname}".mlpackage models/
rm -rf models/ggml-"${mname}"-encoder.mlmodelc
mv -v models/coreml-encoder-"${mname}".mlmodelc models/ggml-"${mname}"-encoder.mlmodelc

# TODO: decoder (sometime in the future maybe)
#xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/
Expand Down