ci : add whisper test

ggml-ci
ggerganov · Sep 15, 2023 · a1f6ca4 · a1f6ca4
1 parent 1472fed
commit a1f6ca4
Showing 1 changed file with 64 additions and 32 deletions.
diff --git a/ci/run.sh b/ci/run.sh
@@ -161,76 +161,107 @@ function gg_sum_gpt_2 {
     gg_printf '```\n'
 }
 
-# mpt
+# mnist
 
-function gg_run_mpt {
+function gg_run_mnist {
     cd ${SRC}
 
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/config.json
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer.json
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer_config.json
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/pytorch_model.bin.index.json
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/configuration_mpt.py
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00001-of-00002.bin
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00002-of-00002.bin
-
     cd build-ci-release
 
     set -e
 
-    path_models="../models-mnt/mpt/7B"
-    model_f16="${path_models}/ggml-model-f16.bin"
-    model_q4_0="${path_models}/ggml-model-q4_0.bin"
+    mkdir -p models/mnist
+    python3 ../examples/mnist/convert-h5-to-ggml.py ../examples/mnist/models/mnist/mnist_model.state_dict
 
-    python3 ../examples/mpt/convert-h5-to-ggml.py ${path_models} 1
-    ./bin/mpt-quantize ${model_f16} ${model_q4_0} q4_0
+    model_f32="./models/mnist/ggml-model-f32.bin"
+    samples="../examples/mnist/models/mnist/t10k-images.idx3-ubyte"
 
-    (time ./bin/mpt --model ${model_f16}  -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
-    (time ./bin/mpt --model ${model_q4_0} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
+    # first command runs and exports "mnist.ggml", the second command runs the exported model
+
+    (time ./bin/mnist     ${model_f32} ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log
+    (time ./bin/mnist-cpu ./mnist.ggml ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log
 
     set +e
 }
 
-function gg_sum_mpt {
+function gg_sum_mnist {
     gg_printf '### %s\n\n' "${ci}"
 
-    gg_printf 'Runs short MPT text generation\n'
+    gg_printf 'MNIST\n'
     gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
     gg_printf '```\n'
-    gg_printf '%s\n' "$(cat $OUT/${ci}-tg.log)"
+    gg_printf '%s\n' "$(cat $OUT/${ci}-mnist.log)"
     gg_printf '```\n'
 }
 
-# mnist
+# whisper
 
-function gg_run_mnist {
+function gg_run_whisper {
     cd ${SRC}
 
+    gg_wget models-mnt/whisper/ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
+    gg_wget models-mnt/whisper/ https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav
+
     cd build-ci-release
 
     set -e
 
-    mkdir -p models/mnist
-    python3 ../examples/mnist/convert-h5-to-ggml.py ../examples/mnist/models/mnist/mnist_model.state_dict
+    path_models="../models-mnt/whisper/"
+    model_f16="${path_models}/ggml-base.en.bin"
+    audio_0="${path_models}/jfk.wav"
 
-    model_f32="./models/mnist/ggml-model-f32.bin"
-    samples="../examples/mnist/models/mnist/t10k-images.idx3-ubyte"
+    (time ./bin/whisper -m ${model_f16} -f ${audio_0} 2>&1 | tee -a $OUT/${ci}-main.log) || true
 
-    # first command runs and exports "mnist.ggml", the second command runs the exported model
+    set +e
+}
 
-    (time ./bin/mnist     ${model_f32} ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log
-    (time ./bin/mnist-cpu ./mnist.ggml ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log
+function gg_sum_whisper {
+    gg_printf '### %s\n\n' "${ci}"
+
+    gg_printf 'Runs short Whisper transcription\n'
+    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
+    gg_printf '```\n'
+    gg_printf '%s\n' "$(cat $OUT/${ci}-main.log)"
+    gg_printf '```\n'
+}
+
+# mpt
+
+function gg_run_mpt {
+    cd ${SRC}
+
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/config.json
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer.json
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer_config.json
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/pytorch_model.bin.index.json
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/configuration_mpt.py
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00001-of-00002.bin
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00002-of-00002.bin
+
+    cd build-ci-release
+
+    set -e
+
+    path_models="../models-mnt/mpt/7B"
+    model_f16="${path_models}/ggml-model-f16.bin"
+    model_q4_0="${path_models}/ggml-model-q4_0.bin"
+
+    python3 ../examples/mpt/convert-h5-to-ggml.py ${path_models} 1
+    ./bin/mpt-quantize ${model_f16} ${model_q4_0} q4_0
+
+    (time ./bin/mpt --model ${model_f16}  -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
+    (time ./bin/mpt --model ${model_q4_0} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
 
     set +e
 }
 
-function gg_sum_mnist {
+function gg_sum_mpt {
     gg_printf '### %s\n\n' "${ci}"
 
-    gg_printf 'MNIST\n'
+    gg_printf 'Runs short MPT text generation\n'
     gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
     gg_printf '```\n'
-    gg_printf '%s\n' "$(cat $OUT/${ci}-mnist.log)"
+    gg_printf '%s\n' "$(cat $OUT/${ci}-tg.log)"
     gg_printf '```\n'
 }
 
@@ -252,6 +283,7 @@ test $ret -eq 0 && gg_run ctest_debug
 test $ret -eq 0 && gg_run ctest_release
 test $ret -eq 0 && gg_run gpt_2
 test $ret -eq 0 && gg_run mnist
+test $ret -eq 0 && gg_run whisper
 
 if [ -z $GG_BUILD_LOW_PERF ]; then
     test $ret -eq 0 && gg_run mpt