ggerganov · ggerganov · Nov 20, 2023 · Oct 19, 2023 · Oct 19, 2023 · Nov 17, 2023
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -64,6 +64,7 @@ elseif(CMAKE_JS_VERSION)
 else()
  add_subdirectory(main)
  add_subdirectory(stream)
+ add_subdirectory(server)
  add_subdirectory(command)
  add_subdirectory(bench)
  add_subdirectory(quantize)

diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(TARGET server)
+add_executable(${TARGET} server.cpp httplib.h json.hpp)
+
+include(DefaultTargetOptions)
+
+target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})
diff --git a/examples/server/README.md b/examples/server/README.md
@@ -0,0 +1,59 @@
+# whisper.cpp http server
+
+Simple http server. WAV Files are passed to the inference model via http requests.
+
+```
+./server -h
+
+usage: ./bin/server [options]
+
+options:
+ -h, --help [default] show this help message and exit
+ -t N, --threads N [4 ] number of threads to use during computation
+ -p N, --processors N [1 ] number of processors to use during computation
+ -ot N, --offset-t N [0 ] time offset in milliseconds
+ -on N, --offset-n N [0 ] segment index offset
+ -d N, --duration N [0 ] duration of audio to process in milliseconds
+ -mc N, --max-context N [-1 ] maximum number of text context tokens to store
+ -ml N, --max-len N [0 ] maximum segment length in characters
+ -sow, --split-on-word [false ] split on word rather than on token
+ -bo N, --best-of N [2 ] number of best candidates to keep
+ -bs N, --beam-size N [-1 ] beam size for beam search
+ -wt N, --word-thold N [0.01 ] word timestamp probability threshold
+ -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
+ -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
+ -debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
+ -tr, --translate [false ] translate from source language to english
+ -di, --diarize [false ] stereo audio diarization
+ -tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
+ -nf, --no-fallback [false ] do not use temperature fallback while decoding
+ -ps, --print-special [false ] print special tokens
+ -pc, --print-colors [false ] print colors
+ -pp, --print-progress [false ] print progress
+ -nt, --no-timestamps [false ] do not print timestamps
+ -l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
+ -dl, --detect-language [false ] exit after automatically detecting language
+ --prompt PROMPT [ ] initial prompt
+ -m FNAME, --model FNAME [models/ggml-base.en.bin] model path
+ -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
+ --host HOST, [127.0.0.1] Hostname/ip-adress for the server
+ --port PORT, [8080 ] Port number for the server
+```
+
+## request examples
+
+**/inference**
+```
+curl 127.0.0.1:8080/inference \
+-H "Content-Type: multipart/form-data" \
+-F file="<file-path>" \
+-F temperature="0.2" \
+-F response-format="json"
+```
+
+**/load**
+```
+curl 127.0.0.1:8080/load \
+-H "Content-Type: multipart/form-data" \
+-F model="<path-to-model-file>"
+```