Skip to content

Commit ad93962

Browse files
cebtenzzrem18coppolaMichael Coppola
authored
server : add parameter -tb N, --threads-batch N (#3584) (#3768)
Co-authored-by: Michael Coppola <m18coppola@gmail.com> Co-authored-by: Michael Coppola <info@michaeljcoppola.com>
1 parent 1717521 commit ad93962

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

examples/server/server.cpp

+19-9
Original file line numberDiff line numberDiff line change
@@ -1749,15 +1749,16 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
17491749
printf("usage: %s [options]\n", argv0);
17501750
printf("\n");
17511751
printf("options:\n");
1752-
printf(" -h, --help show this help message and exit\n");
1753-
printf(" -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
1754-
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
1755-
printf(" -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
1756-
printf(" --rope-freq-base N RoPE base frequency (default: loaded from model)\n");
1757-
printf(" --rope-freq-scale N RoPE frequency scaling factor (default: loaded from model)\n");
1758-
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
1759-
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
1760-
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
1752+
printf(" -h, --help show this help message and exit\n");
1753+
printf(" -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
1754+
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
1755+
printf(" -tb N, --threads-batch N number of threads to use during batch and prompt processing (default: same as --threads)\n");
1756+
printf(" -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
1757+
printf(" --rope-freq-base N RoPE base frequency (default: loaded from model)\n");
1758+
printf(" --rope-freq-scale N RoPE frequency scaling factor (default: loaded from model)\n");
1759+
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
1760+
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
1761+
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
17611762
if (llama_mlock_supported())
17621763
{
17631764
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
@@ -1907,6 +1908,15 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
19071908
}
19081909
params.n_threads = std::stoi(argv[i]);
19091910
}
1911+
else if (arg == "--threads-batch" || arg == "-tb")
1912+
{
1913+
if (++i >= argc)
1914+
{
1915+
invalid_param = true;
1916+
break;
1917+
}
1918+
params.n_threads_batch = std::stoi(argv[i]);
1919+
}
19101920
else if (arg == "-b" || arg == "--batch-size")
19111921
{
19121922
if (++i >= argc)

0 commit comments

Comments
 (0)