Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly implement credentialed CORS for the server #4513

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,14 @@ bash chat.sh
API example using Python Flask: [api_like_OAI.py](api_like_OAI.py)
This example must be used with server.cpp

requirements:

```shell
pip install flask flask-cors fschat # flask-cors and fschat are optional. flask-cors is used to allow cross-origin requests, fschat is used for integration of chat template
```

Run the server:

```sh
python api_like_OAI.py
```
Expand All @@ -338,6 +346,8 @@ After running the API server, you can use it in Python by setting the API base U
openai.api_base = "http://<Your api-server IP>:port"
```

For better integration with the model, it is recommended to utilize the `--chat-prompt-model` parameter when starting up the system, rather than relying solely on parameters like `--user-name`. This specific parameter accepts model names that have been registered within the [FastChat/conversation.py](https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py) file, an example would be `llama-2`.

Then you can utilize llama.cpp as an OpenAI's **chat.completion** or **text_completion** API

### Extending or building alternative Web Front End
Expand Down
24 changes: 18 additions & 6 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2710,9 +2710,15 @@ int main(int argc, char **argv)
return false;
};

svr.set_default_headers({{"Server", "llama.cpp"},
{"Access-Control-Allow-Origin", "*"},
{"Access-Control-Allow-Headers", "content-type"}});
svr.set_default_headers({{"Server", "llama.cpp"}});

// CORS preflight
svr.Options(R"(.*)", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res) {
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
res.set_header("Access-Control-Allow-Credentials", "true");
res.set_header("Access-Control-Allow-Methods", "POST");
res.set_header("Access-Control-Allow-Headers", "*");
});

// this is only called if no index.html is found in the public --path
svr.Get("/", [](const httplib::Request &, httplib::Response &res)
Expand Down Expand Up @@ -2744,7 +2750,7 @@ int main(int argc, char **argv)

svr.Get("/props", [&llama](const httplib::Request & /*req*/, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", "*");
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
json data = {
{ "user_name", llama.name_user.c_str() },
{ "assistant_name", llama.name_assistant.c_str() }
Expand All @@ -2754,6 +2760,7 @@ int main(int argc, char **argv)

svr.Post("/completion", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
if (!validate_api_key(req, res)) {
return;
}
Expand Down Expand Up @@ -2821,10 +2828,9 @@ int main(int argc, char **argv)
}
});



svr.Get("/v1/models", [&params](const httplib::Request&, httplib::Response& res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
std::time_t t = std::time(0);

json models = {
Expand All @@ -2842,9 +2848,11 @@ int main(int argc, char **argv)
res.set_content(models.dump(), "application/json; charset=utf-8");
});


// TODO: add mount point without "/v1" prefix -- how?
svr.Post("/v1/chat/completions", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
if (!validate_api_key(req, res)) {
return;
}
Expand Down Expand Up @@ -2918,6 +2926,7 @@ int main(int argc, char **argv)

svr.Post("/infill", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
if (!validate_api_key(req, res)) {
return;
}
Expand Down Expand Up @@ -2990,6 +2999,7 @@ int main(int argc, char **argv)

svr.Post("/tokenize", [&llama](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
const json body = json::parse(req.body);
std::vector<llama_token> tokens;
if (body.count("content") != 0)
Expand All @@ -3002,6 +3012,7 @@ int main(int argc, char **argv)

svr.Post("/detokenize", [&llama](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
const json body = json::parse(req.body);
std::string content;
if (body.count("tokens") != 0)
Expand All @@ -3016,6 +3027,7 @@ int main(int argc, char **argv)

svr.Post("/embedding", [&llama](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
const json body = json::parse(req.body);
json prompt;
if (body.count("content") != 0)
Expand Down