diff --git a/common/chat.cpp b/common/chat.cpp index 8587140e1ff0a..5c08e65cdc97f 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -217,7 +217,10 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa msg_part.text = part.at("text"); msg.content_parts.push_back(msg_part); } - } else if (!content.is_null()) { + } else if (content.is_null()) { + // Handle null content by setting it to empty string + msg.content = ""; + } else { throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); } } @@ -306,7 +309,7 @@ json common_chat_msgs_to_json_oaicompat(const std::vector & msg } } } else { - jmsg["content"] = json(); // null + jmsg["content"] = ""; // empty string instead of null } if (!msg.reasoning_content.empty()) { jmsg["reasoning_content"] = msg.reasoning_content; @@ -640,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS"; case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2"; case COMMON_CHAT_FORMAT_APERTUS: return "Apertus"; + case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5"; default: throw std::runtime_error("Unknown chat format"); } @@ -1902,6 +1906,394 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) { } } +static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + + // Build Minja inputs + minja::chat_template_inputs ti; + ti.messages = inputs.messages; + ti.tools = inputs.tools.empty() ? json() : inputs.tools; + ti.add_generation_prompt = inputs.add_generation_prompt; + ti.extra_context = inputs.extra_context; + + // Disable every Minja polyfill + minja::chat_template_options topts; + topts.apply_polyfills = true; + topts.polyfill_tools = false; + topts.polyfill_tool_call_examples = false; + topts.polyfill_tool_calls = false; + topts.polyfill_tool_responses = false; + topts.polyfill_system_role = false; + topts.polyfill_object_arguments = true; + topts.polyfill_typed_content = false; + topts.use_bos_token = true; + topts.use_eos_token = true; + + std::string prompt = tmpl.apply(ti, topts); + + // match the existing trimming behavior + if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) { + prompt.erase(0, tmpl.bos_token().size()); + } + if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) { + prompt.erase(prompt.size() - tmpl.eos_token().size()); + } + + // add GLM preserved tokens + data.preserved_tokens = { + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>", + "<|code_prefix|>", + "<|code_middle|>", + "<|code_suffix|>", + "/nothink", + "", + "", + "", + "", + "", + "", + "", + "" + }; + + // extra GLM 4.5 stop word + data.additional_stops.insert(data.additional_stops.end(), { + "<|user|>", + "<|observation|>" + }); + + // build grammar for tool call + if (inputs.tools.is_array() && !inputs.tools.empty()) { + // GLM 4.5 uses format: function_name\nkey\nvalue\n + data.grammar = build_grammar([&](const common_grammar_builder &builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + + std::string param_rules; + if (parameters.contains("properties")) { + for (const auto & [key, value] : parameters.at("properties").items()) { + if (value.contains("type") && value["type"].is_string() && value["type"] == "string") { + param_rules += "\"" + key + "\\n\" ( string-arg-val | " + builder.add_schema(name + "-arg-" + key, value) + " ) \"\\n\" "; + } else { + param_rules += "\"" + key + "\\n\" " + builder.add_schema(name + "-arg-" + key, value) + " \"\\n\" "; + } + } + } + + tool_rules.push_back(builder.add_rule(name + "-call", "\"\\n\" \"" + name + "\\n\" " + param_rules + " \"\"")); + }); + builder.add_rule("string-arg-val", "( [^<] | \"<\" ( [^/] | \"/\" ( [^a] | \"a\" ( [^r] | \"r\" ( [^g] | \"g\" ( [^_] | \"_\" ( [^v] | \"v\" ( [^a] | \"a\" ( [^l] | \"l\" ( [^u] | \"u\" ( [^e] | \"e\" ( [^>] ) ) ) ) ) ) ) ) ) ) ) )*"); + builder.add_rule("root", string_join(tool_rules, " | ")); + }); + + // grammar trigger for tool call + data.grammar_lazy = true; + data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "\n" }); + } + + data.prompt = prompt; + data.format = COMMON_CHAT_FORMAT_GLM_4_5; + return data; +} + +static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) { + if (!builder.syntax().parse_tool_calls) { + builder.consume_spaces(); + builder.try_parse_reasoning("", ""); + builder.add_content(builder.consume_rest()); + return; + } + + constexpr auto rstrip = [](std::string &s){ + s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base())); + }; + // Erase substring from l to r, along with additional spaces nearby + constexpr auto erase_spaces = [](auto &str, auto l, auto r){ + for (--l; /* l > -1 && */ l < str.size() && std::isspace(static_cast(str[l--]));); + ++l; + while (++r < str.size() && std::isspace(static_cast(str[r]))); + str[l] = '\n'; + str[l + 1] = '\n'; + if (l != 0) l += 2; + str.erase(l, r - l); + return l; + }; + // Handle unclosed from content + constexpr auto filter_unclosed_think = [erase_spaces](auto &content , auto &&builder){ + auto &syntax = std::forward(builder).syntax(); + if (syntax.reasoning_format == COMMON_REASONING_FORMAT_NONE || syntax.reasoning_in_content) return; + if (auto pos = content.rfind(""); pos != std::string::npos) { + //if (std::forward(builder).is_partial()) { + // Streaming: delete all token + while (pos != std::string::npos) { + pos = erase_spaces(content, pos, pos + 7); + pos = content.rfind("", pos); + } + //} else { + // // Non-streaming: Put all messages before in reasoning content + // std::forward(builder).add_reasoning_content(string_strip(content.substr(0, pos))); + // content.erase(0, pos + 8); + //} + } + }; + // Drop substring from needle to end from a JSON + constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "GLM_4_5_PARTIAL_FLAG"){ + auto pos = json_str.rfind(needle); + if (pos == std::string::npos) { + return false; + } + for (auto i = pos + needle.size(); i < json_str.size(); ++i) { + unsigned char ch = static_cast(json_str[i]); + if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) { + return false; + } + } + if (pos != 0 && json_str[pos - 1] == '"') { + --pos; + } + json_str.resize(pos); + return true; + }; + // Helper to generate a partial argument JSON + constexpr auto gen_partial_json = [partial_json](auto &&set_partial_arg, auto &&arguments, auto &&builder, auto &&function_name){ + std::forward(set_partial_arg)(std::forward(builder).consume_rest(), "GLM_4_5_PARTIAL_FLAG"); + auto tool_str = std::forward(arguments).dump(); + if (partial_json(tool_str)) { + if (std::forward(builder).add_tool_call(std::forward(function_name), "", tool_str)) { + return; + } + } + LOG_DBG("Failed to parse partial GLM 4.5 tool call, fallback to non-partial: %s\n", tool_str.c_str()); + }; + + // GLM 4.5 uses format: function_name\nkey\nvalue\n + std::string half_reasoning(""); + while (auto tc = builder.try_find_literal("\n")) { + auto &content = tc->prelude; + + if (half_reasoning.size() != 0) { + if (auto pos = content.find(""); pos == std::string::npos) { + half_reasoning += content + "\n"; + continue; + } else { + auto reasoning_content = content.substr(0, pos); + rstrip(reasoning_content); + if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) { + if (builder.result().content.size() != 0) { + builder.add_content("\n\n"); + } + builder.add_content(""); + builder.add_content(half_reasoning); + builder.add_content(reasoning_content); + builder.add_content(""); + } else { + builder.add_reasoning_content(half_reasoning); + builder.add_reasoning_content(reasoning_content); + } + content.erase(0, pos + 8); + half_reasoning.clear(); + } + } + + // Handle multiple think block + bool toolcall_in_think = false; + for (auto think_start = content.rfind(""); think_start != std::string::npos; think_start = content.rfind("", think_start - 1)) { + if (auto think_end = content.find("", think_start + 7); think_end != std::string::npos) { + if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) { + auto reasoning_content = string_strip(content.substr(think_start + 7, think_end - think_start - 7)); + builder.add_reasoning_content(reasoning_content); + think_start = erase_spaces(content, think_start, think_end + 7); + } + } else { + // This start is in thinking block, skip this tool call + auto pos = think_start + 7; + while (pos < content.size() && std::isspace(static_cast(content[pos++]))); + half_reasoning = content.substr(pos) + "\n"; + content.resize(think_start); + toolcall_in_think = true; + } + } + rstrip(content); + + // Handle unclosed token + filter_unclosed_think(content, builder); + + // Strip if needed + if (content.size() > 0 && std::isspace(static_cast(content[0]))) { + content = string_strip(content); + } + + // Add content + if (content.size() != 0) { + // If there are multiple content blocks + if (builder.result().content.size() != 0) { + builder.add_content("\n\n"); + } + builder.add_content(content); + } + + // This start is in thinking block, skip this tool call + if (toolcall_in_think) { + continue; + } + + builder.consume_spaces(); + auto func_name = builder.try_find_literal(""); + if (!func_name) { + func_name = builder.try_find_literal(""); + } + if (!func_name) { + // Partial tool name not supported + throw common_chat_msg_partial_exception("incomplete tool_call"); + } + + // If GLM generate multiple tool call and the first tool call has no argument + if (func_name->prelude.find("") != std::string::npos){ + builder.move_back(func_name->prelude.size() + 12); + func_name = builder.try_find_literal(""); + } + + // Parse tool name + builder.move_to(func_name->groups[0].begin); + std::string function_name = func_name->prelude; + rstrip(function_name); + + // Argument JSON + json arguments = json::object(); + + // Helper to generate a partial argument JSON + const auto gen_partial_args = [&](auto &&set_partial_arg){ + gen_partial_json(std::forward(set_partial_arg), arguments, builder, function_name); + }; + + // Parse all arg_key/arg_value pairs + while (builder.try_consume_literal("")) { + // Parse arg_key + auto key_res = builder.try_find_literal(""); + if (!key_res) { + gen_partial_args([&](auto &&rest, auto &&needle){arguments[rest + needle] = "";}); + throw common_chat_msg_partial_exception("Expected after "); + } + if (key_res->groups[0].end - key_res->groups[0].begin != 10) { + gen_partial_args([&](auto &&, auto &&needle){arguments[key_res->prelude + needle] = "";}); + throw common_chat_msg_partial_exception("Expected after "); + } + auto &key = key_res->prelude; + builder.consume_spaces(); + + // Parse arg_value + if (!builder.try_consume_literal("")) { + gen_partial_args([&](auto &&, auto &&needle){arguments[key] = needle;}); + throw common_chat_msg_partial_exception("Expected after "); + } + auto val_start = builder.pos(); + + // Test if arg_val is a partial JSON + std::optional value_json = std::nullopt; + try { value_json = builder.try_consume_json(); } + catch (const std::runtime_error&) { builder.move_to(val_start); } + + // If it is a JSON and followed by , parse as json + // cannot support streaming because it may change to plain text + if (value_json) { + builder.consume_spaces(); + if (builder.pos() == builder.input().size()) { + gen_partial_args([&](auto &&, auto &&needle){arguments[key] = needle;}); + LOG_DBG("GLM 4.5 partial JSON arg_value: %s\n", value_json->json.dump().c_str()); + throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations."); + } + if (builder.try_consume_literal("") && value_json->healing_marker.marker.empty()) { + arguments[key] = value_json->json; + } else { + builder.move_to(val_start); + } + } + + // If not, parse as plain text + if (val_start == builder.pos()) { + if (auto value_plain = builder.try_find_literal("")) { + if (value_plain->groups[0].end - value_plain->groups[0].begin != 12) { + gen_partial_args([&](auto &&, auto &&needle){arguments[key] = value_plain->prelude + needle;}); + throw common_chat_msg_partial_exception("Expected after "); + } + arguments[key] = value_plain->prelude; + } else { + gen_partial_args([&](auto &&rest, auto &&needle){arguments[key] = rest + needle;}); + throw common_chat_msg_partial_exception("Expected after "); + } + } + builder.consume_spaces(); + } + + // Consume closing tag + if (!builder.try_consume_literal("")) { + auto tool_call_arg = arguments.dump(); + rstrip(tool_call_arg); + if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') { + tool_call_arg.resize(tool_call_arg.size() - 1); + } + rstrip(tool_call_arg); + builder.add_tool_call(function_name, "", tool_call_arg); + throw common_chat_msg_partial_exception("Expected after "); + } + + // Add the parsed tool call + if (!builder.add_tool_call(function_name, "", arguments.dump())) { + throw common_chat_msg_partial_exception("Failed to add GLM tool call"); + } + } + + builder.consume_spaces(); + while (builder.pos() != builder.input().size()) { + builder.try_parse_reasoning("", ""); + builder.consume_spaces(); + std::string content; + if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) { + content = builder.consume_rest(); + } else { + if (auto rsn = builder.try_find_literal("")) { + builder.move_to(rsn->groups[0].begin); + content = std::move(rsn->prelude); + } else { + content = builder.consume_rest(); + } + filter_unclosed_think(content, builder); + } + rstrip(content); + if (content.size() != 0) { + if (builder.result().content.size() != 0) { + builder.add_content("\n\n"); + } + builder.add_content(content); + } + if (!builder.try_consume_literal("")) { + break; + } + builder.move_to(builder.pos() - 7); + } +} + static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { LOG_DBG("%s\n", __func__); common_chat_params data; @@ -2723,6 +3115,11 @@ static common_chat_params common_chat_templates_apply_jinja( return common_chat_params_init_granite(tmpl, params); } + // GLM 4.5: detect by and tags (check before Hermes since both use ) + if (src.find("[gMASK]") != std::string::npos && src.find("") != std::string::npos && src.find("") != std::string::npos && params.json_schema.is_null()) { + return common_chat_params_init_glm_4_5(tmpl, params); + } + // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools) if (src.find("") != std::string::npos && params.json_schema.is_null()) { return common_chat_params_init_hermes_2_pro(tmpl, params); @@ -2926,6 +3323,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { case COMMON_CHAT_FORMAT_APERTUS: common_chat_parse_apertus(builder); break; + case COMMON_CHAT_FORMAT_GLM_4_5: + common_chat_parse_glm_4_5(builder); + break; default: throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format)); } diff --git a/common/chat.h b/common/chat.h index f7b36ec711df4..6bcfed9f7bcdb 100644 --- a/common/chat.h +++ b/common/chat.h @@ -116,6 +116,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_CHAT_FORMAT_NEMOTRON_V2, COMMON_CHAT_FORMAT_APERTUS, + COMMON_CHAT_FORMAT_GLM_4_5, COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; diff --git a/common/json-partial.cpp b/common/json-partial.cpp index 919927dc32446..aaf11310ab8a3 100644 --- a/common/json-partial.cpp +++ b/common/json-partial.cpp @@ -297,8 +297,25 @@ bool common_json_parse( it = temptative_end; return true; } - // TODO: handle unclosed top-level primitive if the stack was empty but we got an error (e.g. "tru", "\"", etc...) - // fprintf(stderr, "Closing: TODO\n"); + // handle unclosed top-level primitive + if (err_loc.position != 0 && !healing_marker.empty() && err_loc.stack.empty()) { + std::string str(it, temptative_end); + const auto & magic_seed = out.healing_marker.marker = healing_marker; + if (can_parse(str + "\"")) { + // Was inside an string + str += (out.healing_marker.json_dump_marker = magic_seed) + "\""; + } else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"")) { + // Was inside an string after an escape + str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\""; + } else { + // TODO: handle more unclosed top-level primitive if the stack was empty but we got an error (e.g. "tru", "\"", etc...) + // fprintf(stderr, "Closing: TODO\n"); + return false; + } + out.json = json::parse(str); + it = temptative_end; + return true; + } return false; } out.json = json::parse(it, end); diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index dd9b51a9e50fd..9cf63dee93331 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -944,6 +944,9 @@ class SchemaConverter { return _add_rule(rule_name, out.str()); } else if (schema.empty() || schema_type == "object") { return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); + } else if (schema_type.is_null() && schema.contains("not") && schema["not"].is_object() && schema["not"].empty()) { + // librechat returns not:{}, which does nothing. + return ""; } else { if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { _errors.push_back("Unrecognized schema: " + schema.dump());