diff --git a/.github/chatmodes/custom_models.chatmode.md b/.github/chatmodes/custom_models.chatmode.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/common/chat.cpp b/common/chat.cpp index 60805ab3b53f5..3c4fd78fa6a00 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -203,7 +203,10 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa msg_part.text = part.at("text"); msg.content_parts.push_back(msg_part); } - } else if (!content.is_null()) { + } else if (content.is_null()) { + // Handle null content by setting it to empty string + msg.content = ""; + } else { throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); } } @@ -292,7 +295,7 @@ json common_chat_msgs_to_json_oaicompat(const std::vector & msg } } } else { - jmsg["content"] = json(); // null + jmsg["content"] = ""; // empty string instead of null } if (!msg.reasoning_content.empty()) { jmsg["reasoning_content"] = msg.reasoning_content; @@ -607,6 +610,7 @@ const char * common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS"; + case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5"; default: throw std::runtime_error("Unknown chat format"); } @@ -1325,6 +1329,210 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) { } } +static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) { + LOG_INF("%s: initializing GLM-4.5 chat params\n", __func__); + common_chat_params data; + + // Configure template inputs + minja::chat_template_inputs tmpl_inputs; + tmpl_inputs.messages = inputs.messages; + tmpl_inputs.tools = inputs.tools.empty() ? json() : inputs.tools; + tmpl_inputs.add_generation_prompt = inputs.add_generation_prompt; + tmpl_inputs.extra_context = inputs.extra_context; + tmpl_inputs.now = inputs.now; // Use the consistent timestamp from params + + // Configure template options to disable polyfills and enforce native XML format + minja::chat_template_options opts; + opts.apply_polyfills = false; // Hard disable all polyfills + + // The prompt is generated here + data.prompt = tmpl.apply(tmpl_inputs, opts); + data.format = COMMON_CHAT_FORMAT_GLM_4_5; + + data.preserved_tokens = { + "<|system|>", "<|assistant|>", "<|observation|>", + "", "", "", "", + "", "", "", "", + "", "", + }; + + // Store tools schema for type-aware parsing later + data.tools_schema = inputs.tools; + + LOG_INF("%s: GLM-4.5 native XML format enforced\n", __func__); + return data; +} + +static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) { + + auto get_expected_type = [&](const std::string& tool_name, const std::string& param_name) -> std::string { + // Access tools schema from builder syntax + const auto& tools_schema = builder.syntax().tools_schema; + if (tools_schema.is_array()) { + for (const auto& tool : tools_schema) { + if (tool.contains("function") && tool["function"]["name"] == tool_name) { + auto params = tool["function"]["parameters"]; + if (params.contains("properties") && params["properties"].contains(param_name)) { + return params["properties"][param_name].value("type", "string"); + } + } + } + } + return "string"; // Default fallback + }; + + auto handle_tool_call_end = [&] (common_chat_msg_parser & builder, auto end_pos) { + builder.move_to(end_pos); + builder.consume_literal(""); + + size_t obs_pos = builder.input().find("<|observation|>", builder.pos()); + if (obs_pos != std::string::npos) { + if (obs_pos > builder.pos()) { + std::string content = builder.input().substr(builder.pos(), obs_pos - builder.pos()); + builder.add_content(content); + } + + builder.move_to(obs_pos); + builder.consume_literal("<|observation|>"); + } else { + std::string remaining = builder.consume_rest(); + if (!remaining.empty()) builder.add_content(remaining); + } + }; + + builder.consume_spaces(); + builder.try_parse_reasoning("", ""); + + size_t curr_pos = builder.pos(); + while (builder.input().find("", builder.pos()) != std::string::npos) { + size_t tool_call_start = builder.input().find("", builder.pos()); + if (tool_call_start > builder.pos()) { + std::string content = builder.input().substr(builder.pos(), tool_call_start - builder.pos()); + builder.add_content(content); + } + + size_t tool_call_end = builder.input().find("", tool_call_start); + if (tool_call_end == std::string::npos) return; + + builder.move_to(tool_call_start); + builder.consume_literal(""); + builder.consume_spaces(); + + size_t arg_key_start = builder.input().find("", builder.pos()); + if (arg_key_start == std::string::npos || arg_key_start > tool_call_end) { + std::string function_content = builder.input().substr(builder.pos(), tool_call_end - builder.pos()); + std::string function_name = string_strip(function_content); + + if (!builder.add_tool_call(function_name, "", "{}")) { + LOG_INF("%s: failed to add tool call\n", __func__); + } + handle_tool_call_end(builder, tool_call_end); + } else { + std::string function_content = builder.input().substr(builder.pos(), arg_key_start - builder.pos()); + std::string function_name = string_strip(function_content); + + json args_json = json::object(); + builder.move_to(arg_key_start); + + while (builder.pos() < tool_call_end && builder.input().substr(builder.pos()).rfind("", 0) == 0) { + if (!builder.try_consume_literal("")) break; + + auto key_close = builder.try_find_literal(""); + if (!key_close || key_close->groups[0].end > tool_call_end) { + throw common_chat_msg_partial_exception("incomplete tool call (arg_key)"); + } + std::string key = string_strip(key_close->prelude); + + builder.consume_spaces(); + if (!builder.try_consume_literal("")) { + throw common_chat_msg_partial_exception("incomplete tool call (arg_value)"); + } + + auto value_close = builder.try_find_literal(""); + if (!value_close || value_close->groups[0].end > tool_call_end) { + throw common_chat_msg_partial_exception("incomplete tool call (arg_value content)"); + } + std::string value = string_strip(value_close->prelude); + + std::string expected_type = get_expected_type(function_name, key); + json parsed_value; + + if (expected_type == "integer" || expected_type == "number") { + try { + if (value.find('.') != std::string::npos) { + parsed_value = std::stod(value); + } else { + parsed_value = std::stoll(value); + } + } catch (const std::exception&) { + LOG_WRN("%s: Failed to parse '%s' as a number for key '%s', falling back to string.\n", __func__, value.c_str(), key.c_str()); + parsed_value = value; + } + } else if (expected_type == "boolean") { + std::string lower_val = value; + std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), + [](unsigned char c){ return std::tolower(c); }); + if (lower_val == "true" || lower_val == "1") { + parsed_value = true; + } else if (lower_val == "false" || lower_val == "0") { + parsed_value = false; + } else { + LOG_WRN("%s: Ambiguous boolean value '%s' for key '%s', falling back to string.\n", __func__, value.c_str(), key.c_str()); + parsed_value = value; + } + } else if (expected_type == "array" || expected_type == "object") { + try { + parsed_value = json::parse(value); + } catch (const json::parse_error&) { + LOG_WRN("%s: Failed to parse '%s' as JSON for key '%s', falling back to raw string.\n", __func__, value.c_str(), key.c_str()); + parsed_value = value; + } + } else { + // Default case is "string". + parsed_value = value; + } + + args_json[key] = parsed_value; + builder.consume_spaces(); + } + + // This is a special case to handle when the model outputs a single JSON object as a string + if (args_json.size() == 1) { + const auto key = args_json.begin().key(); + auto& value = args_json.begin().value(); + if (value.is_string()) { + try { + json unpacked_json = json::parse(value.get()); + if (unpacked_json.is_object()) { + args_json = unpacked_json; + } + } catch (const std::exception&) { + // Not a valid JSON string, proceed as normal + } + } + } + + if (!builder.add_tool_call(function_name, "", args_json.dump())) { + LOG_INF("%s: failed to add tool call with arguments\n", __func__); + } else { + LOG_INF("%s: successfully added tool call with arguments\n", __func__); + } + handle_tool_call_end(builder, tool_call_end); + } + + if (curr_pos == builder.pos()) { + LOG_INF("%s: no progress in parsing, stopping to avoid infinite loop\n", __func__); + break; + } + curr_pos = builder.pos(); + } + + if (builder.pos() < builder.input().size()) { + builder.add_content(builder.consume_rest()); + } +} + + static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { LOG_DBG("%s\n", __func__); common_chat_params data; @@ -1805,6 +2013,11 @@ static common_chat_params common_chat_templates_apply_jinja( return common_chat_params_init_command_r7b(tmpl, params); } + // GLM 4.5: detect by and tags (check before Hermes since both use ) + if (src.find("[gMASK]") != std::string::npos && src.find("") != std::string::npos && src.find("") != std::string::npos && params.json_schema.is_null()) { + return common_chat_params_init_glm_4_5(tmpl, params); + } + // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools) if (src.find("") != std::string::npos && params.json_schema.is_null()) { return common_chat_params_init_hermes_2_pro(tmpl, params); @@ -1969,6 +2182,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { case COMMON_CHAT_FORMAT_GPT_OSS: common_chat_parse_gpt_oss(builder); break; + case COMMON_CHAT_FORMAT_GLM_4_5: + common_chat_parse_glm_4_5(builder); + break; default: throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format)); } diff --git a/common/chat.h b/common/chat.h index b014f9f0aaeb4..625b50d053622 100644 --- a/common/chat.h +++ b/common/chat.h @@ -3,6 +3,7 @@ #pragma once #include "common.h" +#include #include #include #include @@ -110,6 +111,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_HERMES_2_PRO, COMMON_CHAT_FORMAT_COMMAND_R7B, COMMON_CHAT_FORMAT_GPT_OSS, + COMMON_CHAT_FORMAT_GLM_4_5, COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; @@ -141,6 +143,7 @@ struct common_chat_params { std::vector grammar_triggers; std::vector preserved_tokens; std::vector additional_stops; + nlohmann::ordered_json tools_schema = nlohmann::ordered_json(); // Schema for tools to pass to parser }; struct common_chat_syntax { @@ -150,6 +153,7 @@ struct common_chat_syntax { bool reasoning_in_content = false; bool thinking_forced_open = false; bool parse_tool_calls = true; + nlohmann::ordered_json tools_schema = nlohmann::ordered_json(); // Schema for tools to enable type-aware parsing }; // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid diff --git a/models/templates/glm_4_5.jinja b/models/templates/glm_4_5.jinja new file mode 100644 index 0000000000000..08fb5d1f97f0f --- /dev/null +++ b/models/templates/glm_4_5.jinja @@ -0,0 +1,119 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|> +{%- set user_content = visible_text(m.content) -%} +{{ user_content }} +{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not user_content.endswith("/nothink")) else '' -}} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set parts = content.split('') -%} + {%- set before_first_close = parts | first -%} + {%- set inner_parts = before_first_close.rstrip('\n').split('') -%} + {%- set extracted_reasoning = inner_parts | last -%} + {%- set reasoning_content = extracted_reasoning.lstrip('\n') -%} + {%- set after_last_close = parts | last -%} + {%- set content = after_last_close.lstrip('\n') -%} + {%- endif %} +{%- endif %} +{%- if loop.index0 > ns.last_user_index and reasoning_content -%} +{{ '\n' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '\n' }} +{%- endif -%} +{%- if content.strip() -%} +{{ '\n' + content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc_obj = tc.function %} +{%- else %} + {%- set tc_obj = tc %} +{%- endif %} +{{ '\n' + tc_obj.name }} +{%- if tc_obj.arguments is mapping -%} + {%- for k, v in tc_obj.arguments.items() -%} + +{{ k }} +{{ v | tojson if v is not string else v }} + {%- endfor -%} +{%- else -%} + {#- Arguments came as string - this shouldn't happen with polyfills disabled -#} + {#- Output as single argument for debugging -#} + +raw_arguments +{{ tc_obj.arguments }} +{%- endif -%} +{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '\n\n' }} +{{- m.content }} +{{- '\n' }} +{%- else -%} +<|observation|>{% for tr in m.content %} + + +{{ tr.output if tr.output is defined else tr }} +{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|> +{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '\n' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 73c98bfa207fc..5918ff1f4594a 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -294,6 +294,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std common_chat_syntax syntax; syntax.format = data.params.format; syntax.reasoning_format = reasoning_format; + syntax.tools_schema = data.params.tools_schema; const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, syntax); assert_msg_equals(test_message, msg); } diff --git a/tools/server/server.cpp b/tools/server/server.cpp index a255d481a4d1c..a70508b3568e3 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -387,6 +387,7 @@ struct server_task { params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (params_base.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false); params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false); + params.oaicompat_chat_syntax.tools_schema = json_value(data, "tools_schema", nlohmann::ordered_json()); } { diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index f3dfc8225da4d..84fb72a6fb5c1 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -848,6 +848,9 @@ static json oaicompat_chat_params_parse( } } + // Store tools schema for parser + llama_params["tools_schema"] = chat_params.tools_schema; + return llama_params; }