Skip to content

Commit fd3a317

Browse files
authored
Merge branch 'ggml-org:master' into acc_type_vec2
2 parents a4375dc + f9bc66c commit fd3a317

File tree

72 files changed

+14712
-5888
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+14712
-5888
lines changed

.github/workflows/build.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,39 @@ jobs:
387387
cd build
388388
ctest -L main --verbose
389389
390+
ubuntu-24-cmake-vulkan-deb:
391+
runs-on: ubuntu-24.04
392+
393+
steps:
394+
- name: Clone
395+
id: checkout
396+
uses: actions/checkout@v4
397+
398+
- name: ccache
399+
uses: ggml-org/ccache-action@v1.2.16
400+
with:
401+
key: ubuntu-24-cmake-vulkan-deb
402+
evict-old-files: 1d
403+
404+
- name: Dependencies
405+
id: depends
406+
run: |
407+
sudo apt-get install -y glslc libvulkan-dev libcurl4-openssl-dev
408+
409+
- name: Configure
410+
id: cmake_configure
411+
run: |
412+
cmake -B build \
413+
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
414+
-DGGML_BACKEND_DL=ON \
415+
-DGGML_CPU_ALL_VARIANTS=ON \
416+
-DGGML_VULKAN=ON
417+
418+
- name: Build
419+
id: cmake_build
420+
run: |
421+
cmake --build build -j $(nproc)
422+
390423
ubuntu-24-cmake-vulkan:
391424
runs-on: ubuntu-24.04
392425

common/arg.cpp

Lines changed: 162 additions & 134 deletions
Large diffs are not rendered by default.

common/chat-parser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
432432
if (is_arguments_path({})) {
433433
// Entire JSON is the arguments and was parsed fully.
434434
return consume_json_result {
435-
partial->json.dump(),
435+
partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
436436
/* .is_partial = */ false,
437437
};
438438
}
@@ -444,7 +444,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
444444
std::vector<std::string> path;
445445
std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
446446
if (is_arguments_path(path)) {
447-
auto arguments = j.dump();
447+
auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
448448
if (is_partial() && !partial->healing_marker.marker.empty()) {
449449
auto idx = arguments.find(partial->healing_marker.json_dump_marker);
450450
if (idx != std::string::npos) {

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ struct common_params {
426426
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
427427
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
428428
int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot
429-
int32_t cache_ram_mib = 8192; // 0 = no limit, 1 = 1 MiB, etc.
429+
int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc.
430430

431431
std::string hostname = "127.0.0.1";
432432
std::string public_path = ""; // NOLINT

common/json-partial.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <nlohmann/json.hpp>
66

77
#include <string>
8+
#include <regex>
89

910
using json = nlohmann::ordered_json;
1011

@@ -168,6 +169,47 @@ bool common_json_parse(
168169
}
169170
}
170171

172+
// Matches a potentially partial unicode escape sequence, e.g. \u, \uX, \uXX, \uXXX, \uXXXX
173+
static const std::regex partial_unicode_regex(R"(\\u(?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F])?)?)?)?$)");
174+
175+
auto is_high_surrogate = [&](const std::string & s) {
176+
// Check if a partial of a high surrogate (U+D800-U+DBFF)
177+
return s.length() >= 4 &&
178+
s[0] == '\\' && s[1] == 'u' &&
179+
std::tolower(s[2]) == 'd' &&
180+
(s[3] == '8' || s[3] == '9' || std::tolower(s[3]) == 'a' || std::tolower(s[3]) == 'b');
181+
};
182+
183+
// Initialize the unicode marker to a low surrogate to handle the edge case
184+
// where a high surrogate (U+D800-U+DBFF) is immediately followed by a
185+
// backslash (\)
186+
std::string unicode_marker_padding = "udc00";
187+
std::smatch last_unicode_seq;
188+
189+
if (std::regex_search(str, last_unicode_seq, partial_unicode_regex)) {
190+
std::smatch second_last_seq;
191+
std::string prelude = str.substr(0, last_unicode_seq.position());
192+
193+
// Pad the escape sequence with 0s until it forms a complete sequence of 6 characters
194+
unicode_marker_padding = std::string(6 - last_unicode_seq.length(), '0');
195+
196+
if (is_high_surrogate(last_unicode_seq.str())) {
197+
// If the sequence is a partial match for a high surrogate, add a low surrogate (U+DC00-U+UDFF)
198+
unicode_marker_padding += "\\udc00";
199+
} else if (std::regex_search(prelude, second_last_seq, partial_unicode_regex)) {
200+
if (is_high_surrogate(second_last_seq.str())) {
201+
// If this follows a high surrogate, pad it to be a low surrogate
202+
if (last_unicode_seq.length() == 2) {
203+
unicode_marker_padding = "dc00";
204+
} else if (last_unicode_seq.length() == 3) {
205+
unicode_marker_padding = "c00";
206+
} else {
207+
// The original unicode_marker_padding is already padded with 0s
208+
}
209+
}
210+
}
211+
}
212+
171213
const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
172214

173215
if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
@@ -186,6 +228,9 @@ bool common_json_parse(
186228
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
187229
// Was inside an object value string after an escape
188230
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
231+
} else if (can_parse(str + unicode_marker_padding + "\"" + closing)) {
232+
// Was inside an object value string after a partial unicode escape
233+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\"" + closing;
189234
} else {
190235
// find last :
191236
auto last_pos = str.find_last_of(':');
@@ -205,6 +250,9 @@ bool common_json_parse(
205250
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
206251
// Was inside an array value string after an escape
207252
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
253+
} else if (can_parse(str + unicode_marker_padding + "\"" + closing)) {
254+
// Was inside an array value string after a partial unicode escape
255+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\"" + closing;
208256
} else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
209257
// Had just finished a value
210258
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
@@ -230,6 +278,9 @@ bool common_json_parse(
230278
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
231279
// Was inside an object key string after an escape
232280
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
281+
} else if (can_parse(str + unicode_marker_padding + "\": 1" + closing)) {
282+
// Was inside an object key string after a partial unicode escape
283+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\": 1" + closing;
233284
} else {
234285
auto last_pos = str.find_last_of(':');
235286
if (last_pos == std::string::npos) {

convert_hf_to_gguf.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5966,20 +5966,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
59665966
class JambaModel(TextModel):
59675967
model_arch = gguf.MODEL_ARCH.JAMBA
59685968

5969-
def get_vocab_base_pre(self, tokenizer) -> str:
5970-
del tokenizer # unused
5971-
5972-
return "gpt-2"
5973-
59745969
def set_vocab(self):
59755970
if (self.dir_model / "tokenizer.model").is_file():
5976-
# Using Jamba's tokenizer.json causes errors on model load
5977-
# (something about "byte not found in vocab"),
5978-
# but there's a working tokenizer.model
59795971
self._set_vocab_sentencepiece()
59805972
else:
5981-
# Some Jamba models only have a tokenizer.json, which works.
5982-
self._set_vocab_gpt2()
5973+
self._set_vocab_llama_hf()
5974+
self.gguf_writer.add_add_space_prefix(False)
59835975

59845976
def set_gguf_parameters(self):
59855977
d_model = self.find_hparam(["hidden_size", "mamba_d_model"])

docs/ops.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Legend:
3131
| CONV_TRANSPOSE_1D ||||||||||
3232
| CONV_TRANSPOSE_2D ||||||||||
3333
| COS ||||| 🟡 ||| 🟡 ||
34-
| COUNT_EQUAL ||||||| |||
34+
| COUNT_EQUAL ||||||| |||
3535
| CPY || 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 ||
3636
| CROSS_ENTROPY_LOSS ||||||||||
3737
| CROSS_ENTROPY_LOSS_BACK ||||||||||
@@ -51,7 +51,7 @@ Legend:
5151
| GET_ROWS || 🟡 || 🟡 || 🟡 | 🟡 | 🟡 ||
5252
| GET_ROWS_BACK ||| 🟡 | 🟡 ||||||
5353
| GROUP_NORM ||||||||||
54-
| GROUP_NORM_MUL_ADD ||||||| |||
54+
| GROUP_NORM_MUL_ADD ||||||| |||
5555
| HARDSIGMOID |||| 🟡 | 🟡 || 🟡 |||
5656
| HARDSWISH |||| 🟡 | 🟡 || 🟡 |||
5757
| IM2COL ||||| 🟡 |||||
@@ -65,11 +65,11 @@ Legend:
6565
| MUL_MAT_ID || 🟡 |||| 🟡 | 🟡 |||
6666
| NEG |||| 🟡 | 🟡 || 🟡 |||
6767
| NORM ||||| 🟡 ||| 🟡 ||
68-
| NORM_MUL_ADD ||||||| |||
68+
| NORM_MUL_ADD ||||||| |||
6969
| OPT_STEP_ADAMW ||||||||||
7070
| OPT_STEP_SGD ||||||||||
7171
| OUT_PROD | 🟡 || 🟡 | 🟡 ||| 🟡 |||
72-
| PAD ||||||| |||
72+
| PAD ||||||| 🟡 |||
7373
| PAD_REFLECT_1D ||||||||||
7474
| POOL_2D || 🟡 ||||||||
7575
| REGLU ||||| 🟡 ||| 🟡 ||
@@ -92,19 +92,21 @@ Legend:
9292
| SILU |||| 🟡 | 🟡 | 🟡 | 🟡 | 🟡 ||
9393
| SILU_BACK ||||||||||
9494
| SIN ||||| 🟡 ||| 🟡 ||
95-
| SOFTCAP ||||||| |||
96-
| SOFT_MAX || 🟡 ||||| 🟡 |||
97-
| SOFT_MAX_BACK ||| 🟡 | 🟡 ||| |||
95+
| SOFTCAP ||||||| |||
96+
| SOFT_MAX || 🟡 ||||| |||
97+
| SOFT_MAX_BACK ||| 🟡 | 🟡 ||| 🟡 |||
9898
| SQR ||||| 🟡 ||| 🟡 ||
9999
| SQRT ||||| 🟡 |||||
100100
| SSM_CONV ||||||||||
101101
| SSM_SCAN ||||||||||
102102
| STEP |||| 🟡 | 🟡 || 🟡 |||
103103
| SUB ||||| 🟡 | 🟡 ||||
104104
| SUM ||||||||||
105-
| SUM_ROWS ||||||| |||
105+
| SUM_ROWS ||||||| 🟡 |||
106106
| SWIGLU ||||| 🟡 ||| 🟡 ||
107107
| SWIGLU_OAI ||||||||||
108108
| TANH |||| 🟡 | 🟡 || 🟡 | 🟡 ||
109109
| TIMESTEP_EMBEDDING ||||||||||
110+
| TOPK_MOE ||||||||||
110111
| UPSCALE || 🟡 ||| 🟡 || 🟡 |||
112+
| XIELU ||||||||||

0 commit comments

Comments
 (0)