Skip to content

Commit 85107b6

Browse files
committed
fix confilcts
1 parent b913e89 commit 85107b6

File tree

13 files changed

+304
-212
lines changed

13 files changed

+304
-212
lines changed

convert_hf_to_gguf.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3538,25 +3538,16 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
35383538
return super().modify_tensors(data_torch, name, bid)
35393539

35403540

3541-
<<<<<<< HEAD
35423541
@ModelBase.register("Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration")
35433542
class Qwen3VLVisionModel(MmprojModel):
3544-
=======
3545-
@ModelBase.register("Qwen3VLMoeForConditionalGeneration")
3546-
class Qwen3VLMoeVisionModel(MmprojModel):
3547-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
35483543
def __init__(self, *args, **kwargs):
35493544
super().__init__(*args, **kwargs)
35503545
assert self.has_vision_encoder
35513546
assert self.hparams_vision is not None
35523547

35533548
# Compute image_size if not present
35543549
if "image_size" not in self.hparams_vision:
3555-
<<<<<<< HEAD
35563550
# For Qwen3VL/Qwen3VLMoe, compute from num_position_embeddings
3557-
=======
3558-
# For Qwen3VLMoe, compute from num_position_embeddings
3559-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
35603551
num_pos = self.hparams_vision.get("num_position_embeddings", 2304)
35613552
patch_size = self.hparams_vision.get("patch_size", 16)
35623553
# num_position_embeddings = (image_size / patch_size) ** 2
@@ -3601,15 +3592,10 @@ def set_gguf_parameters(self):
36013592
self.gguf_writer.add_vision_deepstack_layers(self.deepstack_layers)
36023593

36033594
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3604-
<<<<<<< HEAD
36053595
# Skip text model tensors - they go in the text model file
36063596
if name.startswith("model.language_model.") or name.startswith("lm_head."):
36073597
return []
36083598

3609-
=======
3610-
del bid # unused
3611-
3612-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
36133599
if name.startswith("model.visual."):
36143600
name = name.replace("model.visual.", "visual.", 1)
36153601

@@ -3666,13 +3652,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
36663652
]
36673653

36683654
if name == "visual.patch_embed.proj.bias":
3669-
<<<<<<< HEAD
36703655
# Include the bias - it's used by the C++ code
36713656
return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".bias", data_torch)]
3672-
=======
3673-
# Skip bias for Qwen3VL - the C++ code expects it to be null
3674-
return []
3675-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
36763657

36773658
if name.startswith("visual."):
36783659
if ".qkv." in name:
@@ -3695,12 +3676,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
36953676

36963677
return [(self.map_tensor_name(name), data_torch)]
36973678

3698-
<<<<<<< HEAD
36993679
# Fall back to parent class for other tensors
37003680
return super().modify_tensors(data_torch, name, bid)
3701-
=======
3702-
return []
3703-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
37043681

37053682
@ModelBase.register("InternVisionModel")
37063683
class InternVisionModel(MmprojModel):
@@ -4026,7 +4003,6 @@ def set_vocab(self):
40264003
super().set_vocab()
40274004

40284005

4029-
<<<<<<< HEAD
40304006
@ModelBase.register("Qwen3VLForConditionalGeneration")
40314007
class Qwen3VLTextModel(Qwen3Model):
40324008
model_arch = gguf.MODEL_ARCH.QWEN3VL
@@ -4056,8 +4032,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
40564032
return super().modify_tensors(data_torch, name, bid)
40574033

40584034

4059-
=======
4060-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
40614035
@ModelBase.register("Qwen3VLMoeForConditionalGeneration")
40624036
class Qwen3VLMoeTextModel(Qwen3MoeModel):
40634037
model_arch = gguf.MODEL_ARCH.QWEN3VLMOE
@@ -4079,16 +4053,13 @@ def set_gguf_parameters(self):
40794053

40804054
logger.info(f"MRoPE sections: {mrope_section[:4]}")
40814055

4082-
<<<<<<< HEAD
40834056
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
40844057
# Skip vision tensors - they go in the mmproj file
40854058
if name.startswith("model.visual."):
40864059
return []
40874060

40884061
return super().modify_tensors(data_torch, name, bid)
40894062

4090-
=======
4091-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
40924063

40934064
@ModelBase.register("GPT2LMHeadModel")
40944065
class GPT2Model(TextModel):

ggml/src/ggml-cpu/ops.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5516,16 +5516,6 @@ static void ggml_mrope_cache_init(
55165516

55175517
float theta = theta_t;
55185518

5519-
<<<<<<< HEAD
5520-
if (sector >= sections[0] && sector < sec_w) {
5521-
theta = theta_h;
5522-
}
5523-
else if (sector >= sec_w && sector < sec_w + sections[2]) {
5524-
theta = theta_w;
5525-
}
5526-
else if (sector >= sec_w + sections[2]) {
5527-
theta = theta_e;
5528-
=======
55295519
if (is_interleaved_mrope) {
55305520
// thwthwthw...ttt
55315521
if (sector % 3 == 1 && sector < 3 * sections[1]) {
@@ -5545,7 +5535,6 @@ static void ggml_mrope_cache_init(
55455535
else if (sector >= sec_w + sections[2]) {
55465536
theta = theta_e;
55475537
}
5548-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
55495538
}
55505539

55515540
rope_yarn(

gguf-py/gguf/constants.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -352,10 +352,7 @@ class MODEL_ARCH(IntEnum):
352352
QWEN2VL = auto()
353353
QWEN3 = auto()
354354
QWEN3MOE = auto()
355-
<<<<<<< HEAD
356355
QWEN3VL = auto()
357-
=======
358-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
359356
QWEN3VLMOE = auto()
360357
PHI2 = auto()
361358
PHI3 = auto()
@@ -706,10 +703,7 @@ class MODEL_TENSOR(IntEnum):
706703
MODEL_ARCH.QWEN2VL: "qwen2vl",
707704
MODEL_ARCH.QWEN3: "qwen3",
708705
MODEL_ARCH.QWEN3MOE: "qwen3moe",
709-
<<<<<<< HEAD
710706
MODEL_ARCH.QWEN3VL: "qwen3vl",
711-
=======
712-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
713707
MODEL_ARCH.QWEN3VLMOE: "qwen3vlmoe",
714708
MODEL_ARCH.PHI2: "phi2",
715709
MODEL_ARCH.PHI3: "phi3",
@@ -1518,7 +1512,6 @@ class MODEL_TENSOR(IntEnum):
15181512
MODEL_TENSOR.FFN_DOWN_EXP,
15191513
MODEL_TENSOR.FFN_UP_EXP,
15201514
],
1521-
<<<<<<< HEAD
15221515
MODEL_ARCH.QWEN3VL: [
15231516
MODEL_TENSOR.TOKEN_EMBD,
15241517
MODEL_TENSOR.OUTPUT_NORM,
@@ -1536,8 +1529,6 @@ class MODEL_TENSOR(IntEnum):
15361529
MODEL_TENSOR.FFN_DOWN,
15371530
MODEL_TENSOR.FFN_UP,
15381531
],
1539-
=======
1540-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
15411532
MODEL_ARCH.QWEN3VLMOE: [
15421533
MODEL_TENSOR.TOKEN_EMBD,
15431534
MODEL_TENSOR.OUTPUT_NORM,

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ extern "C" {
232232

233233
llama_token * token;
234234
float * embd;
235-
llama_pos * pos; // first `n_tokens` elements are always linearly increasing position for traditional llm
235+
llama_pos * pos;
236236
int32_t * n_seq_id;
237237
llama_seq_id ** seq_id;
238238
int8_t * logits; // TODO: rename this to "output"

src/llama-arch.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
3232
{ LLM_ARCH_QWEN2VL, "qwen2vl" },
3333
{ LLM_ARCH_QWEN3, "qwen3" },
3434
{ LLM_ARCH_QWEN3MOE, "qwen3moe" },
35-
<<<<<<< HEAD
3635
{ LLM_ARCH_QWEN3_VL, "qwen3vl" },
37-
=======
38-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
3936
{ LLM_ARCH_QWEN3_VL_MOE, "qwen3vlmoe" },
4037
{ LLM_ARCH_PHI2, "phi2" },
4138
{ LLM_ARCH_PHI3, "phi3" },
@@ -785,7 +782,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
785782
},
786783
},
787784
{
788-
<<<<<<< HEAD
789785
LLM_ARCH_QWEN3_VL,
790786
{
791787
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
@@ -805,8 +801,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
805801
},
806802
},
807803
{
808-
=======
809-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
810804
LLM_ARCH_QWEN3_VL_MOE,
811805
{
812806
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },

src/llama-arch.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,7 @@ enum llm_arch {
3636
LLM_ARCH_QWEN2VL,
3737
LLM_ARCH_QWEN3,
3838
LLM_ARCH_QWEN3MOE,
39-
<<<<<<< HEAD
4039
LLM_ARCH_QWEN3_VL,
41-
=======
42-
>>>>>>> remote-JJJYmmm/qwen3vl-1022
4340
LLM_ARCH_QWEN3_VL_MOE,
4441
LLM_ARCH_PHI2,
4542
LLM_ARCH_PHI3,

src/llama-batch.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,23 @@ bool llama_batch_allocr::init(
259259
const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
260260

261261
if (p0 >= 0) {
262-
if (seq_pos_min(s) != p0 + 1) {
262+
bool ok = true;
263+
264+
if (batch.token) {
265+
if (seq_pos_min(s) != p0 + 1) {
266+
ok = false;
267+
}
268+
} else {
269+
assert(batch.embd);
270+
271+
// for embeddings (typically used as vision input), we allow them to have repeating positions
272+
// ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762
273+
if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) {
274+
ok = false;
275+
}
276+
}
277+
278+
if (!ok) {
263279
LLAMA_LOG_ERROR(
264280
"%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
265281
" - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
@@ -639,7 +655,7 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
639655

640656
auto udata = std::make_shared<llama_ubatch::data_t>();
641657

642-
const int32_t n_pos_cur = batch.embd ? (n_pos_per_embd + 1) : 1;
658+
const int32_t n_pos_cur = batch.embd ? n_pos_per_embd : 1;
643659

644660
const int64_t n_embd_all = batch.embd ? (int64_t) n_tokens*n_embd : 0;
645661
const int64_t n_pos_all = (int64_t) n_tokens*n_pos_cur;
@@ -665,7 +681,7 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
665681
}
666682

667683
for (int j = 0; j < n_pos_cur; ++j) {
668-
udata->pos[j * n_tokens + i] = batch.pos[j * batch.n_tokens + idxs[i]];
684+
udata->pos[j*n_tokens + i] = batch.pos[j*batch.n_tokens + idxs[i]];
669685
}
670686

671687
udata->n_seq_id[i] = batch.n_seq_id[idxs[i]];

src/llama-graph.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,7 @@ void llm_graph_input_pos::set_input(const llama_ubatch * ubatch) {
5454
}
5555
ggml_backend_tensor_set(pos, pos_data.data(), 0, pos_data.size()*ggml_element_size(pos));
5656
} else {
57-
llama_pos * pos_ptr = ubatch->pos;
58-
// Normally, ubatch->pos stores linearly increasing position
59-
// However, some multi-modal models requires special position embedding (e.g. M-Rope in qwen2vl and qwen2.5vl)
60-
// But linearly increasing position is still needed for proper causal attention masking
61-
// So we store both of them: the first n_tokens elements are not changed, while model-specific positions are appended after that.
62-
if (ubatch->embd && n_pos_per_embd > 1) pos_ptr += n_tokens; // use mrope positions
63-
ggml_backend_tensor_set(pos, pos_ptr, 0, n_tokens * n_pos_per_embd * ggml_element_size(pos));
57+
ggml_backend_tensor_set(pos, ubatch->pos, 0, n_tokens*n_pos_per_embd*ggml_element_size(pos));
6458
}
6559
}
6660
}

src/llama-graph.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,6 @@ struct llm_graph_context {
687687
ggml_tensor * build_inp_pos_bucket_enc() const;
688688
ggml_tensor * build_inp_pos_bucket_dec() const;
689689
ggml_tensor * build_pos_bias(ggml_tensor * pos_bucket, ggml_tensor * attn_rel_b) const;
690-
691690
ggml_tensor * build_qwen3vl_inp_embd(ggml_tensor * tok_embd) const;
692691

693692
//

0 commit comments

Comments
 (0)