Skip to content

Commit

Permalink
Merge pull request vllm-project#3 from beagleski/bapatra/bugfix-longr…
Browse files Browse the repository at this point in the history
…ope-type

minor change for LongRoPE config to account for rename from longrope …
  • Loading branch information
codedecde authored May 4, 2024
2 parents 63b9bb8 + e1dd365 commit 561d5a8
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ def _get_and_verify_max_len(
derived_max_model_len = default_max_len

rope_scaling = getattr(hf_config, "rope_scaling", None)
if rope_scaling is not None and rope_scaling["type"] != "longrope":
if rope_scaling is not None and rope_scaling["type"] not in ("longrope", "su"):
assert "factor" in rope_scaling
scaling_factor = rope_scaling["factor"]
if rope_scaling["type"] == "yarn":
Expand Down
6 changes: 5 additions & 1 deletion vllm/model_executor/models/phi3small/phi3small.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,11 @@ def compute_logits(self, hidden_states: torch.Tensor,
sampling_metadata: SamplingMetadata) -> torch.Tensor:
logits = self.logits_processor(self.lm_head.weight, hidden_states,
sampling_metadata)
if self.dummy_token_indices is not None:
if self.dummy_token_indices is not None and logits is not None:
# In case of tensor-parallelism, the logit processor under the hood
# does an `tensor_model_parallel_gather`, so that the vocab multiplication
# would happen only on rank 0. For all other ranks, the logits are returned as
# None. Hence only rank with not None logits should fill the dummy tokens with -inf.
logits.index_fill_(-1, self.dummy_token_indices, -torch.inf)
return logits

Expand Down

0 comments on commit 561d5a8

Please sign in to comment.