From c3d3ea9b3a0a8833b699b1739cac334b4306a85d Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Sun, 3 Sep 2023 01:41:26 -0600 Subject: [PATCH] gguf: Fix special vocab handling when id < 0 --- gguf-py/gguf/gguf.py | 4 ++-- gguf-py/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index b1bc4205bb8b0..d377cd56d88e7 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -801,7 +801,7 @@ def try_load_from_tokenizer_json(self, path: Path) -> bool: else: continue for maybe_token_id in (atok.get('id') for atok in added_tokens if atok.get('content') == tc_content): - if isinstance(maybe_token_id, int): + if isinstance(maybe_token_id, int) and maybe_token_id >= 0: self.special_token_ids[typ] = maybe_token_id break return True @@ -814,7 +814,7 @@ def try_load_from_config_json(self, path: Path) -> bool: config = json.load(f) for typ in self.special_token_types: maybe_token_id = config.get(f'{typ}_token_id') - if isinstance(maybe_token_id, int): + if isinstance(maybe_token_id, int) and maybe_token_id >= 0: self.special_token_ids[typ] = maybe_token_id return True diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index 26f792b14c46a..8da60de1b3f3f 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gguf" -version = "0.3.1" +version = "0.3.2" description = "Write ML models in GGUF for GGML" authors = ["GGML "] packages = [