From e98915d9d05b0e0936c3f8bc761960824794fb5a Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 9 Mar 2023 23:58:10 +0200 Subject: [PATCH] Fix unknown tokens (#10) --- src/tokenizers.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenizers.js b/src/tokenizers.js index 9419f30c2c37..f89d5ba5a69c 100644 --- a/src/tokenizers.js +++ b/src/tokenizers.js @@ -27,11 +27,11 @@ class TokenizerModel extends Callable { throw Error("encode should be implemented in subclass.") } convert_tokens_to_ids(tokens) { - return tokens.map(t => this.tokens_to_ids[t] ?? this.config.unk_token_id); + return tokens.map(t => this.tokens_to_ids[t] ?? this.unk_token_id); } convert_ids_to_tokens(ids) { - return ids.map(i => this.vocab[i] ?? this.config.unk_token); + return ids.map(i => this.vocab[i] ?? this.unk_token); } }