Skip to content

Commit

Permalink
fix prompt tokenization logic
Browse files Browse the repository at this point in the history
  • Loading branch information
ensan-hcl committed Dec 4, 2023
1 parent 4fa44e8 commit 39d6b36
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
5 changes: 3 additions & 2 deletions examples/batched.swift/Sources/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,10 @@ print("decoded \(n_decode) tokens in \(String(format: "%.2f", Double(t_main_end
llama_print_timings(context)

private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
let n_tokens = text.count + (add_bos ? 1 : 0)
let utf8Count = text.utf8.count
let n_tokens = utf8Count + (add_bos ? 1 : 0)
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
let tokenCount = llama_tokenize(model, text, Int32(text.count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false)
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false)
var swiftTokens: [llama_token] = []
for i in 0 ..< tokenCount {
swiftTokens.append(tokens[Int(i)])
Expand Down
5 changes: 3 additions & 2 deletions examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,10 @@ actor LlamaContext {
}

private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
let n_tokens = text.count + (add_bos ? 1 : 0)
let utf8Count = text.utf8.count
let n_tokens = utf8Count + (add_bos ? 1 : 0)
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
let tokenCount = llama_tokenize(model, text, Int32(text.count), tokens, Int32(n_tokens), add_bos, false)
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false)

var swiftTokens: [llama_token] = []
for i in 0..<tokenCount {
Expand Down

0 comments on commit 39d6b36

Please sign in to comment.