Skip to content

Commit

Permalink
switch tokenizer implementation with pure js and more compatible js-t…
Browse files Browse the repository at this point in the history
…iktoken
  • Loading branch information
masterkain committed May 17, 2023
1 parent f39279c commit 3ec62b8
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 12 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
"test:prettier": "prettier '**/*.{js,jsx,ts,tsx}' --check"
},
"dependencies": {
"@dqbd/tiktoken": "^1.0.7",
"cac": "^6.7.14",
"conf": "^11.0.1",
"eventsource-parser": "^1.0.0",
"js-tiktoken": "^1.0.5",
"keyv": "^4.5.2",
"p-timeout": "^6.1.1",
"quick-lru": "^6.1.1",
Expand Down
17 changes: 9 additions & 8 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions src/tokenizer.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { get_encoding } from '@dqbd/tiktoken'
import { getEncoding } from 'js-tiktoken'

// TODO: make this configurable
const tokenizer = get_encoding('cl100k_base')
const tokenizer = getEncoding('cl100k_base')

export function encode(input: string): Uint32Array {
return tokenizer.encode(input)
return new Uint32Array(tokenizer.encode(input))
}

0 comments on commit 3ec62b8

Please sign in to comment.