1+ use base64;
12use fancy_regex:: Regex ;
23use mlua:: prelude:: * ;
34use rustc_hash:: FxHashMap as HashMap ;
@@ -6,7 +7,6 @@ use std::fs::File;
67use std:: io:: { BufRead , BufReader } ;
78use std:: sync:: { Arc , Mutex } ;
89use std:: thread;
9- use base64;
1010
1111#[ cfg( feature = "multithreading" ) ]
1212const MAX_NUM_THREADS : usize = 128 ;
@@ -203,7 +203,7 @@ pub fn tiktoken_core(lua: &mlua::Lua) -> LuaResult<LuaTable> {
203203 Ok ( ( ) )
204204 } ,
205205 ) ?;
206- let _encode = lua. create_function ( move |_, text : String | encode ( & * state2, text) ) ?;
206+ let _encode = lua. create_function ( move |_, text : mlua :: String | encode ( & * state2, text) ) ?;
207207
208208 let exports = lua. create_table ( ) ?;
209209 exports. set ( "new" , _new) ?;
@@ -261,7 +261,8 @@ fn new(
261261 } ) ;
262262}
263263
264- fn encode ( state : & State , text : String ) -> LuaResult < ( Vec < usize > , usize , usize ) > {
264+ fn encode ( state : & State , text : mlua:: String ) -> LuaResult < ( Vec < usize > , usize , usize ) > {
265+ let encoded_str = String :: from_utf8_lossy ( text. as_bytes ( ) ) ;
265266 let allowed_special = HashSet :: new ( ) ;
266267 let max_tokens = None ;
267268 Ok ( state
@@ -270,7 +271,7 @@ fn encode(state: &State, text: String) -> LuaResult<(Vec<usize>, usize, usize)>
270271 . unwrap ( )
271272 . as_ref ( )
272273 . unwrap ( )
273- . _encode_native ( & text , & allowed_special, max_tokens) )
274+ . _encode_native ( & encoded_str , & allowed_special, max_tokens) )
274275}
275276
276277pub struct CoreBPENative {
0 commit comments