@@ -32,7 +32,7 @@ int main(int argc, char ** argv){
3232 // tokenize the prompt
3333 const bool add_bos = llama_should_add_bos_token (model);
3434
35- const char * static_input_file = " ./wikitext-2 -raw/wiki.train.raw" ;
35+ const char * static_input_file = " ./wikitext-103 -raw/wiki.train.raw" ;
3636 std::ifstream file (static_input_file);
3737 if (!file) {
3838 fprintf (stderr, " error: failed to open file '%s'\n " , static_input_file);
@@ -58,7 +58,6 @@ int main(int argc, char ** argv){
5858
5959 const int i_start = std::max (inp_size - nnew, ngram_size);
6060 const int64_t t_start_ms = ggml_time_ms ();
61- int percentage_done = 0 ;
6261 for (int i = i_start; i < inp_size; ++i) {
6362 const int ngram_start = i - ngram_size;
6463 uint64_t ngram = inp_data[ngram_start];
@@ -83,21 +82,20 @@ int main(int argc, char ** argv){
8382 }
8483 }
8584
86- if (i >= inp_size*(percentage_done + 1 )/100 ) {
87- ++percentage_done;
88-
85+ if (i % 10000000 == 0 ) {
8986 const int64_t t_now_ms = ggml_time_ms ();
90- const int64_t eta_ms = (100 - percentage_done ) * (t_now_ms - t_start_ms) / percentage_done ;
87+ const int64_t eta_ms = (inp_size - i ) * (t_now_ms - t_start_ms) / i ;
9188 const int64_t eta_min = eta_ms / (60 *1000 );
9289 const int64_t eta_s = (eta_ms - eta_min) / 1000 ;
9390
94- fprintf (stderr, " lookup-create: %02d%% done, ETA: %02ld:%02ld\n " , percentage_done , eta_min, eta_s);
91+ fprintf (stderr, " lookup-create: hashing %d/%d done, ETA: %02ld:%02ld\n " , i, inp_size , eta_min, eta_s);
9592 }
9693 }
9794 };
9895
9996 all_token_hashmap atc;
10097 update_hashmaps (&atc, inp_static.data (), inp_static.size (), inp_static.size ());
98+ fprintf (stderr, " lookup-create: hashing done, writing file\n " );
10199
102100 std::ofstream file_out (" lookup.bin" , std::ios::binary);
103101 for (std::pair<uint64_t , token_hashmap> item : atc) {
0 commit comments