3131
3232
3333_iron_chat = r"""
34- _____ _____ ____ _ _ _____ _ _
35- |_ _| __ \ / __ \| \ | | / ____| | | |
36- ______ | | | |__) | | | | \| | | | | |__ __ _| |_ ______
37- |______| | | | _ /| | | | . ` | | | | '_ \ / _` | __| |______|
38- _| |_| | \ \| |__| | |\ | | |____| | | | (_| | |_
39- |_____|_| \_\\____/|_| \_| \_____|_| |_|\__,_|\__|
40-
41- ___ _ _ _ _ _ __ __ ___ ___ _ ___
42- | _ \_ _ _____ _(_)__| |___ __| | | |__ _ _ /_\ | \/ | \ | _ \ /_\ | \
43- | _/ '_/ _ \ V / / _` / -_) _` | | '_ \ || | / _ \| |\/| | |) | | / / _ \| |) |
44- |_| |_| \___/\_/|_\__,_\___\__,_| |_.__/\_, | /_/ \_\_| |_|___/ |_|_\/_/ \_\___/
45- |__/
34+ /$$$$$$ /$$$$$$$ /$$$$$$ /$$ /$$
35+ |_ $$_/| $$__ $$ /$$__ $$| $$$ | $$
36+ | $$ | $$ \ $$| $$ \ $$| $$$$| $$
37+ | $$ | $$$$$$$/| $$ | $$| $$ $$ $$
38+ | $$ | $$__ $$| $$ | $$| $$ $$$$
39+ | $$ | $$ \ $$| $$ | $$| $$\ $$$
40+ /$$$$$$| $$ | $$| $$$$$$/| $$ \ $$
41+ |______/|__/ |__/ \______/ |__/ \__/
42+
43+
44+ /$$ /$$ /$$$$$$ /$$ /$$ /$$$$$$
45+ | $$ | $$ /$$__ $$| $$$ /$$$ /$$__ $$
46+ | $$ | $$ | $$ \ $$| $$$$ /$$$$| $$ \ $$
47+ | $$ | $$ | $$$$$$$$| $$ $$/$$ $$| $$$$$$$$
48+ | $$ | $$ | $$__ $$| $$ $$$| $$| $$__ $$
49+ | $$ | $$ | $$ | $$| $$\ $ | $$| $$ | $$
50+ | $$$$$$$$| $$$$$$$$| $$ | $$| $$ \/ | $$| $$ | $$
51+ |________/|________/|__/ |__/|__/ |__/|__/ |__/
4652"""
4753
4854
@@ -191,7 +197,7 @@ def inference(
191197
192198 print (f"Starting text generation..." )
193199 print (f"Generating { num_tokens } tokens..." )
194- print ("=" * 60 )
200+ print ("=" * 55 )
195201
196202 prefill_end_time = None
197203
@@ -222,7 +228,7 @@ def set_prefill_time():
222228 tokens_per_second = (num_tokens - 1 ) / post_prefill_time if num_tokens > 1 else 0
223229 time_per_token = total_time / (num_tokens - 1 ) if num_tokens > 1 else prefill_time
224230
225- print ("=" * 60 )
231+ print ("=" * 55 )
226232 print (" TIMING RESULTS:" )
227233 print (f" Total time: { total_time :.4f} seconds" )
228234 print (f" Prefill time: { prefill_time :.4f} seconds" )
@@ -233,7 +239,7 @@ def set_prefill_time():
233239 if num_tokens > 0
234240 else " Time per token: N/A"
235241 )
236- print ("=" * 60 )
242+ print ("=" * 55 )
237243
238244 logging .info (f"Generation time: { total_time :.4f} sec" )
239245 logging .info (f"Total wall clock time: { total_time :.4f} sec" )
0 commit comments