bautification

andrej · andrej · commit 8941533f7c0b · 2025-11-14T15:32:49.000-07:00
diff --git a/examples/llama_3.2_1b/inference.py b/examples/llama_3.2_1b/inference.py
@@ -31,18 +31,24 @@
 
 
 _iron_chat = r"""
-           _____ _____   ____  _   _     _____ _           _            
-          |_   _|  __ \ / __ \| \ | |   / ____| |         | |           
-  ______    | | | |__) | |  | |  \| |  | |    | |__   __ _| |_   ______ 
- |______|   | | |  _  /| |  | | . ` |  | |    | '_ \ / _` | __| |______|
-           _| |_| | \ \| |__| | |\  |  | |____| | | | (_| | |_          
-          |_____|_|  \_\\____/|_| \_|   \_____|_| |_|\__,_|\__|         
-           
-  ___             _    _        _   _             _   __  __ ___    ___    _   ___  
- | _ \_ _ _____ _(_)__| |___ __| | | |__ _  _    /_\ |  \/  |   \  | _ \  /_\ |   \ 
- |  _/ '_/ _ \ V / / _` / -_) _` | | '_ \ || |  / _ \| |\/| | |) | |   / / _ \| |) |
- |_| |_| \___/\_/|_\__,_\___\__,_| |_.__/\_, | /_/ \_\_|  |_|___/  |_|_\/_/ \_\___/ 
-                                         |__/                                       
+        /$$$$$$ /$$$$$$$   /$$$$$$  /$$   /$$
+       |_  $$_/| $$__  $$ /$$__  $$| $$$ | $$
+         | $$  | $$  \ $$| $$  \ $$| $$$$| $$
+         | $$  | $$$$$$$/| $$  | $$| $$ $$ $$
+         | $$  | $$__  $$| $$  | $$| $$  $$$$
+         | $$  | $$  \ $$| $$  | $$| $$\  $$$
+        /$$$$$$| $$  | $$|  $$$$$$/| $$ \  $$
+       |______/|__/  |__/ \______/ |__/  \__/
+                                            
+                                            
+ /$$       /$$        /$$$$$$  /$$      /$$  /$$$$$$ 
+| $$      | $$       /$$__  $$| $$$    /$$$ /$$__  $$
+| $$      | $$      | $$  \ $$| $$$$  /$$$$| $$  \ $$
+| $$      | $$      | $$$$$$$$| $$ $$/$$ $$| $$$$$$$$
+| $$      | $$      | $$__  $$| $$  $$$| $$| $$__  $$
+| $$      | $$      | $$  | $$| $$\  $ | $$| $$  | $$
+| $$$$$$$$| $$$$$$$$| $$  | $$| $$ \/  | $$| $$  | $$
+|________/|________/|__/  |__/|__/     |__/|__/  |__/
 """
 
 
@@ -191,7 +197,7 @@ def inference(
 
     print(f"Starting text generation...")
     print(f"Generating {num_tokens} tokens...")
-    print("=" * 60)
+    print("=" * 55)
 
     prefill_end_time = None
 
@@ -222,7 +228,7 @@ def set_prefill_time():
     tokens_per_second = (num_tokens - 1) / post_prefill_time if num_tokens > 1 else 0
     time_per_token = total_time / (num_tokens - 1) if num_tokens > 1 else prefill_time
 
-    print("=" * 60)
+    print("=" * 55)
     print(" TIMING RESULTS:")
     print(f"  Total time: {total_time:.4f} seconds")
     print(f"  Prefill time: {prefill_time:.4f} seconds")
@@ -233,7 +239,7 @@ def set_prefill_time():
         if num_tokens > 0
         else "  Time per token: N/A"
     )
-    print("=" * 60)
+    print("=" * 55)
 
     logging.info(f"Generation time: {total_time:.4f} sec")
     logging.info(f"Total wall clock time: {total_time:.4f} sec")
diff --git a/examples/llama_3.2_1b/src/model_with_json.py b/examples/llama_3.2_1b/src/model_with_json.py
@@ -72,11 +72,23 @@ def format_option(name, value):
             return f"{name} {checkmark}"
         return f"{name}: {value}"
 
+    dont_print = {"dtype"}
+    # The following options are mutually exclusive, e.g. regular and fused MHA
+    # cannot be enabled at the same time. But it looks bad to have red Xs,
+    # indicating things are running on the CPU when they are not. So, we only
+    # print one of these mutually exclusive options.
+    if cfg["use_aie_fused_mha"]:
+        dont_print |= {"use_aie_regular_mha"}
+    else:
+        dont_print |= {"use_aie_fused_mha"}
+
     console.print(
         "AIE Configuration ([green]✔[/green] = AIE NPU / [red]✘[/red] = CPU):",
         style="bold underline",
     )
     for option_key, (option_ty, option_default, option_name) in config_options.items():
+        if option_key in dont_print:
+            continue
         console.print(format_option(option_name, cfg.get(option_key, option_default)))
     console.print("")