@@ -72,6 +72,26 @@ def run_maverick_serving(model: str):
7272 raise
7373
7474
75+ def get_rope_layers_config (model_path : str ) -> list [int ]:
76+ """
77+ Get the interleaved RoPE configuration from HuggingFace config
78+
79+ Args:
80+ model_path: Path to the local directory containing the reduced
81+ Maverick model checkpoint
82+
83+ Returns:
84+ List of 0 or 1 indicating whether each layer uses RoPE and local attn
85+ 0 indicates that RoPE is not used while 1 indicates that RoPE is used.
86+ """
87+ config_path = Path (model_path ) / "config.json"
88+ model_config = json .loads (config_path .read_text ())
89+ text_config = model_config ["text_config" ]
90+ no_rope_layers = text_config ["no_rope_layers" ]
91+ print (f"Found no_rope_layers: { no_rope_layers } " )
92+ return no_rope_layers
93+
94+
7595def create_reduced_maverick_model (
7696 original_model_name :
7797 str = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" ,
@@ -80,7 +100,7 @@ def create_reduced_maverick_model(
80100 num_experts : int = 4 ,
81101 vision_layers : int = 2 ,
82102 force_recreate : bool = False ,
83- ) -> tuple [ str , list [ int ]] :
103+ ) -> str :
84104 """
85105 Create a reduced-layer version of the Maverick model.
86106
@@ -93,22 +113,13 @@ def create_reduced_maverick_model(
93113 force_recreate: Whether to recreate if output_dir already exists
94114
95115 Returns:
96- Tuple of:
97- - Path to the created reduced model directory
98- - List of 0 or 1 indicating whether each layer uses RoPE and local attn
99- 0 indicates that RoPE is not used while 1 indicates that RoPE is used.
116+ Path to the created reduced model directory
100117 """
101118
102119 print (
103120 f"Creating reduced Maverick model with { text_layers } text layers and "
104121 f"{ vision_layers } vision layers..." )
105122
106- print ("Loading original model configuration..." )
107- original_config = AutoConfig .from_pretrained (original_model_name ,
108- trust_remote_code = True )
109- text_config = original_config .to_dict ()["text_config" ]
110- no_rope_layers = text_config ["no_rope_layers" ]
111-
112123 # Create output directory
113124 output_path = Path (output_dir )
114125 if output_path .exists ():
@@ -117,11 +128,14 @@ def create_reduced_maverick_model(
117128 else :
118129 print (f"Output directory { output_dir } already exists. "
119130 "Use --force-recreate to overwrite." )
120- return str (output_path ), no_rope_layers
131+ return str (output_path )
121132
122133 output_path .mkdir (parents = True , exist_ok = True )
123134
124135 try :
136+ print ("Loading original model configuration..." )
137+ original_config = AutoConfig .from_pretrained (original_model_name ,
138+ trust_remote_code = True )
125139 print ("Creating reduced configuration..." )
126140 reduced_config = create_reduced_config (original_config , text_layers ,
127141 num_experts , vision_layers )
@@ -149,7 +163,7 @@ def create_reduced_maverick_model(
149163 print (f"Could not copy generation config: { e } " )
150164
151165 print (f"Successfully created reduced Maverick model at { output_path } " )
152- return str (output_path ), no_rope_layers
166+ return str (output_path )
153167
154168 except Exception as e :
155169 print (f"Error creating reduced model: { e } " )
@@ -586,7 +600,7 @@ def test_dummy_maverick(
586600 monkeypatch .setenv ("VLLM_USE_V1" , "1" )
587601 monkeypatch .setenv ("VLLM_ENABLE_V1_MULTIPROCESSING" , "0" )
588602
589- model_path , rope_layers = create_reduced_maverick_model (
603+ model_path = create_reduced_maverick_model (
590604 original_model_name = original_model_name ,
591605 output_dir = output_dir ,
592606 text_layers = text_layers ,
@@ -597,6 +611,8 @@ def test_dummy_maverick(
597611
598612 print (f"\n Reduced model created successfully at: { model_path } " )
599613
614+ rope_layers = get_rope_layers_config (model_path )
615+
600616 llm = LLM (
601617 model = model_path ,
602618 trust_remote_code = True ,
0 commit comments