Skip to content

Commit aae1dc7

Browse files
authored
Bump transformers and torch (#117)
Pin bumps: - torch to 20250725 - torchao to 20250730 - executorch to 20250730 - transformers to 4.54.1
1 parent 36e3dd5 commit aae1dc7

File tree

16 files changed

+158
-187
lines changed

16 files changed

+158
-187
lines changed

.github/workflows/test_models.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ jobs:
3434
fail-fast: false
3535
matrix:
3636
test-modeling: ${{ fromJson(needs.discover-tests.outputs.model_names) }}
37-
executorch-version: ['0.6.0', 'nightly']
37+
executorch-version: ['0.7.0', 'nightly']
3838
python-version: ['3.11']
39-
os: [macos-15, ubuntu-22.04]
39+
# os: [macos-15, ubuntu-22.04] # TODO(#122): Re-enable the mac tests after fixing seg fault.
40+
os: [ubuntu-22.04]
4041

4142
# Custom job name, now shortened and cleaner
4243
name: ${{ matrix.test-modeling }} (et=${{ matrix.executorch-version }}, py=${{ matrix.python-version }}, ${{ matrix.os }})

install_dev.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@
55

66
def install_torch_nightly_deps():
77
"""Install torch related dependencies from pinned nightly"""
8-
EXECUTORCH_NIGHTLY_VERSION = "dev20250625"
9-
TORCHAO_NIGHTLY_VERSION = "dev20250620"
8+
EXECUTORCH_NIGHTLY_VERSION = "dev20250730"
9+
TORCHAO_NIGHTLY_VERSION = "dev20250730"
1010
# Torch nightly is aligned with pinned nightly in https://github.com/pytorch/executorch/blob/main/install_requirements.py#L74
11-
TORCH_NIGHTLY_VERSION = "dev20250601"
11+
TORCH_NIGHTLY_VERSION = "dev20250725"
1212
subprocess.check_call(
1313
[
1414
sys.executable,
1515
"-m",
1616
"pip",
1717
"install",
18-
f"executorch==0.7.0.{EXECUTORCH_NIGHTLY_VERSION}",
19-
f"torch==2.8.0.{TORCH_NIGHTLY_VERSION}",
20-
f"torchvision==0.23.0.{TORCH_NIGHTLY_VERSION}",
18+
f"executorch==0.8.0.{EXECUTORCH_NIGHTLY_VERSION}",
19+
f"torch==2.9.0.{TORCH_NIGHTLY_VERSION}",
20+
f"torchvision==0.24.0.{TORCH_NIGHTLY_VERSION}",
2121
f"torchaudio==2.8.0.{TORCH_NIGHTLY_VERSION}",
22-
f"torchao==0.12.0.{TORCHAO_NIGHTLY_VERSION}",
22+
f"torchao==0.13.0.{TORCHAO_NIGHTLY_VERSION}",
2323
"--extra-index-url",
2424
"https://download.pytorch.org/whl/nightly/cpu",
2525
]
@@ -34,7 +34,7 @@ def install_dep_from_source():
3434
"-m",
3535
"pip",
3636
"install",
37-
"git+https://github.com/huggingface/transformers@896e9cea1ade521b2648f4798218550f6c72190c#egg=transformers", # 4.53.1
37+
"git+https://github.com/huggingface/transformers@9c641dc16154964e5ffc0c13e9ec6aaffa295ed6#egg=transformers", # 4.54.1
3838
]
3939
)
4040
subprocess.check_call(

optimum/executorch/attentions/custom_kv_cache.py

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ def __init__(
5454

5555
# Create a list of CustomKVCache instances, one per layer
5656
self.kv_cache = torch.nn.ModuleList()
57-
for _ in range(config.num_hidden_layers):
57+
for layer in self.layers:
5858
layer_cache = CustomKVCache(
59-
max_batch_size=self.max_batch_size,
60-
max_context_length=self.max_cache_len,
61-
n_heads=self.num_key_value_heads,
62-
head_dim=self.head_dim,
59+
max_batch_size=layer.max_batch_size,
60+
max_context_length=layer.max_cache_len,
61+
n_heads=layer.num_heads,
62+
head_dim=layer.head_dim,
6363
dtype=dtype,
6464
)
6565
self.kv_cache.append(layer_cache)
@@ -202,32 +202,29 @@ def __init__(
202202
layer_device_map=layer_device_map,
203203
)
204204

205-
# make sure layer_device_map is none
206205
assert layer_device_map is None
207206
assert device is None or device == "cpu", "Device must be None or 'cpu'"
208207

209208
self.cache_position = None
210-
# Create a list of cache instances, one per layer
211-
# Use CustomKVCache for global layers and CustomRingKVCache for sliding window layers
209+
# Create a list of cache instances, one per layer.
210+
# Use CustomKVCache for global layers and CustomRingKVCache for sliding window layers.
212211
self.kv_cache = torch.nn.ModuleList()
213-
for layer_idx in range(config.num_hidden_layers):
214-
# newer version of transfomer has is_sliding defined
215-
# for HybridCache
216-
if self.is_sliding[layer_idx]:
212+
for layer in self.layers:
213+
if layer.is_sliding:
217214
# This is a sliding window layer
218215
layer_cache = CustomRingKVCache(
219-
max_batch_size=self.max_batch_size,
220-
max_context_length=self.sliding_window_len,
221-
n_heads=self.num_key_value_heads,
222-
head_dim=self.head_dim,
216+
max_batch_size=layer.max_batch_size,
217+
max_context_length=layer.max_cache_len,
218+
n_heads=layer.num_heads,
219+
head_dim=layer.head_dim,
223220
dtype=dtype,
224221
)
225222
else:
226223
layer_cache = CustomKVCache(
227-
max_batch_size=self.max_batch_size,
228-
max_context_length=self.max_cache_len,
229-
n_heads=self.num_key_value_heads,
230-
head_dim=self.head_dim,
224+
max_batch_size=layer.max_batch_size,
225+
max_context_length=layer.max_cache_len,
226+
n_heads=layer.num_heads,
227+
head_dim=layer.head_dim,
231228
dtype=dtype,
232229
)
233230
self.kv_cache.append(layer_cache)
@@ -284,7 +281,7 @@ def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
284281

285282
# For CustomRingKVCache, we need to handle the sequence length differently
286283
layer_cache = self.kv_cache[layer_idx]
287-
if self.is_sliding[layer_idx]:
284+
if self.layers[layer_idx].is_sliding:
288285
# CustomRingKVCache cache_position_manager which
289286
# maintains cache position for each slot in the kv cache
290287
# we return the max position + 1 to indicate max position
@@ -308,7 +305,7 @@ def get_layer_cache(self, layer_idx: int):
308305

309306
def replace_with_et_custom_kv_cache(module, config, generation_config, cache_dtype):
310307
"""
311-
Replace all KV caches in the module with ETCustomStaticCache.
308+
Replace all KV caches in the module with ETCustomStaticCache or ETCustomHybridCache.
312309
This modifies the model in place.
313310
314311
Args:
@@ -342,18 +339,18 @@ def _replace_with_et_custom_kv_cache(module, config, generation_config, cache_dt
342339
if getattr(module, "replace_cache", None) is not None:
343340
static_cache = ETCustomStaticCache(
344341
config=config,
345-
max_batch_size=generation_config.cache_config.batch_size,
346-
max_cache_len=generation_config.cache_config.max_cache_len,
347-
device=generation_config.cache_config.device,
342+
max_batch_size=generation_config.cache_config.get("batch_size"),
343+
max_cache_len=generation_config.cache_config.get("max_cache_len"),
344+
device=generation_config.cache_config.get("device"),
348345
dtype=cache_dtype,
349346
)
350347
module.replace_cache(static_cache)
351348
else:
352349
module.static_cache = ETCustomStaticCache(
353350
config=config,
354-
max_batch_size=generation_config.cache_config.batch_size,
355-
max_cache_len=generation_config.cache_config.max_cache_len,
356-
device=generation_config.cache_config.device,
351+
max_batch_size=generation_config.cache_config.get("batch_size"),
352+
max_cache_len=generation_config.cache_config.get("max_cache_len"),
353+
device=generation_config.cache_config.get("device"),
357354
dtype=cache_dtype,
358355
)
359356
# Dont know why we need to this even though
@@ -370,25 +367,25 @@ def _replace_with_et_custom_kv_cache(module, config, generation_config, cache_dt
370367
if getattr(module, "replace_cache", None) is not None:
371368
hybrid_cache = ETCustomHybridCache(
372369
config=config,
373-
max_batch_size=generation_config.cache_config.batch_size,
374-
max_cache_len=generation_config.cache_config.max_cache_len,
375-
device=generation_config.cache_config.device,
370+
max_batch_size=generation_config.cache_config.get("batch_size"),
371+
max_cache_len=generation_config.cache_config.get("max_cache_len"),
372+
device=generation_config.cache_config.get("device"),
376373
dtype=cache_dtype,
377374
)
378375
module.replace_cache(hybrid_cache)
379376
else:
380377
module.cache = ETCustomHybridCache(
381378
config=config,
382-
max_batch_size=generation_config.cache_config.batch_size,
383-
max_cache_len=generation_config.cache_config.max_cache_len,
384-
device=generation_config.cache_config.device,
379+
max_batch_size=generation_config.cache_config.get("batch_size"),
380+
max_cache_len=generation_config.cache_config.get("max_cache_len"),
381+
device=generation_config.cache_config.get("device"),
385382
dtype=cache_dtype,
386383
)
387384
# Register cache attributes for each layer
388385
for i in range(len(module.cache.kv_cache)):
389386
setattr(module, f"key_cache_{i}", module.cache.kv_cache[i].k_cache)
390387
setattr(module, f"value_cache_{i}", module.cache.kv_cache[i].v_cache)
391-
if module.cache.is_sliding[i]:
388+
if module.cache.layers[i].is_sliding:
392389
# Register cache_positions as buffer for sliding window layers
393390
# This prevents it from being traced as a constant
394391
module.register_buffer(

optimum/executorch/modeling.py

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import logging
1818
import os
19+
import shutil
1920
from abc import ABC, abstractmethod
2021
from pathlib import Path
2122
from tempfile import TemporaryDirectory
@@ -24,6 +25,7 @@
2425
import torch
2526
from huggingface_hub import hf_hub_download
2627
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
28+
from torch.ao.quantization.fx._decomposed import quantized_decomposed_lib # noqa
2729
from transformers import (
2830
AutoModelForCausalLM,
2931
AutoModelForImageClassification,
@@ -102,6 +104,34 @@ def __init__(self, models: Dict[str, "ExecuTorchModule"], config: "PretrainedCon
102104

103105
self.stats = Stats()
104106

107+
# Initialize cleanup tracking
108+
self._temp_dir = None
109+
110+
def __del__(self):
111+
"""Clean up temporary files when the model instance is destroyed."""
112+
self._cleanup_temp_resources()
113+
114+
def _cleanup_temp_resources(self):
115+
"""Clean up temporary directory and files."""
116+
if hasattr(self, "_temp_dir") and self._temp_dir is not None:
117+
try:
118+
if hasattr(self._temp_dir, "cleanup"):
119+
# It's a TemporaryDirectory object
120+
logging.info(f"Cleaning up temporary directory: {self._temp_dir.name}")
121+
self._temp_dir.cleanup()
122+
logging.info("Temporary directory cleanup completed")
123+
elif isinstance(self._temp_dir, (str, Path)):
124+
# It's a path
125+
logging.info(f"Cleaning up temporary path: {self._temp_dir}")
126+
shutil.rmtree(self._temp_dir, ignore_errors=True)
127+
logging.info("Temporary path cleanup completed")
128+
except Exception as e:
129+
# Log cleanup errors for debugging
130+
logging.warning(f"Error during temp directory cleanup: {e}")
131+
pass
132+
finally:
133+
self._temp_dir = None
134+
105135
@abstractmethod
106136
def forward(self, *args, **kwargs):
107137
"""
@@ -242,7 +272,7 @@ def _export(
242272
inferred_task = TasksManager.infer_task_from_model(cls.auto_model_class)
243273
logging.info(f"Inferred task from model class: {inferred_task}")
244274

245-
save_dir = TemporaryDirectory()
275+
save_dir = TemporaryDirectory(prefix="executorch_export_")
246276
save_dir_path = Path(save_dir.name)
247277

248278
# Export to ExecuTorch and save the pte file to the temporary directory
@@ -266,7 +296,7 @@ def _export(
266296
for name, _ in executorch_progs.items():
267297
models.update(cls._from_pretrained(save_dir_path, file_name=f"{name}.pte", config=config))
268298

269-
return models
299+
return models, save_dir
270300

271301
def _save_pretrained(self, save_directory):
272302
"""
@@ -298,6 +328,7 @@ def from_pretrained(
298328
logger.info("Offline mode: setting `local_files_only=True`")
299329
local_files_only = True
300330

331+
# See if model was already exported to ExecuTorch and uplaoded to the HuggingFace repo.
301332
_export = export
302333
try:
303334
if local_files_only and not os.path.isdir(model_id):
@@ -324,21 +355,21 @@ def from_pretrained(
324355
if export:
325356
logger.warning(
326357
f"The model {model_id} was already converted to the ExecuTorch IR but got `export=True`, the model will be converted to ExecuTorch once again. "
327-
# "Don't forget to save the resulting model with `.save_pretrained()`"
328358
)
329359
_export = True
330360
else:
331361
logger.warning(
332362
f"No ExecuTorch files were found for {model_id}, setting `export=True` to convert the model to the ExecuTorch IR. "
333-
# "Don't forget to save the resulting model with `.save_pretrained()`"
334363
)
335364
except Exception as exception:
336365
logger.warning(
337366
f"Could not infer whether the model was already converted or not to the ExecuTorch IR, keeping `export={export}`.\n{exception}"
338367
)
339368

369+
temp_dir = None
340370
if _export:
341-
models_dict = cls._export(
371+
logging.info(f"Exporting {model_id} to ExecuTorch program...")
372+
models_dict, temp_dir = cls._export(
342373
model_id=model_id,
343374
config=config,
344375
revision=revision,
@@ -351,6 +382,9 @@ def from_pretrained(
351382
**kwargs,
352383
)
353384
else:
385+
logging.info(
386+
f"Pre-exported `.pte` artifact already exists in HuggingFace repo or provided file path for {model_id}, skipping export."
387+
)
354388
models_dict = {}
355389
for pte_file in pte_files:
356390
models_dict.update(
@@ -368,7 +402,14 @@ def from_pretrained(
368402
)
369403
)
370404

371-
return cls(models_dict, config)
405+
model_instance = cls(models_dict, config)
406+
407+
# Store the TemporaryDirectory reference to prevent GC
408+
if temp_dir is not None:
409+
model_instance._temp_dir = temp_dir
410+
logging.info(f"Stored temp directory reference in model: {temp_dir.name}")
411+
412+
return model_instance
372413

373414

374415
class ExecuTorchModelForSeq2SeqLM(ExecuTorchModelBase):

optimum/exporters/executorch/__main__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""Entry point to the optimum.exporters.executorch command line."""
1616

1717
import argparse
18+
import logging
1819
import os
1920
import warnings
2021
from pathlib import Path
@@ -130,10 +131,14 @@ def main_export(
130131
kwargs["force_download"] = force_download
131132
kwargs["config"] = config
132133

134+
# 1. Load model, apply source transformations, and torch.export() into a graph (ExportedProgram).
135+
logging.info(f"Loading {model_name_or_path} and exporting to static graph...")
133136
recipe_kwargs = kwargs.pop("recipe_kwargs", {})
134137

135138
model = task_func(model_name_or_path, **kwargs)
136139

140+
# 2. Export to ExecuTorch through ExecuTorch's lowering APIs.
141+
logging.info(f"Lowering {model_name_or_path} to ExecuTorch...")
137142
if not os.path.exists(output_dir):
138143
os.makedirs(output_dir)
139144

optimum/exporters/executorch/convert.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,17 @@
1919
from pathlib import Path
2020
from typing import Union
2121

22+
from transformers.integrations.executorch import sdpa_mask_without_vmap
23+
from transformers.masking_utils import AttentionMaskInterface
2224
from transformers.modeling_utils import AttentionInterface
2325

2426
from optimum.executorch.attentions.custom_sdpa import custom_sdpa_with_start_pos_forward
25-
from optimum.utils.import_utils import is_transformers_version
2627

2728
from .recipe_registry import discover_recipes, recipe_registry
2829

2930

3031
AttentionInterface.register("custom_sdpa", custom_sdpa_with_start_pos_forward)
31-
if is_transformers_version(">=", "4.53.0.dev0"):
32-
from transformers.integrations.executorch import sdpa_mask_without_vmap
33-
from transformers.masking_utils import AttentionMaskInterface
34-
35-
AttentionMaskInterface.register("custom_sdpa", sdpa_mask_without_vmap)
32+
AttentionMaskInterface.register("custom_sdpa", sdpa_mask_without_vmap)
3633

3734

3835
def export_to_executorch(

0 commit comments

Comments
 (0)