refactoring functions for huggingface integration

liangel-02 · liangel-02 · commit e3a6d23b7352 · 2025-09-04T08:14:55.000-07:00
diff --git a/test/prototype/safetensors/test_safetensors_support.py b/test/prototype/safetensors/test_safetensors_support.py
@@ -1,16 +1,18 @@
+import json
 import tempfile
 import unittest
 
 import torch
+from safetensors.torch import load_file, save_file
 from torch.testing._internal.common_utils import (
     TestCase,
     run_tests,
 )
 
 from torchao import quantize_
 from torchao.prototype.safetensors.safetensors_support import (
-    load_tensor_state_dict,
-    save_tensor_state_dict,
+    reconstruct_tensor_state_dict,
+    convert_tensor_state_dict
 )
 from torchao.quantization.granularity import PerRow
 from torchao.quantization.quant_api import Float8DynamicActivationFloat8WeightConfig
@@ -19,6 +21,18 @@
 )
 
 
+def load_data(file_path: str, device: str):
+    loaded_tensors = load_file(file_path, device)
+    with open(file_path, "rb") as f:
+        import struct
+
+        header_size = struct.unpack("<Q", f.read(8))[0]
+        header_bytes = f.read(header_size)
+        header = json.loads(header_bytes)
+        metadata = header.get("__metadata__", {})
+    return loaded_tensors, metadata
+
+
 @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
 @unittest.skipIf(not is_sm_at_least_89(), "Need sm89+")
 class TestSafeTensors(TestCase):
@@ -32,8 +46,10 @@ def test_safetensors(self):
         ref_output = model(*example_inputs)
 
         with tempfile.NamedTemporaryFile() as f:
-            save_tensor_state_dict(model.state_dict(), f.name)
-            reconstructed_dict = load_tensor_state_dict(f.name, device="cuda")
+            tensors_dict, metadata = convert_tensor_state_dict(model.state_dict())
+            save_file(tensors_dict, f.name, metadata=metadata)
+            tensors_dict, metadata = load_data(file_path=f.name, device="cuda")
+            reconstructed_dict = reconstruct_tensor_state_dict(tensors_dict, metadata)
 
         model = torch.nn.Sequential(
             torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")
diff --git a/torchao/prototype/safetensors/safetensors_support.py b/torchao/prototype/safetensors/safetensors_support.py
@@ -1,9 +1,8 @@
 import json
 import logging
-from typing import Dict
+from typing import Any, Dict
 
 import torch
-from safetensors.torch import load_file, save_file
 
 from torchao.prototype.safetensors.safetensors_serialization import (
     Float8TensorAttributeJSONEncoder,
@@ -14,17 +13,51 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-def load_tensor_state_dict(file_path: str, device: str):
+def reconstruct_tensor_state_dict(
+    tensors_data: Dict[str, Any],
+    metadata: Dict[str, Any],
+):
     """
-    Load a dictionary of tensor subclasses from a safetensors file.
+    Recover tensor subclass state dict from provided torch.Tensor data and metadata
+    This function is used after loading in previously saved model state dict (using safetensors.save_file) to reconstruct tensor subclass structure
+
+    For example, given a previously converted tensors_data and metadata:
+    tensors_data = {
+        '0.weight:qdata': torch.Tensor(...),
+        '0.weight:scale': torch.Tensor(...),
+        '0.bias:_data': torch.Tensor(...),
+    }
+    metadata = {
+        '0.weight': {
+            '_type': 'Float8Tensor',
+            '_data': {
+                'block_size': [1,32],
+                ...
+            }
+        }
+        '0.bias': {
+            '_type': 'torch.Tensor',
+        }
+        'tensor_names': ['0.weight', '0.bias']
+    }
+
+    We recover the structure of the original state dict:
+    tensor_dict = {
+        '0.weight': Float8Tensor(
+            qdata=torch.Tensor(...),
+            scale=torch.Tensor(...),
+            block_size=[1,32],
+            ...),
+        '0.bias': torch.Tensor(...),
+    }
 
     For torch.Tensors, we load:
         - _data: the tensor data
         - _type: the tensor type
 
     For Float8Tensor, we load:
         - tensor_data: qdata and scale
-        - tensor_attributes:
+        - tensor_attributes (metadata):
             - block_size
             - mm_config
             - hp_value_lb
@@ -34,20 +67,13 @@ def load_tensor_state_dict(file_path: str, device: str):
             - dtype
 
     Args:
-        file_path: Path to the safetensors file
+        tensors_data: Tensor data,
+        metadata: Tensor attributes
 
     Returns:
         Dictionary of reconstructed tensor subclasses
     """
-    loaded_tensors = load_file(file_path, device)
-
-    with open(file_path, "rb") as f:
-        import struct
-
-        header_size = struct.unpack("<Q", f.read(8))[0]
-        header_bytes = f.read(header_size)
-        header = json.loads(header_bytes)
-        metadata = header.get("__metadata__", {})
+    combined_data = {**tensors_data, **metadata}
 
     if "tensor_names" not in metadata:
         raise ValueError("No tensors found")
@@ -57,7 +83,7 @@ def load_tensor_state_dict(file_path: str, device: str):
 
     for tensor_name in tensor_names:
         tensor_tensors = {}
-        for key, value in loaded_tensors.items():
+        for key, value in combined_data.items():
             if key.startswith(f"{tensor_name}:"):
                 # Remove the prefix
                 tensor_tensors[key[len(tensor_name) + 1 :]] = value
@@ -73,18 +99,45 @@ def load_tensor_state_dict(file_path: str, device: str):
         else:
             raise ValueError(f"Unsupported tensor type: {tensor_type}")
 
-    logger.info(
-        f"Loaded {len(tensor_names)} tensor subclasses from {file_path} with metadata"
-    )
     return result
 
 
-def save_tensor_state_dict(
-    tensor_dict: Dict[str, Dict[str, torch.Tensor]],
-    file_path: str,
+def convert_tensor_state_dict(
+    tensors_dict: Dict[str, Dict[str, torch.Tensor]],
 ):
     """
-    Save a dictionary of tensor subclasses with appropriate metadata.
+    Convert a dictionary of tensor subclasses so that it is compatible with safetensors.save_file
+    We disconstruct tensor subclass structure into torch.Tensor data and metadata
+
+    For example, given something like:
+    tensor_dict = {
+        '0.weight': Float8Tensor(
+            qdata=torch.Tensor(...),
+            scale=torch.Tensor(...),
+            block_size=[1,32],
+            ...),
+        '0.bias': torch.Tensor(...),
+    }
+
+    We convert this to:
+    tensors_data = {
+        '0.weight:qdata': torch.Tensor(...),
+        '0.weight:scale': torch.Tensor(...),
+        '0.bias:_data': torch.Tensor(...),
+    }
+    metadata = {
+        '0.weight': {
+            '_type': 'Float8Tensor',
+            '_data': {
+                'block_size': [1,32],
+                ...
+            }
+        }
+        '0.bias': {
+            '_type': 'torch.Tensor',
+        }
+        'tensor_names': ['0.weight', '0.bias']
+    }
 
     For torch.Tensors, we save:
         - _data: the tensor data
@@ -105,22 +158,21 @@ def save_tensor_state_dict(
 
     Args:
         tensor_dict: Dictionary of tensor subclasses to save, with keys as tensor names
-        file_path: Path where to save the tensors
     """
 
-    combined_metadata = {}
-    combined_tensors_dict = {}
+    metadata = {}
+    tensors_data = {}
 
-    for tensor_name, tensor in tensor_dict.items():
+    for tensor_name, tensor in tensors_dict.items():
         if isinstance(tensor, Float8Tensor):
-            tensors_dict = {}
+            tensor_dict = {}
             for tensor_data_name in tensor.tensor_data_names:
-                tensors_dict[tensor_data_name] = getattr(tensor, tensor_data_name)
+                tensor_dict[tensor_data_name] = getattr(tensor, tensor_data_name)
 
-            metadata = json.dumps(tensor, cls=Float8TensorAttributeJSONEncoder)
+            tensor_metadata = json.dumps(tensor, cls=Float8TensorAttributeJSONEncoder)
         elif type(tensor) is torch.Tensor:
-            tensors_dict = {"_data": tensor}
-            metadata = json.dumps({"_type": torch.Tensor.__name__})
+            tensor_dict = {"_data": tensor}
+            tensor_metadata = json.dumps({"_type": torch.Tensor.__name__})
         else:
             raise ValueError(f"Unsupported tensor type: {type(tensor)}")
 
@@ -129,15 +181,11 @@ def save_tensor_state_dict(
             f"{tensor_name}:{key}": (
                 value.detach().clone() if isinstance(value, torch.Tensor) else value
             )
-            for key, value in tensors_dict.items()
+            for key, value in tensor_dict.items()
         }
 
-        combined_metadata[tensor_name] = metadata
-        combined_tensors_dict.update(prefixed_tensors_dict)
-
-    combined_metadata["tensor_names"] = json.dumps(list(tensor_dict.keys()))
+        metadata[tensor_name] = tensor_metadata
+        tensors_data.update(prefixed_tensors_dict)
 
-    save_file(combined_tensors_dict, file_path, metadata=combined_metadata)
-    logger.info(
-        f"Saved {len(tensor_dict)} tensor subclasses to {file_path} with metadata"
-    )
+    metadata["tensor_names"] = json.dumps(list(tensors_dict.keys()))
+    return tensors_data, metadata