pytorch
diff --git a/‎.github/workflows/torchao_experimental_test.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/torchao_experimental_test.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎CITATION.cff‎
Lines changed: 1 addition & 1 deletion b/‎CITATION.cff‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/benchmark_blockwise_scaled_linear_triton.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/benchmark_blockwise_scaled_linear_triton.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/float8/bench_grouped_mm.py‎
Lines changed: 1 addition & 34 deletions b/‎benchmarks/float8/bench_grouped_mm.py‎
Lines changed: 1 addition & 34 deletions
diff --git a/‎docs/source/tutorials_source/pt2e_quant_openvino_inductor.rst‎
Lines changed: 7 additions & 4 deletions b/‎docs/source/tutorials_source/pt2e_quant_openvino_inductor.rst‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎test/quantization/test_config_serialization.py‎ renamed to ‎test/core/test_config.py‎
Lines changed: 14 additions & 0 deletions b/‎test/quantization/test_config_serialization.py‎ renamed to ‎test/core/test_config.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎test/float8/test_base.py‎
Lines changed: 3 additions & 7 deletions b/‎test/float8/test_base.py‎
Lines changed: 3 additions & 7 deletions
@@ -53,6 +53,7 @@ jobs:
           pytest torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py
           python torchao/experimental/tests/test_embedding_xbit_quantizer.py
           python torchao/experimental/tests/test_quant_passes.py
+          pytest -s test/prototype/test_dynamic_activation_lut.py
       - name: Run kernels/cpu/aarch64/tests
         if: runner.os == 'macOS'
         run: |
@@ -106,7 +107,7 @@ jobs:
   #         conda run -n test-mps-ops-env pip install torch --index-url "https://download.pytorch.org/whl/nightly/cpu"
   #     - name: Print torch version
   #       run: |
-          
+
   #         conda run -n test-mps-ops-env python -c "import torch; print(torch.__version__)"
   #     - name: Install requirements
   #       run: |
 
@@ -4,6 +4,6 @@ message: "If you use this software, please cite it as below."
 type: software
 authors:
   - given-names: "torchao maintainers and contributors"
-url: "https//github.com/pytorch/torchao"
+url: "https//github.com/pytorch/ao"
 license: "BSD-3-Clause"
 date-released: "2024-10-25"
@@ -278,7 +278,7 @@ If you find the torchao library useful, please cite it in your work as below.
 @software{torchao,
   title={TorchAO: PyTorch-Native Training-to-Serving Model Optimization},
   author={torchao},
-  url={https://github.com/pytorch/torchao},
+  url={https://github.com/pytorch/ao},
   license={BSD-3-Clause},
   month={oct},
   year={2024}
 
@@ -13,7 +13,7 @@
     from triton.testing import do_bench
 
     from torchao.float8.float8_utils import compute_error
-    from torchao.prototype.blockwise_fp8.blockwise_quantization import (
+    from torchao.prototype.blockwise_fp8_inference.blockwise_quantization import (
         blockwise_fp8_gemm,
         fp8_blockwise_act_quant,
         fp8_blockwise_weight_quant,
 
@@ -3,14 +3,14 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
-import random
 from typing import Optional
 
 import fire
 import pandas as pd
 import torch
 from utils import do_benchmarks, get_name_to_moe_shapes_iter
 
+from torchao.prototype.moe_training.utils import generate_jagged_offs
 from torchao.testing.training.roofline_utils import get_specs
 
 
@@ -146,39 +146,6 @@ def do_scaled_grouped_mm(A, B):
         data_df.to_csv(out_filename)
 
 
-def generate_jagged_offs(E, M, dtype=torch.int32, device="cuda"):
-    """
-    Generates a tensor of length E, containing random values divisible by 16,
-    from 0 to M, in sorted order, and where the final value in the tensor is always M.
-    Args:
-        E (int): The length of the tensor.
-        M (int): The maximum value in the tensor.
-    Returns:
-        torch.Tensor: A tensor of length E with the specified properties.
-    """
-    # Ensure M is divisible by 16
-    if M % 16 != 0:
-        raise ValueError("M must be divisible by 16")
-
-    # Generate a list of possible values
-    possible_values = [i for i in range(0, M + 1, 16)]
-
-    # If E is larger than the number of possible values, raise an error
-    if E > len(possible_values):
-        raise ValueError("E cannot be larger than the number of possible values")
-
-    # Randomly select E - 1 values from the possible values (excluding M)
-    selected_values = torch.tensor(random.sample(possible_values[:-1], E - 1))
-
-    # Append M to the selected values
-    selected_values = torch.cat((selected_values, torch.tensor([M])))
-
-    # Sort the selected values
-    selected_values, _ = torch.sort(selected_values)
-
-    return selected_values.to(dtype).to(device)
-
-
 def main() -> None:
     fire.Fire(run)
 
 
@@ -74,7 +74,7 @@ OpenVINO and NNCF could be easily installed via `pip distribution <https://docs.
 .. code-block:: bash
 
     pip install -U pip
-    pip install openvino, nncf
+    pip install openvino nncf
 
 
 1. Capture FX Graph
@@ -84,7 +84,6 @@ We will start by performing the necessary imports, capturing the FX Graph from t
 
 .. code-block:: python
 
-    import copy
     import openvino.torch
     import torch
     import torchvision.models as models
@@ -106,7 +105,7 @@ We will start by performing the necessary imports, capturing the FX Graph from t
     example_inputs = (x,)
 
     # Capture the FX Graph to be quantized
-    with torch.no_grad(), nncf.torch.disable_patching():
+    with torch.no_grad():
         exported_model = torch.export.export(model, example_inputs).module()
 
 
@@ -204,7 +203,7 @@ After that the FX Graph can utilize OpenVINO optimizations using `torch.compile(
 
 .. code-block:: python
 
-    with torch.no_grad(), nncf.torch.disable_patching():
+    with torch.no_grad():
         optimized_model = torch.compile(quantized_model, backend="openvino")
 
         # Running some benchmark
@@ -235,6 +234,10 @@ These advanced NNCF algorithms can be accessed via the NNCF `quantize_pt2e` API:
 
 
     calibration_dataset = nncf.Dataset(calibration_loader, transform_fn)
+
+    with torch.no_grad():
+        exported_model = torch.export.export(model, example_inputs).module()
+
     quantized_model = quantize_pt2e(
         exported_model, quantizer, calibration_dataset, smooth_quant=True, fast_bias_correction=False
     )
 
@@ -187,5 +187,19 @@ def test_version_mismatch():
             config_from_dict(reconstructable)
 
 
+def test_default_version():
+    """Making sure the default version for a new config inheriting from AOBaseConfig is always 1
+    because it's the default VERSION that all children has when they haven't explicitly
+    defined a VERSION class variable
+    """
+
+    @dataclass
+    class DummyConfig(AOBaseConfig):
+        pass
+
+    config = DummyConfig()
+    assert config.VERSION == 1, "Default version must be 1"
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
@@ -8,7 +8,6 @@
 import random
 import re
 import unittest
-import warnings
 
 import pytest
 import torch
@@ -381,6 +380,9 @@ def test_linear_from_config_params(
         "linear_dtype", [torch.bfloat16, torch.float16, torch.float32]
     )
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    @unittest.skipIf(
+        torch.cuda.is_available() and not is_sm_at_least_90(), "CUDA capability < 9.0"
+    )
     @skip_if_rocm("ROCm enablement in progress")
     def test_linear_from_recipe(
         self,
@@ -389,12 +391,6 @@ def test_linear_from_recipe(
         linear_dtype: torch.dtype,
         linear_bias: bool,
     ):
-        if torch.cuda.get_device_capability() < (9, 0):
-            warnings.warn(
-                f"CUDA capability {torch.cuda.get_device_capability()} < (9.0)"
-            )
-            pytest.skip()
-
         x = torch.randn(*x_shape, device="cuda", dtype=linear_dtype)
         m_ref = nn.Linear(16, 32, bias=linear_bias, device="cuda", dtype=linear_dtype)
         config = Float8LinearConfig.from_recipe_name(recipe_name)