bitsandbytes-foundation · Titus-von-Koeller · Feb 19, 2024 · Feb 21, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -6,3 +6,13 @@ ea7c14f8ef64924f2d0ff80df3cdabf2c7299848
 
 # Remove f-prefix from strings that don't use formatting
 7727fa4c8c6c1ef2b109120aff4196a0a6bf3ed6
+
+# format bitsandbytes/cextension.py
+04f691ef3061e6659aa0a741ca97d00d031618c4
+
+# whitespace in pyproject.toml
+f7b791863083429ba79dc00f925a041beab63297
+
+# format bitsandbytes/functional.py
+64ad928224ab1134dff416feee5e7ca663331bc0
+01327aa0119fa503ea16322dd72f69f202e4502e
diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py
@@ -3,7 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from . import cuda_setup, research, utils
+from . import research, utils
 from .autograd._functions import (
     MatmulLtState,
     bmm_cublas,
@@ -12,6 +12,7 @@
     matmul_cublas,
     mm_cublas,
 )
+from .backends import _backend as backend
 from .cextension import COMPILED_WITH_CUDA
 from .nn import modules
 

diff --git a/bitsandbytes/__main__.py b/bitsandbytes/__main__.py
@@ -59,7 +59,7 @@ def main():
     generate_bug_report_information()
 
     from . import COMPILED_WITH_CUDA
-    from .cuda_setup.main import get_compute_capabilities
+    from .device_setup.cuda.main import get_compute_capabilities
 
     print_header("OTHER")
     print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")

diff --git a/bitsandbytes/backends/__init__.py b/bitsandbytes/backends/__init__.py
@@ -0,0 +1,11 @@
+from ..cextension import lib
+from ._base import COOSparseTensor
+from .nvidia import CudaBackend
+
+_backend = CudaBackend(lib) if lib else None
+# TODO: this should actually be done in `cextension.py` and potentially with .get_instance()
+#       for now this is just a simplifying assumption
+#
+# Notes from Tim:
+#       backend = CUDABackend.get_instance()
+#    	 -> CUDASetup -> lib -> backend.clib = lib
diff --git a/bitsandbytes/backends/_base.py b/bitsandbytes/backends/_base.py
@@ -0,0 +1,143 @@
+import torch
+
+
+class COOSparseTensor:
+    def __init__(self, rows, cols, nnz, rowidx, colidx, values):
+        assert rowidx.dtype == torch.int32
+        assert colidx.dtype == torch.int32
+        assert values.dtype == torch.float16
+        assert values.numel() == nnz
+        assert rowidx.numel() == nnz
+        assert colidx.numel() == nnz
+
+        self.rows = rows
+        self.cols = cols
+        self.nnz = nnz
+        self.rowidx = rowidx
+        self.colidx = colidx
+        self.values = values
+
+
+class BackendInterface:
-class BackendInterface:
+class Backend:
-class BackendInterface:
+class Backend:
+    _instance = None
+
+    def __new__(cls, lib=None):
+        if cls._instance is None:
+            if lib is None:
+                raise ValueError(
+                    "A 'lib' binary must be provided during the first initialization of BackendInterface."
+                )
+            cls._instance = super().__new__(cls)
+            cls._instance.lib = (
+                lib  # Set the binary name during the first and only instantiation
+            )
+        else:
+            if lib is not None:
+                raise ValueError(
+                    "The BackendInterface singleton has already been initialized with a 'lib' value. Re-initialization with a new 'lib' value is not allowed."
+                )
+        return cls._instance
+
+    def check_matmul(
+        self,
+        A,
+        B,
+        out=None,
+        transposed_A=False,
+        transposed_B=False,
+        expected_type=torch.int8,
+    ):
+        """
+        Checks if the matrix multiplication between A and B can be performed, considering their shapes,
+        whether they are transposed, and their data types. It also determines the shape of the output tensor.
+
+        Parameters:
+        - A (torch.Tensor): The first matrix in the multiplication.
+        - B (torch.Tensor): The second matrix in the multiplication.
+        - out (torch.Tensor, optional): The output tensor to store the result of the multiplication. Default is None.
+        - transposed_A (bool, optional): Indicates if matrix A is transposed. Default is False.
+        - transposed_B (bool, optional): Indicates if matrix B is transposed. Default is False.
+        - expected_type (torch.dtype, optional): The expected data type of matrices A and B. Default is torch.int8.
+
+        Returns:
+        - tuple: The shape of the output tensor resulting from the matrix multiplication.
+
+        Raises:
+        - TypeError: If the data types of A or B do not match the expected type.
+        - ValueError: If the dimensions of A and B are not compatible for matrix multiplication.
+        """
+        raise NotImplementedError
+
+    # 8-bit matmul interface
+    def coo_zeros(self, rows, cols, nnz, device, dtype=torch.half):
+        rowidx = torch.zeros((nnz,), dtype=torch.int32, device=device)
+        colidx = torch.zeros((nnz,), dtype=torch.int32, device=device)
+        values = torch.zeros((nnz,), dtype=dtype, device=device)
+
+        return COOSparseTensor(rows, cols, nnz, rowidx, colidx, values)
+
+    def get_colrow_absmax(
+        self, A, row_stats=None, col_stats=None, nnz_block_ptr=None, threshold=0.0
+    ):
+        raise NotImplementedError
+
+    def double_quant(
+        self,
+        A,
+        col_stats=None,
+        row_stats=None,
+        out_col=None,
+        out_row=None,
+        threshold=0.0,
+    ):
+        raise NotImplementedError
+
+    def extract_outliers(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def igemmlt(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def mm_dequant(self, *args, **kwargs):
+        raise NotImplementedError
+
+    # k-bit quantization interface
+    def create_quant_map(self, interface, quant_name):
+        """
+        Below functions should be abstracted into a general method
+        "create_quant_map(interface, "quant_name")", so we can call e.g.
+        create_quant_map(..., quant_name='normal'):
+            - 'create_dynamic_map'
+            - 'create_fp8_map'
+            - 'create_linear_map'
+            - 'create_normal_map'
+            - 'create_quantile_map'
+        """
+        raise NotImplementedError
+
+    def estimate_quantiles(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def dequantize_blockwise(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def quantize_blockwise(self, *args, **kwargs):
+        raise NotImplementedError
+
+    # 4-bit matmul interface
+    def dequantize_4bit(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def quantize_4bit(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def gemv_4bit(self, *args, **kwargs):
+        raise NotImplementedError
+
+    # 8-bit optimizer interface
+    def optimizer_update_32bit(self, *args, **kwargs):
+        """This is needed for tests"""
+        raise NotImplementedError("Subclasses must implement 'optimizer_update_32bit'.")
+
+    def optimizer_update_8bit_blockwise(self, *args, **kwargs):
+        raise NotImplementedError
diff --git a/bitsandbytes/backends/_helpers.py b/bitsandbytes/backends/_helpers.py
@@ -0,0 +1,54 @@
+import ctypes
+from typing import Optional
+
+import torch
+
+
+def pre_call(device):
+    prev_device = torch.cuda.current_device()
+    torch.cuda.set_device(device)
+    return prev_device
+
+
+def post_call(prev_device):
+    torch.cuda.set_device(prev_device)
+
+
+def get_ptr(A: Optional[torch.Tensor]) -> Optional[ctypes.c_void_p]:
+    """
+    Get the ctypes pointer from a PyTorch Tensor.
+
+    Parameters
+    ----------
+    A : torch.tensor
+        The PyTorch tensor.
+
+    Returns
+    -------
+    ctypes.c_void_p
+    """
+    if A is None:
+        return None
+    else:
+        return ctypes.c_void_p(A.data.data_ptr())
+
+
+def is_on_gpu(tensors):
+    on_gpu = True
+    gpu_ids = set()
+    for t in tensors:
+        if t is None:
+            continue  # NULL pointers are fine
+        is_paged = getattr(t, "is_paged", False)
+        on_gpu &= t.device.type == "cuda" or is_paged
+        if not is_paged:
+            gpu_ids.add(t.device.index)
+    if not on_gpu:
+        raise TypeError(
+            f"All input tensors need to be on the same GPU, but found some tensors to not be on a GPU:\n {[(t.shape, t.device) for t in tensors]}"
+        )
+    if len(gpu_ids) > 1:
+        raise TypeError(
+            f"Input tensors need to be on the same GPU, but found the following tensor and device combinations:\n {[(t.shape, t.device) for t in tensors]}"
+        )
+    return on_gpu
diff --git a/bitsandbytes/cuda_setup/__init__.py → bitsandbytes/backends/amd.py b/bitsandbytes/cuda_setup/__init__.py → bitsandbytes/backends/amd.py
diff --git a/bitsandbytes/backends/apple.py b/bitsandbytes/backends/apple.py
diff --git a/bitsandbytes/backends/intel.py b/bitsandbytes/backends/intel.py