Skip to content

Conversation

wenhuach21
Copy link
Contributor

@wenhuach21 wenhuach21 commented Sep 25, 2025

  • support 16 bits in options
  • support gguf
  • support mixed data type, algorithm could mixed, exporting could not
  • gguf group_size fallback
  • check gguf scale_dtype
  • layer_config ut
  • support FP8 model
  • AutoScheme could also be patched by group_size, etc, fill with default value
  • gguf could not be mixed with other formats
  • support non_str dataset
  • support shared_layers
  • support naive methods
  • test_large_model

wenhuach21 and others added 30 commits September 25, 2025 14:14
@wenhuach21 wenhuach21 changed the title [WIP]try to enable auto_scheme API [WIP]support automatic mixed bits assignment Oct 9, 2025
@wenhuach21
Copy link
Contributor Author

@xin3he @n1ck-guo
please have a review of this function first

def compute_layer_bits(
    layer: torch.nn.Module,
    ignore_scale_zp_bits: bool = False,
) -> tuple[int, float]:
    """Compute total and average bitwidth for a single quantized layer.
    Args:
        layer: A PyTorch layer with quantization attributes.
        ignore_scale_zp_bits: Whether to ignore scale/zero-point overhead.
    Returns:
        A tuple (total_bits, avg_bits) representing bit usage.
    """
    weight = layer.weight
    n_param = weight.numel()
    weight_bits = getattr(layer, "bits", 16)
    group_size = getattr(layer, "group_size", 128)
    super_group_size = getattr(layer, "super_group_size", None)
    super_weight_bits = getattr(layer, "super_bits", None)

    # Unquantized layer or ignoring scale/zp overhead
    if weight_bits >= 16 or ignore_scale_zp_bits:
        if super_weight_bits is not None:  # reset gguf 16 bits to 32 bits
            return 32 * n_param, 32
        return weight_bits * n_param, 16.0

    in_features, out_features = get_layer_features(layer)

    # Determine number of groups based on group size
    if group_size > 0:
        n_group = out_features * (in_features + group_size - 1) // group_size
    elif group_size == 0:
        n_group = 1
    elif group_size == -1:
        n_group = out_features
    else:
        raise ValueError(f"Invalid group_size {group_size}")

    # Compute auxiliary bits (scales, zero-points, or double quantization)
    aux_total_bits = 0
    if not super_group_size:
        scale_bits = 16
        zp_bits = weight_bits
        aux_total_bits = n_group * (scale_bits + zp_bits)
    else:
        aux_total_bits += n_group * super_weight_bits * 2
        n_super_group = (n_group + super_group_size - 1) // super_group_size
        aux_total_bits += n_super_group * 32 * 2  # 32-bit scale and min_v

    total_bits = weight_bits * n_param + aux_total_bits
    avg_bits = total_bits / n_param
    return total_bits, avg_bits

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant