Skip to content

Commit

Permalink
Revert "Support Fake GroupWise Quant (PaddlePaddle#61900)"
Browse files Browse the repository at this point in the history
This reverts commit 2175de0.
  • Loading branch information
hanhaowen-mt committed May 13, 2024
1 parent f6964b4 commit b78d1b0
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 221 deletions.
39 changes: 2 additions & 37 deletions python/paddle/nn/quant/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,7 @@ def from_quanter(quanter):


class LinearQuanter(Layer):
def __init__(
self,
scales,
zero_point=None,
quant_axis=None,
bit_length=8,
group_size=128,
):
def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
super().__init__()
scales = paddle.to_tensor(scales, dtype="float32")
scale_attr = paddle.framework.ParamAttr(
Expand All @@ -72,21 +65,9 @@ def __init__(
)
self._quant_axis = -1 if quant_axis is None else quant_axis
self._bit_length = bit_length
self._group_size = group_size

def forward(self, input):
if in_dynamic_mode():
if len(self._scales.shape) > 1:
bnt = (1 << (self._bit_length - 1)) - 1
new_s = paddle.repeat_interleave(
self._scales, self._group_size, 0
)
quant_weight = paddle.clip(
paddle.round(input.cast('float32') / new_s * bnt),
-bnt - 1,
bnt,
)
return quant_weight.cast(input.dtype)
return _C_ops.quantize_linear(
input.cast('float32'),
self._scales,
Expand Down Expand Up @@ -124,14 +105,7 @@ def from_quanter(quanter):


class LinearDequanter(Layer):
def __init__(
self,
scales,
zero_point=None,
quant_axis=None,
bit_length=8,
group_size=128,
):
def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
super().__init__()
scales = paddle.to_tensor(scales, dtype="float32")
scale_attr = paddle.framework.ParamAttr(
Expand All @@ -150,18 +124,9 @@ def __init__(
)
self._quant_axis = -1 if quant_axis is None else quant_axis
self._bit_length = bit_length
self._group_size = group_size

def forward(self, input):
if in_dynamic_mode():
if len(self._scales.shape) > 1:
bnt = (1 << (self._bit_length - 1)) - 1
new_s = paddle.repeat_interleave(
self._scales, self._group_size, 0
)
quant_dequant_weight = input.cast('float32') / bnt * new_s
return quant_dequant_weight.cast(input.dtype)

return _C_ops.dequantize_linear(
input.cast('float32'),
self._scales,
Expand Down
3 changes: 1 addition & 2 deletions python/paddle/quantization/observers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,5 @@
# limitations under the License.

from .abs_max import AbsmaxObserver
from .groupwise import GroupWiseWeightObserver

__all__ = ["AbsmaxObserver", "GroupWiseWeightObserver"]
__all__ = ["AbsmaxObserver"]
113 changes: 0 additions & 113 deletions python/paddle/quantization/observers/groupwise.py

This file was deleted.

69 changes: 0 additions & 69 deletions test/quantization/test_groupwise.py

This file was deleted.

0 comments on commit b78d1b0

Please sign in to comment.