Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove old dataloader & generator from quantilization #55754

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 11 additions & 44 deletions python/paddle/static/quantization/post_training_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@
except:
from .utils import tqdm

from inspect import isgeneratorfunction

from paddle.fluid.framework import IrGraph, _get_var

from ... import io, static
from ...fluid import reader
from ...framework import core
from ...utils import unique_name
from ..log_helper import get_logger
Expand Down Expand Up @@ -171,16 +169,16 @@ def __init__(
When all parameters were saved in a single binary file, set it
as the real filename. If parameters were saved in separate files,
set it as 'None'. Default is 'None'.
batch_generator(Python Generator): The batch generator provides
batch_generator(Python Generator, depreceated): The batch generator provides
calibrate data for DataLoader, and it returns a batch every
time. Note that, sample_generator and batch_generator, only one
should be set. Beisdes, batch_generator supports lod tensor.
sample_generator(Python Generator): The sample generator provides
sample_generator(Python Generator, depreceated): The sample generator provides
calibrate data for DataLoader, and it only returns a sample every
time. Note that, sample_generator and batch_generator, only one
should be set. Beisdes, sample_generator dose not support lod tensor.
data_loader(Python Generator, Paddle.io.DataLoader, optional): The
Generator or Dataloader provides calibrate data, and it could
data_loader(Paddle.io.DataLoader): The
Dataloader provides calibrate data, and it could
return a batch every time.
batch_size(int, optional): The batch size of DataLoader. Default is 10.
batch_nums(int, optional): If batch_nums is not None, the number of
Expand Down Expand Up @@ -309,22 +307,12 @@ def __init__(

# Check inputs
assert executor is not None, "The executor cannot be None."
assert any(
[gen is not None]
for gen in [sample_generator, batch_generator, data_loader]
), (
"The sample_generator, batch_generator "
"and data_loader cannot be None in the same time."
)
if data_loader is not None:
assert isinstance(
data_loader,
(
io.DataLoader,
type(isgeneratorfunction),
reader.GeneratorLoader,
),
), "data_loader only accepts `paddle.io.DataLoader` or Generator instance."
assert data_loader is not None, "data_loader cannot be None."

assert isinstance(
data_loader, io.DataLoader
), "data_loader only accepts `paddle.io.DataLoader`."

assert batch_size > 0, "The batch_size should be greater than 0."
assert (
algo in self._support_algo_type
Expand Down Expand Up @@ -615,29 +603,8 @@ def _load_model_data(self):
for var_name in self._feed_list
]

if self._data_loader is not None:
self._batch_nums = (
self._batch_nums if self._batch_nums else len(self._data_loader)
)
return
self._data_loader = reader.DataLoader.from_generator(
feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True
)
if self._sample_generator is not None:
self._data_loader.set_sample_generator(
self._sample_generator,
batch_size=self._batch_size,
drop_last=True,
places=self._place,
)
elif self._batch_generator is not None:
self._data_loader.set_batch_generator(
self._batch_generator, places=self._place
)
self._batch_nums = (
self._batch_nums
if self._batch_nums
else len(list(self._data_loader))
self._batch_nums if self._batch_nums else len(self._data_loader)
)

def _optimize_fp32_model(self):
Expand Down
34 changes: 32 additions & 2 deletions test/quantization/test_post_training_quantization_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,23 @@
np.random.seed(0)


class TransedMnistDataSet(paddle.io.Dataset):
def __init__(self, mnist_data):
self.mnist_data = mnist_data

def __getitem__(self, idx):
img = (
np.array(self.mnist_data[idx][0])
.astype('float32')
.reshape(1, 28, 28)
)
batch = img / 127.5 - 1.0
return {"img": batch}

def __len__(self):
return len(self.mnist_data)


class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self):
self.root_path = tempfile.TemporaryDirectory()
Expand Down Expand Up @@ -217,14 +234,27 @@ def generate_quantized_model(
):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
val_reader = paddle.dataset.mnist.train()

train_dataset = paddle.vision.datasets.MNIST(
mode='train', transform=None
)
train_dataset = TransedMnistDataSet(train_dataset)
BatchSampler = paddle.io.BatchSampler(
train_dataset, batch_size=batch_size
)
val_data_generator = paddle.io.DataLoader(
train_dataset,
batch_sampler=BatchSampler,
places=paddle.static.cpu_places(),
)

ptq = PostTrainingQuantization(
executor=exe,
model_dir=model_path,
model_filename=model_filename,
params_filename=params_filename,
sample_generator=val_reader,
sample_generator=None,
data_loader=val_data_generator,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo,
Expand Down
46 changes: 31 additions & 15 deletions test/quantization/test_post_training_quantization_while.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@
np.random.seed(0)


class TransedMnistDataSet(paddle.io.Dataset):
def __init__(self, mnist_data):
self.mnist_data = mnist_data

def __getitem__(self, idx):
img = (
np.array(self.mnist_data[idx][0])
.astype('float32')
.reshape(1, 28, 28)
)
batch = img / 127.5 - 1.0
return {"x": batch}

def __len__(self):
return len(self.mnist_data)


class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self):
self.download_path = 'int8/download'
Expand Down Expand Up @@ -132,28 +149,30 @@ def generate_quantized_model(
is_optimize_model=False,
batch_size=10,
batch_nums=10,
is_data_loader=False,
):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
val_reader = paddle.dataset.mnist.train()

def val_data_generator():
batches = []
for data in val_reader():
batches.append(data[0].reshape(1, 28, 28))
if len(batches) == batch_size:
batches = np.asarray(batches)
yield {"x": batches}
batches = []
train_dataset = paddle.vision.datasets.MNIST(
mode='train', transform=None
)
train_dataset = TransedMnistDataSet(train_dataset)
BatchSampler = paddle.io.BatchSampler(
train_dataset, batch_size=batch_size
)
val_data_generator = paddle.io.DataLoader(
train_dataset,
batch_sampler=BatchSampler,
places=paddle.static.cpu_places(),
)

ptq = PostTrainingQuantization(
executor=exe,
model_dir=model_path,
model_filename='model.pdmodel',
params_filename='model.pdiparams',
sample_generator=val_reader if not is_data_loader else None,
data_loader=val_data_generator if is_data_loader else None,
sample_generator=None,
data_loader=val_data_generator,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo,
Expand Down Expand Up @@ -183,7 +202,6 @@ def run_test(
batch_size=10,
infer_iterations=10,
quant_iterations=5,
is_data_loader=False,
):
origin_model_path = self.download_model(data_url, data_md5, model_name)

Expand All @@ -210,7 +228,6 @@ def run_test(
is_optimize_model,
batch_size,
quant_iterations,
is_data_loader=is_data_loader,
)

print(
Expand Down Expand Up @@ -442,7 +459,6 @@ def test_post_training_abs_max(self):
batch_size,
infer_iterations,
quant_iterations,
is_data_loader=True,
)


Expand Down