Skip to content

Commit 7499cd8

Browse files
authored
Merge branch 'master' into xinhe/patch-2
2 parents 4ee86c1 + 02233fb commit 7499cd8

File tree

22 files changed

+229
-115
lines changed

22 files changed

+229
-115
lines changed

.azure-pipelines/scripts/ut/3x/run_3x_ort.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverag
1313
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
1414
cd /neural-compressor/test || exit 1
1515
find ./3x/onnxrt/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
16+
find ./3x/common/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
1617

1718
LOG_DIR=/neural-compressor/log_dir
1819
mkdir -p ${LOG_DIR}

.azure-pipelines/scripts/ut/3x/run_3x_pt.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverag
1313
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
1414
cd /neural-compressor/test || exit 1
1515
find ./3x/torch/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
16+
find ./3x/common/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
1617

1718
LOG_DIR=/neural-compressor/log_dir
1819
mkdir -p ${LOG_DIR}

.azure-pipelines/scripts/ut/3x/run_3x_tf.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverag
1313
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
1414
cd /neural-compressor/test || exit 1
1515
find ./3x/tensorflow/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
16+
find ./3x/common/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
1617

1718
LOG_DIR=/neural-compressor/log_dir
1819
mkdir -p ${LOG_DIR}

.azure-pipelines/ut-3x-pt.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pr:
1111
- neural_compressor/common
1212
- neural_compressor/torch
1313
- test/3x/torch
14+
- test/3x/common
1415
- setup.py
1516
- requirements_pt.txt
1617
- .azure-pipelines/scripts/ut/3x/collect_log_3x.sh

examples/.config/model_params_pytorch.json

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -492,13 +492,6 @@
492492
"main_script": "run_clm_no_trainer.py",
493493
"batch_size": 8
494494
},
495-
"opt_125m_woq_gptq_debug_int4":{
496-
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
497-
"dataset_location": "",
498-
"input_model": "",
499-
"main_script": "run_clm_no_trainer.py",
500-
"batch_size": 8
501-
},
502495
"opt_125m_woq_teq":{
503496
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
504497
"dataset_location": "",
@@ -583,13 +576,6 @@
583576
"main_script": "run_clm_no_trainer.py",
584577
"batch_size": 1
585578
},
586-
"gpt_j_woq_gptq_debug_int4":{
587-
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
588-
"dataset_location": "",
589-
"input_model": "",
590-
"main_script": "run_clm_no_trainer.py",
591-
"batch_size": 1
592-
},
593579
"gpt_j_woq_gptq_int4":{
594580
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
595581
"dataset_location": "",
@@ -618,7 +604,7 @@
618604
"main_script": "run_clm_no_trainer.py",
619605
"batch_size": 1
620606
},
621-
"falcon_7b_woq_gptq_debug_int4":{
607+
"falcon_7b_woq_gptq_int4":{
622608
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
623609
"dataset_location": "",
624610
"input_model": "",

examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ def download_url(url, root, filename=None, md5=None): # pragma: no cover
153153
md5 (str): the md5 string.
154154
"""
155155
import urllib
156+
import ssl
157+
ssl._create_default_https_context = ssl._create_unverified_context
156158
root = os.path.expanduser(root)
157159
if not filename:
158160
filename = os.path.basename(url)

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,9 @@ python run_clm_no_trainer.py \
6161
--woq_scheme asym \
6262
--woq_group_size 128 \
6363
--gptq_pad_max_length 2048 \
64-
--gptq_use_max_length \
65-
--gptq_debug
64+
--gptq_use_max_length
6665
```
67-
**Notes**: Weight-only quantization based on fake quantization is previewly supported and supports RTN, GPTQ[1], AWQ[2], TEQ algorithms. For more details, please refer to [link](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization_weight_only.md). Our GPTQ API support various CLMs including GPTJ, OPTs, Blooms, Llamas, Falcons, MPTs, ChatGLMs, etc. Simply replace the "--model" argument with other models to quantize different CLMs with GPTQ.
66+
**Notes**: Weight-only quantization based on fake quantization is supported in preview, including RTN, GPTQ[1], AWQ[2], TEQ algorithms. For more details, please refer to [link](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization_weight_only.md). Our GPTQ API support various CLMs including GPTJ, OPTs, Blooms, Llamas, Falcons, MPTs, ChatGLMs, etc. Simply replace the "--model" argument with other models to quantize different CLMs with GPTQ.
6867

6968

7069
#### Accuracy with lm_eval
@@ -111,8 +110,7 @@ python run_clm_no_trainer.py \
111110
--woq_scheme asym \
112111
--woq_group_size 128 \
113112
--gptq_pad_max_length 2048 \
114-
--gptq_use_max_length \
115-
--gptq_debug
113+
--gptq_use_max_length
116114
```
117115

118116
#### Accuracy with lm_eval
@@ -158,8 +156,7 @@ python run_clm_no_trainer.py \
158156
--woq_scheme asym \
159157
--woq_group_size 128 \
160158
--gptq_pad_max_length 2048 \
161-
--gptq_use_max_length \
162-
--gptq_debug
159+
--gptq_use_max_length
163160
```
164161

165162
#### Accuracy with lm_eval
@@ -202,8 +199,7 @@ python run_clm_no_trainer.py \
202199
--woq_scheme asym \
203200
--woq_group_size 128 \
204201
--gptq_pad_max_length 2048 \
205-
--gptq_use_max_length \
206-
--gptq_debug
202+
--gptq_use_max_length
207203
```
208204
#### Accuracy with lm_eval
209205
```bash
@@ -244,8 +240,7 @@ python run_clm_no_trainer.py \
244240
--woq_scheme asym \
245241
--woq_group_size 128 \
246242
--gptq_pad_max_length 2048 \
247-
--gptq_use_max_length \
248-
--gptq_debug
243+
--gptq_use_max_length
249244
```
250245
#### Accuracy with lm_eval
251246
```bash

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,10 @@ function run_benchmark {
7979
model_name_or_path="facebook/opt-125m"
8080
approach="weight_only"
8181
extra_cmd=$extra_cmd" --woq_algo GPTQ"
82-
elif [ "${topology}" = "opt_125m_woq_gptq_debug_int4" ]; then
82+
elif [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
8383
model_name_or_path="facebook/opt-125m"
8484
approach="weight_only"
85-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_scheme asym --woq_group_size 128 --gptq_use_max_length --gptq_debug"
85+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_scheme asym --woq_group_size 128 --gptq_use_max_length"
8686
elif [ "${topology}" = "opt_125m_woq_teq" ]; then
8787
model_name_or_path="facebook/opt-125m"
8888
approach="weight_only"
@@ -106,17 +106,17 @@ function run_benchmark {
106106
model_name_or_path="EleutherAI/gpt-j-6b"
107107
approach="weight_only"
108108
extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_enable_mse_search"
109-
elif [ "${topology}" = "gpt_j_woq_gptq_debug_int4" ]; then
109+
elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
110110
model_name_or_path="EleutherAI/gpt-j-6b"
111111
approach="weight_only"
112-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length --gptq_debug"
112+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
113113
elif [ "${topology}" = "falcon_7b_sq" ]; then
114114
model_name_or_path="tiiuae/falcon-7b-instruct"
115115
extra_cmd=$extra_cmd" --sq --alpha 0.5"
116-
elif [ "${topology}" = "falcon_7b_woq_gptq_debug_int4" ]; then
116+
elif [ "${topology}" = "falcon_7b_woq_gptq_int4" ]; then
117117
model_name_or_path="tiiuae/falcon-7b-instruct"
118118
approach="weight_only"
119-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length --gptq_debug"
119+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
120120
fi
121121

122122
python -u run_clm_no_trainer.py \

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@
7777
parser.add_argument('--gptq_pad_max_length', type=int, default=2048, help='Calibration dataset sequence max length, \
7878
this should align with your model config, \
7979
and your dataset builder args: args.pad_max_length')
80-
parser.add_argument('--gptq_debug', action='store_true', help='Whether to use debug model ')
8180
parser.add_argument('--gptq_static_groups', action='store_true', help='Use determined group to do quantization')
8281
# ==============code generation args===========
8382
parser.add_argument("--code_generation", action="store_true")
@@ -292,35 +291,6 @@ def calib_func(prepared_model):
292291
op_name_dict=op_name_dict,
293292
recipes=recipes,
294293
)
295-
296-
# for test on various models, keep the code of directly call gptq_quantize
297-
if args.gptq_debug:
298-
299-
from neural_compressor.adaptor.torch_utils.weight_only import gptq_quantize
300-
301-
gptq_conf = {
302-
".*": {
303-
'wbits': args.woq_bits, # 1-8 bits
304-
'group_size': args.woq_group_size, # -1 (per-channel)
305-
'sym': (args.woq_scheme == "sym"),
306-
'act_order': args.gptq_actorder,
307-
'static_groups': args.gptq_static_groups,
308-
}
309-
}
310-
q_model_gptq_debug, gptq_config = gptq_quantize(
311-
user_model,
312-
weight_config=gptq_conf,
313-
dataloader=calib_dataloader,
314-
nsamples=args.gptq_nsamples,
315-
use_max_length=args.gptq_use_max_length,
316-
pad_max_length=args.gptq_pad_max_length,
317-
)
318-
319-
# save the fake quantized model
320-
os.makedirs(args.output_dir, exist_ok=True)
321-
torch.save(q_model_gptq_debug, os.path.join(args.output_dir, "gptq_best_model.pt"))
322-
exit(0)
323-
324294
else:
325295
if re.search("gpt", user_model.config.model_type):
326296
op_type_dict = {
@@ -371,12 +341,9 @@ def eval_func(model):
371341
if args.ipex:
372342
user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
373343
else:
374-
if args.gptq_debug:
375-
user_model = torch.load(os.path.join(args.output_dir, "gptq_best_model.pt"))
376-
else:
377-
user_model, _ = get_user_model()
378-
kwargs = {'weight_only': True} if args.approach == 'weight_only' else {}
379-
user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model, **kwargs)
344+
user_model, _ = get_user_model()
345+
kwargs = {'weight_only': True} if args.approach == 'weight_only' else {}
346+
user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model, **kwargs)
380347
else:
381348
user_model, _ = get_user_model()
382349

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def forward(self, *inp, **kwargs):
315315
'percdamp': 0.01,
316316
'act_order':args.act_order,
317317
'block_size': args.block_size,
318-
'nsampeles': args.nsamples,
318+
'nsamples': args.nsamples,
319319
'use_max_length': args.use_max_length,
320320
'pad_max_length': args.pad_max_length
321321
},

0 commit comments

Comments
 (0)