|
77 | 77 | parser.add_argument('--gptq_pad_max_length', type=int, default=2048, help='Calibration dataset sequence max length, \ |
78 | 78 | this should align with your model config, \ |
79 | 79 | and your dataset builder args: args.pad_max_length') |
80 | | -parser.add_argument('--gptq_debug', action='store_true', help='Whether to use debug model ') |
81 | 80 | parser.add_argument('--gptq_static_groups', action='store_true', help='Use determined group to do quantization') |
82 | 81 | # ==============code generation args=========== |
83 | 82 | parser.add_argument("--code_generation", action="store_true") |
@@ -292,35 +291,6 @@ def calib_func(prepared_model): |
292 | 291 | op_name_dict=op_name_dict, |
293 | 292 | recipes=recipes, |
294 | 293 | ) |
295 | | - |
296 | | - # for test on various models, keep the code of directly call gptq_quantize |
297 | | - if args.gptq_debug: |
298 | | - |
299 | | - from neural_compressor.adaptor.torch_utils.weight_only import gptq_quantize |
300 | | - |
301 | | - gptq_conf = { |
302 | | - ".*": { |
303 | | - 'wbits': args.woq_bits, # 1-8 bits |
304 | | - 'group_size': args.woq_group_size, # -1 (per-channel) |
305 | | - 'sym': (args.woq_scheme == "sym"), |
306 | | - 'act_order': args.gptq_actorder, |
307 | | - 'static_groups': args.gptq_static_groups, |
308 | | - } |
309 | | - } |
310 | | - q_model_gptq_debug, gptq_config = gptq_quantize( |
311 | | - user_model, |
312 | | - weight_config=gptq_conf, |
313 | | - dataloader=calib_dataloader, |
314 | | - nsamples=args.gptq_nsamples, |
315 | | - use_max_length=args.gptq_use_max_length, |
316 | | - pad_max_length=args.gptq_pad_max_length, |
317 | | - ) |
318 | | - |
319 | | - # save the fake quantized model |
320 | | - os.makedirs(args.output_dir, exist_ok=True) |
321 | | - torch.save(q_model_gptq_debug, os.path.join(args.output_dir, "gptq_best_model.pt")) |
322 | | - exit(0) |
323 | | - |
324 | 294 | else: |
325 | 295 | if re.search("gpt", user_model.config.model_type): |
326 | 296 | op_type_dict = { |
@@ -371,12 +341,9 @@ def eval_func(model): |
371 | 341 | if args.ipex: |
372 | 342 | user_model = load(os.path.abspath(os.path.expanduser(args.output_dir))) |
373 | 343 | else: |
374 | | - if args.gptq_debug: |
375 | | - user_model = torch.load(os.path.join(args.output_dir, "gptq_best_model.pt")) |
376 | | - else: |
377 | | - user_model, _ = get_user_model() |
378 | | - kwargs = {'weight_only': True} if args.approach == 'weight_only' else {} |
379 | | - user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model, **kwargs) |
| 344 | + user_model, _ = get_user_model() |
| 345 | + kwargs = {'weight_only': True} if args.approach == 'weight_only' else {} |
| 346 | + user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model, **kwargs) |
380 | 347 | else: |
381 | 348 | user_model, _ = get_user_model() |
382 | 349 |
|
|
0 commit comments