From 5df30e1e28de487db565d7fdc69b15ad7bf25fd1 Mon Sep 17 00:00:00 2001 From: Bhuvanesh Sridharan Date: Thu, 19 Sep 2024 16:24:00 +0530 Subject: [PATCH 1/2] fix: fixing use of custom calibration dataset for smoothquant in llama --- examples/llama/convert_checkpoint.py | 14 ++++++++++++++ tensorrt_llm/models/convert_utils.py | 4 ++-- tensorrt_llm/models/llama/convert.py | 10 ++++++++-- tensorrt_llm/models/llama/model.py | 4 +++- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/examples/llama/convert_checkpoint.py b/examples/llama/convert_checkpoint.py index 820db5b2d..f97edcb04 100644 --- a/examples/llama/convert_checkpoint.py +++ b/examples/llama/convert_checkpoint.py @@ -91,6 +91,18 @@ def parse_arguments(): help= "The huggingface dataset name or the local directory of the dataset for calibration." ) + parser.add_argument( + "--calib_size", + type=int, + default=512, + help="Number of datapoints to use for calibration. Default is 512. Set to -1 to use the whole dataset.", + ) + parser.add_argument( + "--calib_max_seq_length", + type=int, + default=512, + help="Max Sequence length to use for calibration. Default is 512.", + ) parser.add_argument( "--smoothquant", "-sq", @@ -402,6 +414,8 @@ def convert_and_save_hf(args): quant_config=quant_config, device='cpu' if args.load_model_on_cpu else 'cuda', calib_dataset=args.calib_dataset, + calib_batches=args.calib_size, + calib_max_seq_length=args.calib_max_seq_length, **override_fields) else: # When not loading by shard, preload one complete model and then slice per rank weights from this diff --git a/tensorrt_llm/models/convert_utils.py b/tensorrt_llm/models/convert_utils.py index be587fd5f..c81518178 100644 --- a/tensorrt_llm/models/convert_utils.py +++ b/tensorrt_llm/models/convert_utils.py @@ -254,8 +254,8 @@ def has_safetensors(model_dir: str): def load_calib_dataset(dataset_name_or_dir: str, config_name: Optional[str] = None, - split: Optional[str] = None, - key: Optional[str] = None, + split: Optional[str] = "train", # default split value in hf datasets object + key: Optional[str] = "text", # default key value in hf datasets object trust_remote_code=True, **kwargs): if config_name is None: diff --git a/tensorrt_llm/models/llama/convert.py b/tensorrt_llm/models/llama/convert.py index a1f3b7c38..7788a55c9 100644 --- a/tensorrt_llm/models/llama/convert.py +++ b/tensorrt_llm/models/llama/convert.py @@ -1084,7 +1084,10 @@ def quantize(hf_model_dir: str, output_dir: str, config: LLaMAConfig, device: str = 'cuda', - calib_dataset: str = 'cnn_dailymail'): + calib_dataset: str = 'cnn_dailymail', + calib_batches: int = 512, + calib_max_seq_length: int = 512, + ): ''' Quantize the save the model as TRT-LLM checkpoint to output_dir ''' @@ -1118,7 +1121,10 @@ def quantize(hf_model_dir: str, dataset = load_calib_dataset(calib_dataset) - act_range = capture_activation_range(hf_model, tokenizer, dataset) + if calib_batches == -1: # use the whole dataset if calib_batches is -1 + calib_batches = len(dataset) + + act_range = capture_activation_range(hf_model, tokenizer, dataset, num_samples=calib_batches, seq_len=calib_max_seq_length) qkv_para, smoother = {}, {} if use_smooth_quant: smooth_llama_model(hf_model, act_range, quant_config.smoothquant_val, diff --git a/tensorrt_llm/models/llama/model.py b/tensorrt_llm/models/llama/model.py index 93111df4c..352bc33d7 100644 --- a/tensorrt_llm/models/llama/model.py +++ b/tensorrt_llm/models/llama/model.py @@ -446,7 +446,9 @@ def quantize( output_dir, config=config, device=device, - calib_dataset=calib_dataset) + calib_dataset=calib_dataset, + calib_batches=calib_batches, + calib_max_seq_length=calib_max_seq_length,) else: raise ValueError( f"The quant_config ({quant_config}) does not require calibration, try {cls.__name__}.from_hugging_face instead." From 67aa530ae5971467557dff40ff0409efda78b0c5 Mon Sep 17 00:00:00 2001 From: Bhuvanesh Sridharan Date: Sat, 28 Sep 2024 11:48:28 +0530 Subject: [PATCH 2/2] fix: Correctly setting default split and key without overriding the values in DEFAULT_HF_DATASET_META --- tensorrt_llm/models/convert_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorrt_llm/models/convert_utils.py b/tensorrt_llm/models/convert_utils.py index c81518178..980ac32ab 100644 --- a/tensorrt_llm/models/convert_utils.py +++ b/tensorrt_llm/models/convert_utils.py @@ -249,13 +249,14 @@ def has_safetensors(model_dir: str): 'ccdv/cnn_dailymail': ('3.0.0', 'train', 'article'), 'cnn_dailymail': ('3.0.0', 'train', 'article'), 'lambada': (None, 'validation', 'text'), + '': (None, 'train', 'text'), } def load_calib_dataset(dataset_name_or_dir: str, config_name: Optional[str] = None, - split: Optional[str] = "train", # default split value in hf datasets object - key: Optional[str] = "text", # default key value in hf datasets object + split: Optional[str] = None, # default split value will be 'train' in hf datasets object + key: Optional[str] = None, # default key value will be 'text' in hf datasets object trust_remote_code=True, **kwargs): if config_name is None: