Skip to content

Commit

Permalink
refine docs, update requirements (#1493)
Browse files Browse the repository at this point in the history
Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
  • Loading branch information
WeiweiZhang1 authored Dec 22, 2023
1 parent 699d644 commit 9d41a7d
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,24 @@ AutoRound is an advanced weight-only quantization algorithm, based on SignRound.
## Prerequisites
- Python 3.9 or higher


- The transformers version required varies across different types of models. Here, the transformers version used for running models during experiments is provided as a reference.
| Model | Transformers version |
| :----: | :----: |
| EleutherAI/gpt-j-6b | 4.28/4.30/4.34 |
| huggyllama/llama-7b | 4.28/4.30/4.34 |
| meta-llama/Llama-2-7b-hf | 4.30/4.34 |
| facebook/opt-6.7b | 4.28/4.30/4.34 |
| tiiuae/falcon-7b | 4.28/4.30/4.34 |
| mosaicml/mpt-7b | 4.28/4.30/4.34 |
| bigscience/bloom-7b1 | 4.28/4.30/4.34 |
| EleutherAI/gpt-j-6b | 4.28/4.30/4.34/4.36 |
| huggyllama/llama-7b | 4.28/4.30/4.34/4.36 |
| meta-llama/Llama-2-7b-hf | 4.30/4.34/4.36 |
| facebook/opt-6.7b | 4.28/4.30/4.34/4.36 |
| tiiuae/falcon-7b | 4.28/4.30/4.34/4.36 |
| mosaicml/mpt-7b | 4.28/4.30/4.34/4.36 |
| bigscience/bloom-7b1 | 4.28/4.30/4.34/4.36 |
| baichuan-inc/Baichuan-7B | 4.28/4.30 |
| Qwen/Qwen-7B | 4.28/4.30/4.34 |
| THUDM/chatglm2-6b | 4.28/4.30 |
| mistralai/Mistral-7B-v0.1 | 4.34 |
| Qwen/Qwen-7B | 4.28/4.30/4.34/4.36 |
| THUDM/chatglm3-6b | 4.34/4.36 |
| mistralai/Mistral-7B-v0.1 | 4.34/4.36 |

Please note that all experiments in the SignRound+ technical report were conducted using transformers version 4.34.1.



## Installation
Expand All @@ -42,7 +46,7 @@ Include the flag `--adam`. Note that AdamW may be slightly less effective than S

- **Running the Original SignRound:**
```bash
CUDA_VISIBLE_DEVICES=0 python3 main.py --model_name facebook/opt-125m --amp --num_bits 4 --group_size -1 --iters 400 --lr 0.0025 --minmax_lr 0.0025
CUDA_VISIBLE_DEVICES=0 python3 main.py --model_name facebook/opt-125m --amp --num_bits 4 --group_size -1 --iters 400 --lr 0.0025 --minmax_lr 0.0025
```
It's recommended to use `--enable_minmax_tuning`.

Expand All @@ -64,3 +68,4 @@ If you find SignRound useful for your research, please cite our paper:
year={2023}
}
```

Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from parse_results import result_parser
import pprint
import json
import re
import shutil
import transformers
import time
Expand Down Expand Up @@ -158,12 +159,6 @@ def simple_evaluate(
+ ".db",
)

# if isinstance(lm.tokenizer, transformers.LlamaTokenizerFast):
# if lm.tokenizer.pad_token is None:
# lm.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# else:
# lm.tokenizer.pad_token = '[PAD]'

task_dict = lm_eval.tasks.get_task_dict(tasks)

if check_integrity:
Expand Down Expand Up @@ -238,14 +233,6 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
if each in tasks:
external_tasks.append(each)
tasks.remove(each)
#
# lm = lm_eval.models.get_model("hf-causal-experimental").create_from_arg_string(
# model_args,
# {
# "batch_size": eval_bs,
# "max_batch_size": eval_bs,
# "device": device}
# )

results = {}
model = None
Expand All @@ -254,41 +241,23 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
try:
num_fewshot = fewshots_dict[mark][tmp_tasks]
task_names = lm_eval.utils.pattern_match([tmp_tasks], ALL_TASKS)
# task_dict = get_task_dict(task_names)

# for lm-eval internal tasks
print(f'********* {tmp_tasks} evaluate ************')
task_s = time.time()
for shot in num_fewshot:
# tmp_results = evaluator.evaluate(
# lm=lm,
# task_dict=task_dict,
# num_fewshot=shot,
# limit=limit,
# bootstrap_iters=100000,
# description_dict=None,
# decontamination_ngrams_path=None,
# write_out=False,
# output_base_path=None,
# )
# tmp_results, model = simple_evaluate(model="hf-causal", model_args=model_args, tasks=task_names,
# num_fewshot=shot, limit=limit,batch_size=eval_bs,max_batch_size=eval_bs)

model_args = f'pretrained={output_dir},tokenizer="{output_dir}",dtype={dtype},use_accelerate={use_accelerate},trust_remote_code=True'
model_type = "hf-causal-experimental"
# else:
# model_args = f'pretrained={output_dir},tokenizer="{output_dir}",dtype={dtype}'
# model_type = "hf-causal"

if bool(re.search("chatglm", output_dir.lower())):
model_args = f'pretrained={output_dir},tokenizer={output_dir},dtype={dtype},trust_remote_code=True'
model_type = "hf-causal"
else:
model_args = f'pretrained={output_dir},tokenizer={output_dir},dtype={dtype},use_accelerate={use_accelerate},trust_remote_code=True'
model_type = "hf-causal-experimental"

if "wikitext" in task_names:
tmp_eval_bs = 1
else:
tmp_eval_bs = eval_bs

tmp_results, lm = simple_evaluate(model=model_type, model_args=model_args, tasks=task_names,
num_fewshot=shot, limit=limit, batch_size=tmp_eval_bs,
max_batch_size=tmp_eval_bs, lm=lm)

sub_name = f'{tmp_tasks} {shot}-shot'
print(f'{sub_name}: ')
pprint.pprint(tmp_results["results"])
Expand All @@ -299,8 +268,6 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
print(str(e))
continue

# if isinstance(lm.tokenizer, transformers.LlamaTokenizerFast):
# lm.tokenizer = transformers.AutoTokenizer.from_pretrained(output_dir, use_fast=False)
tokenizer = transformers.AutoTokenizer.from_pretrained(output_dir, use_fast=False, trust_remote_code=True)
model = lm.model
# for external tasks
Expand Down Expand Up @@ -369,14 +336,9 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
new_dict[new_key] = data[sub_key][sub_sub_key]

import pandas as pd

df = pd.DataFrame(data=new_dict, index=[0])

df.to_excel(excel_file)

# if output_dir == "./tmp_signround":
# shutil.rmtree(output_dir)


if __name__ == "__main__":
import time
Expand All @@ -392,35 +354,15 @@ def eval_model(output_dir=None, model=None, tokenizer=None,

args = parser.parse_args()
s = time.time()
# 'wikitext2', 'ptb-new', 'c4-new', 'lambada_openai',
# 'hellaswag', 'winogrande', 'piqa', 'coqa', 'drop', 'gsm8k','truthfulqa_mc',
# "lambada_openai": [0],
# "hellaswag": [0],
# "winogrande": [0],
# "piqa": [0],
# "hendrycksTest-*": [0],
# "wikitext": [0],
# "truthfulqa_mc": [0],
# "openbookqa": [0],
# "boolq": [0],
# "rte": [0],
# "arc_easy": [0],
# "arc_challenge": [0],

test_tasks = [
"hendrycksTest-*", 'lambada_openai', "wikitext2", "ptb-new", "c4_new"

]

test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai', 'hellaswag', 'winogrande', 'piqa',
"hendrycksTest-*", "wikitext", "truthfulqa_mc", "openbookqa", "boolq", "rte", "arc_easy", "arc_challenge"]
test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai', 'hellaswag', 'winogrande', 'piqa',
]
excel_name = (args.model_name).split('/')[-1] + ".xlsx"

# test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai']
eval_model(output_dir=args.model_name,
tasks=test_tasks,
eval_bs=args.bs, eval_orig_float=True, limit=None, excel_file=excel_name)

print("cost time: ", time.time() - s)

Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
transformers==4.34.1
transformers==4.36.0
torch==2.0.1
git+https://github.com/EleutherAI/lm-evaluation-harness.git@008fc2a23245c40384f2312718433eeb1e0f87a9
fsspec==2023.9.2
tiktoken
transformers_stream_generator
peft
sentencepiece
einops

##the following is for intel neural compressor
schema
py-cpuinfo
prettytable
Pillow
opencv-python-headless
pycocotools
pycocotools
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,20 @@ This is a sample code for SignRound ([arxiv](https://arxiv.org/abs/2309.05516)),
![overview](./overview.png)



# Prerequisite
python 3.9 or higher
-python 3.9 or higher

- The transformers version required varies across different types of models. Here, the transformers version used for running models during experiments is provided as a reference.
| Model | Transformers version |
| :----: | :----: |
| decapoda-research/llama-7b-hf | 4.28 |
| huggyllama/llama-7b | 4.28/4.30/4.34/4.36 |
| meta-llama/Llama-2-7b-hf | 4.28/4.30/4.34/4.36 |
| facebook/opt-6.7b | 4.28/4.30/4.34/4.36 |
| bigscience/bloom-7b1 | 4.28/4.30/4.34/4.36 |

Please note that all experimental data in the paper is based on transformer version 3.28.1. the huggingface source for llama-7b-hf mentioned in the paper, 'decapoda-research/llama-7b-hf', is currently unavailable. You may opt for 'huggyllama/llama-7b' as an alternative, but please be aware that this replacement might yield slight differences in results.

pip install -r requirements.txt


# Run
Expand All @@ -24,7 +33,7 @@ CUDA_VISIBLE_DEVICES=0 python3 signround.py --model_name facebook/opt-125m --amp
## Known issue
To address the original lambada evaluation bug in the old version of lm-eval, we have incorporated the lm-eval from intel extension for transformers(ITREX). This discrepancy may lead to certain variations.

To reproduce our results in the paper, please install ITREX
To reproduce our results in the paper, please install ITREX

```bash
pip install intel-extension-for-transformers
Expand All @@ -41,3 +50,5 @@ If you find SignRound useful or relevant to your research, please kindly cite ou
}
```



Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
accelerate
datasets==2.12.0
torch==1.13.1
transformers==4.30.0
git+https://github.com/EleutherAI/lm-evaluation-harness.git@e81d3cce155e93ba2445068767c738891ad97024
transformers==4.36.0
git+https://github.com/EleutherAI/lm-evaluation-harness.git@008fc2a23245c40384f2312718433eeb1e0f87a9

0 comments on commit 9d41a7d

Please sign in to comment.