intel-analytics · cyita · Sep 18, 2024 · Jul 23, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/python/llm/dev/benchmark/LongBench/README.md b/python/llm/dev/benchmark/LongBench/README.md
@@ -1,10 +1,13 @@
 # LongBench Benchmark Test
 
+LongBench is the first benchmark for bilingual, multitask, and comprehensive assessment of long context understanding capabilities of large language models. This benchmark implementation is adapted from [THUDM/LongBench](https://github.com/THUDM/LongBench) and [experiments/LongBench](https://github.com/FasterDecoding/SnapKV/tree/main/experiments/LongBench)
+
 LongBench Benchmark allows users to test LongBench benchmark and record them in some json files. Users can provide models and related information in `config.yaml` and `config` directory.
 
-Before running, make sure to have [ipex-llm](../../../../../README.md) installed.
 
-## Dependencies
+## Environment Preparation
+
+Before running, make sure to have [ipex-llm](../../../../../README.md) installed.
 
 ```bash
 pip install omegaconf
@@ -14,6 +17,24 @@ pip install fuzzywuzzy
 pip install rouge
 ```
 
+### Load Data
+
+You can download and load the LongBench data through the Hugging Face datasets ([🤗 HF Repo](https://huggingface.co/datasets/THUDM/LongBench)):
+
+```python
+
+from datasets import load_dataset
+
+datasets = ["narrativeqa", "qasper", "multifieldqa_en", "multifieldqa_zh", "hotpotqa", "2wikimqa", "musique", \
+            "dureader", "gov_report", "qmsum", "multi_news", "vcsum", "trec", "triviaqa", "samsum", "lsht", \
+            "passage_count", "passage_retrieval_en", "passage_retrieval_zh", "lcc", "repobench-p"]
+
+for dataset in datasets:
+    data = load_dataset('THUDM/LongBench', dataset, split='test')
+    data = load_dataset('THUDM/LongBench', f"{dataset}_e", split='test')
+
+```
+
 ## config
 
 ### `config.yaml`
@@ -28,9 +49,9 @@ model_name:
   # - "chatglm4-9b"
   # - "qwen2-7b-instruct"
 
-# whether or not to test the full-kv score
+# whether test the full-kv
 full_kv: True
-# whether or not to open optimize_model
+# Whether apply model optimization
 optimize_model: True
 # dtype of the model
 dtype: 'fp16'
@@ -78,16 +99,28 @@ Some json files is saved in the `config` dir. It can be divided into three kinds
 
 #### About compress-kv
 
-The rest json files are about compress-kv. 
+The rest JSON files are compress-kv test configurations.
 
 ## Run
 
 There are two python files for users' call.
 
-- `pred.py`: This script will give the output of the models configged in the `config.yaml`
+1. Configure the `config.yaml` and run `pred.py` and you can obtain the output of the model under `pred/` folder corresponding to the model name.
 
-- `eval.py`: This script calculates the score of each case.
+2. Run the evaluation code `eval.py`, you can get the evaluation results on all datasets in `result.json`.
 
 > [!Note]
 >
-> To test the models and get the score in a row, please run `test_and_eval.sh`
+> To test the models and get the score in a row, please run `test_and_eval.sh`
+
+## Citation
+
+```bibtex
+@article{bai2023longbench,
+  title={LongBench: A Bilingual, Multitask Benchmark for Long Context Understanding},
+  author={Bai, Yushi and Lv, Xin and Zhang, Jiajie and Lyu, Hongchang and Tang, Jiankai and Huang, Zhidian and Du, Zhengxiao and Liu, Xiao and Zeng, Aohan and Hou, Lei and Dong, Yuxiao and Tang, Jie and Li, Juanzi},
+  journal={arXiv preprint arXiv:2308.14508},
+  year={2023}
+}
+
+```
diff --git a/python/llm/dev/benchmark/LongBench/eval.py b/python/llm/dev/benchmark/LongBench/eval.py
@@ -1,3 +1,22 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file is adapted from
+# https://github.com/insuhan/hyper-attn/blob/main/benchmark_patch_llm.py
+#
+
 import os
 import json
 import argparse

diff --git a/python/llm/dev/benchmark/LongBench/metrics.py b/python/llm/dev/benchmark/LongBench/metrics.py
@@ -1,3 +1,22 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file is adapted from
+# https://github.com/insuhan/hyper-attn/blob/main/benchmark_patch_llm.py
+#
+
 import re
 import string
 

diff --git a/python/llm/dev/benchmark/LongBench/pred.py b/python/llm/dev/benchmark/LongBench/pred.py
@@ -1,3 +1,22 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file is adapted from
+# https://github.com/insuhan/hyper-attn/blob/main/benchmark_patch_llm.py
+#
+
 import os
 from transformers import AutoTokenizer
 from ipex_llm.transformers import AutoModelForCausalLM