From 5959b46b25f795588b1a221787bb63d292ccaf5f Mon Sep 17 00:00:00 2001 From: Egor Krivov Date: Wed, 21 Feb 2024 15:59:54 +0100 Subject: [PATCH 1/5] switched to ipex_cpu --- tests/conda-envs/ipex.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/conda-envs/ipex.yaml b/tests/conda-envs/ipex.yaml index d136704..51b4f96 100644 --- a/tests/conda-envs/ipex.yaml +++ b/tests/conda-envs/ipex.yaml @@ -3,12 +3,17 @@ channels: - intel - conda-forge dependencies: - - intel-aikit-pytorch - - pytorch>=2.0.1=*_xpu_* - - intel-extension-for-pytorch - datasets - accelerate - sentencepiece # The following packages are required to run benchmarks - sqlalchemy>=2.0.0 - pytest + - pip + - --index-url https://download.pytorch.org/whl/cpu + - torch + - torchvision + - torchaudio + - intel-extension-for-pytorch + - --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ + - oneccl_bind_pt From c77810baad853816a878e9aef40d544731cf39ff Mon Sep 17 00:00:00 2001 From: Egor Krivov Date: Wed, 21 Feb 2024 16:05:04 +0000 Subject: [PATCH 2/5] updated env --- tests/conda-envs/ipex.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/conda-envs/ipex.yaml b/tests/conda-envs/ipex.yaml index 51b4f96..9691bde 100644 --- a/tests/conda-envs/ipex.yaml +++ b/tests/conda-envs/ipex.yaml @@ -3,6 +3,7 @@ channels: - intel - conda-forge dependencies: + - python=3.11 - datasets - accelerate - sentencepiece @@ -10,10 +11,12 @@ dependencies: - sqlalchemy>=2.0.0 - pytest - pip - - --index-url https://download.pytorch.org/whl/cpu + - pip: + - --extra-index-url https://download.pytorch.org/whl/cpu - torch - torchvision - torchaudio + - transformers==4.35.2 - intel-extension-for-pytorch - --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ - oneccl_bind_pt From b34bd20f4bb7dc204be7f89c6438cd85449070cd Mon Sep 17 00:00:00 2001 From: Egor Krivov Date: Fri, 23 Feb 2024 14:04:07 +0100 Subject: [PATCH 3/5] ipex update --- dl_bench/llm.py | 42 ++++++++++++++++++++++-------------------- dl_bench/utils.py | 6 ++++-- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/dl_bench/llm.py b/dl_bench/llm.py index 906f581..8bcc38b 100644 --- a/dl_bench/llm.py +++ b/dl_bench/llm.py @@ -15,23 +15,23 @@ def get_llm(name, dtype): - if name == "gptj": - model_name = "EleutherAI/gpt-j-6B" - - model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=dtype) - tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") - elif name == "llama2-13b": - kwargs = {} - if "HF_TOKEN" in os.environ: - kwargs["token"] = os.environ.get("HF_TOKEN") - - model_name = "meta-llama/Llama-2-13b-hf" - model = LlamaForCausalLM.from_pretrained( - model_name, torch_dtype=dtype, **kwargs - ) - tokenizer = LlamaTokenizer.from_pretrained(model_name, **kwargs) - else: + name2params = { + "gptj": ("EleutherAI/gpt-j-6B", AutoModelForCausalLM, AutoTokenizer), + "llama2-7b": ("meta-llama/Llama-2-7b-hf", LlamaForCausalLM, LlamaTokenizer), + "llama2-13b": ("meta-llama/Llama-2-13b-hf", LlamaForCausalLM, LlamaTokenizer), + } + + if name not in name2params: raise ValueError("Unsupported model name") + + kwargs = {} + if name.startswith("llama2") and "HF_TOKEN" in os.environ: + kwargs = {"HF_TOKEN": os.environ.get("HF_TOKEN")} + + model_name, M, T = name2params[name] + + model = M.from_pretrained(model_name, torch_dtype=dtype, **kwargs) + tokenizer = T.from_pretrained(model_name) return tokenizer, model @@ -39,7 +39,7 @@ class LlmBenchmark(Benchmark): def __init__(self, params) -> None: name = params.get("name", "gptj") dtype = params.get("dtype") - self.batch_size = params.get("batch_size", 1) + self.batch_size = int(params.get("batch_size", 1)) self.n_iter = params.get("n_iter", 5) self.warmup_batches = params.get("warmup", 2) @@ -75,26 +75,28 @@ def inference(self, backend): # self.flops_per_sample = get_macs(self.model, self.in_shape, backend) * 2 self.model = backend.prepare_eval_transformer(self.model) - self.model.eval() enabled = backend.dtype != torch.float32 n_items = 0 outputs = [] fw_times = [] - self.model.eval() + + # Ipex gives error with eval, other backends have no effect + # self.model.eval() for i in range(self.n_iter): print(f"Epoch {i+1}/{self.n_iter}") cast = torch.autocast(enabled=enabled, device_type=backend.device_name) with torch.inference_mode(), cast: tokens, total_time = self.generate(backend) + print(f"Fw time: {total_time:.1f}") + if i < self.warmup_batches: # We restart timer because that was just a warmup start = get_time() continue - print(f"Fw time: {total_time:.1f}") fw_times.append(total_time) n_items += math.prod(tokens.shape) outputs.append(tokens) diff --git a/dl_bench/utils.py b/dl_bench/utils.py index 09da649..e269fe7 100644 --- a/dl_bench/utils.py +++ b/dl_bench/utils.py @@ -132,7 +132,7 @@ def prepare_eval_transformer(self, model): model = model.to(memory_format=torch.channels_last) model.to(self.device) - with torch.inference_mode(): + with torch.no_grad(): model.eval() return self._compile_transformer_model( self.compile_mode, model, dtype=self.dtype @@ -160,7 +160,9 @@ def _compile_transformer_model(compile_mode, model, dtype=torch.bfloat16): import intel_extension_for_pytorch as ipex params = {} if dtype != torch.bfloat16 else {"dtype": torch.bfloat16} - compiled_model = ipex.optimize_transformers(model, **params) + #compiled_model = ipex.llm.optimize(model, **params, inplace=True, deployment_mode=True) + compiled_model = ipex.llm.optimize(model, **params) + # compiled_model = ipex.optimize_transformers(model, **params) print("Compiled with ipex") elif compile_mode == "ipex_onednn_graph": raise NotImplementedError() From 991a8b5fb17fba6d65373e6bd1e6ea300cbddaff Mon Sep 17 00:00:00 2001 From: Egor Krivov Date: Fri, 23 Feb 2024 14:57:19 +0000 Subject: [PATCH 4/5] fixed bug --- dl_bench/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl_bench/llm.py b/dl_bench/llm.py index 8bcc38b..5972804 100644 --- a/dl_bench/llm.py +++ b/dl_bench/llm.py @@ -26,7 +26,7 @@ def get_llm(name, dtype): kwargs = {} if name.startswith("llama2") and "HF_TOKEN" in os.environ: - kwargs = {"HF_TOKEN": os.environ.get("HF_TOKEN")} + kwargs = {"token": os.environ.get("HF_TOKEN")} model_name, M, T = name2params[name] From fbe4d66d362a26de9593593e97ee291ce9bfd7a7 Mon Sep 17 00:00:00 2001 From: Egor Krivov Date: Mon, 26 Feb 2024 14:02:51 +0100 Subject: [PATCH 5/5] added old xpu file --- tests/conda-envs/ipex-xpu.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tests/conda-envs/ipex-xpu.yaml diff --git a/tests/conda-envs/ipex-xpu.yaml b/tests/conda-envs/ipex-xpu.yaml new file mode 100644 index 0000000..d136704 --- /dev/null +++ b/tests/conda-envs/ipex-xpu.yaml @@ -0,0 +1,14 @@ +name: ipex +channels: + - intel + - conda-forge +dependencies: + - intel-aikit-pytorch + - pytorch>=2.0.1=*_xpu_* + - intel-extension-for-pytorch + - datasets + - accelerate + - sentencepiece +# The following packages are required to run benchmarks + - sqlalchemy>=2.0.0 + - pytest