diff --git a/llm/llama3/xpu/_sources/index.md.txt b/llm/llama3/xpu/_sources/index.md.txt
index 8cf6babda..52f3878d4 100644
--- a/llm/llama3/xpu/_sources/index.md.txt
+++ b/llm/llama3/xpu/_sources/index.md.txt
@@ -1,6 +1,6 @@
 # Intel® Extension for PyTorch* Large Language Model (LLM) Feature Get Started For Llama 3 models
 
-Intel® Extension for PyTorch* provides dedicated optimization for running Llama 3 models on Intel® Core™ Ultra Processors with Intel® Arc™ Graphics, including weight-only quantization (WOQ), Rotary Position Embedding fusion, etc. You are welcomed to have a try with these optimizations on Intel® Core™ Ultra Processors with Intel® Arc™ Graphics.
+Intel® Extension for PyTorch* provides dedicated optimization for running Llama 3 models on Intel® Core™ Ultra Processors with Intel® Arc™ Graphics, including weight-only quantization (WOQ), Rotary Position Embedding fusion, etc. You are welcomed to have a try with these optimizations on Intel® Core™ Ultra Processors with Intel® Arc™ Graphics. This document shows how to run Llama 3 with a preview version of Intel® Extension for PyTorch*.
 
 # 1. Environment Setup
 
@@ -126,4 +126,4 @@ python run_generation_gpu_woq_for_llama.py --model ${PATH/TO/MODEL} --accuracy -
 ```
 
 ## Miscellaneous Tips
-Intel® Extension for PyTorch* also provides dedicated optimization for many other Large Language Models (LLM), which covers a set of data types for supporting various scenarios. For more details, please check [Large Language Models (LLM) Optimizations Overview](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/tutorials/llm.html).
+Intel® Extension for PyTorch* also provides dedicated optimization for many other Large Language Models (LLM), which covers a set of data types for supporting various scenarios. For more details, please check [Large Language Models (LLM) Optimizations Overview](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/tutorials/llm.html). To replicate Llama 3 performance numbers on Intel ARC A770, please take advantage of [IPEX-LLM](https://github.com/intel-analytics/ipex-llm).
diff --git a/llm/llama3/xpu/genindex.html b/llm/llama3/xpu/genindex.html
index 8ba6f073c..6baebd760 100644
--- a/llm/llama3/xpu/genindex.html
+++ b/llm/llama3/xpu/genindex.html
@@ -95,7 +95,7 @@ <h1 id="index">Index</h1>
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7f1198cc90f0> 
+   <jinja2.runtime.BlockReference object at 0x7f1deff8ce80> 
 <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a> <a href="/#" data-wap_ref="dns" id="wap_dns"><small>| Your Privacy Choices</small></a> <a href=https://www.intel.com/content/www/us/en/privacy/privacy-residents-certain-states.html data-wap_ref="nac" id="wap_nac"><small>| Notice at Collection</small></a> </div> <p></p> <div>&copy; Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), <a href='http://opensource.org/licenses/0BSD'>http://opensource.org/licenses/0BSD</a>. </div>
 
 
diff --git a/llm/llama3/xpu/index.html b/llm/llama3/xpu/index.html
index ad8a202ec..8cfc781c8 100644
--- a/llm/llama3/xpu/index.html
+++ b/llm/llama3/xpu/index.html
@@ -95,7 +95,7 @@
              
   <section id="intel-extension-for-pytorch-large-language-model-llm-feature-get-started-for-llama-3-models">
 <h1>Intel® Extension for PyTorch* Large Language Model (LLM) Feature Get Started For Llama 3 models<a class="headerlink" href="#intel-extension-for-pytorch-large-language-model-llm-feature-get-started-for-llama-3-models" title="Link to this heading"></a></h1>
-<p>Intel® Extension for PyTorch* provides dedicated optimization for running Llama 3 models on Intel® Core™ Ultra Processors with Intel® Arc™ Graphics, including weight-only quantization (WOQ), Rotary Position Embedding fusion, etc. You are welcomed to have a try with these optimizations on Intel® Core™ Ultra Processors with Intel® Arc™ Graphics.</p>
+<p>Intel® Extension for PyTorch* provides dedicated optimization for running Llama 3 models on Intel® Core™ Ultra Processors with Intel® Arc™ Graphics, including weight-only quantization (WOQ), Rotary Position Embedding fusion, etc. You are welcomed to have a try with these optimizations on Intel® Core™ Ultra Processors with Intel® Arc™ Graphics. This document shows how to run Llama 3 with a preview version of Intel® Extension for PyTorch*.</p>
 </section>
 <section id="environment-setup">
 <h1>1. Environment Setup<a class="headerlink" href="#environment-setup" title="Link to this heading"></a></h1>
@@ -246,7 +246,7 @@ <h3>2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11 Home<a class="headerl
 </section>
 <section id="miscellaneous-tips">
 <h2>Miscellaneous Tips<a class="headerlink" href="#miscellaneous-tips" title="Link to this heading"></a></h2>
-<p>Intel® Extension for PyTorch* also provides dedicated optimization for many other Large Language Models (LLM), which covers a set of data types for supporting various scenarios. For more details, please check <a class="reference external" href="https://intel.github.io/intel-extension-for-pytorch/xpu/latest/tutorials/llm.html">Large Language Models (LLM) Optimizations Overview</a>.</p>
+<p>Intel® Extension for PyTorch* also provides dedicated optimization for many other Large Language Models (LLM), which covers a set of data types for supporting various scenarios. For more details, please check <a class="reference external" href="https://intel.github.io/intel-extension-for-pytorch/xpu/latest/tutorials/llm.html">Large Language Models (LLM) Optimizations Overview</a>. To replicate Llama 3 performance numbers on Intel ARC A770, please take advantage of <a class="reference external" href="https://github.com/intel-analytics/ipex-llm">IPEX-LLM</a>.</p>
 </section>
 </section>
 
@@ -264,7 +264,7 @@ <h2>Miscellaneous Tips<a class="headerlink" href="#miscellaneous-tips" title="Li
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7f1198c5a170> 
+   <jinja2.runtime.BlockReference object at 0x7f1deffaf130> 
 <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a> <a href="/#" data-wap_ref="dns" id="wap_dns"><small>| Your Privacy Choices</small></a> <a href=https://www.intel.com/content/www/us/en/privacy/privacy-residents-certain-states.html data-wap_ref="nac" id="wap_nac"><small>| Notice at Collection</small></a> </div> <p></p> <div>&copy; Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), <a href='http://opensource.org/licenses/0BSD'>http://opensource.org/licenses/0BSD</a>. </div>
 
 
diff --git a/llm/llama3/xpu/search.html b/llm/llama3/xpu/search.html
index 23910ca1b..6d397ae19 100644
--- a/llm/llama3/xpu/search.html
+++ b/llm/llama3/xpu/search.html
@@ -103,7 +103,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7f1198c59c90> 
+   <jinja2.runtime.BlockReference object at 0x7f1deff4c610> 
 <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a> <a href="/#" data-wap_ref="dns" id="wap_dns"><small>| Your Privacy Choices</small></a> <a href=https://www.intel.com/content/www/us/en/privacy/privacy-residents-certain-states.html data-wap_ref="nac" id="wap_nac"><small>| Notice at Collection</small></a> </div> <p></p> <div>&copy; Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), <a href='http://opensource.org/licenses/0BSD'>http://opensource.org/licenses/0BSD</a>. </div>
 
 
diff --git a/llm/llama3/xpu/searchindex.js b/llm/llama3/xpu/searchindex.js
index 2f737807b..5dfc719c7 100644
--- a/llm/llama3/xpu/searchindex.js
+++ b/llm/llama3/xpu/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"1. Environment Setup": [[0, "environment-setup"]], "1.1 Conda-based environment setup with pre-built wheels on Windows 11 Home": [[0, "conda-based-environment-setup-with-pre-built-wheels-on-windows-11-home"]], "2. How To Run Llama 3": [[0, "how-to-run-llama-3"]], "2.1 Usage of running Llama 3 models": [[0, "usage-of-running-llama-3-models"]], "2.1.1 INT4 WOQ Model": [[0, "int4-woq-model"]], "2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11 Home": [[0, "measure-llama-3-woq-int4-performance-on-windows-11-home"]], "2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11 Home": [[0, "validate-llama-3-woq-int4-accuracy-on-windows-11-home"]], "Intel\u00ae Extension for PyTorch* Large Language Model (LLM) Feature Get Started For Llama 3 models": [[0, "intel-extension-for-pytorch-large-language-model-llm-feature-get-started-for-llama-3-models"]], "Miscellaneous Tips": [[0, "miscellaneous-tips"]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"0": 0, "0a0": 0, "10": 0, "1e": 0, "200": 0, "2022": 0, "2024": 0, "2030": 0, "22": 0, "2bgit03c5535": 0, "2bgit04048c2": 0, "30": 0, "32": 0, "35": 0, "4": 0, "5": 0, "512": 0, "5e": 0, "7": 0, "785597": 0, "9": 0, "If": 0, "TO": 0, "The": 0, "_": 0, "acceler": 0, "activ": 0, "actual": 0, "all": 0, "also": 0, "amazonaw": 0, "an": 0, "ani": 0, "ar": 0, "arc": 0, "arg": 0, "auto": 0, "autoround": 0, "avail": 0, "b": 0, "bat": 0, "below": 0, "benchmark": 0, "c": 0, "calib_it": 0, "call": 0, "capabl": 0, "cd": 0, "channel": 0, "check": 0, "checkout": 0, "choos": 0, "cid": 0, "client": 0, "clone": 0, "com": 0, "command": 0, "commun": 0, "compressor": 0, "comput": 0, "constrain": 0, "content": 0, "control": 0, "core": 0, "cover": 0, "cp39": 0, "creat": 0, "data": 0, "dataset": 0, "dedic": 0, "default": 0, "dep": 0, "depend": 0, "detail": 0, "detect": 0, "dev": 0, "develop": 0, "diffus": 0, "download": 0, "driver": 0, "embed": 0, "en": 0, "enabl": 0, "etc": 0, "exampl": 0, "execut": 0, "facilit": 0, "fals": 0, "file": 0, "first": 0, "folder": 0, "follow": 0, "from": 0, "fusion": 0, "gener": 0, "git": 0, "github": 0, "gpu": 0, "graphic": 0, "han": 0, "have": 0, "head": 0, "heavili": 0, "highli": 0, "html": 0, "http": 0, "hub": 0, "huggingfac": 0, "huggingface_hub": 0, "i": 0, "id": 0, "includ": 0, "infer": 0, "instal": 0, "intel_extension_for_pytorch": 0, "ipex_dev": 0, "iri": 0, "iter": 0, "kei": 0, "latenc": 0, "libuv": 0, "llama3": 0, "llama3_all_int4": 0, "llm_acc_test": 0, "lm": 0, "lm_eval": 0, "local": 0, "lr": 0, "m": 0, "machin": 0, "mani": 0, "max": 0, "memori": 0, "microsoft": 0, "minmax_lr": 0, "model_path": 0, "more": 0, "n": 0, "neural": 0, "new": 0, "next": 0, "note": 0, "nsampl": 0, "num": 0, "oneapi": 0, "onli": 0, "operatingsystem": 0, "optim": 0, "other": 0, "otherwis": 0, "out": 0, "output": 0, "output_dir": 0, "overview": 0, "own": 0, "passiv": 0, "path": 0, "pip": 0, "piqa": 0, "plan": 0, "pleas": 0, "posit": 0, "power": 0, "print": 0, "procedur": 0, "processor": 0, "profile_token_lat": 0, "program": 0, "provid": 0, "py": 0, "python": 0, "quant": 0, "quantiz": 0, "recommend": 0, "releas": 0, "repeat": 0, "replac": 0, "rotari": 0, "round": 0, "run_generation_gpu_woq": 0, "run_generation_gpu_woq_for_llama": 0, "s3": 0, "save": 0, "scenario": 0, "schema": 0, "script": 0, "sentenc": 0, "server": 0, "set": 0, "setvar": 0, "singl": 0, "size": 0, "sku": 0, "sourc": 0, "specifi": 0, "step": 0, "studio": 0, "support": 0, "task": 0, "text": 0, "thank": 0, "token": 0, "tool": 0, "toolkit": 0, "torch": 0, "transform": 0, "try": 0, "type": 0, "u": 0, "ultra": 0, "unset": 0, "us": 0, "use_quant_input": 0, "v": 0, "variabl": 0, "variou": 0, "version": 0, "visual": 0, "visualstudio": 0, "vs2022": 0, "vslandingpag": 0, "wait": 0, "warmup": 0, "weight": 0, "welcom": 0, "whether": 0, "which": 0, "whl": 0, "win_amd64": 0, "woq_algo": 0, "www": 0, "x86": 0, "xe": 0, "xeon": 0, "xpu": 0, "xpu_export": 0, "xpu_int4": 0, "xpu_lm_head": 0, "y": 0, "you": 0, "your": 0, "yourworkspac": 0, "zh": 0}, "titles": ["Intel\u00ae Extension for PyTorch* Large Language Model (LLM) Feature Get Started For Llama 3 models"], "titleterms": {"1": 0, "11": 0, "2": 0, "3": 0, "For": 0, "To": 0, "accuraci": 0, "base": 0, "built": 0, "conda": 0, "environ": 0, "extens": 0, "featur": 0, "get": 0, "home": 0, "how": 0, "int4": 0, "intel": 0, "languag": 0, "larg": 0, "llama": 0, "llm": 0, "measur": 0, "miscellan": 0, "model": 0, "perform": 0, "pre": 0, "pytorch": 0, "run": 0, "setup": 0, "start": 0, "tip": 0, "usag": 0, "valid": 0, "wheel": 0, "window": 0, "woq": 0}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"1. Environment Setup": [[0, "environment-setup"]], "1.1 Conda-based environment setup with pre-built wheels on Windows 11 Home": [[0, "conda-based-environment-setup-with-pre-built-wheels-on-windows-11-home"]], "2. How To Run Llama 3": [[0, "how-to-run-llama-3"]], "2.1 Usage of running Llama 3 models": [[0, "usage-of-running-llama-3-models"]], "2.1.1 INT4 WOQ Model": [[0, "int4-woq-model"]], "2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11 Home": [[0, "measure-llama-3-woq-int4-performance-on-windows-11-home"]], "2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11 Home": [[0, "validate-llama-3-woq-int4-accuracy-on-windows-11-home"]], "Intel\u00ae Extension for PyTorch* Large Language Model (LLM) Feature Get Started For Llama 3 models": [[0, "intel-extension-for-pytorch-large-language-model-llm-feature-get-started-for-llama-3-models"]], "Miscellaneous Tips": [[0, "miscellaneous-tips"]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"0": 0, "0a0": 0, "10": 0, "1e": 0, "200": 0, "2022": 0, "2024": 0, "2030": 0, "22": 0, "2bgit03c5535": 0, "2bgit04048c2": 0, "30": 0, "32": 0, "35": 0, "4": 0, "5": 0, "512": 0, "5e": 0, "7": 0, "785597": 0, "9": 0, "If": 0, "TO": 0, "The": 0, "_": 0, "a770": 0, "acceler": 0, "activ": 0, "actual": 0, "advantag": 0, "all": 0, "also": 0, "amazonaw": 0, "an": 0, "ani": 0, "ar": 0, "arc": 0, "arg": 0, "auto": 0, "autoround": 0, "avail": 0, "b": 0, "bat": 0, "below": 0, "benchmark": 0, "c": 0, "calib_it": 0, "call": 0, "capabl": 0, "cd": 0, "channel": 0, "check": 0, "checkout": 0, "choos": 0, "cid": 0, "client": 0, "clone": 0, "com": 0, "command": 0, "commun": 0, "compressor": 0, "comput": 0, "constrain": 0, "content": 0, "control": 0, "core": 0, "cover": 0, "cp39": 0, "creat": 0, "data": 0, "dataset": 0, "dedic": 0, "default": 0, "dep": 0, "depend": 0, "detail": 0, "detect": 0, "dev": 0, "develop": 0, "diffus": 0, "document": 0, "download": 0, "driver": 0, "embed": 0, "en": 0, "enabl": 0, "etc": 0, "exampl": 0, "execut": 0, "facilit": 0, "fals": 0, "file": 0, "first": 0, "folder": 0, "follow": 0, "from": 0, "fusion": 0, "gener": 0, "git": 0, "github": 0, "gpu": 0, "graphic": 0, "han": 0, "have": 0, "head": 0, "heavili": 0, "highli": 0, "html": 0, "http": 0, "hub": 0, "huggingfac": 0, "huggingface_hub": 0, "i": 0, "id": 0, "includ": 0, "infer": 0, "instal": 0, "intel_extension_for_pytorch": 0, "ipex": 0, "ipex_dev": 0, "iri": 0, "iter": 0, "kei": 0, "latenc": 0, "libuv": 0, "llama3": 0, "llama3_all_int4": 0, "llm_acc_test": 0, "lm": 0, "lm_eval": 0, "local": 0, "lr": 0, "m": 0, "machin": 0, "mani": 0, "max": 0, "memori": 0, "microsoft": 0, "minmax_lr": 0, "model_path": 0, "more": 0, "n": 0, "neural": 0, "new": 0, "next": 0, "note": 0, "nsampl": 0, "num": 0, "number": 0, "oneapi": 0, "onli": 0, "operatingsystem": 0, "optim": 0, "other": 0, "otherwis": 0, "out": 0, "output": 0, "output_dir": 0, "overview": 0, "own": 0, "passiv": 0, "path": 0, "pip": 0, "piqa": 0, "plan": 0, "pleas": 0, "posit": 0, "power": 0, "preview": 0, "print": 0, "procedur": 0, "processor": 0, "profile_token_lat": 0, "program": 0, "provid": 0, "py": 0, "python": 0, "quant": 0, "quantiz": 0, "recommend": 0, "releas": 0, "repeat": 0, "replac": 0, "replic": 0, "rotari": 0, "round": 0, "run_generation_gpu_woq": 0, "run_generation_gpu_woq_for_llama": 0, "s3": 0, "save": 0, "scenario": 0, "schema": 0, "script": 0, "sentenc": 0, "server": 0, "set": 0, "setvar": 0, "show": 0, "singl": 0, "size": 0, "sku": 0, "sourc": 0, "specifi": 0, "step": 0, "studio": 0, "support": 0, "take": 0, "task": 0, "text": 0, "thank": 0, "thi": 0, "token": 0, "tool": 0, "toolkit": 0, "torch": 0, "transform": 0, "try": 0, "type": 0, "u": 0, "ultra": 0, "unset": 0, "us": 0, "use_quant_input": 0, "v": 0, "variabl": 0, "variou": 0, "version": 0, "visual": 0, "visualstudio": 0, "vs2022": 0, "vslandingpag": 0, "wait": 0, "warmup": 0, "weight": 0, "welcom": 0, "whether": 0, "which": 0, "whl": 0, "win_amd64": 0, "woq_algo": 0, "www": 0, "x86": 0, "xe": 0, "xeon": 0, "xpu": 0, "xpu_export": 0, "xpu_int4": 0, "xpu_lm_head": 0, "y": 0, "you": 0, "your": 0, "yourworkspac": 0, "zh": 0}, "titles": ["Intel\u00ae Extension for PyTorch* Large Language Model (LLM) Feature Get Started For Llama 3 models"], "titleterms": {"1": 0, "11": 0, "2": 0, "3": 0, "For": 0, "To": 0, "accuraci": 0, "base": 0, "built": 0, "conda": 0, "environ": 0, "extens": 0, "featur": 0, "get": 0, "home": 0, "how": 0, "int4": 0, "intel": 0, "languag": 0, "larg": 0, "llama": 0, "llm": 0, "measur": 0, "miscellan": 0, "model": 0, "perform": 0, "pre": 0, "pytorch": 0, "run": 0, "setup": 0, "start": 0, "tip": 0, "usag": 0, "valid": 0, "wheel": 0, "window": 0, "woq": 0}})
\ No newline at end of file