pytorch
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 1 addition & 2 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/mediatek/requirements.txt‎ renamed to ‎backends/mediatek/requirements.txt‎ b/‎examples/mediatek/requirements.txt‎ renamed to ‎backends/mediatek/requirements.txt‎
diff --git a/‎backends/mediatek/scripts/README.md‎
Lines changed: 34 additions & 15 deletions b/‎backends/mediatek/scripts/README.md‎
Lines changed: 34 additions & 15 deletions
diff --git a/‎backends/qualcomm/README.md‎
Lines changed: 64 additions & 0 deletions b/‎backends/qualcomm/README.md‎
Lines changed: 64 additions & 0 deletions
@@ -188,7 +188,7 @@ EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
 EXPORT_ARGS="-c ${CHECKPOINT_FILE_NAME} -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
 if [[ "${XNNPACK}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
+  EXPORT_ARGS="${EXPORT_ARGS} -X --xnnpack-extended-ops -qmode 8da4w -G 128"
 fi
 if [[ "${CUSTOM}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"
 
@@ -30,7 +30,6 @@
 )
 from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
 from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge
-from torch._export import capture_pre_autograd_graph
 from torch.ao.quantization.pt2e.export_utils import model_is_exported
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
@@ -58,7 +57,7 @@ def convert_pt2(
     """
 
     # Export with dynamo
-    model_gm = capture_pre_autograd_graph(model, inputs)
+    model_gm = torch.export.export_for_training(model, inputs).module()
 
     if model_gm_has_SDPA(model_gm):  # pyre-fixme[6]
         # Decompose SDPA
 
@@ -10,41 +10,60 @@ Before you begin, ensure you have the following prerequisites installed and conf
 
 - **Download Buck2**: Obtain Buck2 from the official [releases page](https://github.com/facebook/buck2/releases/tag/2024-02-01).
 - **Add to PATH**: Extract the downloaded file and add the directory to your system's `$PATH` environment variable.
-```bash
-export PATH=<path_to_buck>:$PATH
-```
+   ```bash
+   export PATH=<path_to_buck>:$PATH
+   ```
 
 ### 2. Android NDK
 
 - **Download Android NDK**: Acquire the Android NDK from the [Android developer site](https://developer.android.com/ndk/downloads).
 - **Set NDK Path**: Ensure that the `$ANDROID_NDK` environment variable is set to the path where the NDK is located.
-```bash
-export ANDROID_NDK=<path_to_android_ndk>
-```
+   ```bash
+   export ANDROID_NDK=<path_to_android_ndk>
+   ```
 
 ### 3. MediaTek ExercuTorch Libraries
 
-Download the following libraries from MediaTek's NeuroPilot portal (link to be added):
+Download [NeuroPilot Express SDK](https://neuropilot.mediatek.com/resources/public/npexpress/en/docs/npexpress) from MediaTek's NeuroPilot portal:
 
 - `libneuronusdk_adapter.mtk.so`: This universal SDK contains the implementation required for executing target-dependent code on the MediaTek chip.
 - `libneuron_buffer_allocator.so`: This utility library is designed for allocating DMA buffers necessary for model inference.
-```bash
-export NEURON_BUFFER_ALLOCATOR_LIB=<path_to_buffer_allocator>
-```
+- `mtk_converter-8.8.0.dev20240723+public.d1467db9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl`: This library preprocess the model into a MediaTek representation.
+- `mtk_neuron-8.2.2-py3-none-linux_x86_64.whl`: This library converts the model to binaries.
 
 ## Setup
 
-Follow the steps below to set up your build environment:
+Follow the steps below to setup your build environment:
+
+1. **Setup ExercuTorch Environment**: Refer to the [Setting up ExercuTorch](https://pytorch.org/executorch/stable/getting-started-setup) guide for detailed instructions on setting up the ExercuTorch environment.
+
+2. **Setup MediaTek Backend Environment**
+- Install the dependent libs. Ensure that you are inside backends/mediatek/ directory
+   ```bash
+   pip3 install -r requirements.txt
+   ```
+- Install the two .whl downloaded from NeuroPilot Portal
+   ```bash
+   pip3 install mtk_neuron-8.2.2-py3-none-linux_x86_64.whl
+   pip3 install mtk_converter-8.8.0.dev20240723+public.d1467db9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+   ```
+- Set evironment variables for building backend
+   ```bash
+   export NEURON_BUFFER_ALLOCATOR_LIB=<path_to_buffer_allocator>
+   ```
 
-1. **ExercuTorch Official Tutorial**: Refer to the [Setting up ExercuTorch](https://pytorch.org/executorch/stable/getting-started-setup) guide for detailed instructions on setting up the ExercuTorch environment.
+## Build
 
-2. **Build Script**: Once the prerequisites are in place, run the `mtk_build.sh` script to start the build process.
+1. **Build MediaTek Backend**: Once the prerequisites are in place, run the `mtk_build.sh` script to start the build process, MediaTek backend will be built under `cmake-android-out/backends/` as `libneuron_backend.so`
 
    ```bash
    ./mtk_build.sh
    ```
-3. **Push MediaTek universal SDK to the device**: push libneuronusdk_adapter.mtk.so to the phone and export it to the `$LD_LIBRARY_PATH` environment variable before executing ExercuTorch with MediaTek backend.
+
+## Run
+
+1. **Push MediaTek universal SDK and MediaTek backend to the device**: push `libneuronusdk_adapter.mtk.so` and `libneuron_backend.so` to the phone and export it to the `$LD_LIBRARY_PATH` environment variable before executing ExercuTorch with MediaTek backend.
 
    ```bash
-   export LD_LIBRARY_PATH=<path_to_usdk>:$LD_LIBRARY_PATH
+   export LD_LIBRARY_PATH=<path_to_usdk>:<path_to_neuron_backend>:$LD_LIBRARY_PATH
    ```
@@ -73,3 +73,67 @@ examples/qualcomm
 Please see this [README.md](../../examples/qualcomm/README.md).
 
 Further, an example build script is provided as [build.sh](scripts/build.sh).
+
+## Issues
+If you want to address the problem encountered, it would be great to have reproduction information for indicating maintainers. Please also follow the [policy](../../CONTRIBUTING.md#issues) to emit issues.
+
+## Pull Requests
+PRs are always welcome to help improve the codebase in a comprehensive manner. Before submitting changes, please apply:
+
+- **Check the Coding Style**:<br/>
+    Make sure your code follows the [style guides](../../CONTRIBUTING.md#coding-style) and passes the [lint checks](../../CONTRIBUTING.md#lintrunner).
+
+- **Add Unit Tests**:<br/>
+    Following is an example of adding test case after [creating new operator builder](builders/README.md), please navigate to `backends/qualcomm/tests` folder and put minimum example module in `model.py`. e.g.:
+    ```python
+    class IndexPut(torch.nn.Module):
+        ...
+
+    # please insert implementation in alphabetical order
+    class LayerNorm(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.layer_norm = torch.nn.LayerNorm([768], eps=1e-6)
+
+        def forward(self, x):
+            return self.layer_norm(x)
+
+
+    class LeakyReLUDefault(torch.nn.Module):
+        ...
+    ```
+    Also extend sections `TestQNNFloatingPointOperator`, `TestQNNQuantizedOperator` in `test_qnn_delegate.py`. e.g.:
+    ```python
+    class TestQNNQuantizedOperator(TestQNN):
+        def test_qnn_backend_interpolate_nearest_2d(self):
+            ...
+
+        # please insert it implementation alphabetical order
+        def test_qnn_backend_layer_norm(self):
+            module = LayerNorm()  # noqa: F405
+            sample_input = (torch.randn(196, 768),)
+            module = self.get_qdq_module(module, sample_input)
+            self.lower_module_and_test_output(module, sample_input)
+
+        def test_qnn_backend_leaky_relu(self):
+            ...
+    ```
+
+- **Verify Unit Test Results**:<br/>
+    ```bash
+    cd $PATH_TO_EXECUTORCH
+    # example usage of performing unit test
+    python backends/qualcomm/tests/test_qnn_delegate.py -k TestQNNQuantizedOperator.test_qnn_backend_layer_norm -s $DEVICE_SERIAL -m SM8650 -b build-android/ -a $PATH_TO_TEST_ARTIFACTS
+    ```
+    The test graph is expected to have 1 delegated node with only placeholders / output nodes being left. Check the execution report for more information.
+
+- **Code Reviews**:<br/>
+    Please ping authors in Qualcomm AI Engine Direct related PRs for reviewing, possible candidates are listed below:
+    - [chiwwang](https://github.com/chiwwang)
+    - [shewu-quic](https://github.com/shewu-quic)
+    - [chunit-quic](https://github.com/chunit-quic)
+    - [winskuo-quic](https://github.com/winskuo-quic)
+    - [chuntl](https://github.com/chuntl)
+    - [haowhsu-quic](https://github.com/haowhsu-quic)
+
+Thanks again for your contribution!